From 540c8cb6a576f34a9a0b04467f46bb6e67a1f852 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 2 Mar 2011 19:51:41 -0500
Subject: gss:krb5 only include enctype numbers in gm_upcall_enctypes

Make the value in gm_upcall_enctypes just the enctype values.
This allows the values to be used more easily elsewhere.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/auth_gss/auth_gss.c      | 2 +-
 net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 45dbf1521b9a..f3914d0c5079 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -417,7 +417,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 		gss_msg->msg.len += len;
 	}
 	if (mech->gm_upcall_enctypes) {
-		len = sprintf(p, mech->gm_upcall_enctypes);
+		len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes);
 		p += len;
 		gss_msg->msg.len += len;
 	}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index f375decc024b..9022f0a6503e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -750,7 +750,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
-	.gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
+	.gm_upcall_enctypes = "18,17,16,23,3,1,2",
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.3


From 8b3e07ac908d005bb791410f594cce8744f6806a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 7 Mar 2011 22:50:47 -0500
Subject: svcrpc: fix rare race on unix_domain creation

Note that "new" here is not yet fully initialized; auth_domain_put
should be called only on auth_domains that have actually been added to
the hash.

Before this fix, two attempts to add the same domain at once could
cause the hlist_del in auth_domain_put to fail.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcauth_unix.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 30916b06c12b..d100bf2b4e81 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -38,6 +38,14 @@ struct unix_domain {
 
 extern struct auth_ops svcauth_unix;
 
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
+
+	kfree(dom->name);
+	kfree(ud);
+}
+
 struct auth_domain *unix_domain_find(char *name)
 {
 	struct auth_domain *rv;
@@ -47,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name)
 	while(1) {
 		if (rv) {
 			if (new && rv != &new->h)
-				auth_domain_put(&new->h);
+				svcauth_unix_domain_release(new);
 
 			if (rv->flavour != &svcauth_unix) {
 				auth_domain_put(rv);
@@ -74,14 +82,6 @@ struct auth_domain *unix_domain_find(char *name)
 }
 EXPORT_SYMBOL_GPL(unix_domain_find);
 
-static void svcauth_unix_domain_release(struct auth_domain *dom)
-{
-	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
-
-	kfree(dom->name);
-	kfree(ud);
-}
-
 
 /**************************************************
  * cache for IP address to unix_domain
-- 
cgit v1.2.3


From 352b5d13c0684ba8cd103aa20cb74f105334562a Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 9 Mar 2011 22:40:30 -0500
Subject: svcrpc: fix bad argument in unix_domain_find

"After merging the nfsd tree, today's linux-next build (powerpc
ppc64_defconfig) produced this warning:

net/sunrpc/svcauth_unix.c: In function 'unix_domain_find':
net/sunrpc/svcauth_unix.c:58: warning: passing argument 1 of
+'svcauth_unix_domain_release' from incompatible pointer type
net/sunrpc/svcauth_unix.c:41: note: expected 'struct auth_domain *' but
argument
+is of type 'struct unix_domain *'

Introduced by commit 8b3e07ac908d ("svcrpc: fix rare race on unix_domain
creation")."

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcauth_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index d100bf2b4e81..c8e10216c113 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -55,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name)
 	while(1) {
 		if (rv) {
 			if (new && rv != &new->h)
-				svcauth_unix_domain_release(new);
+				svcauth_unix_domain_release(&new->h);
 
 			if (rv->flavour != &svcauth_unix) {
 				auth_domain_put(rv);
-- 
cgit v1.2.3


From b09734b1f4abd86e046777f0f268215b4ef1b523 Mon Sep 17 00:00:00 2001
From: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Date: Wed, 2 Feb 2011 11:39:32 -0800
Subject: libceph: Fix base64-decoding when input ends in newline.

It used to return -EINVAL because it thought the end was not aligned
to 4 bytes.

Clean up superfluous src < end test in if, the while itself guarantees
that.

Signed-off-by: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/armor.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index eb2a666b0be7..1fc1ee11dfa2 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end)
 	while (src < end) {
 		int a, b, c, d;
 
-		if (src < end && src[0] == '\n')
+		if (src[0] == '\n') {
 			src++;
+			continue;
+		}
 		if (src + 4 > end)
 			return -EINVAL;
 		a = decode_bits(src[0]);
-- 
cgit v1.2.3


From 4be34b9d69c97211ff4eb00d79078f3c1593804d Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sat, 22 Jan 2011 22:40:20 +0100
Subject: SUNRPC: Remove resource leak in svc_rdma_send_error()

We leak the memory allocated to 'ctxt' when we return after
'ib_dma_mapping_error()' returns !=0.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912a..1a10dcd999ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 					    p, 0, length, DMA_FROM_DEVICE);
 	if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
 		put_page(p);
+		svc_rdma_put_context(ctxt, 1);
 		return;
 	}
 	atomic_inc(&xprt->sc_dma_used);
-- 
cgit v1.2.3


From 67c5c6cb8129c595f21e88254a3fc6b3b841ae8e Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Thu, 17 Mar 2011 01:40:10 +0000
Subject: econet: 4 byte infoleak to the network

struct aunhdr has 4 padding bytes between 'pad' and 'handle' fields on
x86_64.  These bytes are not initialized in the variable 'ah' before
sending 'ah' to the network.  This leads to 4 bytes kernel stack
infoleak.

This bug was introduced before the git epoch.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 0c2826337919..116d3fd3d669 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		udpdest.sin_addr.s_addr = htonl(network | addr.station);
 	}
 
+	memset(&ah, 0, sizeof(ah));
 	ah.port = port;
 	ah.cb = cb & 0x7f;
 	ah.code = 2;		/* magic */
-	ah.pad = 0;
 
 	/* tack our header on the front of the iovec */
 	size = sizeof(struct aunhdr);
-- 
cgit v1.2.3


From 5e5069b41d5b82bcadc1dbf73f48476b428c102f Mon Sep 17 00:00:00 2001
From: Roger Luethi <rl@hellgate.ch>
Date: Thu, 17 Mar 2011 06:37:21 +0000
Subject: ethtool: __ethtool_set_sg: check for function pointer before using it

__ethtool_set_sg does not check if dev->ethtool_ops->set_sg is defined
which can result in a NULL pointer dereference when ethtool is used to
change SG settings for drivers without SG support.

Signed-off-by: Roger Luethi <rl@hellgate.ch>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c1a71bb738da..a1086fb0c0c7 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1457,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
 {
 	int err;
 
+	if (!dev->ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
 	if (data && !(dev->features & NETIF_F_ALL_CSUM))
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 3a7da39d165e0c363c294feec119db1427032afd Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 17 Mar 2011 07:34:32 +0000
Subject: ethtool: Compat handling for struct ethtool_rxnfc

This structure was accidentally defined such that its layout can
differ between 32-bit and 64-bit processes.  Add compat structure
definitions and an ioctl wrapper function.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: stable@kernel.org [2.6.30+]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 107 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 937d0fcf74bc..5212447c86e7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2588,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 
 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 {
+	struct compat_ethtool_rxnfc __user *compat_rxnfc;
+	bool convert_in = false, convert_out = false;
+	size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
+	struct ethtool_rxnfc __user *rxnfc;
 	struct ifreq __user *ifr;
+	u32 rule_cnt = 0, actual_rule_cnt;
+	u32 ethcmd;
 	u32 data;
-	void __user *datap;
+	int ret;
+
+	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+		return -EFAULT;
 
-	ifr = compat_alloc_user_space(sizeof(*ifr));
+	compat_rxnfc = compat_ptr(data);
 
-	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+	if (get_user(ethcmd, &compat_rxnfc->cmd))
 		return -EFAULT;
 
-	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+	/* Most ethtool structures are defined without padding.
+	 * Unfortunately struct ethtool_rxnfc is an exception.
+	 */
+	switch (ethcmd) {
+	default:
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		/* Buffer size is variable */
+		if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
+			return -EFAULT;
+		if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
+			return -ENOMEM;
+		buf_size += rule_cnt * sizeof(u32);
+		/* fall through */
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+		convert_out = true;
+		/* fall through */
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		buf_size += sizeof(struct ethtool_rxnfc);
+		convert_in = true;
+		break;
+	}
+
+	ifr = compat_alloc_user_space(buf_size);
+	rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
+
+	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
 		return -EFAULT;
 
-	datap = compat_ptr(data);
-	if (put_user(datap, &ifr->ifr_ifru.ifru_data))
+	if (put_user(convert_in ? rxnfc : compat_ptr(data),
+		     &ifr->ifr_ifru.ifru_data))
 		return -EFAULT;
 
-	return dev_ioctl(net, SIOCETHTOOL, ifr);
+	if (convert_in) {
+		/* We expect there to be holes between fs.m_u and
+		 * fs.ring_cookie and at the end of fs, but nowhere else.
+		 */
+		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
+			     sizeof(compat_rxnfc->fs.m_u) !=
+			     offsetof(struct ethtool_rxnfc, fs.m_u) +
+			     sizeof(rxnfc->fs.m_u));
+		BUILD_BUG_ON(
+			offsetof(struct compat_ethtool_rxnfc, fs.location) -
+			offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+			offsetof(struct ethtool_rxnfc, fs.location) -
+			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+		if (copy_in_user(rxnfc, compat_rxnfc,
+				 (void *)(&rxnfc->fs.m_u + 1) -
+				 (void *)rxnfc) ||
+		    copy_in_user(&rxnfc->fs.ring_cookie,
+				 &compat_rxnfc->fs.ring_cookie,
+				 (void *)(&rxnfc->fs.location + 1) -
+				 (void *)&rxnfc->fs.ring_cookie) ||
+		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
+				 sizeof(rxnfc->rule_cnt)))
+			return -EFAULT;
+	}
+
+	ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+	if (ret)
+		return ret;
+
+	if (convert_out) {
+		if (copy_in_user(compat_rxnfc, rxnfc,
+				 (const void *)(&rxnfc->fs.m_u + 1) -
+				 (const void *)rxnfc) ||
+		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
+				 &rxnfc->fs.ring_cookie,
+				 (const void *)(&rxnfc->fs.location + 1) -
+				 (const void *)&rxnfc->fs.ring_cookie) ||
+		    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
+				 sizeof(rxnfc->rule_cnt)))
+			return -EFAULT;
+
+		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+			/* As an optimisation, we only copy the actual
+			 * number of rules that the underlying
+			 * function returned.  Since Mallory might
+			 * change the rule count in user memory, we
+			 * check that it is less than the rule count
+			 * originally given (as the user buffer size),
+			 * which has been range-checked.
+			 */
+			if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
+				return -EFAULT;
+			if (actual_rule_cnt < rule_cnt)
+				rule_cnt = actual_rule_cnt;
+			if (copy_in_user(&compat_rxnfc->rule_locs[0],
+					 &rxnfc->rule_locs[0],
+					 rule_cnt * sizeof(u32)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
 }
 
 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
-- 
cgit v1.2.3


From d870bfb9d366c5d466c0f5419a4ec95a3f71ea8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 18 Mar 2011 00:27:27 +0000
Subject: vlan: should take into account needed_headroom

Commit c95b819ad7 (gre: Use needed_headroom)
made gre use needed_headroom instead of hard_header_len

This uncover a bug in vlan code.

We should make sure vlan devices take into account their
real_dev->needed_headroom or we risk a crash in ipgre_header(), because
we dont have enough room to push IP header in skb.

Reported-by: Diddi Oscarsson <diddi@diddi.se>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ae610f046de5..e34ea9e5e28b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -720,6 +720,7 @@ static int vlan_dev_init(struct net_device *dev)
 	dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
 #endif
 
+	dev->needed_headroom = real_dev->needed_headroom;
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
-- 
cgit v1.2.3


From 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Mar 2011 05:27:28 +0000
Subject: bridge: Reset IPCB when entering IP stack on NF_FORWARD

Whenever we enter the IP stack proper from bridge netfilter we
need to ensure that the skb is in a form the IP stack expects
it to be in.

The entry point on NF_FORWARD did not meet the requirements of
the IP stack, therefore leading to potential crashes/panics.

This patch fixes the problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f97af5590ba1..008ff6c4eecf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -739,6 +739,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
+	if (br_parse_ip_options(skb))
+		return NF_DROP;
+
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
-- 
cgit v1.2.3


From 5e0c1eb7e6b61998c7ecd39b7f69a15773d894d4 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sun, 20 Mar 2011 15:33:26 +0100
Subject: netfilter: ipset: fix address ranges at hash:*port* types

The hash:*port* types with IPv4 silently ignored when address ranges
with non TCP/UDP were added/deleted from the set and used the first
address from the range only.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_hash_ipport.c    | 34 +++++++++--------------------
 net/netfilter/ipset/ip_set_hash_ipportip.c  | 34 +++++++++--------------------
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 34 +++++++++--------------------
 net/netfilter/ipset/ip_set_hash_netport.c   | 30 +++++++------------------
 4 files changed, 38 insertions(+), 94 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index adbe787ea5dc..b9214145d357 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -150,6 +150,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipport4_elem data = { };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -172,21 +173,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -195,7 +190,6 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -219,13 +213,12 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -361,6 +354,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipport6_elem data = { };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -385,21 +379,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -407,9 +395,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 22e23abb86c6..4642872df6e1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -154,6 +154,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportip4_elem data = { };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -180,21 +181,15 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -203,7 +198,6 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -227,13 +221,12 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -375,6 +368,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportip6_elem data = { };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -403,21 +397,15 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -425,9 +413,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 6033e8b54bbd..2cb84a54b7ad 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -174,6 +174,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -208,21 +209,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -231,7 +226,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -255,13 +249,12 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -429,6 +422,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportnet6_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -465,21 +459,15 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -487,9 +475,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 34a165626ee9..8598676f2a05 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -170,6 +170,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netport4_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -198,21 +199,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -220,9 +215,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
@@ -390,6 +383,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netport6_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -418,21 +412,15 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -440,9 +428,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
-- 
cgit v1.2.3


From 5c1aba467828bf0574ec5754c84884d573f590af Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sun, 20 Mar 2011 15:35:01 +0100
Subject: netfilter: ipset: fix checking the type revision at create command

The revision of the set type was not checked at the create command: if the
userspace sent a valid set type but with not supported revision number,
it'd create a loop.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_core.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 618a615acc9d..d6b48230a540 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -94,16 +94,28 @@ static int
 find_set_type_get(const char *name, u8 family, u8 revision,
 		  struct ip_set_type **found)
 {
+	struct ip_set_type *type;
+	int err;
+
 	rcu_read_lock();
 	*found = find_set_type(name, family, revision);
 	if (*found) {
-		int err = !try_module_get((*found)->me);
-		rcu_read_unlock();
-		return err ? -EFAULT : 0;
+		err = !try_module_get((*found)->me) ? -EFAULT : 0;
+		goto unlock;
 	}
+	/* Make sure the type is loaded but we don't support the revision */
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STREQ(type->name, name)) {
+			err = -IPSET_ERR_FIND_TYPE;
+			goto unlock;
+		}
 	rcu_read_unlock();
 
 	return try_to_load_type(name);
+
+unlock:
+	rcu_read_unlock();
+	return err;
 }
 
 /* Find a given set type by name and family.
@@ -116,7 +128,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
 	struct ip_set_type *type;
 	bool found = false;
 
-	*min = *max = 0;
+	*min = 255; *max = 0;
 	rcu_read_lock();
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STREQ(type->name, name) &&
@@ -124,7 +136,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
 			found = true;
 			if (type->revision < *min)
 				*min = type->revision;
-			else if (type->revision > *max)
+			if (type->revision > *max)
 				*max = type->revision;
 		}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From db856674ac69e31946e56085239757cca3f7655f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 20 Mar 2011 15:40:06 +0100
Subject: netfilter: xtables: fix reentrancy

commit f3c5c1bfd4308 (make ip_tables reentrant) introduced a race in
handling the stackptr restore, at the end of ipt_do_table()

We should do it before the call to xt_info_rdunlock_bh(), or we allow
cpu preemption and another cpu overwrites stackptr of original one.

A second fix is to change the underflow test to check the origptr value
instead of 0 to detect underflow, or else we allow a jump from different
hooks.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b09ed0d080f9..ffcea0d1678e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				if (*stackptr == 0) {
+				if (*stackptr <= origptr) {
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 					pr_debug("Underflow (this is normal) "
@@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb,
 			/* Verdict */
 			break;
 	} while (!acpar.hotdrop);
-	xt_info_rdunlock_bh();
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
+	xt_info_rdunlock_bh();
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c9598a9067d7..0b2af9b85cec 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -410,7 +410,7 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				if (*stackptr == 0)
+				if (*stackptr <= origptr)
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 				else
@@ -441,8 +441,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!acpar.hotdrop);
 
-	xt_info_rdunlock_bh();
 	*stackptr = origptr;
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
-- 
cgit v1.2.3


From 961ed183a9fd080cf306c659b8736007e44065a5 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Sun, 20 Mar 2011 15:42:52 +0100
Subject: netfilter: ipt_CLUSTERIP: fix buffer overflow

'buffer' string is copied from userspace.  It is not checked whether it is
zero terminated.  This may lead to overflow inside of simple_strtoul().
Changli Gao suggested to copy not more than user supplied 'size' bytes.

It was introduced before the git epoch.  Files "ipt_CLUSTERIP/*" are
root writable only by default, however, on some setups permissions might be
relaxed to e.g. network admin user.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 403ca57f6011..d609ac3cb9a4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -664,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 	char buffer[PROC_WRITELEN+1];
 	unsigned long nodenum;
 
-	if (copy_from_user(buffer, input, PROC_WRITELEN))
+	if (size > PROC_WRITELEN)
+		return -EIO;
+	if (copy_from_user(buffer, input, size))
 		return -EFAULT;
+	buffer[size] = 0;
 
 	if (*buffer == '+') {
 		nodenum = simple_strtoul(buffer+1, NULL, 10);
-- 
cgit v1.2.3


From 8bc8aecdc5e26cfda12dbd6867af4aa67836da6a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 21 Mar 2011 20:01:00 +0100
Subject: mac80211: initialize sta->last_rx in sta_info_alloc

This field is used to determine the inactivity time. When in AP mode,
hostapd uses it for kicking out inactive clients after a while. Without this
patch, hostapd immediately deauthenticates a new client if it checks the
inactivity time before the client sends its first data frame.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 5a11078827ab..d0311a322ddd 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -243,6 +243,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
 	sta->sdata = sdata;
+	sta->last_rx = jiffies;
 
 	ewma_init(&sta->avg_signal, 1024, 8);
 
-- 
cgit v1.2.3


From 6f6c7006755b667f9f6c1f3b6f08cd65f75cc471 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 17 Jan 2011 20:34:08 -0800
Subject: libceph: fix osd request queuing on osdmap updates

If we send a request to osd A, and the request's pg remaps to osd B and
then back to A in quick succession, we need to resend the request to A. The
old code was only calling kick_requests after processing all incremental
maps in a message, so it was very possible to not resend a request that
needed to be resent.  This would make the osd eventually time out (at least
with the current default of osd timeouts enabled).

The correct approach is to scan requests on every map incremental.  This
patch refactors the kick code in a few ways:
 - all requests are either on req_lru (in flight), req_unsent (ready to
   send), or req_notarget (currently map to no up osd)
 - mapping always done by map_request (previous map_osds)
 - if the mapping changes, we requeue.  requests are resent only after all
   map incrementals are processed.
 - some osd reset code is moved out of kick_requests into a separate
   function
 - the "kick this osd" functionality is moved to kick_osd_requests, as it
   is unrelated to scanning for request->pg->osd mapping changes

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 255 ++++++++++++++++++++++++--------------------------
 1 file changed, 122 insertions(+), 133 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3e20a122ffa2..b85ed5a5503d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -22,10 +22,9 @@
 #define OSD_OPREPLY_FRONT_LEN	512
 
 static const struct ceph_connection_operations osd_con_ops;
-static int __kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd);
 
-static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void send_queued(struct ceph_osd_client *osdc);
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
 
 static int op_needs_trail(int op)
 {
@@ -529,6 +528,35 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
 	return NULL;
 }
 
+/*
+ * Resubmit requests pending on the given osd.
+ */
+static void __kick_osd_requests(struct ceph_osd_client *osdc,
+				struct ceph_osd *osd)
+{
+	struct ceph_osd_request *req;
+	int err;
+
+	dout("__kick_osd_requests osd%d\n", osd->o_osd);
+	err = __reset_osd(osdc, osd);
+	if (err == -EAGAIN)
+		return;
+
+	list_for_each_entry(req, &osd->o_requests, r_osd_item) {
+		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
+		     osd->o_osd);
+		req->r_flags |= CEPH_OSD_FLAG_RETRY;
+	}
+}
+
+static void kick_osd_requests(struct ceph_osd_client *osdc,
+			      struct ceph_osd *kickosd)
+{
+	mutex_lock(&osdc->request_mutex);
+	__kick_osd_requests(osdc, kickosd);
+	mutex_unlock(&osdc->request_mutex);
+}
 
 /*
  * If the osd connection drops, we need to resubmit all requests.
@@ -543,7 +571,8 @@ static void osd_reset(struct ceph_connection *con)
 	dout("osd_reset osd%d\n", osd->o_osd);
 	osdc = osd->o_osdc;
 	down_read(&osdc->map_sem);
-	kick_requests(osdc, osd);
+	kick_osd_requests(osdc, osd);
+	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
 
@@ -781,20 +810,20 @@ static void __cancel_request(struct ceph_osd_request *req)
 		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
 		req->r_sent = 0;
 	}
-	list_del_init(&req->r_req_lru_item);
 }
 
 /*
  * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
  * (as needed), and set the request r_osd appropriately.  If there is
- * no up osd, set r_osd to NULL.
+ * no up osd, set r_osd to NULL.  Move the request to the appropiate list
+ * (unsent, homeless) or leave on in-flight lru.
  *
  * Return 0 if unchanged, 1 if changed, or negative on error.
  *
  * Caller should hold map_sem for read and request_mutex.
  */
-static int __map_osds(struct ceph_osd_client *osdc,
-		      struct ceph_osd_request *req)
+static int __map_request(struct ceph_osd_client *osdc,
+			 struct ceph_osd_request *req)
 {
 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	struct ceph_pg pgid;
@@ -802,11 +831,13 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	int o = -1, num = 0;
 	int err;
 
-	dout("map_osds %p tid %lld\n", req, req->r_tid);
+	dout("map_request %p tid %lld\n", req, req->r_tid);
 	err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
 				      &req->r_file_layout, osdc->osdmap);
-	if (err)
+	if (err) {
+		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 		return err;
+	}
 	pgid = reqhead->layout.ol_pgid;
 	req->r_pgid = pgid;
 
@@ -823,7 +854,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	    (req->r_osd == NULL && o == -1))
 		return 0;  /* no change */
 
-	dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
+	dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
 	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
 	     req->r_osd ? req->r_osd->o_osd : -1);
 
@@ -841,10 +872,12 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	if (!req->r_osd && o >= 0) {
 		err = -ENOMEM;
 		req->r_osd = create_osd(osdc);
-		if (!req->r_osd)
+		if (!req->r_osd) {
+			list_move(&req->r_req_lru_item, &osdc->req_notarget);
 			goto out;
+		}
 
-		dout("map_osds osd %p is osd%d\n", req->r_osd, o);
+		dout("map_request osd %p is osd%d\n", req->r_osd, o);
 		req->r_osd->o_osd = o;
 		req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
 		__insert_osd(osdc, req->r_osd);
@@ -855,6 +888,9 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	if (req->r_osd) {
 		__remove_osd_from_lru(req->r_osd);
 		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+	} else {
+		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 	}
 	err = 1;   /* osd or pg changed */
 
@@ -869,16 +905,6 @@ static int __send_request(struct ceph_osd_client *osdc,
 			  struct ceph_osd_request *req)
 {
 	struct ceph_osd_request_head *reqhead;
-	int err;
-
-	err = __map_osds(osdc, req);
-	if (err < 0)
-		return err;
-	if (req->r_osd == NULL) {
-		dout("send_request %p no up osds in pg\n", req);
-		ceph_monc_request_next_osdmap(&osdc->client->monc);
-		return 0;
-	}
 
 	dout("send_request %p tid %llu to osd%d flags %d\n",
 	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
@@ -897,6 +923,21 @@ static int __send_request(struct ceph_osd_client *osdc,
 	return 0;
 }
 
+/*
+ * Send any requests in the queue (req_unsent).
+ */
+static void send_queued(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd_request *req, *tmp;
+
+	dout("send_queued\n");
+	mutex_lock(&osdc->request_mutex);
+	list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
+		__send_request(osdc, req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
 /*
  * Timeout callback, called every N seconds when 1 or more osd
  * requests has been active for more than N seconds.  When this
@@ -916,7 +957,6 @@ static void handle_timeout(struct work_struct *work)
 	unsigned long keepalive =
 		osdc->client->options->osd_keepalive_timeout * HZ;
 	unsigned long last_stamp = 0;
-	struct rb_node *p;
 	struct list_head slow_osds;
 
 	dout("timeout\n");
@@ -925,21 +965,6 @@ static void handle_timeout(struct work_struct *work)
 	ceph_monc_request_next_osdmap(&osdc->client->monc);
 
 	mutex_lock(&osdc->request_mutex);
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		if (req->r_resend) {
-			int err;
-
-			dout("osdc resending prev failed %lld\n", req->r_tid);
-			err = __send_request(osdc, req);
-			if (err)
-				dout("osdc failed again on %lld\n", req->r_tid);
-			else
-				req->r_resend = false;
-			continue;
-		}
-	}
 
 	/*
 	 * reset osds that appear to be _really_ unresponsive.  this
@@ -963,7 +988,7 @@ static void handle_timeout(struct work_struct *work)
 		BUG_ON(!osd);
 		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
 			   req->r_tid, osd->o_osd);
-		__kick_requests(osdc, osd);
+		__kick_osd_requests(osdc, osd);
 	}
 
 	/*
@@ -991,7 +1016,7 @@ static void handle_timeout(struct work_struct *work)
 
 	__schedule_osd_timeout(osdc);
 	mutex_unlock(&osdc->request_mutex);
-
+	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
 
@@ -1109,108 +1134,61 @@ bad:
 	ceph_msg_dump(msg);
 }
 
-
-static int __kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd)
+static void reset_changed_osds(struct ceph_osd_client *osdc)
 {
-	struct ceph_osd_request *req;
 	struct rb_node *p, *n;
-	int needmap = 0;
-	int err;
-
-	dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
-	if (kickosd) {
-		err = __reset_osd(osdc, kickosd);
-		if (err == -EAGAIN)
-			return 1;
-	} else {
-		for (p = rb_first(&osdc->osds); p; p = n) {
-			struct ceph_osd *osd =
-				rb_entry(p, struct ceph_osd, o_node);
-
-			n = rb_next(p);
-			if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
-			    memcmp(&osd->o_con.peer_addr,
-				   ceph_osd_addr(osdc->osdmap,
-						 osd->o_osd),
-				   sizeof(struct ceph_entity_addr)) != 0)
-				__reset_osd(osdc, osd);
-		}
-	}
-
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		if (req->r_resend) {
-			dout(" r_resend set on tid %llu\n", req->r_tid);
-			__cancel_request(req);
-			goto kick;
-		}
-		if (req->r_osd && kickosd == req->r_osd) {
-			__cancel_request(req);
-			goto kick;
-		}
 
-		err = __map_osds(osdc, req);
-		if (err == 0)
-			continue;  /* no change */
-		if (err < 0) {
-			/*
-			 * FIXME: really, we should set the request
-			 * error and fail if this isn't a 'nofail'
-			 * request, but that's a fair bit more
-			 * complicated to do.  So retry!
-			 */
-			dout(" setting r_resend on %llu\n", req->r_tid);
-			req->r_resend = true;
-			continue;
-		}
-		if (req->r_osd == NULL) {
-			dout("tid %llu maps to no valid osd\n", req->r_tid);
-			needmap++;  /* request a newer map */
-			continue;
-		}
+	for (p = rb_first(&osdc->osds); p; p = n) {
+		struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
 
-kick:
-		dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
-		     req->r_osd ? req->r_osd->o_osd : -1);
-		req->r_flags |= CEPH_OSD_FLAG_RETRY;
-		err = __send_request(osdc, req);
-		if (err) {
-			dout(" setting r_resend on %llu\n", req->r_tid);
-			req->r_resend = true;
-		}
+		n = rb_next(p);
+		if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
+		    memcmp(&osd->o_con.peer_addr,
+			   ceph_osd_addr(osdc->osdmap,
+					 osd->o_osd),
+			   sizeof(struct ceph_entity_addr)) != 0)
+			__reset_osd(osdc, osd);
 	}
-
-	return needmap;
 }
 
 /*
- * Resubmit osd requests whose osd or osd address has changed.  Request
- * a new osd map if osds are down, or we are otherwise unable to determine
- * how to direct a request.
- *
- * Close connections to down osds.
- *
- * If @who is specified, resubmit requests for that specific osd.
+ * Requeue requests whose mapping to an OSD has changed.  If requests map to
+ * no osd, request a new map.
  *
  * Caller should hold map_sem for read and request_mutex.
  */
-static void kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd)
+static void kick_requests(struct ceph_osd_client *osdc)
 {
-	int needmap;
+	struct ceph_osd_request *req;
+	struct rb_node *p;
+	int needmap = 0;
+	int err;
 
+	dout("kick_requests\n");
 	mutex_lock(&osdc->request_mutex);
-	needmap = __kick_requests(osdc, kickosd);
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+		err = __map_request(osdc, req);
+		if (err < 0)
+			continue;  /* error */
+		if (req->r_osd == NULL) {
+			dout("%p tid %llu maps to no osd\n", req, req->r_tid);
+			needmap++;  /* request a newer map */
+		} else if (err > 0) {
+			dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
+			     req->r_osd ? req->r_osd->o_osd : -1);
+			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		}
+	}
 	mutex_unlock(&osdc->request_mutex);
 
 	if (needmap) {
 		dout("%d requests for down osds, need new map\n", needmap);
 		ceph_monc_request_next_osdmap(&osdc->client->monc);
 	}
-
 }
+
+
 /*
  * Process updated osd map.
  *
@@ -1263,6 +1241,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 				ceph_osdmap_destroy(osdc->osdmap);
 				osdc->osdmap = newmap;
 			}
+			kick_requests(osdc);
+			reset_changed_osds(osdc);
 		} else {
 			dout("ignoring incremental map %u len %d\n",
 			     epoch, maplen);
@@ -1300,6 +1280,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 			osdc->osdmap = newmap;
 			if (oldmap)
 				ceph_osdmap_destroy(oldmap);
+			kick_requests(osdc);
 		}
 		p += maplen;
 		nr_maps--;
@@ -1308,8 +1289,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 done:
 	downgrade_write(&osdc->map_sem);
 	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
-	if (newmap)
-		kick_requests(osdc, NULL);
+	send_queued(osdc);
 	up_read(&osdc->map_sem);
 	wake_up_all(&osdc->client->auth_wq);
 	return;
@@ -1347,15 +1327,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 	 * the request still han't been touched yet.
 	 */
 	if (req->r_sent == 0) {
-		rc = __send_request(osdc, req);
-		if (rc) {
-			if (nofail) {
-				dout("osdc_start_request failed send, "
-				     " marking %lld\n", req->r_tid);
-				req->r_resend = true;
-				rc = 0;
-			} else {
-				__unregister_request(osdc, req);
+		rc = __map_request(osdc, req);
+		if (rc < 0)
+			return rc;
+		if (req->r_osd == NULL) {
+			dout("send_request %p no up osds in pg\n", req);
+			ceph_monc_request_next_osdmap(&osdc->client->monc);
+		} else {
+			rc = __send_request(osdc, req);
+			if (rc) {
+				if (nofail) {
+					dout("osdc_start_request failed send, "
+					     " will retry %lld\n", req->r_tid);
+					rc = 0;
+				} else {
+					__unregister_request(osdc, req);
+				}
 			}
 		}
 	}
@@ -1441,6 +1428,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	INIT_LIST_HEAD(&osdc->osd_lru);
 	osdc->requests = RB_ROOT;
 	INIT_LIST_HEAD(&osdc->req_lru);
+	INIT_LIST_HEAD(&osdc->req_unsent);
+	INIT_LIST_HEAD(&osdc->req_notarget);
 	osdc->num_requests = 0;
 	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
-- 
cgit v1.2.3


From a454f0ccefbfdbfc0e1aa8a5f8010af5e48b8845 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 21 Mar 2011 18:08:28 -0700
Subject: xfrm: Fix initialize repl field of struct xfrm_state

Commit 'xfrm: Move IPsec replay detection functions to a separate file'
  (9fdc4883d92d20842c5acea77a4a21bb1574b495)
introduce repl field to struct xfrm_state, and only initialize it
under SA's netlink create path, the other path, such as pf_key,
ipcomp/ipcomp6 etc, the repl field remaining uninitialize. So if
the SA is created by pf_key, any input packet with SA's encryption
algorithm will cause panic.

    int xfrm_input()
    {
        ...
        x->repl->advance(x, seq);
        ...
    }

This patch fixed it by introduce new function __xfrm_init_state().

Pid: 0, comm: swapper Not tainted 2.6.38-next+ #14 Bochs Bochs
EIP: 0060:[<c078e5d5>] EFLAGS: 00010206 CPU: 0
EIP is at xfrm_input+0x31c/0x4cc
EAX: dd839c00 EBX: 00000084 ECX: 00000000 EDX: 01000000
ESI: dd839c00 EDI: de3a0780 EBP: dec1de88 ESP: dec1de64
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process swapper (pid: 0, ti=dec1c000 task=c09c0f20 task.ti=c0992000)
Stack:
 00000000 00000000 00000002 c0ba27c0 00100000 01000000 de3a0798 c0ba27c0
 00000033 dec1de98 c0786848 00000000 de3a0780 dec1dea4 c0786868 00000000
 dec1debc c074ee56 e1da6b8c de3a0780 c074ed44 de3a07a8 dec1decc c074ef32
Call Trace:
 [<c0786848>] xfrm4_rcv_encap+0x22/0x27
 [<c0786868>] xfrm4_rcv+0x1b/0x1d
 [<c074ee56>] ip_local_deliver_finish+0x112/0x1b1
 [<c074ed44>] ? ip_local_deliver_finish+0x0/0x1b1
 [<c074ef32>] NF_HOOK.clone.1+0x3d/0x44
 [<c074ef77>] ip_local_deliver+0x3e/0x44
 [<c074ed44>] ? ip_local_deliver_finish+0x0/0x1b1
 [<c074ec03>] ip_rcv_finish+0x30a/0x332
 [<c074e8f9>] ? ip_rcv_finish+0x0/0x332
 [<c074ef32>] NF_HOOK.clone.1+0x3d/0x44
 [<c074f188>] ip_rcv+0x20b/0x247
 [<c074e8f9>] ? ip_rcv_finish+0x0/0x332
 [<c072797d>] __netif_receive_skb+0x373/0x399
 [<c0727bc1>] netif_receive_skb+0x4b/0x51
 [<e0817e2a>] cp_rx_poll+0x210/0x2c4 [8139cp]
 [<c072818f>] net_rx_action+0x9a/0x17d
 [<c0445b5c>] __do_softirq+0xa1/0x149
 [<c0445abb>] ? __do_softirq+0x0/0x149

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 15 ++++++++++++++-
 net/xfrm/xfrm_user.c  |  2 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d575f0534868..f83a3d1da81b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1907,7 +1907,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return res;
 }
 
-int xfrm_init_state(struct xfrm_state *x)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 {
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_mode *inner_mode;
@@ -1980,12 +1980,25 @@ int xfrm_init_state(struct xfrm_state *x)
 	if (x->outer_mode == NULL)
 		goto error;
 
+	if (init_replay) {
+		err = xfrm_init_replay(x);
+		if (err)
+			goto error;
+	}
+
 	x->km.state = XFRM_STATE_VALID;
 
 error:
 	return err;
 }
 
+EXPORT_SYMBOL(__xfrm_init_state);
+
+int xfrm_init_state(struct xfrm_state *x)
+{
+	return __xfrm_init_state(x, true);
+}
+
 EXPORT_SYMBOL(xfrm_init_state);
 
 int __net_init xfrm_state_init(struct net *net)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 706385ae3e4b..fc152d28753c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -511,7 +511,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	err = xfrm_init_state(x);
+	err = __xfrm_init_state(x, false);
 	if (err)
 		goto error;
 
-- 
cgit v1.2.3


From 8aa525a9340da4227797a06221ca08399006635f Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Mon, 21 Mar 2011 18:10:25 -0700
Subject: l2tp: fix possible oops on l2tp_eth module unload

A struct used in the l2tp_eth driver for registering network namespace
ops was incorrectly marked as __net_initdata, leading to oops when
module unloaded.

BUG: unable to handle kernel paging request at ffffffffa00ec098
IP: [<ffffffff8123dbd8>] ops_exit_list+0x7/0x4b
PGD 142d067 PUD 1431063 PMD 195da8067 PTE 0
Oops: 0000 [#1] SMP
last sysfs file: /sys/module/l2tp_eth/refcnt
Call Trace:
 [<ffffffff8123dc94>] ? unregister_pernet_operations+0x32/0x93
 [<ffffffff8123dd20>] ? unregister_pernet_device+0x2b/0x38
 [<ffffffff81068b6e>] ? sys_delete_module+0x1b8/0x222
 [<ffffffff810c7300>] ? do_munmap+0x254/0x318
 [<ffffffff812c64e5>] ? page_fault+0x25/0x30
 [<ffffffff812c6952>] ? system_call_fastpath+0x16/0x1b

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0accc98..a8193f52c13c 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
 	return 0;
 }
 
-static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+static struct pernet_operations l2tp_eth_net_ops = {
 	.init = l2tp_eth_init_net,
 	.id   = &l2tp_eth_net_id,
 	.size = sizeof(struct l2tp_eth_net),
-- 
cgit v1.2.3


From 674f2115995b7b588cbf3540c9f9b2448a8c7ea8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 18:16:39 -0700
Subject: ipx: fix ipx_release()

Commit b0d0d915d1d1a0 (remove the BKL) added a regression, because
sock_put() can free memory while we are going to use it later.

Fix is to delay sock_put() _after_ release_sock().

Reported-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipx/af_ipx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 2731b51923d1..9680226640ef 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -148,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
 	ipx_remove_socket(sk);
 	skb_queue_purge(&sk->sk_receive_queue);
 	sk_refcnt_debug_dec(sk);
-	sock_put(sk);
 }
 
 /*
@@ -1404,6 +1403,7 @@ static int ipx_release(struct socket *sock)
 	sk_refcnt_debug_release(sk);
 	ipx_destroy_socket(sk);
 	release_sock(sk);
+	sock_put(sk);
 out:
 	return 0;
 }
-- 
cgit v1.2.3


From b20e7bbfc7a15a4182730f0936433145992b4b06 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Mar 2011 18:18:00 -0700
Subject: net/appletalk: fix atalk_release use after free

The BKL removal in appletalk introduced a use-after-free problem,
where atalk_destroy_socket frees a sock, but we still release
the socket lock on it.

An easy fix is to take an extra reference on the sock and sock_put
it when returning from atalk_release.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3d4f4b043406..206e771e82d1 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1051,6 +1051,7 @@ static int atalk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
+	sock_hold(sk);
 	lock_sock(sk);
 	if (sk) {
 		sock_orphan(sk);
@@ -1058,6 +1059,8 @@ static int atalk_release(struct socket *sock)
 		atalk_destroy_socket(sk);
 	}
 	release_sock(sk);
+	sock_put(sk);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From ac0a121d7906b049dfee3649f886c969fbb3c1b7 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 21 Mar 2011 18:20:26 -0700
Subject: net: fix incorrect spelling in drop monitor protocol

It was pointed out to me recently that my spelling could be better :)

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c78ce9..706502ff64aa 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void)
 	struct per_cpu_dm_data *data;
 	int cpu, rc;
 
-	printk(KERN_INFO "Initalizing network drop monitor service\n");
+	printk(KERN_INFO "Initializing network drop monitor service\n");
 
 	if (sizeof(void *) > 8) {
 		printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
-- 
cgit v1.2.3


From 9d2a8fa96a44ba242de3a6f56acaef7a40a97b97 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:23:34 -0700
Subject: net ipv6: Fix duplicate /proc/sys/net/ipv6/neigh directory entries.

When I was fixing issues with unregisgtering tables under /proc/sys/net/ipv6/neigh
by adding a mount point it appears I missed a critical ordering issue, in the
ipv6 initialization.  I had not realized that ipv6_sysctl_register is called
at the very end of the ipv6 initialization and in particular after we call
neigh_sysctl_register from ndisc_init.

"neigh" needs to be initialized in ipv6_static_sysctl_register which is
the first ipv6 table to initialized, and definitely before ndisc_init.
This removes the weirdness of duplicate tables while still providing a
"neigh" mount point which prevents races in sysctl unregistering.

This was initially reported at https://bugzilla.kernel.org/show_bug.cgi?id=31232
Reported-by: sunkan@zappa.cx
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sysctl_net_ipv6.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef79f9c..6dcf5e7d661b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
 
 static struct ctl_table empty[1];
 
+static ctl_table ipv6_static_skeleton[] = {
+	{
+		.procname	= "neigh",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= empty,
+	},
+	{ }
+};
+
 static ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "neigh",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= empty,
-	},
 	{ }
 };
 
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
 
 int ipv6_static_sysctl_register(void)
 {
-	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
+	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
 	if (ip6_base == NULL)
 		return -ENOMEM;
 	return 0;
-- 
cgit v1.2.3


From f40f94fc6c3b5a5542d5ed976e9ff69a3b463802 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 10:15:40 +0000
Subject: ipvs: fix a typo in __ip_vs_control_init()

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b799cea31f95..33733c8872e7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3605,7 +3605,7 @@ int __net_init __ip_vs_control_init(struct net *net)
 
 	/* procfs stats */
 	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (ipvs->tot_stats.cpustats) {
+	if (!ipvs->tot_stats.cpustats) {
 		pr_err("%s(): alloc_percpu.\n", __func__);
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3


From 736561a01f11114146b1b7f82d486fa9c95828ef Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 21 Mar 2011 15:18:01 +0000
Subject: IPVS: Use global mutex in ip_vs_app.c

As part of the work to make IPVS network namespace aware
__ip_vs_app_mutex was replaced by a per-namespace lock,
ipvs->app_mutex. ipvs->app_key is also supplied for debugging purposes.

Unfortunately this implementation results in ipvs->app_key residing
in non-static storage which at the very least causes a lockdep warning.

This patch takes the rather heavy-handed approach of reinstating
__ip_vs_app_mutex which will cover access to the ipvs->list_head
of all network namespaces.

[   12.610000] IPVS: Creating netns size=2456 id=0
[   12.630000] IPVS: Registered protocols (TCP, UDP, SCTP, AH, ESP)
[   12.640000] BUG: key ffff880003bbf1a0 not in .data!
[   12.640000] ------------[ cut here ]------------
[   12.640000] WARNING: at kernel/lockdep.c:2701 lockdep_init_map+0x37b/0x570()
[   12.640000] Hardware name: Bochs
[   12.640000] Pid: 1, comm: swapper Tainted: G        W 2.6.38-kexec-06330-g69b7efe-dirty #122
[   12.650000] Call Trace:
[   12.650000]  [<ffffffff8102e685>] warn_slowpath_common+0x75/0xb0
[   12.650000]  [<ffffffff8102e6d5>] warn_slowpath_null+0x15/0x20
[   12.650000]  [<ffffffff8105967b>] lockdep_init_map+0x37b/0x570
[   12.650000]  [<ffffffff8105829d>] ? trace_hardirqs_on+0xd/0x10
[   12.650000]  [<ffffffff81055ad8>] debug_mutex_init+0x38/0x50
[   12.650000]  [<ffffffff8104bc4c>] __mutex_init+0x5c/0x70
[   12.650000]  [<ffffffff81685ee7>] __ip_vs_app_init+0x64/0x86
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff811b1c33>] T.620+0x43/0x170
[   12.660000]  [<ffffffff811b1e9a>] ? register_pernet_subsys+0x1a/0x40
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff811b1db7>] register_pernet_operations+0x57/0xb0
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.670000]  [<ffffffff811b1ea9>] register_pernet_subsys+0x29/0x40
[   12.670000]  [<ffffffff81685f19>] ip_vs_app_init+0x10/0x12
[   12.670000]  [<ffffffff81685a87>] ip_vs_init+0x4c/0xff
[   12.670000]  [<ffffffff8166562c>] do_one_initcall+0x7a/0x12e
[   12.670000]  [<ffffffff8166583e>] kernel_init+0x13e/0x1c2
[   12.670000]  [<ffffffff8128c134>] kernel_thread_helper+0x4/0x10
[   12.670000]  [<ffffffff8128ad40>] ? restore_args+0x0/0x30
[   12.680000]  [<ffffffff81665700>] ? kernel_init+0x0/0x1c2
[   12.680000]  [<ffffffff8128c130>] ? kernel_thread_helper+0x0/0x1global0

Signed-off-by: Simon Horman <horms@verge.net.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_app.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 5c48ffb60c28..2dc6de13ac18 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
 /*
  *	Get an ip_vs_app object
  */
@@ -167,14 +169,13 @@ int
 register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 		       __u16 port)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	int result;
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	result = ip_vs_app_inc_new(net, app, proto, port);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	return result;
 }
@@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	list_add(&app->a_list, &ipvs->app_list);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	return 0;
 }
@@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
  */
 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *inc, *nxt;
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
 		ip_vs_app_inc_release(net, inc);
@@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 
 	list_del(&app->a_list);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 	struct net *net = seq_file_net(seq);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
@@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-	struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
-
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -583,7 +581,6 @@ static int __net_init __ip_vs_app_init(struct net *net)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	INIT_LIST_HEAD(&ipvs->app_list);
-	__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
 	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
 	return 0;
 }
-- 
cgit v1.2.3


From 27660515a21bf913e3208ded3f27abd0529fae0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Fri, 18 Mar 2011 16:56:34 +0000
Subject: net: implement dev_disable_lro() hw_features compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement compatibility with new hw_features for dev_disable_lro().
This is a transition path - dev_disable_lro() should be later
integrated into netdev_fix_features() after all drivers are converted.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     | 19 +++++++++++--------
 net/core/ethtool.c |  2 +-
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0b88eba97dab..f453370131a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1353,14 +1353,17 @@ EXPORT_SYMBOL(dev_close);
  */
 void dev_disable_lro(struct net_device *dev)
 {
-	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
-	    dev->ethtool_ops->set_flags) {
-		u32 flags = dev->ethtool_ops->get_flags(dev);
-		if (flags & ETH_FLAG_LRO) {
-			flags &= ~ETH_FLAG_LRO;
-			dev->ethtool_ops->set_flags(dev, flags);
-		}
-	}
+	u32 flags;
+
+	if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
+		flags = dev->ethtool_ops->get_flags(dev);
+	else
+		flags = ethtool_op_get_flags(dev);
+
+	if (!(flags & ETH_FLAG_LRO))
+		return;
+
+	__ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
 	WARN_ON(dev->features & NETIF_F_LRO);
 }
 EXPORT_SYMBOL(dev_disable_lro);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a1086fb0c0c7..24bd57493c0d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -513,7 +513,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
 	}
 }
 
-static int __ethtool_set_flags(struct net_device *dev, u32 data)
+int __ethtool_set_flags(struct net_device *dev, u32 data)
 {
 	u32 changed;
 
-- 
cgit v1.2.3


From 74cb3c108bc0f599a4eb40980db8580cfba725c9 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 19 Mar 2011 12:13:46 +0000
Subject: ipv4: match prefsrc when deleting routes

fib_table_delete forgets to match the routes by prefsrc.
Callers can specify known IP in fc_prefsrc and we should remove
the exact route. This is needed for cases when same local or
broadcast addresses are used in different subnets and the
routes differ only in prefsrc. All callers that do not provide
fc_prefsrc will ignore the route prefsrc as before and will
delete the first occurence. That is how the ip route del default
magic works.

	Current callers are:

- ip_rt_ioctl where rtentry_to_fib_config provides fc_prefsrc only
when the provided device name matches IP label with colon.

- inet_rtm_delroute where RTA_PREFSRC is optional too

- fib_magic which deals with routes when deleting addresses
and where the fc_prefsrc is always set with the primary IP
for the concerned IFA.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3d28a35c2e1a..ac87a49ad50b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1665,6 +1665,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
 		     fa->fa_scope == cfg->fc_scope) &&
+		    (!cfg->fc_prefsrc ||
+		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
 		    fib_nh_match(cfg, fi) == 0) {
-- 
cgit v1.2.3


From e6abbaa2725a43cf5d26c4c2a5dc6c0f6029ea19 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 19 Mar 2011 12:13:49 +0000
Subject: ipv4: fix route deletion for IPs on many subnets

Alex Sidorenko reported for problems with local
routes left after IP addresses are deleted. It happens
when same IPs are used in more than one subnet for the
device.

	Fix fib_del_ifaddr to restrict the checks for duplicate
local and broadcast addresses only to the IFAs that use
our primary IFA or another primary IFA with same address.
And we expect the prefsrc to be matched when the routes
are deleted because it is possible they to differ only by
prefsrc. This patch prevents local and broadcast routes
to be leaked until their primary IP is deleted finally
from the box.

	As the secondary address promotion needs to delete
the routes for all secondaries that used the old primary IFA,
add option to ignore these secondaries from the checks and
to assume they are already deleted, so that we can safely
delete the route while these IFAs are still on the device list.

Reported-by: Alex Sidorenko <alexandre.sidorenko@hp.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 101 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 88 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a373a259253c..02c3ba61884a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -722,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 	}
 }
 
-static void fib_del_ifaddr(struct in_ifaddr *ifa)
+/* Delete primary or secondary address.
+ * Optionally, on secondary address promotion consider the addresses
+ * from subnet iprim as deleted, even if they are in device list.
+ * In this case the secondary ifa can be in device list.
+ */
+void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct net_device *dev = in_dev->dev;
 	struct in_ifaddr *ifa1;
-	struct in_ifaddr *prim = ifa;
+	struct in_ifaddr *prim = ifa, *prim1 = NULL;
 	__be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
 	__be32 any = ifa->ifa_address & ifa->ifa_mask;
 #define LOCAL_OK	1
@@ -735,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 #define BRD0_OK		4
 #define BRD1_OK		8
 	unsigned ok = 0;
+	int subnet = 0;		/* Primary network */
+	int gone = 1;		/* Address is missing */
+	int same_prefsrc = 0;	/* Another primary with same IP */
 
-	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
-		fib_magic(RTM_DELROUTE,
-			  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-			  any, ifa->ifa_prefixlen, prim);
-	else {
+	if (ifa->ifa_flags & IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 		if (prim == NULL) {
 			printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
 			return;
 		}
+		if (iprim && iprim != prim) {
+			printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
+			return;
+		}
+	} else if (!ipv4_is_zeronet(any) &&
+		   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
+		fib_magic(RTM_DELROUTE,
+			  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+			  any, ifa->ifa_prefixlen, prim);
+		subnet = 1;
 	}
 
 	/* Deletion is more complicated than add.
@@ -755,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 	 */
 
 	for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+		if (ifa1 == ifa) {
+			/* promotion, keep the IP */
+			gone = 0;
+			continue;
+		}
+		/* Ignore IFAs from our subnet */
+		if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, iprim))
+			continue;
+
+		/* Ignore ifa1 if it uses different primary IP (prefsrc) */
+		if (ifa1->ifa_flags & IFA_F_SECONDARY) {
+			/* Another address from our subnet? */
+			if (ifa1->ifa_mask == prim->ifa_mask &&
+			    inet_ifa_match(ifa1->ifa_address, prim))
+				prim1 = prim;
+			else {
+				/* We reached the secondaries, so
+				 * same_prefsrc should be determined.
+				 */
+				if (!same_prefsrc)
+					continue;
+				/* Search new prim1 if ifa1 is not
+				 * using the current prim1
+				 */
+				if (!prim1 ||
+				    ifa1->ifa_mask != prim1->ifa_mask ||
+				    !inet_ifa_match(ifa1->ifa_address, prim1))
+					prim1 = inet_ifa_byprefix(in_dev,
+							ifa1->ifa_address,
+							ifa1->ifa_mask);
+				if (!prim1)
+					continue;
+				if (prim1->ifa_local != prim->ifa_local)
+					continue;
+			}
+		} else {
+			if (prim->ifa_local != ifa1->ifa_local)
+				continue;
+			prim1 = ifa1;
+			if (prim != prim1)
+				same_prefsrc = 1;
+		}
 		if (ifa->ifa_local == ifa1->ifa_local)
 			ok |= LOCAL_OK;
 		if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -763,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 			ok |= BRD1_OK;
 		if (any == ifa1->ifa_broadcast)
 			ok |= BRD0_OK;
+		/* primary has network specific broadcasts */
+		if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
+			__be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
+			__be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
+
+			if (!ipv4_is_zeronet(any1)) {
+				if (ifa->ifa_broadcast == brd1 ||
+				    ifa->ifa_broadcast == any1)
+					ok |= BRD_OK;
+				if (brd == brd1 || brd == any1)
+					ok |= BRD1_OK;
+				if (any == brd1 || any == any1)
+					ok |= BRD0_OK;
+			}
+		}
 	}
 
 	if (!(ok & BRD_OK))
 		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
-	if (!(ok & BRD1_OK))
-		fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
-	if (!(ok & BRD0_OK))
-		fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+	if (subnet && ifa->ifa_prefixlen < 31) {
+		if (!(ok & BRD1_OK))
+			fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+		if (!(ok & BRD0_OK))
+			fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+	}
 	if (!(ok & LOCAL_OK)) {
 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 
 		/* Check, that this local address finally disappeared. */
-		if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+		if (gone &&
+		    inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
 			/* And the last, but not the least thing.
 			 * We must flush stray FIB entries.
 			 *
@@ -896,7 +971,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
-		fib_del_ifaddr(ifa);
+		fib_del_ifaddr(ifa, NULL);
 		fib_update_nh_saddrs(dev);
 		if (ifa->ifa_dev->ifa_list == NULL) {
 			/* Last address was deleted from this interface.
-- 
cgit v1.2.3


From 2d230e2b2c3111cf4a11619f60dcd158ae84e3ab Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 19 Mar 2011 12:13:52 +0000
Subject: ipv4: remove the routes on secondary promotion

The secondary address promotion relies on fib_sync_down_addr
to remove all routes created for the secondary addresses when
the old primary address is deleted. It does not happen for cases
when the primary address is also in another subnet. Fix that
by deleting local and broadcast routes for all secondaries while
they are on device list and by faking that all addresses from
this subnet are to be deleted. It relies on fib_del_ifaddr being
able to ignore the IPs from the concerned subnet while checking
for duplication.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6d85800daeb7..2523001f4c9a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -345,6 +345,17 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 		}
 	}
 
+	/* On promotion all secondaries from subnet are changing
+	 * the primary IP, we must remove all their routes silently
+	 * and later to add them back with new prefsrc. Do this
+	 * while all addresses are on the device list.
+	 */
+	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa))
+			fib_del_ifaddr(ifa, ifa1);
+	}
+
 	/* 2. Unlink it */
 
 	*ifap = ifa1->ifa_next;
-- 
cgit v1.2.3


From 04024b937a6e9b7d4320b5853557cea3930d528c Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 19 Mar 2011 12:13:54 +0000
Subject: ipv4: optimize route adding on secondary promotion

Optimize the calling of fib_add_ifaddr for all
secondary addresses after the promoted one to start from
their place, not from the new place of the promoted
secondary. It will save some CPU cycles because we
are sure the promoted secondary was first for the subnet
and all next secondaries do not change their place.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2523001f4c9a..d5a4553bebc3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -375,6 +375,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
+		struct in_ifaddr *next_sec = promote->ifa_next;
 
 		if (prev_prom) {
 			prev_prom->ifa_next = promote->ifa_next;
@@ -386,7 +387,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
-		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
+		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 			if (ifa1->ifa_mask != ifa->ifa_mask ||
 			    !inet_ifa_match(ifa1->ifa_address, ifa))
 					continue;
-- 
cgit v1.2.3


From a40c4f10e3fb96030358e49abd010c1f08446fa3 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Mon, 21 Mar 2011 15:07:16 -0700
Subject: libceph: add lingering request and watch/notify event framework

Lingering requests are requests that are sent to the OSD normally but
tracked also after we get a successful request.  This keeps the OSD
connection open and resends the original request if the object moves to
another OSD.  The OSD can then send notification messages back to us
if another client initiates a notify.

This framework will be used by RBD so that the client gets notification
when a snapshot is created by another node or tool.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/ceph_common.c |   1 +
 net/ceph/osd_client.c  | 385 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 374 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f3e4a13fea0c..95f96ab94bba 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -62,6 +62,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_OSD_MAP: return "osd_map";
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
 	default: return "unknown";
 	}
 }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b85ed5a5503d..02212ed50852 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -25,6 +25,12 @@ static const struct ceph_connection_operations osd_con_ops;
 
 static void send_queued(struct ceph_osd_client *osdc);
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void __register_request(struct ceph_osd_client *osdc,
+			       struct ceph_osd_request *req);
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+					struct ceph_osd_request *req);
+static int __send_request(struct ceph_osd_client *osdc,
+			  struct ceph_osd_request *req);
 
 static int op_needs_trail(int op)
 {
@@ -33,6 +39,7 @@ static int op_needs_trail(int op)
 	case CEPH_OSD_OP_SETXATTR:
 	case CEPH_OSD_OP_CMPXATTR:
 	case CEPH_OSD_OP_CALL:
+	case CEPH_OSD_OP_NOTIFY:
 		return 1;
 	default:
 		return 0;
@@ -208,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	init_completion(&req->r_completion);
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
+	INIT_LIST_HEAD(&req->r_linger_item);
+	INIT_LIST_HEAD(&req->r_linger_osd);
 	req->r_flags = flags;
 
 	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -314,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 		break;
 	case CEPH_OSD_OP_STARTSYNC:
 		break;
+	case CEPH_OSD_OP_NOTIFY:
+		{
+			__le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
+			__le32 timeout = cpu_to_le32(src->watch.timeout);
+
+			BUG_ON(!req->r_trail);
+
+			ceph_pagelist_append(req->r_trail,
+						&prot_ver, sizeof(prot_ver));
+			ceph_pagelist_append(req->r_trail,
+						&timeout, sizeof(timeout));
+		}
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+		dst->watch.ver = cpu_to_le64(src->watch.ver);
+		dst->watch.flag = src->watch.flag;
+		break;
 	default:
 		pr_err("unrecognized osd opcode %d\n", dst->op);
 		WARN_ON(1);
@@ -534,7 +561,7 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
 static void __kick_osd_requests(struct ceph_osd_client *osdc,
 				struct ceph_osd *osd)
 {
-	struct ceph_osd_request *req;
+	struct ceph_osd_request *req, *nreq;
 	int err;
 
 	dout("__kick_osd_requests osd%d\n", osd->o_osd);
@@ -546,7 +573,17 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 		list_move(&req->r_req_lru_item, &osdc->req_unsent);
 		dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
-		req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		if (!req->r_linger)
+			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+	}
+
+	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
+				 r_linger_osd) {
+		__unregister_linger_request(osdc, req);
+		__register_request(osdc, req);
+		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
+		     osd->o_osd);
 	}
 }
 
@@ -590,6 +627,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
 	atomic_set(&osd->o_ref, 1);
 	osd->o_osdc = osdc;
 	INIT_LIST_HEAD(&osd->o_requests);
+	INIT_LIST_HEAD(&osd->o_linger_requests);
 	INIT_LIST_HEAD(&osd->o_osd_lru);
 	osd->o_incarnation = 1;
 
@@ -679,7 +717,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 	int ret = 0;
 
 	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
-	if (list_empty(&osd->o_requests)) {
+	if (list_empty(&osd->o_requests) &&
+	    list_empty(&osd->o_linger_requests)) {
 		__remove_osd(osdc, osd);
 	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
 			  &osd->o_con.peer_addr,
@@ -752,10 +791,9 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
  * Register request, assign tid.  If this is the first request, set up
  * the timeout event.
  */
-static void register_request(struct ceph_osd_client *osdc,
-			     struct ceph_osd_request *req)
+static void __register_request(struct ceph_osd_client *osdc,
+			       struct ceph_osd_request *req)
 {
-	mutex_lock(&osdc->request_mutex);
 	req->r_tid = ++osdc->last_tid;
 	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
 	INIT_LIST_HEAD(&req->r_req_lru_item);
@@ -769,6 +807,13 @@ static void register_request(struct ceph_osd_client *osdc,
 		dout(" first request, scheduling timeout\n");
 		__schedule_osd_timeout(osdc);
 	}
+}
+
+static void register_request(struct ceph_osd_client *osdc,
+			     struct ceph_osd_request *req)
+{
+	mutex_lock(&osdc->request_mutex);
+	__register_request(osdc, req);
 	mutex_unlock(&osdc->request_mutex);
 }
 
@@ -787,9 +832,14 @@ static void __unregister_request(struct ceph_osd_client *osdc,
 		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
 
 		list_del_init(&req->r_osd_item);
-		if (list_empty(&req->r_osd->o_requests))
+		if (list_empty(&req->r_osd->o_requests) &&
+		    list_empty(&req->r_osd->o_linger_requests)) {
+			dout("moving osd to %p lru\n", req->r_osd);
 			__move_osd_to_lru(osdc, req->r_osd);
-		req->r_osd = NULL;
+		}
+		if (list_empty(&req->r_osd_item) &&
+		    list_empty(&req->r_linger_item))
+			req->r_osd = NULL;
 	}
 
 	ceph_osdc_put_request(req);
@@ -812,6 +862,58 @@ static void __cancel_request(struct ceph_osd_request *req)
 	}
 }
 
+static void __register_linger_request(struct ceph_osd_client *osdc,
+				    struct ceph_osd_request *req)
+{
+	dout("__register_linger_request %p\n", req);
+	list_add_tail(&req->r_linger_item, &osdc->req_linger);
+	list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+}
+
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+					struct ceph_osd_request *req)
+{
+	dout("__unregister_linger_request %p\n", req);
+	if (req->r_osd) {
+		list_del_init(&req->r_linger_item);
+		list_del_init(&req->r_linger_osd);
+
+		if (list_empty(&req->r_osd->o_requests) &&
+		    list_empty(&req->r_osd->o_linger_requests)) {
+			dout("moving osd to %p lru\n", req->r_osd);
+			__move_osd_to_lru(osdc, req->r_osd);
+		}
+		req->r_osd = NULL;
+	}
+}
+
+void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+					 struct ceph_osd_request *req)
+{
+	mutex_lock(&osdc->request_mutex);
+	if (req->r_linger) {
+		__unregister_linger_request(osdc, req);
+		ceph_osdc_put_request(req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
+
+void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+				  struct ceph_osd_request *req)
+{
+	if (!req->r_linger) {
+		dout("set_request_linger %p\n", req);
+		req->r_linger = 1;
+		/*
+		 * caller is now responsible for calling
+		 * unregister_linger_request
+		 */
+		ceph_osdc_get_request(req);
+	}
+}
+EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+
 /*
  * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
  * (as needed), and set the request r_osd appropriately.  If there is
@@ -958,7 +1060,6 @@ static void handle_timeout(struct work_struct *work)
 		osdc->client->options->osd_keepalive_timeout * HZ;
 	unsigned long last_stamp = 0;
 	struct list_head slow_osds;
-
 	dout("timeout\n");
 	down_read(&osdc->map_sem);
 
@@ -1060,7 +1161,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	    numops * sizeof(struct ceph_osd_op))
 		goto bad;
 	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
-
 	/* lookup */
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
@@ -1104,6 +1204,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 
 	dout("handle_reply tid %llu flags %d\n", tid, flags);
 
+	if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
+		__register_linger_request(osdc, req);
+
 	/* either this is a read, or we got the safe response */
 	if (result < 0 ||
 	    (flags & CEPH_OSD_FLAG_ONDISK) ||
@@ -1124,6 +1227,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	}
 
 done:
+	dout("req=%p req->r_linger=%d\n", req, req->r_linger);
 	ceph_osdc_put_request(req);
 	return;
 
@@ -1159,7 +1263,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
  */
 static void kick_requests(struct ceph_osd_client *osdc)
 {
-	struct ceph_osd_request *req;
+	struct ceph_osd_request *req, *nreq;
 	struct rb_node *p;
 	int needmap = 0;
 	int err;
@@ -1177,8 +1281,30 @@ static void kick_requests(struct ceph_osd_client *osdc)
 		} else if (err > 0) {
 			dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
 			     req->r_osd ? req->r_osd->o_osd : -1);
-			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+			if (!req->r_linger)
+				req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		}
+	}
+
+	list_for_each_entry_safe(req, nreq, &osdc->req_linger,
+				 r_linger_item) {
+		dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
+
+		err = __map_request(osdc, req);
+		if (err == 0)
+			continue;  /* no change and no osd was specified */
+		if (err < 0)
+			continue;  /* hrm! */
+		if (req->r_osd == NULL) {
+			dout("tid %llu maps to no valid osd\n", req->r_tid);
+			needmap++;  /* request a newer map */
+			continue;
 		}
+
+		dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
+		     req->r_osd ? req->r_osd->o_osd : -1);
+		__unregister_linger_request(osdc, req);
+		__register_request(osdc, req);
 	}
 	mutex_unlock(&osdc->request_mutex);
 
@@ -1301,6 +1427,223 @@ bad:
 	return;
 }
 
+/*
+ * watch/notify callback event infrastructure
+ *
+ * These callbacks are used both for watch and notify operations.
+ */
+static void __release_event(struct kref *kref)
+{
+	struct ceph_osd_event *event =
+		container_of(kref, struct ceph_osd_event, kref);
+
+	dout("__release_event %p\n", event);
+	kfree(event);
+}
+
+static void get_event(struct ceph_osd_event *event)
+{
+	kref_get(&event->kref);
+}
+
+void ceph_osdc_put_event(struct ceph_osd_event *event)
+{
+	kref_put(&event->kref, __release_event);
+}
+EXPORT_SYMBOL(ceph_osdc_put_event);
+
+static void __insert_event(struct ceph_osd_client *osdc,
+			     struct ceph_osd_event *new)
+{
+	struct rb_node **p = &osdc->event_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_event *event = NULL;
+
+	while (*p) {
+		parent = *p;
+		event = rb_entry(parent, struct ceph_osd_event, node);
+		if (new->cookie < event->cookie)
+			p = &(*p)->rb_left;
+		else if (new->cookie > event->cookie)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &osdc->event_tree);
+}
+
+static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
+					        u64 cookie)
+{
+	struct rb_node **p = &osdc->event_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_event *event = NULL;
+
+	while (*p) {
+		parent = *p;
+		event = rb_entry(parent, struct ceph_osd_event, node);
+		if (cookie < event->cookie)
+			p = &(*p)->rb_left;
+		else if (cookie > event->cookie)
+			p = &(*p)->rb_right;
+		else
+			return event;
+	}
+	return NULL;
+}
+
+static void __remove_event(struct ceph_osd_event *event)
+{
+	struct ceph_osd_client *osdc = event->osdc;
+
+	if (!RB_EMPTY_NODE(&event->node)) {
+		dout("__remove_event removed %p\n", event);
+		rb_erase(&event->node, &osdc->event_tree);
+		ceph_osdc_put_event(event);
+	} else {
+		dout("__remove_event didn't remove %p\n", event);
+	}
+}
+
+int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+			   void (*event_cb)(u64, u64, u8, void *),
+			   int one_shot, void *data,
+			   struct ceph_osd_event **pevent)
+{
+	struct ceph_osd_event *event;
+
+	event = kmalloc(sizeof(*event), GFP_NOIO);
+	if (!event)
+		return -ENOMEM;
+
+	dout("create_event %p\n", event);
+	event->cb = event_cb;
+	event->one_shot = one_shot;
+	event->data = data;
+	event->osdc = osdc;
+	INIT_LIST_HEAD(&event->osd_node);
+	kref_init(&event->kref);   /* one ref for us */
+	kref_get(&event->kref);    /* one ref for the caller */
+	init_completion(&event->completion);
+
+	spin_lock(&osdc->event_lock);
+	event->cookie = ++osdc->event_count;
+	__insert_event(osdc, event);
+	spin_unlock(&osdc->event_lock);
+
+	*pevent = event;
+	return 0;
+}
+EXPORT_SYMBOL(ceph_osdc_create_event);
+
+void ceph_osdc_cancel_event(struct ceph_osd_event *event)
+{
+	struct ceph_osd_client *osdc = event->osdc;
+
+	dout("cancel_event %p\n", event);
+	spin_lock(&osdc->event_lock);
+	__remove_event(event);
+	spin_unlock(&osdc->event_lock);
+	ceph_osdc_put_event(event); /* caller's */
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_event);
+
+
+static void do_event_work(struct work_struct *work)
+{
+	struct ceph_osd_event_work *event_work =
+		container_of(work, struct ceph_osd_event_work, work);
+	struct ceph_osd_event *event = event_work->event;
+	u64 ver = event_work->ver;
+	u64 notify_id = event_work->notify_id;
+	u8 opcode = event_work->opcode;
+
+	dout("do_event_work completing %p\n", event);
+	event->cb(ver, notify_id, opcode, event->data);
+	complete(&event->completion);
+	dout("do_event_work completed %p\n", event);
+	ceph_osdc_put_event(event);
+	kfree(event_work);
+}
+
+
+/*
+ * Process osd watch notifications
+ */
+void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+{
+	void *p, *end;
+	u8 proto_ver;
+	u64 cookie, ver, notify_id;
+	u8 opcode;
+	struct ceph_osd_event *event;
+	struct ceph_osd_event_work *event_work;
+
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	ceph_decode_8_safe(&p, end, proto_ver, bad);
+	ceph_decode_8_safe(&p, end, opcode, bad);
+	ceph_decode_64_safe(&p, end, cookie, bad);
+	ceph_decode_64_safe(&p, end, ver, bad);
+	ceph_decode_64_safe(&p, end, notify_id, bad);
+
+	spin_lock(&osdc->event_lock);
+	event = __find_event(osdc, cookie);
+	if (event) {
+		get_event(event);
+		if (event->one_shot)
+			__remove_event(event);
+	}
+	spin_unlock(&osdc->event_lock);
+	dout("handle_watch_notify cookie %lld ver %lld event %p\n",
+	     cookie, ver, event);
+	if (event) {
+		event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
+		INIT_WORK(&event_work->work, do_event_work);
+		if (!event_work) {
+			dout("ERROR: could not allocate event_work\n");
+			goto done_err;
+		}
+		event_work->event = event;
+		event_work->ver = ver;
+		event_work->notify_id = notify_id;
+		event_work->opcode = opcode;
+		if (!queue_work(osdc->notify_wq, &event_work->work)) {
+			dout("WARNING: failed to queue notify event work\n");
+			goto done_err;
+		}
+	}
+
+	return;
+
+done_err:
+	complete(&event->completion);
+	ceph_osdc_put_event(event);
+	return;
+
+bad:
+	pr_err("osdc handle_watch_notify corrupt msg\n");
+	return;
+}
+
+int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
+{
+	int err;
+
+	dout("wait_event %p\n", event);
+	err = wait_for_completion_interruptible_timeout(&event->completion,
+							timeout * HZ);
+	ceph_osdc_put_event(event);
+	if (err > 0)
+		err = 0;
+	dout("wait_event %p returns %d\n", event, err);
+	return err;
+}
+EXPORT_SYMBOL(ceph_osdc_wait_event);
+
 /*
  * Register request, send initial attempt.
  */
@@ -1430,9 +1773,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	INIT_LIST_HEAD(&osdc->req_lru);
 	INIT_LIST_HEAD(&osdc->req_unsent);
 	INIT_LIST_HEAD(&osdc->req_notarget);
+	INIT_LIST_HEAD(&osdc->req_linger);
 	osdc->num_requests = 0;
 	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+	spin_lock_init(&osdc->event_lock);
+	osdc->event_tree = RB_ROOT;
+	osdc->event_count = 0;
 
 	schedule_delayed_work(&osdc->osds_timeout_work,
 	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
@@ -1452,6 +1799,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 				"osd_op_reply");
 	if (err < 0)
 		goto out_msgpool;
+
+	osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+	if (IS_ERR(osdc->notify_wq)) {
+		err = PTR_ERR(osdc->notify_wq);
+		osdc->notify_wq = NULL;
+		goto out_msgpool;
+	}
 	return 0;
 
 out_msgpool:
@@ -1465,6 +1819,8 @@ EXPORT_SYMBOL(ceph_osdc_init);
 
 void ceph_osdc_stop(struct ceph_osd_client *osdc)
 {
+	flush_workqueue(osdc->notify_wq);
+	destroy_workqueue(osdc->notify_wq);
 	cancel_delayed_work_sync(&osdc->timeout_work);
 	cancel_delayed_work_sync(&osdc->osds_timeout_work);
 	if (osdc->osdmap) {
@@ -1472,6 +1828,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 		osdc->osdmap = NULL;
 	}
 	remove_old_osds(osdc, 1);
+	WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
 	mempool_destroy(osdc->req_mempool);
 	ceph_msgpool_destroy(&osdc->msgpool_op);
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
@@ -1580,6 +1937,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_OSD_OPREPLY:
 		handle_reply(osdc, msg, con);
 		break;
+	case CEPH_MSG_WATCH_NOTIFY:
+		handle_watch_notify(osdc, msg);
+		break;
 
 	default:
 		pr_err("received unknown message type %d %s\n", type,
@@ -1673,6 +2033,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
 
 	switch (type) {
 	case CEPH_MSG_OSD_MAP:
+	case CEPH_MSG_WATCH_NOTIFY:
 		return ceph_msg_new(type, front, GFP_NOFS);
 	case CEPH_MSG_OSD_OPREPLY:
 		return get_reply(con, hdr, skip);
-- 
cgit v1.2.3


From eeff66ef6e33925f615d49e6c846263e342ab60e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:47 +0530
Subject: net/9p: Convert the in the 9p rpc call path to GFP_NOFS

Without this we can cause reclaim allocation in writepage.

[ 3433.448430] =================================
[ 3433.449117] [ INFO: inconsistent lock state ]
[ 3433.449117] 2.6.38-rc5+ #84
[ 3433.449117] ---------------------------------
[ 3433.449117] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage.
[ 3433.449117] kswapd0/505 [HC0[0]:SC0[0]:HE1:SE1] takes:
[ 3433.449117]  (iprune_sem){+++++-}, at: [<ffffffff810ebbab>] shrink_icache_memory+0x45/0x2b1
[ 3433.449117] {RECLAIM_FS-ON-W} state was registered at:
[ 3433.449117]   [<ffffffff8107fe5f>] mark_held_locks+0x52/0x70
[ 3433.449117]   [<ffffffff8107ff02>] lockdep_trace_alloc+0x85/0x9f
[ 3433.449117]   [<ffffffff810d353d>] slab_pre_alloc_hook+0x18/0x3c
[ 3433.449117]   [<ffffffff810d3fd5>] kmem_cache_alloc+0x23/0xa2
[ 3433.449117]   [<ffffffff8127be77>] idr_pre_get+0x2d/0x6f
[ 3433.449117]   [<ffffffff815434eb>] p9_idpool_get+0x30/0xae
[ 3433.449117]   [<ffffffff81540123>] p9_client_rpc+0xd7/0x9b0
[ 3433.449117]   [<ffffffff815427b0>] p9_client_clunk+0x88/0xdb
[ 3433.449117]   [<ffffffff811d56e5>] v9fs_evict_inode+0x3c/0x48
[ 3433.449117]   [<ffffffff810eb511>] evict+0x1f/0x87
[ 3433.449117]   [<ffffffff810eb5c0>] dispose_list+0x47/0xe3
[ 3433.449117]   [<ffffffff810eb8da>] evict_inodes+0x138/0x14f
[ 3433.449117]   [<ffffffff810d90e2>] generic_shutdown_super+0x57/0xe8
[ 3433.449117]   [<ffffffff810d91e8>] kill_anon_super+0x11/0x50
[ 3433.449117]   [<ffffffff811d4951>] v9fs_kill_super+0x49/0xab
[ 3433.449117]   [<ffffffff810d926e>] deactivate_locked_super+0x21/0x46
[ 3433.449117]   [<ffffffff810d9e84>] deactivate_super+0x40/0x44
[ 3433.449117]   [<ffffffff810ef848>] mntput_no_expire+0x100/0x109
[ 3433.449117]   [<ffffffff810f0aeb>] sys_umount+0x2f1/0x31c
[ 3433.449117]   [<ffffffff8102c87b>] system_call_fastpath+0x16/0x1b
[ 3433.449117] irq event stamp: 192941
[ 3433.449117] hardirqs last  enabled at (192941): [<ffffffff81568dcf>] _raw_spin_unlock_irq+0x2b/0x30
[ 3433.449117] hardirqs last disabled at (192940): [<ffffffff810b5f97>] shrink_inactive_list+0x290/0x2f5
[ 3433.449117] softirqs last  enabled at (188470): [<ffffffff8105fd65>] __do_softirq+0x133/0x152
[ 3433.449117] softirqs last disabled at (188455): [<ffffffff8102d7cc>] call_softirq+0x1c/0x28
[ 3433.449117]
[ 3433.449117] other info that might help us debug this:
[ 3433.449117] 1 lock held by kswapd0/505:
[ 3433.449117]  #0:  (shrinker_rwsem){++++..}, at: [<ffffffff810b52e2>] shrink_slab+0x38/0x15f
[ 3433.449117]
[ 3433.449117] stack backtrace:
[ 3433.449117] Pid: 505, comm: kswapd0 Not tainted 2.6.38-rc5+ #84
[ 3433.449117] Call Trace:
[ 3433.449117]  [<ffffffff8107fbce>] ? valid_state+0x17e/0x191
[ 3433.449117]  [<ffffffff81036896>] ? save_stack_trace+0x28/0x45
[ 3433.449117]  [<ffffffff81080426>] ? check_usage_forwards+0x0/0x87
[ 3433.449117]  [<ffffffff8107fcf4>] ? mark_lock+0x113/0x22c
[ 3433.449117]  [<ffffffff8108105f>] ? __lock_acquire+0x37a/0xcf7
[ 3433.449117]  [<ffffffff8107fc0e>] ? mark_lock+0x2d/0x22c
[ 3433.449117]  [<ffffffff81081077>] ? __lock_acquire+0x392/0xcf7
[ 3433.449117]  [<ffffffff810b14d2>] ? determine_dirtyable_memory+0x15/0x28
[ 3433.449117]  [<ffffffff81081a33>] ? lock_acquire+0x57/0x6d
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff81567d85>] ? down_read+0x47/0x5c
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff810b5385>] ? shrink_slab+0xdb/0x15f
[ 3433.449117]  [<ffffffff810b69bc>] ? kswapd+0x574/0x96a
[ 3433.449117]  [<ffffffff810b6448>] ? kswapd+0x0/0x96a
[ 3433.449117]  [<ffffffff810714e2>] ? kthread+0x7d/0x85
[ 3433.449117]  [<ffffffff8102d6d4>] ? kernel_thread_helper+0x4/0x10
[ 3433.449117]  [<ffffffff81569200>] ? restore_args+0x0/0x30
[ 3433.449117]  [<ffffffff81071465>] ? kthread+0x0/0x85
[ 3433.449117]  [<ffffffff8102d6d0>] ? kernel_thread_helper+0x0/0x10

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c     | 10 +++++-----
 net/9p/protocol.c   |  6 +++---
 net/9p/trans_fd.c   |  2 +-
 net/9p/trans_rdma.c |  6 +++---
 net/9p/util.c       |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 347ec0cd2718..2ccbf04d37df 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -223,7 +223,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 
 	req = &c->reqs[row][col];
 	if (!req->tc) {
-		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
 		if (!req->wq) {
 			printk(KERN_ERR "Couldn't grow tag array\n");
 			return ERR_PTR(-ENOMEM);
@@ -233,17 +233,17 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 				P9_TRANS_PREF_PAYLOAD_SEP) {
 			int alloc_msize = min(c->msize, 4096);
 			req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->tc->capacity = alloc_msize;
 			req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->rc->capacity = alloc_msize;
 		} else {
 			req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->tc->capacity = c->msize;
 			req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->rc->capacity = c->msize;
 		}
 		if ((!req->tc) || (!req->rc)) {
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 2ce515b859b3..8a4084fa8b5a 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -205,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				if (errcode)
 					break;
 
-				*sptr = kmalloc(len + 1, GFP_KERNEL);
+				*sptr = kmalloc(len + 1, GFP_NOFS);
 				if (*sptr == NULL) {
 					errcode = -EFAULT;
 					break;
@@ -273,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				if (!errcode) {
 					*wnames =
 					    kmalloc(sizeof(char *) * *nwname,
-						    GFP_KERNEL);
+						    GFP_NOFS);
 					if (!*wnames)
 						errcode = -ENOMEM;
 				}
@@ -317,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					*wqids =
 					    kmalloc(*nwqid *
 						    sizeof(struct p9_qid),
-						    GFP_KERNEL);
+						    GFP_NOFS);
 					if (*wqids == NULL)
 						errcode = -ENOMEM;
 				}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a30471e51740..aa5672b15eae 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -350,7 +350,7 @@ static void p9_read_work(struct work_struct *work)
 
 		if (m->req->rc == NULL) {
 			m->req->rc = kmalloc(sizeof(struct p9_fcall) +
-						m->client->msize, GFP_KERNEL);
+						m->client->msize, GFP_NOFS);
 			if (!m->req->rc) {
 				m->req = NULL;
 				err = -ENOMEM;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 29a54ccd213d..150e0c4bbf40 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -424,7 +424,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	struct p9_rdma_context *rpl_context = NULL;
 
 	/* Allocate an fcall for the reply */
-	rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
+	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
 	if (!rpl_context) {
 		err = -ENOMEM;
 		goto err_close;
@@ -437,7 +437,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 */
 	if (!req->rc) {
 		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
-								GFP_KERNEL);
+				  GFP_NOFS);
 		if (req->rc) {
 			req->rc->sdata = (char *) req->rc +
 						sizeof(struct p9_fcall);
@@ -468,7 +468,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	req->rc = NULL;
 
 	/* Post the request */
-	c = kmalloc(sizeof *c, GFP_KERNEL);
+	c = kmalloc(sizeof *c, GFP_NOFS);
 	if (!c) {
 		err = -ENOMEM;
 		goto err_free1;
diff --git a/net/9p/util.c b/net/9p/util.c
index e048701a72d2..b84619b5ba22 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p)
 	unsigned long flags;
 
 retry:
-	if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+	if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
 		return 0;
 
 	spin_lock_irqsave(&p->lock, flags);
-- 
cgit v1.2.3


From 472e7f9f8b547605ee9670ac803e971c2e3eeac0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:47 +0530
Subject: net/9p: Fix compile warning

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_common.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index d62b9aa58df8..9172ab78fcb0 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -41,9 +41,9 @@ EXPORT_SYMBOL(p9_release_req_pages);
 int
 p9_nr_pages(struct p9_req_t *req)
 {
-	int start_page, end_page;
-	start_page =  (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
-	end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+	unsigned long start_page, end_page;
+	start_page =  (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
+	end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
 			PAGE_SIZE - 1) >> PAGE_SHIFT;
 	return end_page - start_page;
 }
@@ -69,8 +69,8 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 	*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
 
 	if (*pdata_off)
-		first_page_bytes = min((PAGE_SIZE - *pdata_off),
-				req->tc->pbuf_size);
+		first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
+				       req->tc->pbuf_size);
 
 	rpinfo = req->tc->private;
 	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
-- 
cgit v1.2.3


From 53bda3e5b4e91763224ecb7d05dab94d281fd41d Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 15:34:20 -0800
Subject: [net/9p] unconditional wake_up to proc waiting for space on VirtIO
 ring

Process may wait to get space on VirtIO ring to send a transaction to
VirtFS server. Current code just does a conditional wake_up() which
means only one process will be woken up even if multiple processes
are waiting.

This fix makes the wake_up unconditional. Hence we won't have any
processes waiting for-ever.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9b550ed9c711..961e025957ae 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -146,11 +146,10 @@ static void req_done(struct virtqueue *vq)
 		rc = virtqueue_get_buf(chan->vq, &len);
 
 		if (rc != NULL) {
-			if (!chan->ring_bufs_avail) {
-				chan->ring_bufs_avail = 1;
-				wake_up(chan->vc_wq);
-			}
+			chan->ring_bufs_avail = 1;
 			spin_unlock_irqrestore(&chan->lock, flags);
+			/* Wakeup if anyone waiting for VirtIO ring space. */
+			wake_up(chan->vc_wq);
 			P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 			P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
 					rc->tag);
-- 
cgit v1.2.3


From a01a984035ea799b14aa5e874dcaeb122f09c4b4 Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2011 14:12:49 -0700
Subject: [net/9p] Set the condition just before waking up.

Given that the sprious wake-ups are common, we need to move the
condition setting right next to the wake_up().  After setting the condition
to req->status = REQ_STATUS_RCVD, sprious wakeups may cause the
virtqueue back on the free list for someone else to use.
This may result in kernel panic while relasing the pinned pages
in p9_release_req_pages().

Also rearranged the while loop in req_done() for better redability.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 961e025957ae..cb98af9367ab 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -141,33 +141,33 @@ static void req_done(struct virtqueue *vq)
 
 	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
 
-	do {
+	while (1) {
 		spin_lock_irqsave(&chan->lock, flags);
 		rc = virtqueue_get_buf(chan->vq, &len);
 
-		if (rc != NULL) {
-			chan->ring_bufs_avail = 1;
-			spin_unlock_irqrestore(&chan->lock, flags);
-			/* Wakeup if anyone waiting for VirtIO ring space. */
-			wake_up(chan->vc_wq);
-			P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
-			P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
-					rc->tag);
-			req = p9_tag_lookup(chan->client, rc->tag);
-			req->status = REQ_STATUS_RCVD;
-			if (req->tc->private) {
-				struct trans_rpage_info *rp = req->tc->private;
-				/*Release pages */
-				p9_release_req_pages(rp);
-				if (rp->rp_alloc)
-					kfree(rp);
-				req->tc->private = NULL;
-			}
-			p9_client_cb(chan->client, req);
-		} else {
+		if (rc == NULL) {
 			spin_unlock_irqrestore(&chan->lock, flags);
+			break;
 		}
-	} while (rc != NULL);
+
+		chan->ring_bufs_avail = 1;
+		spin_unlock_irqrestore(&chan->lock, flags);
+		/* Wakeup if anyone waiting for VirtIO ring space. */
+		wake_up(chan->vc_wq);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
+		req = p9_tag_lookup(chan->client, rc->tag);
+		if (req->tc->private) {
+			struct trans_rpage_info *rp = req->tc->private;
+			/*Release pages */
+			p9_release_req_pages(rp);
+			if (rp->rp_alloc)
+				kfree(rp);
+			req->tc->private = NULL;
+		}
+		req->status = REQ_STATUS_RCVD;
+		p9_client_cb(chan->client, req);
+	}
 }
 
 /**
-- 
cgit v1.2.3


From 316ad5501c2098cb2a2a25ed77a0421f1671411c Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2011 14:22:41 -0700
Subject: [net/9p] Don't re-pin pages on retrying virtqueue_add_buf().

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb98af9367ab..c6e1ae2fb926 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -262,7 +262,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
-req_retry:
 	req->status = REQ_STATUS_SENT;
 
 	if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
@@ -295,6 +294,7 @@ req_retry:
 		}
 	}
 
+req_retry_pinned:
 	spin_lock_irqsave(&chan->lock, flags);
 
 	/* Handle out VirtIO ring buffers */
@@ -355,7 +355,7 @@ req_retry:
 				return err;
 
 			P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
-			goto req_retry;
+			goto req_retry_pinned;
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
 			P9_DPRINTK(P9_DEBUG_TRANS,
-- 
cgit v1.2.3


From 68da9ba4eeadae86ad42e52b80822fbd56971267 Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Fri, 18 Mar 2011 15:49:48 -0700
Subject: [net/9p]: Introduce basic flow-control for VirtIO transport.

Recent zerocopy work in the 9P VirtIO transport maps and pins
user buffers into kernel memory for the server to work on them.
Since the user process can initiate this kind of pinning with a simple
read/write call, thousands of IO threads initiated by the user process can
hog the system resources and could result into denial of service.

This patch introduces flow control to avoid that extreme scenario.

The ceiling limit to avoid denial of service attacks is set to relatively
high (nr_free_pagecache_pages()/4) so that it won't interfere with
regular usage, but can step in extreme cases to limit the total system
hang. Since we don't have a global structure to accommodate this variable,
I choose the virtio_chan as the home for this.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Reviewed-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/trans_virtio.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c6e1ae2fb926..e8f046b07182 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
+#include <linux/swap.h>
 #include <linux/virtio.h>
 #include <linux/virtio_9p.h>
 #include "trans_common.h"
@@ -51,6 +52,8 @@
 
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
+static atomic_t vp_pinned = ATOMIC_INIT(0);
 
 /**
  * struct virtio_chan - per-instance transport information
@@ -78,7 +81,10 @@ struct virtio_chan {
 	struct virtqueue *vq;
 	int ring_bufs_avail;
 	wait_queue_head_t *vc_wq;
-
+	/* This is global limit. Since we don't have a global structure,
+	 * will be placing it in each channel.
+	 */
+	int p9_max_pages;
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
@@ -159,8 +165,11 @@ static void req_done(struct virtqueue *vq)
 		req = p9_tag_lookup(chan->client, rc->tag);
 		if (req->tc->private) {
 			struct trans_rpage_info *rp = req->tc->private;
+			int p = rp->rp_nr_pages;
 			/*Release pages */
 			p9_release_req_pages(rp);
+			atomic_sub(p, &vp_pinned);
+			wake_up(&vp_wq);
 			if (rp->rp_alloc)
 				kfree(rp);
 			req->tc->private = NULL;
@@ -269,6 +278,14 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 		int rpinfo_size = sizeof(struct trans_rpage_info) +
 			sizeof(struct page *) * nr_pages;
 
+		if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+			err = wait_event_interruptible(vp_wq,
+				atomic_read(&vp_pinned) < chan->p9_max_pages);
+			if (err  == -ERESTARTSYS)
+				return err;
+			P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
+		}
+
 		if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
 			/* We can use sdata */
 			req->tc->private = req->tc->sdata + req->tc->size;
@@ -291,6 +308,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 			if (rpinfo->rp_alloc)
 				kfree(rpinfo);
 			return err;
+		} else {
+			atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
 		}
 	}
 
@@ -452,6 +471,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	}
 	init_waitqueue_head(chan->vc_wq);
 	chan->ring_bufs_avail = 1;
+	/* Ceiling limit to avoid denial of service attacks */
+	chan->p9_max_pages = nr_free_buffer_pages()/4;
 
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
-- 
cgit v1.2.3


From 246408dcd5dfeef2df437ccb0ef4d6ee87805f58 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Mar 2011 18:40:10 -0400
Subject: SUNRPC: Never reuse the socket port after an xs_close()

If we call xs_close(), we're in one of two situations:
 - Autoclose, which means we don't expect to resend a request
 - bind+connect failed, which probably means the port is in use

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 net/sunrpc/xprtsock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index be96d429b475..1e336a06d3e6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -710,6 +710,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	if (sk == NULL)
 		return;
 
+	transport->srcport = 0;
+
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;
-- 
cgit v1.2.3


From 94dcf29a11b3d20a28790598d701f98484a969da Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Mar 2011 16:30:45 -0700
Subject: kthread: use kthread_create_on_node()

ksoftirqd, kworker, migration, and pktgend kthreads can be created with
kthread_create_on_node(), to get proper NUMA affinities for their stack and
task_struct.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/core/pktgen.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0c55eaa70e39..aeeece72b72f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3761,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu)
 	list_add_tail(&t->th_list, &pktgen_threads);
 	init_completion(&t->start_done);
 
-	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+	p = kthread_create_on_node(pktgen_thread_worker,
+				   t,
+				   cpu_to_node(cpu),
+				   "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
 		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);
-- 
cgit v1.2.3


From 9c7a4f9ce651383c73dfdff3d7e21d5f9572c4ec Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 22 Mar 2011 19:17:36 -0700
Subject: ipv6: ip6_route_output does not modify sk parameter, so make it const

This avoids explicit cast to avoid 'discards qualifiers'
compiler warning in a netfilter patch that i've been working on.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6814c8722fa7..843406f14d7b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -854,7 +854,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 				    struct flowi6 *fl6)
 {
 	int flags = 0;
-- 
cgit v1.2.3


From a7bff75b087e7a355838a32efe61707cfa73c194 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Tue, 22 Mar 2011 11:40:32 +0000
Subject: bridge: Fix possibly wrong MLD queries' ethernet source address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ipv6_dev_get_saddr() is currently called with an uninitialized
destination address. Although in tests it usually seemed to nevertheless
always fetch the right source address, there seems to be a possible race
condition.

Therefore this commit changes this, first setting the destination
address and only after that fetching the source address.

Reported-by: Jan Beulich <JBeulich@novell.com>
Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 030a002ff8ee..f61eb2eff3fd 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -445,9 +445,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h->payload_len = htons(8 + sizeof(*mldq));
 	ip6h->nexthdr = IPPROTO_HOPOPTS;
 	ip6h->hop_limit = 1;
+	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
 	ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
 			   &ip6h->saddr);
-	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
-- 
cgit v1.2.3


From 67d4120a1793138bc9f4a6eb61d0fc5298ed97e0 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 14 Mar 2011 10:57:03 +0000
Subject: tcp: avoid cwnd moderation in undo

In the current undo logic, cwnd is moderated after it was restored
to the value prior entering fast-recovery. It was moderated first
in tcp_try_undo_recovery then again in tcp_complete_cwr.

Since the undo indicates recovery was false, these moderations
are not necessary. If the undo is triggered when most of the
outstanding data have been acknowledged, the (restored) cwnd is
falsely pulled down to a small value.

This patch removes these cwnd moderations if cwnd is undone
  a) during fast-recovery
	b) by receiving DSACKs past fast-recovery

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index da782e7ab16d..e72af7e31d1c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 #define DBGUNDO(x...) do { } while (0)
 #endif
 
-static void tcp_undo_cwr(struct sock *sk, const int undo)
+static void tcp_undo_cwr(struct sock *sk, const int undo_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
 		else
 			tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
 
-		if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
+		if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
 			tp->snd_ssthresh = tp->prior_ssthresh;
 			TCP_ECN_withdraw_cwr(tp);
 		}
 	} else {
 		tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
 	}
-	tcp_moderate_cwnd(tp);
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk)
 static inline void tcp_complete_cwr(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
+	/* Do not moderate cwnd if it's already undone in cwr or recovery */
+	if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
+		tp->snd_cwnd = tp->snd_ssthresh;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
+	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
-- 
cgit v1.2.3


From f6152737a95bd6c22f0c664b20831aefd48085a8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 22 Mar 2011 19:37:11 -0700
Subject: tcp: Make undo_ssthresh arg to tcp_undo_cwr() a bool.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e72af7e31d1c..bef9f04c22ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 #define DBGUNDO(x...) do { } while (0)
 #endif
 
-static void tcp_undo_cwr(struct sock *sk, const int undo_ssthresh)
+static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2698,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
 		 * or our original transmission succeeded.
 		 */
 		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			mib_idx = LINUX_MIB_TCPLOSSUNDO;
 		else
@@ -2725,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
 
 	if (tp->undo_marker && !tp->undo_retrans) {
 		DBGUNDO(sk, "D-SACK");
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 		tp->undo_marker = 0;
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
 	}
@@ -2778,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
 		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
 
 		DBGUNDO(sk, "Hoe");
-		tcp_undo_cwr(sk, 0);
+		tcp_undo_cwr(sk, false);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
 
 		/* So... Do not make Hoe's retransmit yet.
@@ -2807,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk)
 
 		DBGUNDO(sk, "partial loss");
 		tp->lost_out = 0;
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
 		inet_csk(sk)->icsk_retransmits = 0;
 		tp->undo_marker = 0;
@@ -3496,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 	if (flag & FLAG_ECE)
 		tcp_ratehalving_spur_to_response(sk);
 	else
-		tcp_undo_cwr(sk, 1);
+		tcp_undo_cwr(sk, true);
 }
 
 /* F-RTO spurious RTO detection algorithm (RFC4138)
-- 
cgit v1.2.3


From 406b6f974dae76a5b795d5c251d11c979a4e509b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 22 Mar 2011 21:56:23 -0700
Subject: ipv4: Fallback to FIB local table in __ip_dev_find().

In commit 9435eb1cf0b76b323019cebf8d16762a50a12a19
("ipv4: Implement __ip_dev_find using new interface address hash.")
we reimplemented __ip_dev_find() so that it doesn't have to
do a full FIB table lookup.

Instead, it consults a hash table of addresses configured to
interfaces.

This works identically to the old code in all except one case,
and that is for loopback subnets.

The old code would match the loopback device for any IP address
that falls within a subnet configured to the loopback device.

Handle this corner case by doing the FIB lookup.

We could implement this via inet_addr_onlink() but:

1) Someone could configure many addresses to loopback and
   inet_addr_onlink() is a simple list traversal.

2) We know the old code works.

Reported-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d5a4553bebc3..5345b0bee6df 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,6 +64,8 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 
+#include "fib_lookup.h"
+
 static struct ipv4_devconf ipv4_devconf = {
 	.data = {
 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -151,6 +153,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 			break;
 		}
 	}
+	if (!result) {
+		struct flowi4 fl4 = { .daddr = addr };
+		struct fib_result res = { 0 };
+		struct fib_table *local;
+
+		/* Fallback to FIB local table so that communication
+		 * over loopback subnets work.
+		 */
+		local = fib_get_table(net, RT_TABLE_LOCAL);
+		if (local &&
+		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
+		    res.type == RTN_LOCAL)
+			result = FIB_RES_DEV(res);
+	}
 	if (result && devref)
 		dev_hold(result);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From eb49a97363f020c1d7eef8bcd93865726b1fa11d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Mar 2011 12:18:15 -0700
Subject: ipv4: fix ip_rt_update_pmtu()

commit 2c8cec5c10bc (Cache learned PMTU information in inetpeer) added
an extra inet_putpeer() call in ip_rt_update_pmtu().

This results in various problems, since we can free one inetpeer, while
it is still in use.

Ref: http://www.spinics.net/lists/netdev/msg159121.html

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 870b5182ddd8..34921b0d3f8e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1593,8 +1593,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 			rt->rt_peer_genid = rt_peer_genid();
 		}
 		check_peer_pmtu(dst, peer);
-
-		inet_putpeer(peer);
 	}
 }
 
-- 
cgit v1.2.3


From 12ce22423abacca70bf1dfbcb8543b3e2b74aad4 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Mar 2011 16:41:45 -0700
Subject: rds: stop including asm-generic/bitops/le.h directly

asm-generic/bitops/le.h is only intended to be included directly from
asm-generic/bitops/ext2-non-atomic.h or asm-generic/bitops/minix-le.h
which implements generic ext2 or minix bit operations.

This stops including asm-generic/bitops/le.h directly and use ext2
non-atomic bit operations instead.

It seems odd to use ext2_*_bit() on rds, but it will replaced with
__{set,clear,test}_bit_le() after introducing little endian bit operations
for all architectures.  This indirect step is necessary to maintain
bisectability for some architectures which have their own little-endian
bit operations.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Andy Grover <andy.grover@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rds/cong.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/rds/cong.c b/net/rds/cong.c
index 75ea686f27d5..8cc322bfd092 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -33,8 +33,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
-
-#include <asm-generic/bitops/le.h>
+#include <linux/bitops.h>
 
 #include "rds.h"
 
@@ -285,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	generic___set_le_bit(off, (void *)map->m_page_addrs[i]);
+	ext2_set_bit(off, (void *)map->m_page_addrs[i]);
 }
 
 void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -299,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	generic___clear_le_bit(off, (void *)map->m_page_addrs[i]);
+	ext2_clear_bit(off, (void *)map->m_page_addrs[i]);
 }
 
 static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
@@ -310,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	return generic_test_le_bit(off, (void *)map->m_page_addrs[i]);
+	return ext2_test_bit(off, (void *)map->m_page_addrs[i]);
 }
 
 void rds_cong_add_socket(struct rds_sock *rs)
-- 
cgit v1.2.3


From e1dc1c81b9d1c823f2a529b9b9cf8bf5dacbce6a Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 23 Mar 2011 16:42:05 -0700
Subject: rds: use little-endian bitops

As a preparation for removing ext2 non-atomic bit operations from
asm/bitops.h.  This converts ext2 non-atomic bit operations to
little-endian bit operations.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Andy Grover <andy.grover@oracle.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/rds/cong.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/cong.c b/net/rds/cong.c
index 8cc322bfd092..6daaa49d133f 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -284,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	ext2_set_bit(off, (void *)map->m_page_addrs[i]);
+	__set_bit_le(off, (void *)map->m_page_addrs[i]);
 }
 
 void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
@@ -298,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	ext2_clear_bit(off, (void *)map->m_page_addrs[i]);
+	__clear_bit_le(off, (void *)map->m_page_addrs[i]);
 }
 
 static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
@@ -309,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
 	i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
 	off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
 
-	return ext2_test_bit(off, (void *)map->m_page_addrs[i]);
+	return test_bit_le(off, (void *)map->m_page_addrs[i]);
 }
 
 void rds_cong_add_socket(struct rds_sock *rs)
-- 
cgit v1.2.3


From 7ebb931598cd95cccea10d4bc4c0123a464ea565 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 24 Mar 2011 17:12:30 +0000
Subject: NFS: use secinfo when crossing mountpoints

A submount may use different security than the parent
mount does.  We should figure out what sec flavor the
submount uses at mount time.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_mech_switch.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 8b4061049d76..6c844b01a1d1 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name)
 
 EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
 
+struct gss_api_mech *
+gss_mech_get_by_OID(struct xdr_netobj *obj)
+{
+	struct gss_api_mech	*pos, *gm = NULL;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		if (obj->len == pos->gm_oid.len) {
+			if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
+				if (try_module_get(pos->gm_owner))
+					gm = pos;
+				break;
+			}
+		}
+	}
+	spin_unlock(&registered_mechs_lock);
+	return gm;
+
+}
+
+EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
+
 static inline int
 mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
 {
-- 
cgit v1.2.3


From 8f70e95f9f4159184f557a1db60c909d7c1bd2e3 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Thu, 24 Mar 2011 17:12:31 +0000
Subject: NFS: Determine initial mount security

When sec=<something> is not presented as a mount option,
we should attempt to determine what security flavor the
server is using.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/gss_mech_switch.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 6c844b01a1d1..e3c36a274412 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -215,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
 
 EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
 
+int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
+{
+	struct gss_api_mech *pos = NULL;
+	int i = 0;
+
+	spin_lock(&registered_mechs_lock);
+	list_for_each_entry(pos, &registered_mechs, gm_list) {
+		array_ptr[i] = pos->gm_pfs->pseudoflavor;
+		i++;
+	}
+	spin_unlock(&registered_mechs_lock);
+	return i;
+}
+
+EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
+
 u32
 gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
 {
-- 
cgit v1.2.3


From fcd13f42c9d6ab7b1024b9b7125a2e8db3cc00b2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Mar 2011 07:01:24 +0000
Subject: ipv4: fix fib metrics

Alessandro Suardi reported that we could not change route metrics :

ip ro change default .... advmss 1400

This regression came with commit 9c150e82ac50 (Allocate fib metrics
dynamically). fib_metrics is no longer an array, but a pointer to an
array.

Reported-by: Alessandro Suardi <alessandro.suardi@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Alessandro Suardi <alessandro.suardi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 622ac4c95026..75b9fb5d5d37 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -251,7 +251,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
 		    nfi->fib_priority == fi->fib_priority &&
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
-			   sizeof(fi->fib_metrics)) == 0 &&
+			   sizeof(u32) * RTAX_MAX) == 0 &&
 		    ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
 			return fi;
-- 
cgit v1.2.3


From 52cb1c0d208565d8f06b743cdc6596d744f92e3b Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 9 Mar 2011 14:44:05 +0530
Subject: Bluetooth: Increment unacked_frames count only the first transmit

This patch lets 'l2cap_pinfo.unacked_frames' be incremented only
the first time a frame is transmitted.

Previously it was being incremented for retransmitted packets
too resulting the value to cross the transmit window size.

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index c9f9cecca527..ca27f3a41536 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1116,7 +1116,9 @@ int l2cap_ertm_send(struct sock *sk)
 		bt_cb(skb)->tx_seq = pi->next_tx_seq;
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
 
-		pi->unacked_frames++;
+		if (bt_cb(skb)->retries == 1)
+			pi->unacked_frames++;
+
 		pi->frames_sent++;
 
 		if (skb_queue_is_last(TX_QUEUE(sk), skb))
-- 
cgit v1.2.3


From f630cf0d5434e3923e1b8226ffa2753ead6b0ce5 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 16 Mar 2011 15:36:29 -0300
Subject: Bluetooth: Fix HCI_RESET command synchronization

We can't send new commands before a cmd_complete for the HCI_RESET command
shows up.

Reported-by: Mikko Vinni <mmvinni@yahoo.com>
Reported-by: Justin P. Mattock <justinmattock@gmail.com>
Reported-by: Ed Tomlinson <edt@aei.ca>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Tested-by: Justin P. Mattock <justinmattock@gmail.com>
Tested-by: Mikko Vinni <mmvinni@yahoo.com>
Tested-by: Ed Tomlinson <edt@aei.ca>
---
 net/bluetooth/hci_core.c  | 6 +++++-
 net/bluetooth/hci_event.c | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b372fb8bcdcf..92b48e257b89 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -186,6 +186,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
 	BT_DBG("%s %ld", hdev->name, opt);
 
 	/* Reset device */
+	set_bit(HCI_RESET, &hdev->flags);
 	hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
 }
 
@@ -213,8 +214,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	/* Mandatory initialization */
 
 	/* Reset */
-	if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks))
+	if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) {
+			set_bit(HCI_RESET, &hdev->flags);
 			hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
+	}
 
 	/* Read Local Supported Features */
 	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
@@ -1074,6 +1077,7 @@ static void hci_cmd_timer(unsigned long arg)
 
 	BT_ERR("%s command tx timeout", hdev->name);
 	atomic_set(&hdev->cmd_cnt, 1);
+	clear_bit(HCI_RESET, &hdev->flags);
 	tasklet_schedule(&hdev->cmd_task);
 }
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 3fbfa50c2bff..cebe7588469f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -183,6 +183,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
+	clear_bit(HCI_RESET, &hdev->flags);
+
 	hci_req_complete(hdev, HCI_OP_RESET, status);
 }
 
@@ -1847,7 +1849,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (ev->opcode != HCI_OP_NOP)
 		del_timer(&hdev->cmd_timer);
 
-	if (ev->ncmd) {
+	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
 		atomic_set(&hdev->cmd_cnt, 1);
 		if (!skb_queue_empty(&hdev->cmd_q))
 			tasklet_schedule(&hdev->cmd_task);
-- 
cgit v1.2.3


From 6994ca5e8ade57d18b7d1e05aad040c441a2ad37 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 16 Mar 2011 14:29:34 +0200
Subject: Bluetooth: Fix missing hci_dev_lock_bh in user_confirm_reply

The code was correctly calling _unlock at the end of the function but
there was no actual _lock call anywhere.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/mgmt.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 0054c74e27b7..4476d8e3c0f2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1230,6 +1230,8 @@ static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data,
 	if (!hdev)
 		return cmd_status(sk, index, mgmt_op, ENODEV);
 
+	hci_dev_lock_bh(hdev);
+
 	if (!test_bit(HCI_UP, &hdev->flags)) {
 		err = cmd_status(sk, index, mgmt_op, ENETDOWN);
 		goto failed;
-- 
cgit v1.2.3


From a0cc9a1b5712ea52aaa4e7abfa0ec2dbe0d820ff Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Thu, 24 Mar 2011 17:16:08 +0200
Subject: Bluetooth: delete hanging L2CAP channel

Sometimes L2CAP connection remains hanging. Make sure that
L2CAP channel is deleted.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap_sock.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index fc85e7ae33c7..f77308e63e58 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -923,8 +923,9 @@ void __l2cap_sock_close(struct sock *sk, int reason)
 			rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
-		} else
-			l2cap_chan_del(sk, reason);
+		}
+
+		l2cap_chan_del(sk, reason);
 		break;
 
 	case BT_CONNECT:
-- 
cgit v1.2.3


From b77dcf8460ae57d4eb9fd3633eb4f97b8fb20716 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 24 Mar 2011 20:16:42 +0100
Subject: Bluetooth: Fix warning with hci_cmd_timer

After we made debugobjects working again, we got the following:

WARNING: at lib/debugobjects.c:262 debug_print_object+0x8e/0xb0()
Hardware name: System Product Name
ODEBUG: free active (active state 0) object type: timer_list hint: hci_cmd_timer+0x0/0x60
Pid: 2125, comm: dmsetup Tainted: G        W   2.6.38-06707-gc62b389 #110375
Call Trace:
 [<ffffffff8104700a>] warn_slowpath_common+0x7a/0xb0
 [<ffffffff810470b6>] warn_slowpath_fmt+0x46/0x50
 [<ffffffff812d3a5e>] debug_print_object+0x8e/0xb0
 [<ffffffff81bd8810>] ? hci_cmd_timer+0x0/0x60
 [<ffffffff812d4685>] debug_check_no_obj_freed+0x125/0x230
 [<ffffffff810f1063>] ? check_object+0xb3/0x2b0
 [<ffffffff810f3630>] kfree+0x150/0x190
 [<ffffffff81be4d06>] ? bt_host_release+0x16/0x20
 [<ffffffff81be4d06>] bt_host_release+0x16/0x20
 [<ffffffff813a1907>] device_release+0x27/0xa0
 [<ffffffff812c519c>] kobject_release+0x4c/0xa0
 [<ffffffff812c5150>] ? kobject_release+0x0/0xa0
 [<ffffffff812c61f6>] kref_put+0x36/0x70
 [<ffffffff812c4d37>] kobject_put+0x27/0x60
 [<ffffffff813a21f7>] put_device+0x17/0x20
 [<ffffffff81bda4f9>] hci_free_dev+0x29/0x30
 [<ffffffff81928be6>] vhci_release+0x36/0x70
 [<ffffffff810fb366>] fput+0xd6/0x1f0
 [<ffffffff810f8fe6>] filp_close+0x66/0x90
 [<ffffffff810f90a9>] sys_close+0x99/0xf0
 [<ffffffff81d4c96b>] system_call_fastpath+0x16/0x1b

That timer was introduced with commit 6bd32326cda(Bluetooth: Use
proper timer for hci command timout)

Timer seems to be running when the thing is closed. Removing the timer
unconditionally fixes the problem. And yes, it needs to be fixed
before the HCI_UP check.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 92b48e257b89..2216620ff296 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -587,6 +587,9 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_req_cancel(hdev, ENODEV);
 	hci_req_lock(hdev);
 
+	/* Stop timer, it might be running */
+	del_timer_sync(&hdev->cmd_timer);
+
 	if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
 		hci_req_unlock(hdev);
 		return 0;
@@ -626,7 +629,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	/* Drop last sent command */
 	if (hdev->sent_cmd) {
-		del_timer_sync(&hdev->cmd_timer);
 		kfree_skb(hdev->sent_cmd);
 		hdev->sent_cmd = NULL;
 	}
-- 
cgit v1.2.3


From 436c3b66ec9824a633724ae42de1c416af4f2063 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 24 Mar 2011 17:42:21 -0700
Subject: ipv4: Invalidate nexthop cache nh_saddr more correctly.

Any operation that:

1) Brings up an interface
2) Adds an IP address to an interface
3) Deletes an IP address from an interface

can potentially invalidate the nh_saddr value, requiring
it to be recomputed.

Perform the recomputation lazily using a generation ID.

Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c  | 11 +++++++----
 net/ipv4/fib_semantics.c | 32 +++++++++++---------------------
 net/ipv4/route.c         |  6 ++++--
 3 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 02c3ba61884a..f116ce8f1b46 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		if (res.type != RTN_LOCAL || !accept_local)
 			goto e_inval;
 	}
-	*spec_dst = FIB_RES_PREFSRC(res);
+	*spec_dst = FIB_RES_PREFSRC(net, res);
 	fib_combine_itag(itag, &res);
 	dev_match = false;
 
@@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	ret = 0;
 	if (fib_lookup(net, &fl4, &res) == 0) {
 		if (res.type == RTN_UNICAST) {
-			*spec_dst = FIB_RES_PREFSRC(res);
+			*spec_dst = FIB_RES_PREFSRC(net, res);
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 		}
 	}
@@ -960,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 {
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	struct net_device *dev = ifa->ifa_dev->dev;
+	struct net *net = dev_net(dev);
 
 	switch (event) {
 	case NETDEV_UP:
@@ -967,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
-		fib_update_nh_saddrs(dev);
+		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa, NULL);
-		fib_update_nh_saddrs(dev);
+		atomic_inc(&net->ipv4.dev_addr_genid);
 		if (ifa->ifa_dev->ifa_list == NULL) {
 			/* Last address was deleted from this interface.
 			 * Disable IP.
@@ -990,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 {
 	struct net_device *dev = ptr;
 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_UNREGISTER) {
 		fib_disable_ip(dev, 2, -1);
@@ -1007,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		fib_sync_up(dev);
 #endif
+		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(dev_net(dev), -1);
 		break;
 	case NETDEV_DOWN:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 75b9fb5d5d37..2d4bebca671a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -695,6 +695,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 	fib_info_hash_free(old_laddrhash, bytes);
 }
 
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+{
+	nh->nh_saddr = inet_select_addr(nh->nh_dev,
+					nh->nh_gw,
+					nh->nh_cfg_scope);
+	nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
+
+	return nh->nh_saddr;
+}
+
 struct fib_info *fib_create_info(struct fib_config *cfg)
 {
 	int err;
@@ -855,9 +865,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 	change_nexthops(fi) {
 		nexthop_nh->nh_cfg_scope = cfg->fc_scope;
-		nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
-							nexthop_nh->nh_gw,
-							nexthop_nh->nh_cfg_scope);
+		fib_info_update_nh_saddr(net, nexthop_nh);
 	} endfor_nexthops(fi)
 
 link_it:
@@ -1128,24 +1136,6 @@ out:
 	return;
 }
 
-void fib_update_nh_saddrs(struct net_device *dev)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct fib_nh *nh;
-	unsigned int hash;
-
-	hash = fib_devindex_hashfn(dev->ifindex);
-	head = &fib_info_devhash[hash];
-	hlist_for_each_entry(nh, node, head, nh_hash) {
-		if (nh->nh_dev != dev)
-			continue;
-		nh->nh_saddr = inet_select_addr(nh->nh_dev,
-						nh->nh_gw,
-						nh->nh_cfg_scope);
-	}
-}
-
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
 /*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 34921b0d3f8e..4b0c81180804 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1718,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 		rcu_read_lock();
 		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
-			src = FIB_RES_PREFSRC(res);
+			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
 		else
 			src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
@@ -2615,7 +2615,7 @@ static struct rtable *ip_route_output_slow(struct net *net,
 		fib_select_default(&res);
 
 	if (!fl4.saddr)
-		fl4.saddr = FIB_RES_PREFSRC(res);
+		fl4.saddr = FIB_RES_PREFSRC(net, res);
 
 	dev_out = FIB_RES_DEV(res);
 	fl4.flowi4_oif = dev_out->ifindex;
@@ -3219,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net)
 {
 	get_random_bytes(&net->ipv4.rt_genid,
 			 sizeof(net->ipv4.rt_genid));
+	get_random_bytes(&net->ipv4.dev_addr_genid,
+			 sizeof(net->ipv4.dev_addr_genid));
 	return 0;
 }
 
-- 
cgit v1.2.3


From 37e826c513883099c298317bad1b3b677b2905fb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 24 Mar 2011 18:06:47 -0700
Subject: ipv4: Fix nexthop caching wrt. scoping.

Move the scope value out of the fib alias entries and into fib_info,
so that we always use the correct scope when recomputing the nexthop
cached source address.

Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_lookup.h    |  3 +--
 net/ipv4/fib_semantics.c | 15 ++++++++-------
 net/ipv4/fib_trie.c      | 12 ++++--------
 3 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 4ec323875a02..af0f14aba169 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -10,7 +10,6 @@ struct fib_alias {
 	struct fib_info		*fa_info;
 	u8			fa_tos;
 	u8			fa_type;
-	u8			fa_scope;
 	u8			fa_state;
 	struct rcu_head		rcu;
 };
@@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *);
 extern struct fib_info *fib_create_info(struct fib_config *cfg);
 extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
 extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-			 u32 tb_id, u8 type, u8 scope, __be32 dst,
+			 u32 tb_id, u8 type, __be32 dst,
 			 int dst_len, u8 tos, struct fib_info *fi,
 			 unsigned int);
 extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2d4bebca671a..641a5a2a9f9c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 	unsigned int mask = (fib_info_hash_size - 1);
 	unsigned int val = fi->fib_nhs;
 
-	val ^= fi->fib_protocol;
+	val ^= (fi->fib_protocol << 8) | fi->fib_scope;
 	val ^= (__force u32)fi->fib_prefsrc;
 	val ^= fi->fib_priority;
 	for_nexthops(fi) {
@@ -248,6 +248,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 		if (fi->fib_nhs != nfi->fib_nhs)
 			continue;
 		if (nfi->fib_protocol == fi->fib_protocol &&
+		    nfi->fib_scope == fi->fib_scope &&
 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
 		    nfi->fib_priority == fi->fib_priority &&
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
@@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 		goto errout;
 
 	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
-			    fa->fa_type, fa->fa_scope, key, dst_len,
+			    fa->fa_type, key, dst_len,
 			    fa->fa_tos, fa->fa_info, nlm_flags);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -699,7 +700,7 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
 {
 	nh->nh_saddr = inet_select_addr(nh->nh_dev,
 					nh->nh_gw,
-					nh->nh_cfg_scope);
+					nh->nh_parent->fib_scope);
 	nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
 
 	return nh->nh_saddr;
@@ -763,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 	fi->fib_net = hold_net(net);
 	fi->fib_protocol = cfg->fc_protocol;
+	fi->fib_scope = cfg->fc_scope;
 	fi->fib_flags = cfg->fc_flags;
 	fi->fib_priority = cfg->fc_priority;
 	fi->fib_prefsrc = cfg->fc_prefsrc;
@@ -864,7 +866,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	}
 
 	change_nexthops(fi) {
-		nexthop_nh->nh_cfg_scope = cfg->fc_scope;
 		fib_info_update_nh_saddr(net, nexthop_nh);
 	} endfor_nexthops(fi)
 
@@ -914,7 +915,7 @@ failure:
 }
 
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
-		  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
+		  u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
 {
 	struct nlmsghdr *nlh;
@@ -936,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 	NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 	rtm->rtm_type = type;
 	rtm->rtm_flags = fi->fib_flags;
-	rtm->rtm_scope = scope;
+	rtm->rtm_scope = fi->fib_scope;
 	rtm->rtm_protocol = fi->fib_protocol;
 
 	if (rtm->rtm_dst_len)
@@ -1092,7 +1093,7 @@ void fib_select_default(struct fib_result *res)
 	list_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
 
-		if (fa->fa_scope != res->scope ||
+		if (next_fi->fib_scope != res->scope ||
 		    fa->fa_type != RTN_UNICAST)
 			continue;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ac87a49ad50b..90a3ff605591 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			if (fa->fa_info->fib_priority != fi->fib_priority)
 				break;
 			if (fa->fa_type == cfg->fc_type &&
-			    fa->fa_scope == cfg->fc_scope &&
 			    fa->fa_info == fi) {
 				fa_match = fa;
 				break;
@@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 			new_fa->fa_tos = fa->fa_tos;
 			new_fa->fa_info = fi;
 			new_fa->fa_type = cfg->fc_type;
-			new_fa->fa_scope = cfg->fc_scope;
 			state = fa->fa_state;
 			new_fa->fa_state = state & ~FA_S_ACCESSED;
 
@@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	new_fa->fa_info = fi;
 	new_fa->fa_tos = tos;
 	new_fa->fa_type = cfg->fc_type;
-	new_fa->fa_scope = cfg->fc_scope;
 	new_fa->fa_state = 0;
 	/*
 	 * Insert new entry to the list.
@@ -1362,7 +1359,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 
 			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
 				continue;
-			if (fa->fa_scope < flp->flowi4_scope)
+			if (fa->fa_info->fib_scope < flp->flowi4_scope)
 				continue;
 			fib_alias_accessed(fa);
 			err = fib_props[fa->fa_type].error;
@@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 				res->prefixlen = plen;
 				res->nh_sel = nhsel;
 				res->type = fa->fa_type;
-				res->scope = fa->fa_scope;
+				res->scope = fa->fa_info->fib_scope;
 				res->fi = fi;
 				res->table = tb;
 				res->fa_head = &li->falh;
@@ -1664,7 +1661,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 
 		if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
 		    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
-		     fa->fa_scope == cfg->fc_scope) &&
+		     fa->fa_info->fib_scope == cfg->fc_scope) &&
 		    (!cfg->fc_prefsrc ||
 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
@@ -1863,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 				  RTM_NEWROUTE,
 				  tb->tb_id,
 				  fa->fa_type,
-				  fa->fa_scope,
 				  xkey,
 				  plen,
 				  fa->fa_tos,
@@ -2384,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 				seq_indent(seq, iter->depth+1);
 				seq_printf(seq, "  /%d %s %s", li->plen,
 					   rtn_scope(buf1, sizeof(buf1),
-						     fa->fa_scope),
+						     fa->fa_info->fib_scope),
 					   rtn_type(buf2, sizeof(buf2),
 						    fa->fa_type));
 				if (fa->fa_tos)
-- 
cgit v1.2.3


From ef550f6f4f6c9345a27ec85d98f4f7de1adce79c Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 25 Mar 2011 13:27:48 -0700
Subject: ceph: flush msgr_wq during mds_client shutdown

The release method for mds connections uses a backpointer to the
mds_client, so we need to flush the workqueue of any pending work (and
ceph_connection references) prior to freeing the mds_client.  This fixes
an oops easily triggered under UML by

 while true ; do mount ... ; umount ... ; done

Also fix an outdated comment: the flush in ceph_destroy_client only flushes
OSD connections out.  This bug is basically an artifact of the ceph ->
ceph+libceph conversion.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/ceph_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 95f96ab94bba..9bbb356b12e7 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -394,8 +394,8 @@ void ceph_destroy_client(struct ceph_client *client)
 	ceph_osdc_stop(&client->osdc);
 
 	/*
-	 * make sure mds and osd connections close out before destroying
-	 * the auth module, which is needed to free those connections'
+	 * make sure osd connections close out before destroying the
+	 * auth module, which is needed to free those connections'
 	 * ceph_authorizers.
 	 */
 	ceph_msgr_flush();
-- 
cgit v1.2.3


From 1fbc78439291627642517f15b9b91f3125588143 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 25 Mar 2011 20:33:23 -0700
Subject: ipv4: do not ignore route errors

	The "ipv4: Inline fib_semantic_match into check_leaf"
change forgets to return the route errors. check_leaf should
return the same results as fib_table_lookup.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 90a3ff605591..b92c86f6e9b3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1365,9 +1365,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 			err = fib_props[fa->fa_type].error;
 			if (err) {
 #ifdef CONFIG_IP_FIB_TRIE_STATS
-				t->stats.semantic_match_miss++;
+				t->stats.semantic_match_passed++;
 #endif
-				return 1;
+				return err;
 			}
 			if (fi->fib_flags & RTNH_F_DEAD)
 				continue;
-- 
cgit v1.2.3


From 6b0ae4097c1ebad98c7b33f83b4ca7e93890ed12 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Sat, 26 Mar 2011 19:29:34 +0100
Subject: ceph: fix possible NULL pointer dereference

This patch fixes 'event_work' dereference before it is checked for NULL.

Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 02212ed50852..b6776cb627cc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1602,11 +1602,11 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 	     cookie, ver, event);
 	if (event) {
 		event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
-		INIT_WORK(&event_work->work, do_event_work);
 		if (!event_work) {
 			dout("ERROR: could not allocate event_work\n");
 			goto done_err;
 		}
+		INIT_WORK(&event_work->work, do_event_work);
 		event_work->event = event;
 		event_work->ver = ver;
 		event_work->notify_id = notify_id;
-- 
cgit v1.2.3


From a271c5a0dea418931b6a903ef85adc30ad4c54be Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Sun, 27 Mar 2011 17:48:57 +0200
Subject: NFS: Ensure that rpc_release_resources_task() can be called twice.

BUG: atomic_dec_and_test(): -1: atomic counter underflow at:
Pid: 2827, comm: mount.nfs Not tainted 2.6.38 #1
Call Trace:
 [<ffffffffa02223a0>] ? put_rpccred+0x44/0x14e [sunrpc]
 [<ffffffffa021bbe9>] ? rpc_ping+0x4e/0x58 [sunrpc]
 [<ffffffffa021c4a5>] ? rpc_create+0x481/0x4fc [sunrpc]
 [<ffffffffa022298a>] ? rpcauth_lookup_credcache+0xab/0x22d [sunrpc]
 [<ffffffffa028be8c>] ? nfs_create_rpc_client+0xa6/0xeb [nfs]
 [<ffffffffa028c660>] ? nfs4_set_client+0xc2/0x1f9 [nfs]
 [<ffffffffa028cd3c>] ? nfs4_create_server+0xf2/0x2a6 [nfs]
 [<ffffffffa0295d07>] ? nfs4_remote_mount+0x4e/0x14a [nfs]
 [<ffffffff810dd570>] ? vfs_kern_mount+0x6e/0x133
 [<ffffffffa029605a>] ? nfs_do_root_mount+0x76/0x95 [nfs]
 [<ffffffffa029643d>] ? nfs4_try_mount+0x56/0xaf [nfs]
 [<ffffffffa0297434>] ? nfs_get_sb+0x435/0x73c [nfs]
 [<ffffffff810dd59b>] ? vfs_kern_mount+0x99/0x133
 [<ffffffff810dd693>] ? do_kern_mount+0x48/0xd8
 [<ffffffff810f5b75>] ? do_mount+0x6da/0x741
 [<ffffffff810f5c5f>] ? sys_mount+0x83/0xc0
 [<ffffffff8100293b>] ? system_call_fastpath+0x16/0x1b

Well, so, I think this is real bug of nfs codes somewhere. With some
review, the code

rpc_call_sync()
    rpc_run_task
        rpc_execute()
            __rpc_execute()
                rpc_release_task()
                    rpc_release_resources_task()
                        put_rpccred()                <= release cred
    rpc_put_task
        rpc_do_put_task()
            rpc_release_resources_task()
                put_rpccred()                        <= release cred again

seems to be release cred unintendedly.

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ffb687671da0..6b43ee7221d5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -860,8 +860,10 @@ static void rpc_release_resources_task(struct rpc_task *task)
 {
 	if (task->tk_rqstp)
 		xprt_release(task);
-	if (task->tk_msg.rpc_cred)
+	if (task->tk_msg.rpc_cred) {
 		put_rpccred(task->tk_msg.rpc_cred);
+		task->tk_msg.rpc_cred = NULL;
+	}
 	rpc_task_release_client(task);
 }
 
-- 
cgit v1.2.3


From 3bc07321ccc236f693ce1b6a8786f0a2e38bb87e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 15 Mar 2011 21:08:28 +0000
Subject: xfrm: Force a dst refcount before entering the xfrm type handlers

Crypto requests might return asynchronous. In this case we leave
the rcu protected region, so force a refcount on the skb's
destination entry before we enter the xfrm type input/output
handlers.

This fixes a crash when a route is deleted whilst sending IPsec
data that is transformed by an asynchronous algorithm.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_input.c  | 2 ++
 net/xfrm/xfrm_output.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 872065ca7f8c..341cd1189f8a 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -190,6 +190,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 		XFRM_SKB_CB(skb)->seq.input.low = seq;
 		XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
 
+		skb_dst_force(skb);
+
 		nexthdr = x->type->input(x, skb);
 
 		if (nexthdr == -EINPROGRESS)
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 1aba03f449cc..8f3f0eedc5a4 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -78,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 
 		spin_unlock_bh(&x->lock);
 
+		skb_dst_force(skb);
+
 		err = x->type->output(x, skb);
 		if (err == -EINPROGRESS)
 			goto out_exit;
-- 
cgit v1.2.3


From e433430a0ca9cc1b851a83ac3b305e955b64880a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 15 Mar 2011 21:09:32 +0000
Subject: dst: Clone child entry in skb_dst_pop

We clone the child entry in skb_dst_pop before we call
skb_dst_drop(). Otherwise we might kill the child right
before we return it to the caller.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8f3f0eedc5a4..47bacd8c0250 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -96,7 +96,7 @@ resume:
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		skb_dst_set(skb, dst_clone(dst));
+		skb_dst_set(skb, dst);
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
-- 
cgit v1.2.3


From d50e7e3604778bfc2dc40f440e0742dbae399d54 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Sat, 19 Mar 2011 20:14:30 +0000
Subject: irda: prevent heap corruption on invalid nickname

Invalid nicknames containing only spaces will result in an underflow in
a memcpy size calculation, subsequently destroying the heap and
panicking.

v2 also catches the case where the provided nickname is longer than the
buffer size, which can result in controllable heap corruption.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 7c567b8aa89a..2bb2beb6a373 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -105,6 +105,9 @@ irnet_ctrl_write(irnet_socket *	ap,
 	      while(isspace(start[length - 1]))
 		length--;
 
+	      DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5,
+		     -EINVAL, CTRL_ERROR, "Invalid nickname.\n");
+
 	      /* Copy the name for later reuse */
 	      memcpy(ap->rname, start + 5, length - 5);
 	      ap->rname[length - 5] = '\0';
-- 
cgit v1.2.3


From d370af0ef7951188daeb15bae75db7ba57c67846 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Sun, 20 Mar 2011 15:32:06 +0000
Subject: irda: validate peer name and attribute lengths

Length fields provided by a peer for names and attributes may be longer
than the destination array sizes.  Validate lengths to prevent stack
buffer overflows.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/iriap.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 5b743bdd89ba..36477538cea8 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -656,10 +656,16 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
 	n = 1;
 
 	name_len = fp[n++];
+
+	IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;);
+
 	memcpy(name, fp+n, name_len); n+=name_len;
 	name[name_len] = '\0';
 
 	attr_len = fp[n++];
+
+	IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;);
+
 	memcpy(attr, fp+n, attr_len); n+=attr_len;
 	attr[attr_len] = '\0';
 
-- 
cgit v1.2.3


From be20250c13f88375345ad99950190685eda51eb8 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Sat, 19 Mar 2011 20:43:43 +0000
Subject: ROSE: prevent heap corruption with bad facilities

When parsing the FAC_NATIONAL_DIGIS facilities field, it's possible for
a remote host to provide more digipeaters than expected, resulting in
heap corruption.  Check against ROSE_MAX_DIGIS to prevent overflows, and
abort facilities parsing on failure.

Additionally, when parsing the FAC_CCITT_DEST_NSAP and
FAC_CCITT_SRC_NSAP facilities fields, a remote host can provide a length
of less than 10, resulting in an underflow in a memcpy size, causing a
kernel panic due to massive heap corruption.  A length of greater than
20 results in a stack overflow of the callsign array.  Abort facilities
parsing on these invalid length values.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/rose_subr.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 1734abba26a2..174d51c9ce37 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -290,10 +290,15 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
 				facilities->source_ndigis = 0;
 				facilities->dest_ndigis   = 0;
 				for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) {
-					if (pt[6] & AX25_HBIT)
+					if (pt[6] & AX25_HBIT) {
+						if (facilities->dest_ndigis >= ROSE_MAX_DIGIS)
+							return -1;
 						memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN);
-					else
+					} else {
+						if (facilities->source_ndigis >= ROSE_MAX_DIGIS)
+							return -1;
 						memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN);
+					}
 				}
 			}
 			p   += l + 2;
@@ -333,6 +338,11 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
 
 		case 0xC0:
 			l = p[1];
+
+			/* Prevent overflows*/
+			if (l < 10 || l > 20)
+				return -1;
+
 			if (*p == FAC_CCITT_DEST_NSAP) {
 				memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN);
 				memcpy(callsign, p + 12,   l - 10);
@@ -373,12 +383,16 @@ int rose_parse_facilities(unsigned char *p,
 			switch (*p) {
 			case FAC_NATIONAL:		/* National */
 				len = rose_parse_national(p + 1, facilities, facilities_len - 1);
+				if (len < 0)
+					return 0;
 				facilities_len -= len + 1;
 				p += len + 1;
 				break;
 
 			case FAC_CCITT:		/* CCITT */
 				len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
+				if (len < 0)
+					return 0;
 				facilities_len -= len + 1;
 				p += len + 1;
 				break;
-- 
cgit v1.2.3


From e0bccd315db0c2f919e7fcf9cb60db21d9986f52 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 20 Mar 2011 06:48:05 +0000
Subject: rose: Add length checks to CALL_REQUEST parsing

Define some constant offsets for CALL_REQUEST based on the description
at <http://www.techfest.com/networking/wan/x25plp.htm> and the
definition of ROSE as using 10-digit (5-byte) addresses.  Use them
consistently.  Validate all implicit and explicit facilities lengths.
Validate the address length byte rather than either trusting or
assuming its value.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/af_rose.c       |  8 ++---
 net/rose/rose_loopback.c | 13 ++++++-
 net/rose/rose_route.c    | 20 +++++++----
 net/rose/rose_subr.c     | 91 ++++++++++++++++++++++++++++++------------------
 4 files changed, 86 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 5ee0c62046a0..a80aef6e3d1f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -978,7 +978,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
 	struct sock *make;
 	struct rose_sock *make_rose;
 	struct rose_facilities_struct facilities;
-	int n, len;
+	int n;
 
 	skb->sk = NULL;		/* Initially we don't know who it's for */
 
@@ -987,9 +987,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
 	 */
 	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
 
-	len  = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
-	len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
-	if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+	if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
+				   skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
+				   &facilities)) {
 		rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
 		return 0;
 	}
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index ae4a9d99aec7..344456206b70 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -73,9 +73,20 @@ static void rose_loopback_timer(unsigned long param)
 	unsigned int lci_i, lci_o;
 
 	while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+		if (skb->len < ROSE_MIN_LEN) {
+			kfree_skb(skb);
+			continue;
+		}
 		lci_i     = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
 		frametype = skb->data[2];
-		dest      = (rose_address *)(skb->data + 4);
+		if (frametype == ROSE_CALL_REQUEST &&
+		    (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
+		     skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
+		     ROSE_CALL_REQ_ADDR_LEN_VAL)) {
+			kfree_skb(skb);
+			continue;
+		}
+		dest      = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
 		lci_o     = ROSE_DEFAULT_MAXVC + 1 - lci_i;
 
 		skb_reset_transport_header(skb);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 88a77e90e7e8..08dcd2f29cdc 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -861,7 +861,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 	unsigned int lci, new_lci;
 	unsigned char cause, diagnostic;
 	struct net_device *dev;
-	int len, res = 0;
+	int res = 0;
 	char buf[11];
 
 #if 0
@@ -869,10 +869,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 		return res;
 #endif
 
+	if (skb->len < ROSE_MIN_LEN)
+		return res;
 	frametype = skb->data[2];
 	lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
-	src_addr  = (rose_address *)(skb->data + 9);
-	dest_addr = (rose_address *)(skb->data + 4);
+	if (frametype == ROSE_CALL_REQUEST &&
+	    (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
+	     skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
+	     ROSE_CALL_REQ_ADDR_LEN_VAL))
+		return res;
+	src_addr  = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF);
+	dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
 
 	spin_lock_bh(&rose_neigh_list_lock);
 	spin_lock_bh(&rose_route_list_lock);
@@ -1010,12 +1017,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 		goto out;
 	}
 
-	len  = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
-	len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
-
 	memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
 
-	if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+	if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
+				   skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
+				   &facilities)) {
 		rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
 		goto out;
 	}
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 174d51c9ce37..f6c71caa94b9 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -142,7 +142,7 @@ void rose_write_internal(struct sock *sk, int frametype)
 		*dptr++ = ROSE_GFI | lci1;
 		*dptr++ = lci2;
 		*dptr++ = frametype;
-		*dptr++ = 0xAA;
+		*dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL;
 		memcpy(dptr, &rose->dest_addr,  ROSE_ADDR_LEN);
 		dptr   += ROSE_ADDR_LEN;
 		memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
@@ -246,12 +246,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
 	do {
 		switch (*p & 0xC0) {
 		case 0x00:
+			if (len < 2)
+				return -1;
 			p   += 2;
 			n   += 2;
 			len -= 2;
 			break;
 
 		case 0x40:
+			if (len < 3)
+				return -1;
 			if (*p == FAC_NATIONAL_RAND)
 				facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
 			p   += 3;
@@ -260,32 +264,48 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
 			break;
 
 		case 0x80:
+			if (len < 4)
+				return -1;
 			p   += 4;
 			n   += 4;
 			len -= 4;
 			break;
 
 		case 0xC0:
+			if (len < 2)
+				return -1;
 			l = p[1];
+			if (len < 2 + l)
+				return -1;
 			if (*p == FAC_NATIONAL_DEST_DIGI) {
 				if (!fac_national_digis_received) {
+					if (l < AX25_ADDR_LEN)
+						return -1;
 					memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
 					facilities->source_ndigis = 1;
 				}
 			}
 			else if (*p == FAC_NATIONAL_SRC_DIGI) {
 				if (!fac_national_digis_received) {
+					if (l < AX25_ADDR_LEN)
+						return -1;
 					memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
 					facilities->dest_ndigis = 1;
 				}
 			}
 			else if (*p == FAC_NATIONAL_FAIL_CALL) {
+				if (l < AX25_ADDR_LEN)
+					return -1;
 				memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
 			}
 			else if (*p == FAC_NATIONAL_FAIL_ADD) {
+				if (l < 1 + ROSE_ADDR_LEN)
+					return -1;
 				memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
 			}
 			else if (*p == FAC_NATIONAL_DIGIS) {
+				if (l % AX25_ADDR_LEN)
+					return -1;
 				fac_national_digis_received = 1;
 				facilities->source_ndigis = 0;
 				facilities->dest_ndigis   = 0;
@@ -319,24 +339,32 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
 	do {
 		switch (*p & 0xC0) {
 		case 0x00:
+			if (len < 2)
+				return -1;
 			p   += 2;
 			n   += 2;
 			len -= 2;
 			break;
 
 		case 0x40:
+			if (len < 3)
+				return -1;
 			p   += 3;
 			n   += 3;
 			len -= 3;
 			break;
 
 		case 0x80:
+			if (len < 4)
+				return -1;
 			p   += 4;
 			n   += 4;
 			len -= 4;
 			break;
 
 		case 0xC0:
+			if (len < 2)
+				return -1;
 			l = p[1];
 
 			/* Prevent overflows*/
@@ -365,49 +393,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
 	return n;
 }
 
-int rose_parse_facilities(unsigned char *p,
+int rose_parse_facilities(unsigned char *p, unsigned packet_len,
 	struct rose_facilities_struct *facilities)
 {
 	int facilities_len, len;
 
 	facilities_len = *p++;
 
-	if (facilities_len == 0)
+	if (facilities_len == 0 || (unsigned)facilities_len > packet_len)
 		return 0;
 
-	while (facilities_len > 0) {
-		if (*p == 0x00) {
-			facilities_len--;
-			p++;
-
-			switch (*p) {
-			case FAC_NATIONAL:		/* National */
-				len = rose_parse_national(p + 1, facilities, facilities_len - 1);
-				if (len < 0)
-					return 0;
-				facilities_len -= len + 1;
-				p += len + 1;
-				break;
-
-			case FAC_CCITT:		/* CCITT */
-				len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
-				if (len < 0)
-					return 0;
-				facilities_len -= len + 1;
-				p += len + 1;
-				break;
-
-			default:
-				printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
-				facilities_len--;
-				p++;
-				break;
-			}
-		} else
-			break;	/* Error in facilities format */
+	while (facilities_len >= 3 && *p == 0x00) {
+		facilities_len--;
+		p++;
+
+		switch (*p) {
+		case FAC_NATIONAL:		/* National */
+			len = rose_parse_national(p + 1, facilities, facilities_len - 1);
+			break;
+
+		case FAC_CCITT:		/* CCITT */
+			len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
+			break;
+
+		default:
+			printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
+			len = 1;
+			break;
+		}
+
+		if (len < 0)
+			return 0;
+		if (WARN_ON(len >= facilities_len))
+			return 0;
+		facilities_len -= len + 1;
+		p += len + 1;
 	}
 
-	return 1;
+	return facilities_len == 0;
 }
 
 static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
-- 
cgit v1.2.3


From 3b261ade4224852ed841ecfd13876db812846e96 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 22 Mar 2011 01:59:47 +0000
Subject: net: remove useless comments in net/core/dev.c

The code itself can explain what it is doing, no need these comments.

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 54 ------------------------------------------------------
 1 file changed, 54 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index f453370131a0..9b23dcefae6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1140,9 +1140,6 @@ static int __dev_open(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	/*
-	 *	Is it even present?
-	 */
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
@@ -1151,9 +1148,6 @@ static int __dev_open(struct net_device *dev)
 	if (ret)
 		return ret;
 
-	/*
-	 *	Call device private open method
-	 */
 	set_bit(__LINK_STATE_START, &dev->state);
 
 	if (ops->ndo_validate_addr)
@@ -1162,31 +1156,12 @@ static int __dev_open(struct net_device *dev)
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 
-	/*
-	 *	If it went open OK then:
-	 */
-
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
-		/*
-		 *	Set the flags.
-		 */
 		dev->flags |= IFF_UP;
-
-		/*
-		 *	Enable NET_DMA
-		 */
 		net_dmaengine_get();
-
-		/*
-		 *	Initialize multicasting status
-		 */
 		dev_set_rx_mode(dev);
-
-		/*
-		 *	Wakeup transmit queue engine
-		 */
 		dev_activate(dev);
 	}
 
@@ -1209,22 +1184,13 @@ int dev_open(struct net_device *dev)
 {
 	int ret;
 
-	/*
-	 *	Is it already up?
-	 */
 	if (dev->flags & IFF_UP)
 		return 0;
 
-	/*
-	 *	Open device
-	 */
 	ret = __dev_open(dev);
 	if (ret < 0)
 		return ret;
 
-	/*
-	 *	... and announce new interface.
-	 */
 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 	call_netdevice_notifiers(NETDEV_UP, dev);
 
@@ -1240,10 +1206,6 @@ static int __dev_close_many(struct list_head *head)
 	might_sleep();
 
 	list_for_each_entry(dev, head, unreg_list) {
-		/*
-		 *	Tell people we are going down, so that they can
-		 *	prepare to death, when device is still operating.
-		 */
 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
 		clear_bit(__LINK_STATE_START, &dev->state);
@@ -1272,15 +1234,7 @@ static int __dev_close_many(struct list_head *head)
 		if (ops->ndo_stop)
 			ops->ndo_stop(dev);
 
-		/*
-		 *	Device is now down.
-		 */
-
 		dev->flags &= ~IFF_UP;
-
-		/*
-		 *	Shutdown NET_DMA
-		 */
 		net_dmaengine_put();
 	}
 
@@ -1309,9 +1263,6 @@ static int dev_close_many(struct list_head *head)
 
 	__dev_close_many(head);
 
-	/*
-	 * Tell people we are down
-	 */
 	list_for_each_entry(dev, head, unreg_list) {
 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
 		call_netdevice_notifiers(NETDEV_DOWN, dev);
@@ -1371,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro);
 
 static int dev_boot_phase = 1;
 
-/*
- *	Device change register/unregister. These are not inline or static
- *	as we export them to the world.
- */
-
 /**
  *	register_netdevice_notifier - register a network notifier block
  *	@nb: notifier
-- 
cgit v1.2.3


From 53914b67993c724cec585863755c9ebc8446e83b Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 22 Mar 2011 08:27:25 +0000
Subject: can: make struct proto const

can_ioctl is the only reason for struct proto to be non-const.
script/check-patch.pl suggests struct proto be const.

Setting the reference to the common can_ioctl() in all CAN protocols directly
removes the need to make the struct proto writable in af_can.c

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/af_can.c | 9 +++------
 net/can/bcm.c    | 4 ++--
 net/can/raw.c    | 4 ++--
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 702be5a2c956..733d66f1b05a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -95,7 +95,7 @@ struct s_pstats   can_pstats;      /* receive list statistics */
  * af_can socket functions
  */
 
-static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
 
@@ -108,6 +108,7 @@ static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		return -ENOIOCTLCMD;
 	}
 }
+EXPORT_SYMBOL(can_ioctl);
 
 static void can_sock_destruct(struct sock *sk)
 {
@@ -698,13 +699,9 @@ int can_proto_register(struct can_proto *cp)
 		printk(KERN_ERR "can: protocol %d already registered\n",
 		       proto);
 		err = -EBUSY;
-	} else {
+	} else
 		proto_tab[proto] = cp;
 
-		/* use generic ioctl function if not defined by module */
-		if (!cp->ops->ioctl)
-			cp->ops->ioctl = can_ioctl;
-	}
 	spin_unlock(&proto_tab_lock);
 
 	if (err < 0)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 092dc88a7c64..871a0ad51025 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1569,7 +1569,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
 	return size;
 }
 
-static struct proto_ops bcm_ops __read_mostly = {
+static const struct proto_ops bcm_ops = {
 	.family        = PF_CAN,
 	.release       = bcm_release,
 	.bind          = sock_no_bind,
@@ -1578,7 +1578,7 @@ static struct proto_ops bcm_ops __read_mostly = {
 	.accept        = sock_no_accept,
 	.getname       = sock_no_getname,
 	.poll          = datagram_poll,
-	.ioctl         = NULL,		/* use can_ioctl() from af_can.c */
+	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
 	.setsockopt    = sock_no_setsockopt,
diff --git a/net/can/raw.c b/net/can/raw.c
index 883e9d74fddf..649acfa7c70a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -742,7 +742,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
 	return size;
 }
 
-static struct proto_ops raw_ops __read_mostly = {
+static const struct proto_ops raw_ops = {
 	.family        = PF_CAN,
 	.release       = raw_release,
 	.bind          = raw_bind,
@@ -751,7 +751,7 @@ static struct proto_ops raw_ops __read_mostly = {
 	.accept        = sock_no_accept,
 	.getname       = raw_getname,
 	.poll          = datagram_poll,
-	.ioctl         = NULL,		/* use can_ioctl() from af_can.c */
+	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
 	.setsockopt    = raw_setsockopt,
-- 
cgit v1.2.3


From 8628bd8af7c4c14f40f5183f80f5744c4e682439 Mon Sep 17 00:00:00 2001
From: Jan Luebbe <jluebbe@debian.org>
Date: Thu, 24 Mar 2011 07:44:22 +0000
Subject: ipv4: Fix IP timestamp option (IPOPT_TS_PRESPEC) handling in
 ip_options_echo()

The current handling of echoed IP timestamp options with prespecified
addresses is rather broken since the 2.2.x kernels. As far as i understand
it, it should behave like when originating packets.

Currently it will only timestamp the next free slot if:
 - there is space for *two* timestamps
 - some random data from the echoed packet taken as an IP is *not* a local IP

This first is caused by an off-by-one error. 'soffset' points to the next
free slot and so we only need to have 'soffset + 7 <= optlen'.

The second bug is using sptr as the start of the option, when it really is
set to 'skb_network_header(skb)'. I just use dptr instead which points to
the timestamp option.

Finally it would only timestamp for non-local IPs, which we shouldn't do.
So instead we exclude all unicast destinations, similar to what we do in
ip_options_compile().

Signed-off-by: Jan Luebbe <jluebbe@debian.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_options.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1906fa35860c..28a736f3442f 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 				} else {
 					dopt->ts_needtime = 0;
 
-					if (soffset + 8 <= optlen) {
+					if (soffset + 7 <= optlen) {
 						__be32 addr;
 
-						memcpy(&addr, sptr+soffset-1, 4);
-						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) {
+						memcpy(&addr, dptr+soffset-1, 4);
+						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
-- 
cgit v1.2.3


From edf947f10074fea27fdb1730524dca59355a1c40 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 24 Mar 2011 13:24:01 +0000
Subject: bridge: notify applications if address of bridge device changes

The mac address of the bridge device may be changed when a new interface
is added to the bridge. If this happens, then the bridge needs to call
the network notifiers to tickle any other systems that care. Since bridge
can be a module, this also means exporting the notifier function.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c      | 6 +++++-
 net/bridge/br_private.h | 2 +-
 net/bridge/br_stp_if.c  | 9 ++++++---
 net/core/dev.c          | 1 +
 4 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index dce8f0009a12..718b60366dfe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -389,6 +389,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 {
 	struct net_bridge_port *p;
 	int err = 0;
+	bool changed_addr;
 
 	/* Don't allow bridging non-ethernet like devices */
 	if ((dev->flags & IFF_LOOPBACK) ||
@@ -446,7 +447,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	list_add_rcu(&p->list, &br->port_list);
 
 	spin_lock_bh(&br->lock);
-	br_stp_recalculate_bridge_id(br);
+	changed_addr = br_stp_recalculate_bridge_id(br);
 	br_features_recompute(br);
 
 	if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
@@ -456,6 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	br_ifinfo_notify(RTM_NEWLINK, p);
 
+	if (changed_addr)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+
 	dev_set_mtu(br->dev, br_min_mtu(br));
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 19e2f46ed086..387013d33745 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -497,7 +497,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br);
 extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
 extern void br_stp_enable_port(struct net_bridge_port *p);
 extern void br_stp_disable_port(struct net_bridge_port *p);
-extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
+extern bool br_stp_recalculate_bridge_id(struct net_bridge *br);
 extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a);
 extern void br_stp_set_bridge_priority(struct net_bridge *br,
 				       u16 newprio);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 79372d4a4055..5593f5aec942 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -204,7 +204,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
 
 /* called under bridge lock */
-void br_stp_recalculate_bridge_id(struct net_bridge *br)
+bool br_stp_recalculate_bridge_id(struct net_bridge *br)
 {
 	const unsigned char *br_mac_zero =
 			(const unsigned char *)br_mac_zero_aligned;
@@ -222,8 +222,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
 
 	}
 
-	if (compare_ether_addr(br->bridge_id.addr, addr))
-		br_stp_change_bridge_id(br, addr);
+	if (compare_ether_addr(br->bridge_id.addr, addr) == 0)
+		return false;	/* no change */
+
+	br_stp_change_bridge_id(br, addr);
+	return true;
 }
 
 /* called under bridge lock */
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b23dcefae6c..563ddc28139d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1423,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 	ASSERT_RTNL();
 	return raw_notifier_call_chain(&netdev_chain, val, dev);
 }
+EXPORT_SYMBOL(call_netdevice_notifiers);
 
 /* When > 0 there are consumers of rx skb time stamps */
 static atomic_t netstamp_needed = ATOMIC_INIT(0);
-- 
cgit v1.2.3


From 3e49e6d520401e1d25ec8d366520aad2c01adc1c Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Sat, 26 Mar 2011 05:10:30 +0000
Subject: net: use CHECKSUM_NONE instead of magic number

Two places in the kernel were doing skb->ip_summed = 0.

Change both to skb->ip_summed = CHECKSUM_NONE, which is more readable.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7ff0343e05c7..29e48593bf22 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -663,7 +663,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	skb_pull(skb, (u8 *)encap - skb->data);
 	skb_reset_network_header(skb);
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->ip_summed = 0;
+	skb->ip_summed = CHECKSUM_NONE;
 	skb->pkt_type = PACKET_HOST;
 
 	skb_tunnel_rx(skb, reg_dev);
-- 
cgit v1.2.3


From 673e63c688f43104c73aad8ea4237f7ad41fa14d Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 22 Mar 2011 23:54:49 +0000
Subject: net: fix ethtool->set_flags not intended -EINVAL return value

After commit d5dbda23804156ae6f35025ade5307a49d1db6d7 "ethtool: Add
support for vlan accleration.", drivers that have NETIF_F_HW_VLAN_TX,
and/or NETIF_F_HW_VLAN_RX feature, but do not allow enable/disable vlan
acceleration via ethtool set_flags, always return -EINVAL from that
function. Fix by returning -EINVAL only if requested features do not
match current settings and can not be changed by driver.

Change any driver that define ethtool->set_flags to use
ethtool_invalid_flags() to avoid similar problems in the future
(also on drivers that do not have the problem).

Tested with modified (to reproduce this bug) myri10ge driver.

Cc: stable@kernel.org # 2.6.37+
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 24bd57493c0d..74ead9eca126 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -141,9 +141,24 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_flags);
 
+/* Check if device can enable (or disable) particular feature coded in "data"
+ * argument. Flags "supported" describe features that can be toggled by device.
+ * If feature can not be toggled, it state (enabled or disabled) must match
+ * hardcoded device features state, otherwise flags are marked as invalid.
+ */
+bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
+{
+	u32 features = dev->features & flags_dup_features;
+	/* "data" can contain only flags_dup_features bits,
+	 * see __ethtool_set_flags */
+
+	return (features & ~supported) != (data & ~supported);
+}
+EXPORT_SYMBOL(ethtool_invalid_flags);
+
 int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
 {
-	if (data & ~supported)
+	if (ethtool_invalid_flags(dev, data, supported))
 		return -EINVAL;
 
 	dev->features = ((dev->features & ~flags_dup_features) |
-- 
cgit v1.2.3


From 4dc217df68a17a57f8464c74c1b4785e40bddf77 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 25 Mar 2011 15:30:38 +0100
Subject: mac80211: fix a crash in minstrel_ht in HT mode with no supported MCS
 rates

When a client connects in HT mode but does not provide any valid MCS
rates, the function that finds the next sample rate gets stuck in an
infinite loop.
Fix this by falling back to legacy rates if no usable MCS rates are found.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 8212a8bebf06..dbdebeda097f 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -659,18 +659,14 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
 	struct ieee80211_local *local = hw_to_local(mp->hw);
 	u16 sta_cap = sta->ht_cap.cap;
+	int n_supported = 0;
 	int ack_dur;
 	int stbc;
 	int i;
 
 	/* fall back to the old minstrel for legacy stations */
-	if (!sta->ht_cap.ht_supported) {
-		msp->is_ht = false;
-		memset(&msp->legacy, 0, sizeof(msp->legacy));
-		msp->legacy.r = msp->ratelist;
-		msp->legacy.sample_table = msp->sample_table;
-		return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
-	}
+	if (!sta->ht_cap.ht_supported)
+		goto use_legacy;
 
 	BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) !=
 		MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS);
@@ -725,7 +721,22 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 
 		mi->groups[i].supported =
 			mcs->rx_mask[minstrel_mcs_groups[i].streams - 1];
+
+		if (mi->groups[i].supported)
+			n_supported++;
 	}
+
+	if (!n_supported)
+		goto use_legacy;
+
+	return;
+
+use_legacy:
+	msp->is_ht = false;
+	memset(&msp->legacy, 0, sizeof(msp->legacy));
+	msp->legacy.r = msp->ratelist;
+	msp->legacy.sample_table = msp->sample_table;
+	return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
 }
 
 static void
-- 
cgit v1.2.3


From 1f951a7f8ba05192291f781ef99a892697e47d62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20=C5=A0tetiar?= <ynezz@true.cz>
Date: Sun, 27 Mar 2011 13:31:26 +0200
Subject: mac80211: fix NULL pointer dereference in ieee80211_key_alloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ieee80211_key struct can be kfree()d several times in the function, for
example if some of the key setup functions fails beforehand, but there's no
check if the struct is still valid before we call memcpy() and INIT_LIST_HEAD()
on it.  In some cases (like it was in my case), if there's missing aes-generic
module it could lead to the following kernel OOPS:

	Unable to handle kernel NULL pointer dereference at virtual address 0000018c
	....
	PC is at memcpy+0x80/0x29c
	...
	Backtrace:
	[<bf11c5e4>] (ieee80211_key_alloc+0x0/0x234 [mac80211]) from [<bf1148b4>] (ieee80211_add_key+0x70/0x12c [mac80211])
	[<bf114844>] (ieee80211_add_key+0x0/0x12c [mac80211]) from [<bf070cc0>] (__cfg80211_set_encryption+0x2a8/0x464 [cfg80211])

Signed-off-by: Petr Štetiar <ynezz@true.cz>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8c02469b7176..09cf1f28c12b 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -342,7 +342,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		if (IS_ERR(key->u.ccmp.tfm)) {
 			err = PTR_ERR(key->u.ccmp.tfm);
 			kfree(key);
-			key = ERR_PTR(err);
+			return ERR_PTR(err);
 		}
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -360,7 +360,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		if (IS_ERR(key->u.aes_cmac.tfm)) {
 			err = PTR_ERR(key->u.aes_cmac.tfm);
 			kfree(key);
-			key = ERR_PTR(err);
+			return ERR_PTR(err);
 		}
 		break;
 	}
-- 
cgit v1.2.3


From 67aa030c0dff6095128bcb4e8043b48360f32331 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Sat, 26 Mar 2011 18:58:51 +0100
Subject: mac80211: fix possible NULL pointer dereference

This patch moves 'key' dereference after BUG_ON(!key) so that when key is NULL
we will see proper trace instead of oops.

Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 09cf1f28c12b..af3c56482c80 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -400,11 +400,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
 {
 	struct ieee80211_key *old_key;
 	int idx, ret;
-	bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
+	bool pairwise;
 
 	BUG_ON(!sdata);
 	BUG_ON(!key);
 
+	pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
 	idx = key->conf.keyidx;
 	key->local = sdata->local;
 	key->sdata = sdata;
-- 
cgit v1.2.3


From bef9bacc4ec7ea6a02876164cd6ccaa4759edce4 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Sat, 26 Mar 2011 19:26:55 +0100
Subject: cfg80211:: fix possible NULL pointer dereference

In cfg80211_inform_bss_frame() wiphy is first dereferenced on privsz
initialisation and then it is checked for NULL. This patch fixes that.

Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ea427f418f64..300c11d99997 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -585,16 +585,23 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	struct cfg80211_internal_bss *res;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
-	size_t privsz = wiphy->bss_priv_size;
+	size_t privsz;
+
+	if (WARN_ON(!mgmt))
+		return NULL;
+
+	if (WARN_ON(!wiphy))
+		return NULL;
 
 	if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
 	            (signal < 0 || signal > 100)))
 		return NULL;
 
-	if (WARN_ON(!mgmt || !wiphy ||
-		    len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
+	if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
 		return NULL;
 
+	privsz = wiphy->bss_priv_size;
+
 	res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
 	if (!res)
 		return NULL;
-- 
cgit v1.2.3


From 2b78ac9bfc7483ba4bda9ad3d10dd4afcf88337c Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Mon, 28 Mar 2011 14:32:32 +0300
Subject: cfg80211: fix BSS double-unlinking (continued)

This patch adds to the fix "fix BSS double-unlinking"
(commit 3207390a8b58bfc1335750f91cf6783c48ca19ca) by Johannes Berg.

It turns out, that the double-unlinking scenario can also occur if expired
BSS elements are removed whilst an interface is performing association.

To work around that, replace list_del with list_del_init also in the
"cfg80211_bss_expire" function, so that the check for whether the BSS still is
in the list works correctly in cfg80211_unlink_bss.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 300c11d99997..fbf6f33ae4d0 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -123,6 +123,15 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev,
 	}
 }
 
+/* must hold dev->bss_lock! */
+static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev,
+				  struct cfg80211_internal_bss *bss)
+{
+	list_del_init(&bss->list);
+	rb_erase(&bss->rbn, &dev->bss_tree);
+	kref_put(&bss->ref, bss_release);
+}
+
 /* must hold dev->bss_lock! */
 void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
 {
@@ -134,9 +143,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
 			continue;
 		if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
 			continue;
-		list_del(&bss->list);
-		rb_erase(&bss->rbn, &dev->bss_tree);
-		kref_put(&bss->ref, bss_release);
+		__cfg80211_unlink_bss(dev, bss);
 		expired = true;
 	}
 
@@ -669,11 +676,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 
 	spin_lock_bh(&dev->bss_lock);
 	if (!list_empty(&bss->list)) {
-		list_del_init(&bss->list);
+		__cfg80211_unlink_bss(dev, bss);
 		dev->bss_generation++;
-		rb_erase(&bss->rbn, &dev->bss_tree);
-
-		kref_put(&bss->ref, bss_release);
 	}
 	spin_unlock_bh(&dev->bss_lock);
 }
-- 
cgit v1.2.3


From 499fe9a419d43410be576bcc825658997b6ce822 Mon Sep 17 00:00:00 2001
From: Daniel Halperin <dhalperi@cs.washington.edu>
Date: Thu, 24 Mar 2011 16:01:48 -0700
Subject: mac80211: fix aggregation frame release during timeout

Suppose the aggregation reorder buffer looks like this:

x-T-R1-y-R2,

where x and y are frames that have not been received, T is a received
frame that has timed out, and R1,R2 are received frames that have not
yet timed out. The proper behavior in this scenario is to move the
window past x (skipping it), release T and R1, and leave the window at y
until y is received or R2 times out.

As written, this code will instead leave the window at R1, because it
has not yet timed out. Fix this by exiting the reorder loop only when
the frame that has not timed out AND there are skipped frames earlier in
the current valid window.

Signed-off-by: Daniel Halperin <dhalperi@cs.washington.edu>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5c1930ba8ebe..aa5cc37b4921 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -612,7 +612,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 				skipped++;
 				continue;
 			}
-			if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
+			if (skipped &&
+			    !time_after(jiffies, tid_agg_rx->reorder_time[j] +
 					HT_RX_REORDER_BUF_TIMEOUT))
 				goto set_release_timer;
 
-- 
cgit v1.2.3


From 4910ac6c526d2868adcb5893e0c428473de862b5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 28 Mar 2011 16:51:15 -0700
Subject: ipv4: Don't ip_rt_put() an error pointer in RAW sockets.

Reported-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e837ffd3edc3..2d3c72e5bbbf 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -569,6 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
 		if (IS_ERR(rt)) {
 			err = PTR_ERR(rt);
+			rt = NULL;
 			goto done;
 		}
 	}
-- 
cgit v1.2.3


From 36ae0148dbb6b9e15d8f067bb7523fd2b765a6af Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 28 Mar 2011 19:45:52 +0000
Subject: xfrm: Move the test on replay window size into the replay check
 functions

As it is, the replay check is just performed if the replay window of the
legacy implementation is nonzero. So we move the test on a nonzero replay
window inside the replay check functions to be sure we are testing for the
right implementation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_replay.c | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 341cd1189f8a..a026b0ef2443 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -173,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop_unlock;
 		}
 
-		if (x->props.replay_window && x->repl->check(x, skb, seq)) {
+		if (x->repl->check(x, skb, seq)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
 			goto drop_unlock;
 		}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 2f5be5b15740..f218385950ca 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -118,6 +118,9 @@ static int xfrm_replay_check(struct xfrm_state *x,
 	u32 diff;
 	u32 seq = ntohl(net_seq);
 
+	if (!x->props.replay_window)
+		return 0;
+
 	if (unlikely(seq == 0))
 		goto err;
 
@@ -193,9 +196,14 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x,
 {
 	unsigned int bitnr, nr;
 	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+	u32 pos;
 	u32 seq = ntohl(net_seq);
 	u32 diff =  replay_esn->seq - seq;
-	u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+
+	if (!replay_esn->replay_window)
+		return 0;
+
+	pos = (replay_esn->seq - 1) % replay_esn->replay_window;
 
 	if (unlikely(seq == 0))
 		goto err;
@@ -373,12 +381,17 @@ static int xfrm_replay_check_esn(struct xfrm_state *x,
 	unsigned int bitnr, nr;
 	u32 diff;
 	struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+	u32 pos;
 	u32 seq = ntohl(net_seq);
-	u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window;
 	u32 wsize = replay_esn->replay_window;
 	u32 top = replay_esn->seq;
 	u32 bottom = top - wsize + 1;
 
+	if (!wsize)
+		return 0;
+
+	pos = (replay_esn->seq - 1) % replay_esn->replay_window;
+
 	if (unlikely(seq == 0 && replay_esn->seq_hi == 0 &&
 		     (replay_esn->seq < replay_esn->replay_window - 1)))
 		goto err;
-- 
cgit v1.2.3


From af2f464e326ebad57284cfdecb03f1606e89bbc7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 28 Mar 2011 19:46:39 +0000
Subject: xfrm: Assign esn pointers when cloning a state

When we clone a xfrm state we have to assign the replay_esn
and the preplay_esn pointers to the state if we use the
new replay detection method. To this end, we add a
xfrm_replay_clone() function that allocates memory for
the replay detection and takes over the necessary values
from the original state.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f83a3d1da81b..dd78536d40de 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1181,6 +1181,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 			goto error;
 	}
 
+	if (orig->replay_esn) {
+		err = xfrm_replay_clone(x, orig);
+		if (err)
+			goto error;
+	}
+
 	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
 
 	err = xfrm_init_state(x);
-- 
cgit v1.2.3


From e2b19125e94124daaeda1ddcf9b85b04575ad86f Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 28 Mar 2011 19:47:30 +0000
Subject: xfrm: Check for esn buffer len in xfrm_new_ae

In xfrm_new_ae() we may overwrite the allocated esn replay state
buffer with a wrong size. So check that the new size matches the
original allocated size and return an error if this is not the case.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fc152d28753c..ccc4c0c8ef00 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -360,6 +360,23 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props,
 	return 0;
 }
 
+static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn,
+					 struct nlattr *rp)
+{
+	struct xfrm_replay_state_esn *up;
+
+	if (!replay_esn || !rp)
+		return 0;
+
+	up = nla_data(rp);
+
+	if (xfrm_replay_state_esn_len(replay_esn) !=
+			xfrm_replay_state_esn_len(up))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn,
 				       struct xfrm_replay_state_esn **preplay_esn,
 				       struct nlattr *rta)
@@ -1766,6 +1783,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (x->km.state != XFRM_STATE_VALID)
 		goto out;
 
+	err = xfrm_replay_verify_len(x->replay_esn, rp);
+	if (err)
+		goto out;
+
 	spin_lock_bh(&x->lock);
 	xfrm_update_ae_params(x, attrs);
 	spin_unlock_bh(&x->lock);
-- 
cgit v1.2.3


From 02aadf72fe2c83f145e3437734e66be53abae481 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 28 Mar 2011 19:48:09 +0000
Subject: xfrm: Restrict extended sequence numbers to esp

The IPsec extended sequence numbers are fully implemented just for
esp. So restrict the usage to esp until other protocols have
support too.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ccc4c0c8ef00..3d15d3e1b2c4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -127,6 +127,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
 	if (!rt)
 		return 0;
 
+	if (p->id.proto != IPPROTO_ESP)
+		return -EINVAL;
+
 	if (p->replay_window != 0)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 234af26ff123dfb2aa48772124721b1354c8e0a5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 29 Mar 2011 06:25:59 +0300
Subject: ceph: unlock on error in ceph_osdc_start_request()

There was a missing unlock on the error path if __map_request() failed.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b6776cb627cc..03740e8fc9d1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1672,7 +1672,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 	if (req->r_sent == 0) {
 		rc = __map_request(osdc, req);
 		if (rc < 0)
-			return rc;
+			goto out_unlock;
 		if (req->r_osd == NULL) {
 			dout("send_request %p no up osds in pg\n", req);
 			ceph_monc_request_next_osdmap(&osdc->client->monc);
@@ -1689,6 +1689,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 			}
 		}
 	}
+
+out_unlock:
 	mutex_unlock(&osdc->request_mutex);
 	up_read(&osdc->map_sem);
 	return rc;
-- 
cgit v1.2.3


From fbdb9190482fd83a3eb20cdeb0da454759f479d7 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 29 Mar 2011 12:11:06 -0700
Subject: libceph: fix null dereference when unregistering linger requests

We should only clear r_osd if we are neither registered as a linger or a
regular request.  We may unregister as a linger while still registered as
a regular request (e.g., in reset_osd).  Incorrectly clearing r_osd there
leads to a null pointer dereference in __send_request.

Also simplify the parallel check in __unregister_request() where we just
removed r_osd_item and know it's empty.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 03740e8fc9d1..3b91d651fe08 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -837,8 +837,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
 			dout("moving osd to %p lru\n", req->r_osd);
 			__move_osd_to_lru(osdc, req->r_osd);
 		}
-		if (list_empty(&req->r_osd_item) &&
-		    list_empty(&req->r_linger_item))
+		if (list_empty(&req->r_linger_item))
 			req->r_osd = NULL;
 	}
 
@@ -883,7 +882,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 			dout("moving osd to %p lru\n", req->r_osd);
 			__move_osd_to_lru(osdc, req->r_osd);
 		}
-		req->r_osd = NULL;
+		if (list_empty(&req->r_osd_item))
+			req->r_osd = NULL;
 	}
 }
 
-- 
cgit v1.2.3


From 8323c3aa74cd92465350294567142d12ffdcc963 Mon Sep 17 00:00:00 2001
From: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Date: Fri, 25 Mar 2011 16:32:57 -0700
Subject: ceph: Move secret key parsing earlier.

This makes the base64 logic be contained in mount option parsing,
and prepares us for replacing the homebew key management with the
kernel key retention service.

Signed-off-by: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/auth.c        |  8 ++++----
 net/ceph/auth_x.c      |  8 +++++---
 net/ceph/ceph_common.c | 43 ++++++++++++++++++++++++++++++++++++-------
 net/ceph/crypto.c      | 11 +++++++++++
 net/ceph/crypto.h      |  2 ++
 net/ceph/mon_client.c  |  2 +-
 6 files changed, 59 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 549c1f43e1d5..b4bf4ac090f1 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -35,12 +35,12 @@ static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
 /*
  * setup, teardown.
  */
-struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret)
+struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key)
 {
 	struct ceph_auth_client *ac;
 	int ret;
 
-	dout("auth_init name '%s' secret '%s'\n", name, secret);
+	dout("auth_init name '%s'\n", name);
 
 	ret = -ENOMEM;
 	ac = kzalloc(sizeof(*ac), GFP_NOFS);
@@ -52,8 +52,8 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret)
 		ac->name = name;
 	else
 		ac->name = CEPH_AUTH_NAME_DEFAULT;
-	dout("auth_init name %s secret %s\n", ac->name, secret);
-	ac->secret = secret;
+	dout("auth_init name %s\n", ac->name);
+	ac->key = key;
 	return ac;
 
 out:
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 7fd5dfcf6e18..1587dc6010c6 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -662,14 +662,16 @@ int ceph_x_init(struct ceph_auth_client *ac)
 		goto out;
 
 	ret = -EINVAL;
-	if (!ac->secret) {
+	if (!ac->key) {
 		pr_err("no secret set (for auth_x protocol)\n");
 		goto out_nomem;
 	}
 
-	ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
-	if (ret)
+	ret = ceph_crypto_key_clone(&xi->secret, ac->key);
+	if (ret < 0) {
+		pr_err("cannot clone key: %d\n", ret);
 		goto out_nomem;
+	}
 
 	xi->starting = true;
 	xi->ticket_handlers = RB_ROOT;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 9bbb356b12e7..02e084f29d24 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -20,6 +20,7 @@
 #include <linux/ceph/decode.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/auth.h>
+#include "crypto.h"
 
 
@@ -117,9 +118,29 @@ int ceph_compare_options(struct ceph_options *new_opt,
 	if (ret)
 		return ret;
 
-	ret = strcmp_null(opt1->secret, opt2->secret);
-	if (ret)
-		return ret;
+	if (opt1->key && !opt2->key)
+		return -1;
+	if (!opt1->key && opt2->key)
+		return 1;
+	if (opt1->key && opt2->key) {
+		if (opt1->key->type != opt2->key->type)
+			return -1;
+		if (opt1->key->created.tv_sec != opt2->key->created.tv_sec)
+			return -1;
+		if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec)
+			return -1;
+		if (opt1->key->len != opt2->key->len)
+			return -1;
+		if (opt1->key->key && !opt2->key->key)
+			return -1;
+		if (!opt1->key->key && opt2->key->key)
+			return 1;
+		if (opt1->key->key && opt2->key->key) {
+			ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len);
+			if (ret)
+				return ret;
+		}
+	}
 
 	/* any matching mon ip implies a match */
 	for (i = 0; i < opt1->num_mon; i++) {
@@ -203,7 +224,10 @@ void ceph_destroy_options(struct ceph_options *opt)
 {
 	dout("destroy_options %p\n", opt);
 	kfree(opt->name);
-	kfree(opt->secret);
+	if (opt->key) {
+		ceph_crypto_key_destroy(opt->key);
+		kfree(opt->key);
+	}
 	kfree(opt);
 }
 EXPORT_SYMBOL(ceph_destroy_options);
@@ -295,9 +319,14 @@ int ceph_parse_options(struct ceph_options **popt, char *options,
 					      GFP_KERNEL);
 			break;
 		case Opt_secret:
-			opt->secret = kstrndup(argstr[0].from,
-						argstr[0].to-argstr[0].from,
-						GFP_KERNEL);
+		        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
+			if (!opt->key) {
+				err = -ENOMEM;
+				goto out;
+			}
+			err = ceph_crypto_key_unarmor(opt->key, argstr[0].from);
+			if (err < 0)
+				goto out;
 			break;
 
 			/* misc */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 7b505b0c983f..75f0893fa11f 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -9,6 +9,17 @@
 #include <linux/ceph/decode.h>
 #include "crypto.h"
 
+int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+			  const struct ceph_crypto_key *src)
+{
+	memcpy(dst, src, sizeof(struct ceph_crypto_key));
+	dst->key = kmalloc(src->len, GFP_NOFS);
+	if (!dst->key)
+		return -ENOMEM;
+	memcpy(dst->key, src->key, src->len);
+	return 0;
+}
+
 int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
 {
 	if (*p + sizeof(u16) + sizeof(key->created) +
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index f9eccace592b..6cf6edc91ec4 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -19,6 +19,8 @@ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
 	kfree(key->key);
 }
 
+extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
+				 const struct ceph_crypto_key *src);
 extern int ceph_crypto_key_encode(struct ceph_crypto_key *key,
 				  void **p, void *end);
 extern int ceph_crypto_key_decode(struct ceph_crypto_key *key,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 8a079399174a..cbe31fa45508 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -759,7 +759,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 
 	/* authentication */
 	monc->auth = ceph_auth_init(cl->options->name,
-				    cl->options->secret);
+				    cl->options->key);
 	if (IS_ERR(monc->auth))
 		return PTR_ERR(monc->auth);
 	monc->auth->want_keys =
-- 
cgit v1.2.3


From e2c3d29b4295c3eec18294bc34f0c99a7b9ae413 Mon Sep 17 00:00:00 2001
From: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Date: Fri, 25 Mar 2011 16:40:48 -0700
Subject: libceph: Get secret from the kernel keys api when mounting with
 key=NAME.

Signed-off-by: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/Kconfig       |  1 +
 net/ceph/ceph_common.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'net')

diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index ad424049b0cf..be683f2d401f 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -4,6 +4,7 @@ config CEPH_LIB
 	select LIBCRC32C
 	select CRYPTO_AES
 	select CRYPTO
+	select KEYS
 	default n
 	help
 	  Choose Y or M here to include cephlib, which provides the
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 02e084f29d24..c92bc8d50597 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -5,6 +5,8 @@
 #include <linux/fs.h>
 #include <linux/inet.h>
 #include <linux/in6.h>
+#include <linux/key.h>
+#include <keys/user-type.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/parser.h>
@@ -197,6 +199,7 @@ enum {
 	Opt_fsid,
 	Opt_name,
 	Opt_secret,
+	Opt_key,
 	Opt_ip,
 	Opt_last_string,
 	/* string args above */
@@ -213,6 +216,7 @@ static match_table_t opt_tokens = {
 	{Opt_fsid, "fsid=%s"},
 	{Opt_name, "name=%s"},
 	{Opt_secret, "secret=%s"},
+	{Opt_key, "key=%s"},
 	{Opt_ip, "ip=%s"},
 	/* string args above */
 	{Opt_noshare, "noshare"},
@@ -232,6 +236,50 @@ void ceph_destroy_options(struct ceph_options *opt)
 }
 EXPORT_SYMBOL(ceph_destroy_options);
 
+/* get secret from key store */
+static int get_secret(struct ceph_crypto_key *dst, const char *name) {
+	struct key *ukey;
+	int key_err;
+	int err = 0;
+	struct user_key_payload *payload;
+	void *p;
+
+	ukey = request_key(&key_type_user, name, NULL);
+	if (!ukey || IS_ERR(ukey)) {
+		/* request_key errors don't map nicely to mount(2)
+		   errors; don't even try, but still printk */
+		key_err = PTR_ERR(ukey);
+		switch (key_err) {
+		case -ENOKEY:
+			pr_warning("ceph: Mount failed due to key not found: %s\n", name);
+			break;
+		case -EKEYEXPIRED:
+			pr_warning("ceph: Mount failed due to expired key: %s\n", name);
+			break;
+		case -EKEYREVOKED:
+			pr_warning("ceph: Mount failed due to revoked key: %s\n", name);
+			break;
+		default:
+			pr_warning("ceph: Mount failed due to unknown key error"
+			       " %d: %s\n", key_err, name);
+		}
+		err = -EPERM;
+		goto out;
+	}
+
+	payload = ukey->payload.data;
+	p = payload->data;
+	err = ceph_crypto_key_decode(dst, &p, p + payload->datalen);
+	if (err)
+		goto out_key;
+	/* pass through, err is 0 */
+
+out_key:
+	key_put(ukey);
+out:
+	return err;
+}
+
 int ceph_parse_options(struct ceph_options **popt, char *options,
 		       const char *dev_name, const char *dev_name_end,
 		       int (*parse_extra_token)(char *c, void *private),
@@ -328,6 +376,16 @@ int ceph_parse_options(struct ceph_options **popt, char *options,
 			if (err < 0)
 				goto out;
 			break;
+		case Opt_key:
+		        opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL);
+			if (!opt->key) {
+				err = -ENOMEM;
+				goto out;
+			}
+			err = get_secret(opt->key, argstr[0].from);
+			if (err < 0)
+				goto out;
+			break;
 
 			/* misc */
 		case Opt_osdtimeout:
-- 
cgit v1.2.3


From 4b2a58abd1e17c0ee53c8dded879e015917cca67 Mon Sep 17 00:00:00 2001
From: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Date: Mon, 28 Mar 2011 14:59:38 -0700
Subject: libceph: Create a new key type "ceph".

This allows us to use existence of the key type as a feature test,
from userspace.

Signed-off-by: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/ceph_common.c | 21 ++++++++++-------
 net/ceph/crypto.c      | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ceph/crypto.h      |  2 ++
 3 files changed, 77 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index c92bc8d50597..132963abc266 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -6,7 +6,7 @@
 #include <linux/inet.h>
 #include <linux/in6.h>
 #include <linux/key.h>
-#include <keys/user-type.h>
+#include <keys/ceph-type.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/parser.h>
@@ -241,10 +241,9 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
 	struct key *ukey;
 	int key_err;
 	int err = 0;
-	struct user_key_payload *payload;
-	void *p;
+	struct ceph_crypto_key *ckey;
 
-	ukey = request_key(&key_type_user, name, NULL);
+	ukey = request_key(&key_type_ceph, name, NULL);
 	if (!ukey || IS_ERR(ukey)) {
 		/* request_key errors don't map nicely to mount(2)
 		   errors; don't even try, but still printk */
@@ -267,9 +266,8 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
 		goto out;
 	}
 
-	payload = ukey->payload.data;
-	p = payload->data;
-	err = ceph_crypto_key_decode(dst, &p, p + payload->datalen);
+	ckey = ukey->payload.data;
+	err = ceph_crypto_key_clone(dst, ckey);
 	if (err)
 		goto out_key;
 	/* pass through, err is 0 */
@@ -583,10 +581,14 @@ static int __init init_ceph_lib(void)
 	if (ret < 0)
 		goto out;
 
-	ret = ceph_msgr_init();
+	ret = ceph_crypto_init();
 	if (ret < 0)
 		goto out_debugfs;
 
+	ret = ceph_msgr_init();
+	if (ret < 0)
+		goto out_crypto;
+
 	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
 		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
 		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
@@ -594,6 +596,8 @@ static int __init init_ceph_lib(void)
 
 	return 0;
 
+out_crypto:
+	ceph_crypto_shutdown();
 out_debugfs:
 	ceph_debugfs_cleanup();
 out:
@@ -604,6 +608,7 @@ static void __exit exit_ceph_lib(void)
 {
 	dout("exit_ceph_lib\n");
 	ceph_msgr_exit();
+	ceph_crypto_shutdown();
 	ceph_debugfs_cleanup();
 }
 
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 75f0893fa11f..5a8009c9e0cd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -5,7 +5,9 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <crypto/hash.h>
+#include <linux/key-type.h>
 
+#include <keys/ceph-type.h>
 #include <linux/ceph/decode.h>
 #include "crypto.h"
 
@@ -421,3 +423,63 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
 		return -EINVAL;
 	}
 }
+
+int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
+{
+	struct ceph_crypto_key *ckey;
+	int ret;
+	void *p;
+
+	ret = -EINVAL;
+	if (datalen <= 0 || datalen > 32767 || !data)
+		goto err;
+
+	ret = key_payload_reserve(key, datalen);
+	if (ret < 0)
+		goto err;
+
+	ret = -ENOMEM;
+	ckey = kmalloc(sizeof(*ckey), GFP_KERNEL);
+	if (!ckey)
+		goto err;
+
+	/* TODO ceph_crypto_key_decode should really take const input */
+	p = (void*)data;
+	ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen);
+	if (ret < 0)
+		goto err_ckey;
+
+	key->payload.data = ckey;
+	return 0;
+
+err_ckey:
+	kfree(ckey);
+err:
+	return ret;
+}
+
+int ceph_key_match(const struct key *key, const void *description)
+{
+	return strcmp(key->description, description) == 0;
+}
+
+void ceph_key_destroy(struct key *key) {
+	struct ceph_crypto_key *ckey = key->payload.data;
+
+	ceph_crypto_key_destroy(ckey);
+}
+
+struct key_type key_type_ceph = {
+	.name		= "ceph",
+	.instantiate	= ceph_key_instantiate,
+	.match		= ceph_key_match,
+	.destroy	= ceph_key_destroy,
+};
+
+int ceph_crypto_init(void) {
+	return register_key_type(&key_type_ceph);
+}
+
+void ceph_crypto_shutdown(void) {
+	unregister_key_type(&key_type_ceph);
+}
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 6cf6edc91ec4..1919d1550d75 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -42,6 +42,8 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret,
 			 void *dst, size_t *dst_len,
 			 const void *src1, size_t src1_len,
 			 const void *src2, size_t src2_len);
+extern int ceph_crypto_init(void);
+extern void ceph_crypto_shutdown(void);
 
 /* armor.c */
 extern int ceph_armor(char *dst, const char *src, const char *end);
-- 
cgit v1.2.3


From 1459a3cc51d90d78027c7b5c1790e5d22751c8eb Mon Sep 17 00:00:00 2001
From: Balaji G <balajig81@gmail.com>
Date: Tue, 29 Mar 2011 06:20:04 +0000
Subject: bridge: Fix compilation warning in function
 br_stp_recalculate_bridge_id()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/bridge/br_stp_if.c: In function ‘br_stp_recalculate_bridge_id’:
net/bridge/br_stp_if.c:216:3: warning: ‘return’ with no value, in function returning non-void

Signed-off-by: G.Balaji <balajig81@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 5593f5aec942..9b61d09de9b9 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -213,7 +213,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
 
 	/* user has chosen a value so keep it */
 	if (br->flags & BR_SET_MAC_ADDR)
-		return;
+		return false;
 
 	list_for_each_entry(p, &br->port_list, list) {
 		if (addr == br_mac_zero ||
-- 
cgit v1.2.3


From 93ca3bb5df9bc8b2c60485e1cc6507c3d7c8e1fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Mon, 28 Mar 2011 22:40:53 +0000
Subject: net: gre: provide multicast mappings for ipv4 and ipv6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My commit 6d55cb91a0020ac0 (gre: fix hard header destination
address checking) broke multicast.

The reason is that ip_gre used to get ipgre_header() calls with
zero destination if we have NOARP or multicast destination. Instead
the actual target was decided at ipgre_tunnel_xmit() time based on
per-protocol dissection.

Instead of allowing the "abuse" of ->header() calls with invalid
destination, this creates multicast mappings for ip_gre. This also
fixes "ip neigh show nud noarp" to display the proper multicast
mappings used by the gre device.

Reported-by: Doug Kehn <rdkehn@yahoo.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Acked-by: Doug Kehn <rdkehn@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c   | 3 +++
 net/ipv6/ndisc.c | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090d273d7865..1b74d3b64371 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -215,6 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
 	case ARPHRD_INFINIBAND:
 		ip_ib_mc_map(addr, dev->broadcast, haddr);
 		return 0;
+	case ARPHRD_IPGRE:
+		ip_ipgre_mc_map(addr, dev->broadcast, haddr);
+		return 0;
 	default:
 		if (dir) {
 			memcpy(haddr, dev->broadcast, dev->addr_len);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0e49c9db3c98..92f952d093db 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -341,6 +341,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
 	case ARPHRD_INFINIBAND:
 		ipv6_ib_mc_map(addr, dev->broadcast, buf);
 		return 0;
+	case ARPHRD_IPGRE:
+		return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
 	default:
 		if (dir) {
 			memcpy(buf, dev->broadcast, dev->addr_len);
-- 
cgit v1.2.3


From ff9a57a62afbbe2d0f3a09af321f1fd7645f38a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@web.de>
Date: Sat, 26 Mar 2011 20:27:24 +0000
Subject: bridge: mcast snooping, fix length check of snooped MLDv1/2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"len = ntohs(ip6h->payload_len)" does not include the length of the ipv6
header itself, which the rest of this function assumes, though.

This leads to a length check less restrictive as it should be in the
following line for one thing. For another, it very likely leads to an
integer underrun when substracting the offset and therefore to a very
high new value of 'len' due to its unsignedness. This will ultimately
lead to the pskb_trim_rcsum() practically never being called, even in
the cases where it should.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f61eb2eff3fd..59660c909a7c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1475,7 +1475,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	    ip6h->payload_len == 0)
 		return 0;
 
-	len = ntohs(ip6h->payload_len);
+	len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
 	if (skb->len < len)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 79b569f0ec53a14c4d71e79d93a8676d9a0fda6d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Wed, 30 Mar 2011 02:42:17 -0700
Subject: netdev: fix mtu check when TSO is enabled

In case the device where is coming from the packet has TSO enabled,
we should not check the mtu size value as this one could be bigger
than the expected value.

This is the case for the macvlan driver when the lower device has
TSO enabled. The macvlan inherit this feature and forward the packets
without fragmenting them. Then the packets go through dev_forward_skb
and are dropped. This patch fix this by checking TSO is not enabled
when we want to check the mtu size.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 563ddc28139d..3da9fb06d47a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1454,6 +1454,27 @@ static inline void net_timestamp_check(struct sk_buff *skb)
 		__net_timestamp(skb);
 }
 
+static inline bool is_skb_forwardable(struct net_device *dev,
+				      struct sk_buff *skb)
+{
+	unsigned int len;
+
+	if (!(dev->flags & IFF_UP))
+		return false;
+
+	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
+	if (skb->len <= len)
+		return true;
+
+	/* if TSO is enabled, we don't care about the length as the packet
+	 * could be forwarded without being segmented before
+	 */
+	if (skb_is_gso(skb))
+		return true;
+
+	return false;
+}
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1477,8 +1498,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	skb_orphan(skb);
 	nf_reset(skb);
 
-	if (unlikely(!(dev->flags & IFF_UP) ||
-		     (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+	if (unlikely(!is_skb_forwardable(dev, skb))) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
 		return NET_RX_DROP;
-- 
cgit v1.2.3


From c031235b395433350f25943b7580a5e343c7b7b2 Mon Sep 17 00:00:00 2001
From: "Philip A. Prindeville" <philipp@redfish-solutions.com>
Date: Wed, 30 Mar 2011 13:17:04 +0000
Subject: atm/solos-pci: Don't flap VCs when carrier state changes

Don't flap VCs when carrier state changes; higher-level protocols
can detect loss of connectivity and act accordingly. This is more
consistent with how other network interfaces work.

We no longer use release_vccs() so we can delete it.

release_vccs() was duplicated from net/atm/common.c; make the
corresponding function exported, since other code duplicates it
and could leverage it if it were public.

Signed-off-by: Philip A. Prindeville <philipp@redfish-solutions.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/common.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/common.c b/net/atm/common.c
index 1b9c52a02cd3..22b963d06a10 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -252,6 +252,7 @@ void atm_dev_release_vccs(struct atm_dev *dev)
 	}
 	write_unlock_irq(&vcc_sklist_lock);
 }
+EXPORT_SYMBOL(atm_dev_release_vccs);
 
 static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
 {
-- 
cgit v1.2.3


From e2666f84958adb3a034b98e99699b55705117e01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Mar 2011 16:57:46 -0700
Subject: fib: add rtnl locking in ip_fib_net_exit

Daniel J Blueman reported a lockdep splat in trie_firstleaf(), caused by
RTNL being not locked before a call to fib_table_flush()

Reported-by: Daniel J Blueman <daniel.blueman@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f116ce8f1b46..451088330bbb 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1068,6 +1068,7 @@ static void ip_fib_net_exit(struct net *net)
 	fib4_rules_exit(net);
 #endif
 
+	rtnl_lock();
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
 		struct fib_table *tb;
 		struct hlist_head *head;
@@ -1080,6 +1081,7 @@ static void ip_fib_net_exit(struct net *net)
 			fib_free_table(tb);
 		}
 	}
+	rtnl_unlock();
 	kfree(net->ipv4.fib_table_hash);
 }
 
-- 
cgit v1.2.3


From a84b50ceb7d640437d0dc28a2bef0d0de054de89 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Mar 2011 17:51:36 -0700
Subject: sctp: Pass __GFP_NOWARN to hash table allocation attempts.

Like DCCP and other similar pieces of code, there are mechanisms
here to try allocating smaller hash tables if the allocation
fails.  So pass in __GFP_NOWARN like the others do instead of
emitting a scary message.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 152976ec0b74..d5bf91d04f63 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1205,7 +1205,7 @@ SCTP_STATIC __init int sctp_init(void)
 		if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0)
 			continue;
 		sctp_assoc_hashtable = (struct sctp_hashbucket *)
-					__get_free_pages(GFP_ATOMIC, order);
+			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
 	} while (!sctp_assoc_hashtable && --order > 0);
 	if (!sctp_assoc_hashtable) {
 		pr_err("Failed association hash alloc\n");
@@ -1238,7 +1238,7 @@ SCTP_STATIC __init int sctp_init(void)
 		if ((sctp_port_hashsize > (64 * 1024)) && order > 0)
 			continue;
 		sctp_port_hashtable = (struct sctp_bind_hashbucket *)
-					__get_free_pages(GFP_ATOMIC, order);
+			__get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order);
 	} while (!sctp_port_hashtable && --order > 0);
 	if (!sctp_port_hashtable) {
 		pr_err("Failed bind hash alloc\n");
-- 
cgit v1.2.3


From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Date: Wed, 30 Mar 2011 22:57:33 -0300
Subject: Fix common misspellings

Fixes generated by 'codespell' and manually reviewed.

Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
---
 net/8021q/vlanproc.c                      |  2 +-
 net/9p/client.c                           |  2 +-
 net/9p/trans_common.c                     |  4 ++--
 net/9p/util.c                             |  2 +-
 net/atm/br2684.c                          |  2 +-
 net/atm/lec.h                             |  2 +-
 net/batman-adv/soft-interface.c           |  2 +-
 net/bluetooth/hci_core.c                  |  2 +-
 net/bluetooth/l2cap_sock.c                |  2 +-
 net/bridge/br_fdb.c                       |  2 +-
 net/bridge/br_ioctl.c                     |  2 +-
 net/caif/caif_socket.c                    |  2 +-
 net/can/bcm.c                             |  2 +-
 net/ceph/osd_client.c                     |  2 +-
 net/core/dev.c                            | 12 ++++++------
 net/core/filter.c                         |  2 +-
 net/core/link_watch.c                     |  2 +-
 net/core/rtnetlink.c                      |  4 ++--
 net/core/skbuff.c                         |  2 +-
 net/core/sock.c                           | 10 +++++-----
 net/dccp/output.c                         |  2 +-
 net/dsa/mv88e6131.c                       |  2 +-
 net/ipv4/cipso_ipv4.c                     |  8 ++++----
 net/ipv4/fib_trie.c                       |  2 +-
 net/ipv4/icmp.c                           |  2 +-
 net/ipv4/ip_output.c                      |  2 +-
 net/ipv4/ipconfig.c                       |  2 +-
 net/ipv4/netfilter/arp_tables.c           |  4 ++--
 net/ipv4/netfilter/ip_tables.c            |  2 +-
 net/ipv4/netfilter/nf_nat_core.c          |  4 ++--
 net/ipv4/raw.c                            |  2 +-
 net/ipv4/route.c                          |  4 ++--
 net/ipv4/tcp_lp.c                         |  2 +-
 net/ipv4/tcp_output.c                     |  2 +-
 net/ipv4/tcp_yeah.c                       |  2 +-
 net/ipv4/udp.c                            |  2 +-
 net/ipv6/addrconf.c                       |  4 ++--
 net/ipv6/af_inet6.c                       |  2 +-
 net/ipv6/ip6_output.c                     |  2 +-
 net/ipv6/netfilter/ip6_tables.c           |  2 +-
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c |  2 +-
 net/irda/irlap.c                          |  2 +-
 net/irda/irlap_event.c                    |  8 ++++----
 net/irda/irlap_frame.c                    |  2 +-
 net/irda/irlmp_event.c                    |  2 +-
 net/irda/irnet/irnet.h                    |  2 +-
 net/irda/irqueue.c                        |  2 +-
 net/irda/irttp.c                          |  2 +-
 net/irda/qos.c                            |  8 ++++----
 net/irda/timer.c                          |  2 +-
 net/iucv/af_iucv.c                        |  2 +-
 net/iucv/iucv.c                           |  4 ++--
 net/mac80211/ieee80211_i.h                |  2 +-
 net/mac80211/mesh_pathtbl.c               |  2 +-
 net/mac80211/rc80211_minstrel_ht.c        |  2 +-
 net/mac80211/rc80211_pid.h                |  2 +-
 net/mac80211/rx.c                         |  2 +-
 net/mac80211/sta_info.c                   |  4 ++--
 net/mac80211/sta_info.h                   |  2 +-
 net/netfilter/ipset/ip_set_core.c         |  2 +-
 net/netfilter/ipvs/ip_vs_conn.c           |  4 ++--
 net/netfilter/ipvs/ip_vs_lblc.c           |  2 +-
 net/netfilter/ipvs/ip_vs_lblcr.c          |  2 +-
 net/netfilter/ipvs/ip_vs_proto_sctp.c     |  8 ++++----
 net/netfilter/nf_conntrack_core.c         |  4 ++--
 net/netfilter/nf_conntrack_proto_dccp.c   |  2 +-
 net/netfilter/nf_conntrack_proto_sctp.c   |  6 +++---
 net/netfilter/nf_conntrack_sip.c          |  2 +-
 net/netfilter/nf_queue.c                  |  2 +-
 net/netlabel/netlabel_domainhash.c        | 10 +++++-----
 net/netlabel/netlabel_mgmt.c              |  2 +-
 net/rds/ib_send.c                         |  2 +-
 net/rds/iw_cm.c                           |  2 +-
 net/rds/iw_rdma.c                         |  2 +-
 net/rds/iw_send.c                         |  2 +-
 net/rds/send.c                            |  2 +-
 net/rose/rose_route.c                     |  2 +-
 net/sched/act_api.c                       |  2 +-
 net/sched/act_pedit.c                     |  2 +-
 net/sched/em_meta.c                       |  2 +-
 net/sched/sch_htb.c                       |  2 +-
 net/sched/sch_netem.c                     |  6 +++---
 net/sctp/associola.c                      |  2 +-
 net/sctp/auth.c                           |  6 +++---
 net/sctp/input.c                          |  2 +-
 net/sctp/output.c                         |  2 +-
 net/sctp/outqueue.c                       |  6 +++---
 net/sctp/sm_sideeffect.c                  |  2 +-
 net/sctp/sm_statefuns.c                   | 20 ++++++++++----------
 net/sctp/socket.c                         |  2 +-
 net/sctp/ulpevent.c                       |  2 +-
 net/sctp/ulpqueue.c                       |  2 +-
 net/socket.c                              |  2 +-
 net/sunrpc/auth_gss/svcauth_gss.c         |  2 +-
 net/sunrpc/xprtsock.c                     |  4 ++--
 net/tipc/link.c                           |  2 +-
 net/tipc/name_distr.c                     |  2 +-
 net/unix/af_unix.c                        |  2 +-
 net/wanrouter/wanproc.c                   |  2 +-
 net/wireless/reg.c                        |  4 ++--
 net/x25/x25_facilities.c                  |  2 +-
 net/x25/x25_forward.c                     |  4 ++--
 net/xfrm/xfrm_user.c                      |  6 +++---
 103 files changed, 160 insertions(+), 160 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d1314cf18adf..d940c49d168a 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -54,7 +54,7 @@ static const char name_conf[]	 = "config";
 
 /*
  *	Structures for interfacing with the /proc filesystem.
- *	VLAN creates its own directory /proc/net/vlan with the folowing
+ *	VLAN creates its own directory /proc/net/vlan with the following
  *	entries:
  *	config		device status/configuration
  *	<device>	entry for each  device
diff --git a/net/9p/client.c b/net/9p/client.c
index 2ccbf04d37df..48b8e084e710 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -178,7 +178,7 @@ free_and_return:
  * @tag: numeric id for transaction
  *
  * this is a simple array lookup, but will grow the
- * request_slots as necessary to accomodate transaction
+ * request_slots as necessary to accommodate transaction
  * ids which did not previously have a slot.
  *
  * this code relies on the client spinlock to manage locks, its
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 9172ab78fcb0..d47880e971dd 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -36,7 +36,7 @@ p9_release_req_pages(struct trans_rpage_info *rpinfo)
 EXPORT_SYMBOL(p9_release_req_pages);
 
 /**
- * p9_nr_pages - Return number of pages needed to accomodate the payload.
+ * p9_nr_pages - Return number of pages needed to accommodate the payload.
  */
 int
 p9_nr_pages(struct p9_req_t *req)
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(p9_nr_pages);
  * @req: Request to be sent to server.
  * @pdata_off: data offset into the first page after translation (gup).
  * @pdata_len: Total length of the IO. gup may not return requested # of pages.
- * @nr_pages: number of pages to accomodate the payload
+ * @nr_pages: number of pages to accommodate the payload
  * @rw: Indicates if the pages are for read or write.
  */
 int
diff --git a/net/9p/util.c b/net/9p/util.c
index b84619b5ba22..da6af81e59d9 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL(p9_idpool_create);
 
 /**
  * p9_idpool_destroy - create a new per-connection id pool
- * @p: idpool to destory
+ * @p: idpool to destroy
  */
 
 void p9_idpool_destroy(struct p9_idpool *p)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fce2eae8d476..2252c2085dac 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -509,7 +509,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 	write_lock_irq(&devs_lock);
 	net_dev = br2684_find_dev(&be.ifspec);
 	if (net_dev == NULL) {
-		pr_err("tried to attach to non-existant device\n");
+		pr_err("tried to attach to non-existent device\n");
 		err = -ENXIO;
 		goto error;
 	}
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 9d14d196cc1d..dfc071966463 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -35,7 +35,7 @@ struct lecdatahdr_8025 {
  * Operations that LANE2 capable device can do. Two first functions
  * are used to make the device do things. See spec 3.1.3 and 3.1.4.
  *
- * The third function is intented for the MPOA component sitting on
+ * The third function is intended for the MPOA component sitting on
  * top of the LANE device. The MPOA component assigns it's own function
  * to (*associate_indicator)() and the LANE device will use that
  * function to tell about TLVs it sees floating through.
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 9ed26140a269..824e1f6e50f2 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -474,7 +474,7 @@ void interface_rx(struct net_device *soft_iface,
 		goto dropped;
 	skb->protocol = eth_type_trans(skb, soft_iface);
 
-	/* should not be neccesary anymore as we use skb_pull_rcsum()
+	/* should not be necessary anymore as we use skb_pull_rcsum()
 	 * TODO: please verify this and remove this TODO
 	 * -- Dec 21st 2009, Simon Wunderlich */
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b372fb8bcdcf..42d5ff02cb59 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1877,7 +1877,7 @@ static void hci_tx_task(unsigned long arg)
 	read_unlock(&hci_task_lock);
 }
 
-/* ----- HCI RX task (incoming data proccessing) ----- */
+/* ----- HCI RX task (incoming data processing) ----- */
 
 /* ACL data packet */
 static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index fc85e7ae33c7..36b9c5d0ebe3 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -679,7 +679,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
 
 		if (opt == BT_FLUSHABLE_OFF) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-			/* proceed futher only when we have l2cap_conn and
+			/* proceed further only when we have l2cap_conn and
 			   No Flush support in the LM */
 			if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) {
 				err = -EINVAL;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 88485cc74dc3..cc4d3c5ab1c6 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -169,7 +169,7 @@ void br_fdb_flush(struct net_bridge *br)
 	spin_unlock_bh(&br->hash_lock);
 }
 
-/* Flush all entries refering to a specific port.
+/* Flush all entries referring to a specific port.
  * if do_all is set also flush static entries
  */
 void br_fdb_delete_by_port(struct net_bridge *br,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index cb43312b846e..3d9fca0e3370 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -106,7 +106,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
 /*
  * Legacy ioctl's through SIOCDEVPRIVATE
  * This interface is deprecated because it was too difficult to
- * to do the translation for 32/64bit ioctl compatability.
+ * to do the translation for 32/64bit ioctl compatibility.
  */
 static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 8184c031d028..37a4034dfc29 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -852,7 +852,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	sock->state = SS_CONNECTING;
 	sk->sk_state = CAIF_CONNECTING;
 
-	/* Check priority value comming from socket */
+	/* Check priority value coming from socket */
 	/* if priority value is out of range it will be ajusted */
 	if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX)
 		cf_sk->conn_req.priority = CAIF_PRIO_MAX;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 871a0ad51025..57b1aed79014 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -387,7 +387,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 }
 
 /*
- * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions
+ * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
  */
 static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
 {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 02212ed50852..8d4ee7e01793 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -917,7 +917,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
 /*
  * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
  * (as needed), and set the request r_osd appropriately.  If there is
- * no up osd, set r_osd to NULL.  Move the request to the appropiate list
+ * no up osd, set r_osd to NULL.  Move the request to the appropriate list
  * (unsent, homeless) or leave on in-flight lru.
  *
  * Return 0 if unchanged, 1 if changed, or negative on error.
diff --git a/net/core/dev.c b/net/core/dev.c
index 563ddc28139d..56c3e00098c0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2071,7 +2071,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		u32 features;
 
 		/*
-		 * If device doesnt need skb->dst, release it right now while
+		 * If device doesn't need skb->dst, release it right now while
 		 * its hot in this cpu cache
 		 */
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2131,7 +2131,7 @@ gso:
 		nskb->next = NULL;
 
 		/*
-		 * If device doesnt need nskb->dst, release it right now while
+		 * If device doesn't need nskb->dst, release it right now while
 		 * its hot in this cpu cache
 		 */
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
@@ -2950,8 +2950,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
  * a compare and 2 stores extra right now if we dont have it on
  * but have CONFIG_NET_CLS_ACT
- * NOTE: This doesnt stop any functionality; if you dont have
- * the ingress scheduler, you just cant add policies on ingress.
+ * NOTE: This doesn't stop any functionality; if you dont have
+ * the ingress scheduler, you just can't add policies on ingress.
  *
  */
 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
@@ -3780,7 +3780,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * with netpoll's poll_napi().  Only the entity which
 		 * obtains the lock and sees NAPI_STATE_SCHED set will
 		 * actually make the ->poll() call.  Therefore we avoid
-		 * accidently calling ->poll() when NAPI is not scheduled.
+		 * accidentally calling ->poll() when NAPI is not scheduled.
 		 */
 		work = 0;
 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
@@ -6316,7 +6316,7 @@ static void __net_exit default_device_exit(struct net *net)
 		if (dev->rtnl_link_ops)
 			continue;
 
-		/* Push remaing network devices to init_net */
+		/* Push remaining network devices to init_net */
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
 		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 232b1873bb28..afb8afb066bb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -425,7 +425,7 @@ EXPORT_SYMBOL(sk_run_filter);
  * As we dont want to clear mem[] array for each packet going through
  * sk_run_filter(), we check that filter loaded by user never try to read
  * a cell if not previously written, and we check all branches to be sure
- * a malicious user doesnt try to abuse us.
+ * a malicious user doesn't try to abuse us.
  */
 static int check_load_and_stores(struct sock_filter *filter, int flen)
 {
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 01a1101b5936..a7b342131869 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -129,7 +129,7 @@ static void linkwatch_schedule_work(int urgent)
 	if (!cancel_delayed_work(&linkwatch_work))
 		return;
 
-	/* Otherwise we reschedule it again for immediate exection. */
+	/* Otherwise we reschedule it again for immediate execution. */
 	schedule_delayed_work(&linkwatch_work, 0);
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 49f7ea5b4c75..d7c4bb4b1820 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -196,7 +196,7 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
  * as failure of this function is very unlikely, it can only happen due
  * to lack of memory when allocating the chain to store all message
  * handlers for a protocol. Meant for use in init functions where lack
- * of memory implies no sense in continueing.
+ * of memory implies no sense in continuing.
  */
 void rtnl_register(int protocol, int msgtype,
 		   rtnl_doit_func doit, rtnl_dumpit_func dumpit)
@@ -1440,7 +1440,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "
-		       "some changes comitted already. Interface %s may "
+		       "some changes committed already. Interface %s may "
 		       "have been left with an inconsistent configuration, "
 		       "please check.\n", dev->name);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 801dd08908f9..7ebeed0a877c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2267,7 +2267,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
  * of bytes already consumed and the next call to
  * skb_seq_read() will return the remaining part of the block.
  *
- * Note 1: The size of each block of data returned can be arbitary,
+ * Note 1: The size of each block of data returned can be arbitrary,
  *       this limitation is the cost for zerocopy seqeuental
  *       reads of potentially non linear data.
  *
diff --git a/net/core/sock.c b/net/core/sock.c
index 7dfed792434d..6e819780c232 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -215,7 +215,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
-/* Maximal space eaten by iovec or ancilliary data plus some space */
+/* Maximal space eaten by iovec or ancillary data plus some space */
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
@@ -1175,7 +1175,7 @@ static void __sk_free(struct sock *sk)
 void sk_free(struct sock *sk)
 {
 	/*
-	 * We substract one from sk_wmem_alloc and can know if
+	 * We subtract one from sk_wmem_alloc and can know if
 	 * some packets are still in some tx queue.
 	 * If not null, sock_wfree() will call __sk_free(sk) later
 	 */
@@ -1185,10 +1185,10 @@ void sk_free(struct sock *sk)
 EXPORT_SYMBOL(sk_free);
 
 /*
- * Last sock_put should drop referrence to sk->sk_net. It has already
- * been dropped in sk_change_net. Taking referrence to stopping namespace
+ * Last sock_put should drop reference to sk->sk_net. It has already
+ * been dropped in sk_change_net. Taking reference to stopping namespace
  * is not an option.
- * Take referrence to a socket to remove it from hash _alive_ and after that
+ * Take reference to a socket to remove it from hash _alive_ and after that
  * destroy it in the context of init_net.
  */
 void sk_release_kernel(struct sock *sk)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 784d30210543..136d41cbcd02 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -143,7 +143,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
- * dccp_determine_ccmps  -  Find out about CCID-specfic packet-size limits
+ * dccp_determine_ccmps  -  Find out about CCID-specific packet-size limits
  * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
  * since the RX CCID is restricted to feedback packets (Acks), which are small
  * in comparison with the data traffic. A value of 0 means "no current CCMPS".
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
index bb2b41bc854e..d951f93644bf 100644
--- a/net/dsa/mv88e6131.c
+++ b/net/dsa/mv88e6131.c
@@ -124,7 +124,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
 	 * Ignore removed tag data on doubly tagged packets, disable
 	 * flow control messages, force flow control priority to the
 	 * highest, and send all special multicast frames to the CPU
-	 * port at the higest priority.
+	 * port at the highest priority.
 	 */
 	REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 094e150c6260..a0af7ea87870 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -112,7 +112,7 @@ int cipso_v4_rbm_strictvalid = 1;
 /* The maximum number of category ranges permitted in the ranged category tag
  * (tag #5).  You may note that the IETF draft states that the maximum number
  * of category ranges is 7, but if the low end of the last category range is
- * zero then it is possibile to fit 8 category ranges because the zero should
+ * zero then it is possible to fit 8 category ranges because the zero should
  * be omitted. */
 #define CIPSO_V4_TAG_RNG_CAT_MAX      8
 
@@ -438,7 +438,7 @@ cache_add_failure:
  *
  * Description:
  * Search the DOI definition list for a DOI definition with a DOI value that
- * matches @doi.  The caller is responsibile for calling rcu_read_[un]lock().
+ * matches @doi.  The caller is responsible for calling rcu_read_[un]lock().
  * Returns a pointer to the DOI definition on success and NULL on failure.
  */
 static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
@@ -1293,7 +1293,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
 			return ret_val;
 
 		/* This will send packets using the "optimized" format when
-		 * possibile as specified in  section 3.4.2.6 of the
+		 * possible as specified in  section 3.4.2.6 of the
 		 * CIPSO draft. */
 		if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
 			tag_len = 14;
@@ -1752,7 +1752,7 @@ validate_return:
 }
 
 /**
- * cipso_v4_error - Send the correct reponse for a bad packet
+ * cipso_v4_error - Send the correct response for a bad packet
  * @skb: the packet
  * @error: the error code
  * @gateway: CIPSO gateway flag
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b92c86f6e9b3..e9013d6c1f51 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -12,7 +12,7 @@
  *
  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
  *
- * This work is based on the LPC-trie which is originally descibed in:
+ * This work is based on the LPC-trie which is originally described in:
  *
  * An experimental study of compression methods for dynamic tries
  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a91dc1611081..e5f8a71d3a2a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -704,7 +704,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	 */
 
 	/*
-	 *	Check the other end isnt violating RFC 1122. Some routers send
+	 *	Check the other end isn't violating RFC 1122. Some routers send
 	 *	bogus responses to broadcast frames. If you see this message
 	 *	first check your netmask matches at both ends, if it does then
 	 *	get the other vendor to fix their kit.
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 67f241b97649..459c011b1d4a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -603,7 +603,7 @@ slow_path:
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
 			len = mtu;
-		/* IF: we are not sending upto and including the packet end
+		/* IF: we are not sending up to and including the packet end
 		   then align the next start on an eight byte boundary */
 		if (len < left)	{
 			len &= ~7;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2b097752426b..cbff2ecccf3d 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1444,7 +1444,7 @@ static int __init ip_auto_config(void)
 		root_server_addr = addr;
 
 	/*
-	 * Use defaults whereever applicable.
+	 * Use defaults wherever applicable.
 	 */
 	if (ic_defaults() < 0)
 		return -1;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4b5d457c2d76..89bc7e66d598 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -76,7 +76,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
 }
 
 /*
- * Unfortunatly, _b and _mask are not aligned to an int (or long int)
+ * Unfortunately, _b and _mask are not aligned to an int (or long int)
  * Some arches dont care, unrolling the loop is a win on them.
  * For other arches, we only have a 16bit alignement.
  */
@@ -1874,7 +1874,7 @@ static int __init arp_tables_init(void)
 	if (ret < 0)
 		goto err1;
 
-	/* Noone else will be downing sem now, so we won't sleep */
+	/* No one else will be downing sem now, so we won't sleep */
 	ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 	if (ret < 0)
 		goto err2;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ffcea0d1678e..704915028009 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2233,7 +2233,7 @@ static int __init ip_tables_init(void)
 	if (ret < 0)
 		goto err1;
 
-	/* Noone else will be downing sem now, so we won't sleep */
+	/* No one else will be downing sem now, so we won't sleep */
 	ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 	if (ret < 0)
 		goto err2;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 21bcf471b25a..9c71b2755ce3 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -521,7 +521,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
 }
 EXPORT_SYMBOL(nf_nat_protocol_register);
 
-/* Noone stores the protocol anywhere; simply delete it. */
+/* No one stores the protocol anywhere; simply delete it. */
 void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
 {
 	spin_lock_bh(&nf_nat_lock);
@@ -532,7 +532,7 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
 }
 EXPORT_SYMBOL(nf_nat_protocol_unregister);
 
-/* Noone using conntrack by the time this called. */
+/* No one using conntrack by the time this called. */
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
 	struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2d3c72e5bbbf..bceaec42c37d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -622,7 +622,7 @@ do_confirm:
 static void raw_close(struct sock *sk, long timeout)
 {
 	/*
-	 * Raw sockets may have direct kernel refereneces. Kill them.
+	 * Raw sockets may have direct kernel references. Kill them.
 	 */
 	ip_ra_control(sk, 0, NULL);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4b0c81180804..ea107515c53e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -821,7 +821,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 }
 
 /*
- * Pertubation of rt_genid by a small quantity [1..256]
+ * Perturbation of rt_genid by a small quantity [1..256]
  * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
  * many times (2^24) without giving recent rt_genid.
  * Jenkins hash is strong enough that litle changes of rt_genid are OK.
@@ -1191,7 +1191,7 @@ restart:
 #endif
 	/*
 	 * Since lookup is lockfree, we must make sure
-	 * previous writes to rt are comitted to memory
+	 * previous writes to rt are committed to memory
 	 * before making rt visible to other CPUS.
 	 */
 	rcu_assign_pointer(rt_hash_table[hash].chain, rt);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 656d431c99ad..72f7218b03f5 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -12,7 +12,7 @@
  *     within cong_avoid.
  *   o Error correcting in remote HZ, therefore remote HZ will be keeped
  *     on checking and updating.
- *   o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne
+ *   o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since
  *     OWD have a similar meaning as RTT. Also correct the buggy formular.
  *   o Handle reaction for Early Congestion Indication (ECI) within
  *     pkts_acked, as mentioned within pseudo code.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dfa5beb0c1c8..64f30eca1c67 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -73,7 +73,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
 	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 
-	/* Don't override Nagle indefinately with F-RTO */
+	/* Don't override Nagle indefinitely with F-RTO */
 	if (tp->frto_counter == 2)
 		tp->frto_counter = 3;
 
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index dc7f43179c9a..05c3b6f0e8e1 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -20,7 +20,7 @@
 #define TCP_YEAH_DELTA        3 //log minimum fraction of cwnd to be removed on loss
 #define TCP_YEAH_EPSILON      1 //log maximum fraction to be removed on early decongestion
 #define TCP_YEAH_PHY          8 //lin maximum delta from base
-#define TCP_YEAH_RHO         16 //lin minumum number of consecutive rtt to consider competition on loss
+#define TCP_YEAH_RHO         16 //lin minimum number of consecutive rtt to consider competition on loss
 #define TCP_YEAH_ZETA        50 //lin minimum number of state switchs to reset reno_count
 
 #define TCP_SCALABLE_AI_CNT	 100U
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 588f47af5faf..f87a8eb76f3b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -189,7 +189,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
  *  @sk:          socket struct in question
  *  @snum:        port number to look up
  *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
- *  @hash2_nulladdr: AF-dependant hash value in secondary hash chains,
+ *  @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
  *                   with NULL address
  */
 int udp_lib_get_port(struct sock *sk, unsigned short snum,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3daaf3c7703c..1493534116df 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1084,7 +1084,7 @@ static int ipv6_get_saddr_eval(struct net *net,
 	case IPV6_SADDR_RULE_PRIVACY:
 	    {
 		/* Rule 7: Prefer public address
-		 * Note: prefer temprary address if use_tempaddr >= 2
+		 * Note: prefer temporary address if use_tempaddr >= 2
 		 */
 		int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
 				!!(dst->prefs & IPV6_PREFER_SRC_TMP) :
@@ -1968,7 +1968,7 @@ ok:
 					 *  to the stored lifetime since we'll
 					 *  be updating the timestamp below,
 					 *  else we'll set it back to the
-					 *  minumum.
+					 *  minimum.
 					 */
 					if (prefered_lft != ifp->prefered_lft) {
 						valid_lft = stored_lft;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4b13d5d8890e..afcc7099f96d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1113,7 +1113,7 @@ static int __init inet6_init(void)
 	/*
 	 *	ipngwg API draft makes clear that the correct semantics
 	 *	for TCP and UDP is to consider one TCP and UDP instance
-	 *	in a host availiable by both INET and INET6 APIs and
+	 *	in a host available by both INET and INET6 APIs and
 	 *	able to communicate via both network protocols.
 	 */
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 18208876aa8a..46cf7bea6769 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -779,7 +779,7 @@ slow_path:
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
 			len = mtu;
-		/* IF: we are not sending upto and including the packet end
+		/* IF: we are not sending up to and including the packet end
 		   then align the next start on an eight byte boundary */
 		if (len < left)	{
 			len &= ~7;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0b2af9b85cec..5a1c6f27ffaf 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2248,7 +2248,7 @@ static int __init ip6_tables_init(void)
 	if (ret < 0)
 		goto err1;
 
-	/* Noone else will be downing sem now, so we won't sleep */
+	/* No one else will be downing sem now, so we won't sleep */
 	ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 	if (ret < 0)
 		goto err2;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 97c5b21b9674..cdd6d045e42e 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -71,7 +71,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	if (reasm == NULL)
 		return NF_STOLEN;
 
-	/* error occured or not fragmented */
+	/* error occurred or not fragmented */
 	if (reasm == skb)
 		return NF_ACCEPT;
 
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 783c5f367d29..005b424494a0 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -165,7 +165,7 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos,
 
 	irlap_apply_default_connection_parameters(self);
 
-	self->N3 = 3; /* # connections attemts to try before giving up */
+	self->N3 = 3; /* # connections attempts to try before giving up */
 
 	self->state = LAP_NDM;
 
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index d434c8880745..bb47021c9a55 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -708,7 +708,7 @@ static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event,
 
 				self->frame_sent = TRUE;
 			}
-			/* Readjust our timer to accomodate devices
+			/* Readjust our timer to accommodate devices
 			 * doing faster or slower discovery than us...
 			 * Jean II */
 			irlap_start_query_timer(self, info->S, info->s);
@@ -931,7 +931,7 @@ static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event,
 		irlap_send_rr_frame(self, CMD_FRAME);
 
 		/* The timer is set to half the normal timer to quickly
-		 * detect a failure to negociate the new connection
+		 * detect a failure to negotiate the new connection
 		 * parameters. IrLAP 6.11.3.2, note 3.
 		 * Note that currently we don't process this failure
 		 * properly, as we should do a quick disconnect.
@@ -1052,7 +1052,7 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event,
 				return -EPROTO;
 			}
 
-			/* Substract space used by this skb */
+			/* Subtract space used by this skb */
 			self->bytes_left -= skb->len;
 #else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
 			/* Window has been adjusted for the max packet
@@ -1808,7 +1808,7 @@ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
 
 				return -EPROTO; /* Try again later */
 			}
-			/* Substract space used by this skb */
+			/* Subtract space used by this skb */
 			self->bytes_left -= skb->len;
 #else	/* CONFIG_IRDA_DYNAMIC_WINDOW */
 			/* Window has been adjusted for the max packet
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 688222cbf55b..8c004161a843 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -848,7 +848,7 @@ void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb)
 	 * though IrLAP is currently sending the *last* frame of the
 	 * tx-window, the driver most likely has only just started
 	 * sending the *first* frame of the same tx-window.
-	 * I.e. we are always at the very begining of or Tx window.
+	 * I.e. we are always at the very beginning of or Tx window.
 	 * Now, we are supposed to set the final timer from the end
 	 * of our tx-window to let the other peer reply. So, we need
 	 * to add extra time to compensate for the fact that we
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index c1fb5db81042..9505a7d06f1a 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -498,7 +498,7 @@ static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event,
 	switch (event) {
 #ifdef CONFIG_IRDA_ULTRA
 	case LM_UDATA_INDICATION:
-		/* This is most bizzare. Those packets are  aka unreliable
+		/* This is most bizarre. Those packets are  aka unreliable
 		 * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA.
 		 * Why do we pass them as Ultra ??? Jean II */
 		irlmp_connless_data_indication(self, skb);
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 0d82ff5aeff1..979ecb2435a7 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -73,7 +73,7 @@
  * Infinite thanks to those brave souls for providing the infrastructure
  * upon which IrNET is built.
  *
- * Thanks to all my collegues in HP for helping me. In particular,
+ * Thanks to all my colleagues in HP for helping me. In particular,
  * thanks to Salil Pradhan and Bill Serra for W2k testing...
  * Thanks to Luiz Magalhaes for irnetd and much testing...
  *
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 849aaf0dabb5..9715e6e5900b 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -40,7 +40,7 @@
  *	o the hash function for ints is pathetic (but could be changed)
  *	o locking is sometime suspicious (especially during enumeration)
  *	o most users have only a few elements (== overhead)
- *	o most users never use seach, so don't benefit from hashing
+ *	o most users never use search, so don't benefit from hashing
  * Problem already fixed :
  *	o not 64 bit compliant (most users do hashv = (int) self)
  *	o hashbin_remove() is broken => use hashbin_remove_this()
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index f6054f9ccbe3..9d9af4606970 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1193,7 +1193,7 @@ EXPORT_SYMBOL(irttp_connect_request);
 /*
  * Function irttp_connect_confirm (handle, qos, skb)
  *
- *    Sevice user confirms TSAP connection with peer.
+ *    Service user confirms TSAP connection with peer.
  *
  */
 static void irttp_connect_confirm(void *instance, void *sap,
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 2b00974e5bae..1b51bcf42394 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -39,16 +39,16 @@
 #include <net/irda/irlap_frame.h>
 
 /*
- * Maximum values of the baud rate we negociate with the other end.
+ * Maximum values of the baud rate we negotiate with the other end.
  * Most often, you don't have to change that, because Linux-IrDA will
  * use the maximum offered by the link layer, which usually works fine.
  * In some very rare cases, you may want to limit it to lower speeds...
  */
 int sysctl_max_baud_rate = 16000000;
 /*
- * Maximum value of the lap disconnect timer we negociate with the other end.
+ * Maximum value of the lap disconnect timer we negotiate with the other end.
  * Most often, the value below represent the best compromise, but some user
- * may want to keep the LAP alive longuer or shorter in case of link failure.
+ * may want to keep the LAP alive longer or shorter in case of link failure.
  * Remember that the threshold time (early warning) is fixed to 3s...
  */
 int sysctl_max_noreply_time = 12;
@@ -411,7 +411,7 @@ static void irlap_adjust_qos_settings(struct qos_info *qos)
 	 * Fix tx data size according to user limits - Jean II
 	 */
 	if (qos->data_size.value > sysctl_max_tx_data_size)
-		/* Allow non discrete adjustement to avoid loosing capacity */
+		/* Allow non discrete adjustement to avoid losing capacity */
 		qos->data_size.value = sysctl_max_tx_data_size;
 	/*
 	 * Override Tx window if user request it. - Jean II
diff --git a/net/irda/timer.c b/net/irda/timer.c
index 0335ba0cc593..f418cb2ad49c 100644
--- a/net/irda/timer.c
+++ b/net/irda/timer.c
@@ -59,7 +59,7 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s)
 	 * slot time, plus add some extra time to properly receive the last
 	 * discovery packet (which is longer due to extra discovery info),
 	 * to avoid messing with for incomming connections requests and
-	 * to accomodate devices that perform discovery slower than us.
+	 * to accommodate devices that perform discovery slower than us.
 	 * Jean II */
 	timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s)
 		   + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9637e45744fa..986b2a5e8769 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -250,7 +250,7 @@ static struct device *af_iucv_dev;
  *	PRMDATA[0..6]	socket data (max 7 bytes);
  *	PRMDATA[7]	socket data length value (len is 0xff - PRMDATA[7])
  *
- * The socket data length is computed by substracting the socket data length
+ * The socket data length is computed by subtracting the socket data length
  * value from 0xFF.
  * If the socket data len is greater 7, then PRMDATA can be used for special
  * notifications (see iucv_sock_shutdown); and further,
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1ee5dab3cfae..8f156bd86be7 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -735,7 +735,7 @@ static void iucv_cleanup_queue(void)
 	struct iucv_irq_list *p, *n;
 
 	/*
-	 * When a path is severed, the pathid can be reused immediatly
+	 * When a path is severed, the pathid can be reused immediately
 	 * on a iucv connect or a connection pending interrupt. Remove
 	 * all entries from the task queue that refer to a stale pathid
 	 * (iucv_path_table[ix] == NULL). Only then do the iucv connect
@@ -807,7 +807,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
 	spin_lock_bh(&iucv_table_lock);
 	/* Remove handler from the iucv_handler_list. */
 	list_del_init(&handler->list);
-	/* Sever all pathids still refering to the handler. */
+	/* Sever all pathids still referring to the handler. */
 	list_for_each_entry_safe(p, n, &handler->paths, list) {
 		iucv_sever_pathid(p->pathid, NULL);
 		iucv_path_table[p->pathid] = NULL;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a40401701424..c18396c248d7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -97,7 +97,7 @@ struct ieee80211_bss {
 	size_t supp_rates_len;
 
 	/*
-	 * During assocation, we save an ERP value from a probe response so
+	 * During association, we save an ERP value from a probe response so
 	 * that we can feed ERP info to the driver when handling the
 	 * association completes. these fields probably won't be up-to-date
 	 * otherwise, you probably don't want to use them.
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 8d65b47d9837..336ca9d0c5c4 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -628,7 +628,7 @@ void mesh_path_discard_frame(struct sk_buff *skb,
  *
  * @mpath: mesh path whose queue has to be freed
  *
- * Locking: the function must me called withing a rcu_read_lock region
+ * Locking: the function must me called within a rcu_read_lock region
  */
 void mesh_path_flush_pending(struct mesh_path *mpath)
 {
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 8212a8bebf06..78e67d22dc1f 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -259,7 +259,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 		}
 	}
 
-	/* try to sample up to half of the availble rates during each interval */
+	/* try to sample up to half of the available rates during each interval */
 	mi->sample_count *= 4;
 
 	cur_prob = 0;
diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h
index 6510f8ee738e..19111c7bf454 100644
--- a/net/mac80211/rc80211_pid.h
+++ b/net/mac80211/rc80211_pid.h
@@ -77,7 +77,7 @@ union rc_pid_event_data {
 };
 
 struct rc_pid_event {
-	/* The time when the event occured */
+	/* The time when the event occurred */
 	unsigned long timestamp;
 
 	/* Event ID number */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5c1930ba8ebe..c50b68423c7b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -381,7 +381,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
  * specs were sane enough this time around to require padding each A-MSDU
  * subframe to a length that is a multiple of four.
  *
- * Padding like Atheros hardware adds which is inbetween the 802.11 header and
+ * Padding like Atheros hardware adds which is between the 802.11 header and
  * the payload is not supported, the driver is required to move the 802.11
  * header to be directly in front of the payload in that case.
  */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index d0311a322ddd..13e8c30adf01 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -47,9 +47,9 @@
  * Station entries are added by mac80211 when you establish a link with a
  * peer. This means different things for the different type of interfaces
  * we support. For a regular station this mean we add the AP sta when we
- * receive an assocation response from the AP. For IBSS this occurs when
+ * receive an association response from the AP. For IBSS this occurs when
  * get to know about a peer on the same IBSS. For WDS we add the sta for
- * the peer imediately upon device open. When using AP mode we add stations
+ * the peer immediately upon device open. When using AP mode we add stations
  * for each respective station upon request from userspace through nl80211.
  *
  * In order to remove a STA info structure, various sta_info_destroy_*()
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 57681149e37f..b2f95966c7f4 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -173,7 +173,7 @@ struct sta_ampdu_mlme {
 /**
  * enum plink_state - state of a mesh peer link finite state machine
  *
- * @PLINK_LISTEN: initial state, considered the implicit state of non existant
+ * @PLINK_LISTEN: initial state, considered the implicit state of non existent
  * 	mesh peer links
  * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh peer
  * @PLINK_OPN_RCVD: mesh plink open frame has been received from this mesh peer
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d6b48230a540..253326e8d990 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -893,7 +893,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	to = ip_set_list[to_id];
 
 	/* Features must not change.
-	 * Not an artifical restriction anymore, as we must prevent
+	 * Not an artificial restriction anymore, as we must prevent
 	 * possible loops created by swapping in setlist type of sets. */
 	if (!(from->type->features == to->type->features &&
 	      from->type->family == to->type->family))
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index f289306cbf12..c97bd45975be 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -595,7 +595,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 			atomic_inc(&dest->inactconns);
 	} else {
 		/* It is a persistent connection/template, so increase
-		   the peristent connection counter */
+		   the persistent connection counter */
 		atomic_inc(&dest->persistconns);
 	}
 
@@ -657,7 +657,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 		}
 	} else {
 		/* It is a persistent connection/template, so decrease
-		   the peristent connection counter */
+		   the persistent connection counter */
 		atomic_dec(&dest->persistconns);
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index f276df9896b3..87e40ea77a95 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -131,7 +131,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
 	list_del(&en->list);
 	/*
-	 * We don't kfree dest because it is refered either by its service
+	 * We don't kfree dest because it is referred either by its service
 	 * or the trash dest list.
 	 */
 	atomic_dec(&en->dest->refcnt);
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index cb1c9913d38b..90f618ab6dda 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -152,7 +152,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 	write_lock(&set->lock);
 	list_for_each_entry_safe(e, ep, &set->list, list) {
 		/*
-		 * We don't kfree dest because it is refered either
+		 * We don't kfree dest because it is referred either
 		 * by its service or by the trash dest list.
 		 */
 		atomic_dec(&e->dest->refcnt);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b027ccc49f43..d12ed53ec95f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -566,7 +566,7 @@ static struct ipvs_sctp_nextstate
 	 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
 	 */
 	/*
-	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * We received the data chuck, keep the state unchanged. I assume
 	 * that still data chuncks  can be received by both the peers in
 	 * SHUDOWN state
 	 */
@@ -633,7 +633,7 @@ static struct ipvs_sctp_nextstate
 	 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
 	 */
 	/*
-	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * We received the data chuck, keep the state unchanged. I assume
 	 * that still data chuncks  can be received by both the peers in
 	 * SHUDOWN state
 	 */
@@ -701,7 +701,7 @@ static struct ipvs_sctp_nextstate
 	 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
 	 */
 	/*
-	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * We received the data chuck, keep the state unchanged. I assume
 	 * that still data chuncks  can be received by both the peers in
 	 * SHUDOWN state
 	 */
@@ -771,7 +771,7 @@ static struct ipvs_sctp_nextstate
 	 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
 	 */
 	/*
-	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * We received the data chuck, keep the state unchanged. I assume
 	 * that still data chuncks  can be received by both the peers in
 	 * SHUDOWN state
 	 */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 941286ca911d..2e1c11f78419 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -453,7 +453,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	   REJECT will give spurious warnings here. */
 	/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 
-	/* No external references means noone else could have
+	/* No external references means no one else could have
 	   confirmed us. */
 	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
 	pr_debug("Confirming conntrack %p\n", ct);
@@ -901,7 +901,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 				   &dataoff, &protonum);
 	if (ret <= 0) {
-		pr_debug("not prepared to track yet or error occured\n");
+		pr_debug("not prepared to track yet or error occurred\n");
 		NF_CT_STAT_INC_ATOMIC(net, error);
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		ret = -ret;
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 9ae57c57c50e..2e664a69d7db 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -98,7 +98,7 @@ static const char * const dccp_state_names[] = {
 #define sIV	CT_DCCP_INVALID
 
 /*
- * DCCP state transistion table
+ * DCCP state transition table
  *
  * The assumption is the same as for TCP tracking:
  *
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6f4ee70f460b..6772b1154654 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -107,9 +107,9 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
 /* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
 /* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
 /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
+/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/
 /* cookie_echo  */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack   */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
+/* cookie_ack   */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */
 /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
 	},
 	{
@@ -121,7 +121,7 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
 /* shutdown     */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
 /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
 /* error        */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo  */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
+/* cookie_echo  */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */
 /* cookie_ack   */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
 /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
 	}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index bcf47eb518ef..237cc1981b89 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -707,7 +707,7 @@ static const char *ct_sdp_header_search(const char *dptr, const char *limit,
 }
 
 /* Locate a SDP header (optionally a substring within the header value),
- * optionally stopping at the first occurence of the term header, parse
+ * optionally stopping at the first occurrence of the term header, parse
  * it and return the offset and length of the data we're interested in.
  */
 int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5ab22e2bbd7d..5b466cd1272f 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -134,7 +134,7 @@ static int __nf_queue(struct sk_buff *skb,
 	const struct nf_afinfo *afinfo;
 	const struct nf_queue_handler *qh;
 
-	/* QUEUE == DROP if noone is waiting, to be safe. */
+	/* QUEUE == DROP if no one is waiting, to be safe. */
 	rcu_read_lock();
 
 	qh = rcu_dereference(queue_handler[pf]);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index d37b7f80fa37..de0d8e4cbfb6 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -109,7 +109,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
  *
  * Description:
  * This is the hashing function for the domain hash table, it returns the
- * correct bucket number for the domain.  The caller is responsibile for
+ * correct bucket number for the domain.  The caller is responsible for
  * ensuring that the hash table is protected with either a RCU read lock or the
  * hash table lock.
  *
@@ -134,7 +134,7 @@ static u32 netlbl_domhsh_hash(const char *key)
  *
  * Description:
  * Searches the domain hash table and returns a pointer to the hash table
- * entry if found, otherwise NULL is returned.  The caller is responsibile for
+ * entry if found, otherwise NULL is returned.  The caller is responsible for
  * ensuring that the hash table is protected with either a RCU read lock or the
  * hash table lock.
  *
@@ -165,7 +165,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain)
  * Searches the domain hash table and returns a pointer to the hash table
  * entry if an exact match is found, if an exact match is not present in the
  * hash table then the default entry is returned if valid otherwise NULL is
- * returned.  The caller is responsibile ensuring that the hash table is
+ * returned.  The caller is responsible ensuring that the hash table is
  * protected with either a RCU read lock or the hash table lock.
  *
  */
@@ -193,7 +193,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain)
  *
  * Description:
  * Generate an audit record for adding a new NetLabel/LSM mapping entry with
- * the given information.  Caller is responsibile for holding the necessary
+ * the given information.  Caller is responsible for holding the necessary
  * locks.
  *
  */
@@ -605,7 +605,7 @@ int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info)
  *
  * Description:
  * Look through the domain hash table searching for an entry to match @domain,
- * return a pointer to a copy of the entry or NULL.  The caller is responsibile
+ * return a pointer to a copy of the entry or NULL.  The caller is responsible
  * for ensuring that rcu_read_[un]lock() is called.
  *
  */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 998e85e895d0..4f251b19fbcc 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -259,7 +259,7 @@ add_failure:
  *
  * Description:
  * This function is a helper function used by the LISTALL and LISTDEF command
- * handlers.  The caller is responsibile for ensuring that the RCU read lock
+ * handlers.  The caller is responsible for ensuring that the RCU read lock
  * is held.  Returns zero on success, negative values on failure.
  *
  */
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index c47a511f203d..7c4dce8fa5e6 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -355,7 +355,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
  *
  * Conceptually, we have two counters:
  *  -	send credits: this tells us how many WRs we're allowed
- *	to submit without overruning the reciever's queue. For
+ *	to submit without overruning the receiver's queue. For
  *	each SEND WR we post, we decrement this by one.
  *
  *  -	posted credits: this tells us how many WRs we recently
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 712cf2d1f28e..3a60a15d1b4a 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -181,7 +181,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
 	unsigned int send_size, recv_size;
 	int ret;
 
-	/* The offset of 1 is to accomodate the additional ACK WR. */
+	/* The offset of 1 is to accommodate the additional ACK WR. */
 	send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
 	recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
 	rds_iw_ring_resize(send_ring, send_size - 1);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 59509e9a9e72..6deaa77495e3 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -122,7 +122,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
 #else
 			/* FIXME - needs to compare the local and remote
 			 * ipaddr/port tuple, but the ipaddr is the only
-			 * available infomation in the rds_sock (as the rest are
+			 * available information in the rds_sock (as the rest are
 			 * zero'ed.  It doesn't appear to be properly populated
 			 * during connection setup...
 			 */
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 6280ea020d4e..545d8ee3efb1 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -307,7 +307,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
  *
  * Conceptually, we have two counters:
  *  -	send credits: this tells us how many WRs we're allowed
- *	to submit without overruning the reciever's queue. For
+ *	to submit without overruning the receiver's queue. For
  *	each SEND WR we post, we decrement this by one.
  *
  *  -	posted credits: this tells us how many WRs we recently
diff --git a/net/rds/send.c b/net/rds/send.c
index 35b9c2e9caf1..d58ae5f9339e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -116,7 +116,7 @@ static void release_in_xmit(struct rds_connection *conn)
 }
 
 /*
- * We're making the concious trade-off here to only send one message
+ * We're making the conscious trade-off here to only send one message
  * down the connection at a time.
  *   Pro:
  *      - tx queueing is a simple fifo list
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 08dcd2f29cdc..479cae57d187 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -587,7 +587,7 @@ static int rose_clear_routes(void)
 
 /*
  *	Check that the device given is a valid AX.25 interface that is "up".
- * 	called whith RTNL
+ * 	called with RTNL
  */
 static struct net_device *rose_ax25_dev_find(char *devname)
 {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 15873e14cb54..14b42f4ad791 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -999,7 +999,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	switch (n->nlmsg_type) {
 	case RTM_NEWACTION:
 		/* we are going to assume all other flags
-		 * imply create only if it doesnt exist
+		 * imply create only if it doesn't exist
 		 * Note that CREATE | EXCL implies that
 		 * but since we want avoid ambiguity (eg when flags
 		 * is zero) then just set this
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 50c7c06c019d..7affe9a92757 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -161,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 			}
 			if (offset > 0 && offset > skb->len) {
 				pr_info("tc filter pedit"
-					" offset %d cant exceed pkt length %d\n",
+					" offset %d can't exceed pkt length %d\n",
 				       offset, skb->len);
 				goto bad;
 			}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index a4de67eca824..49130e8abff0 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -47,7 +47,7 @@
  * 	on the meta type. Obviously, the length of the data must also
  * 	be provided for non-numeric types.
  *
- * 	Additionaly, type dependant modifiers such as shift operators
+ * 	Additionally, type dependent modifiers such as shift operators
  * 	or mask may be applied to extend the functionaliy. As of now,
  * 	the variable length type supports shifting the byte string to
  * 	the right, eating up any number of octets and thus supporting
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index e1429a85091f..29b942ce9e82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -183,7 +183,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
  * filters in qdisc and in inner nodes (if higher filter points to the inner
  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
  * internal fifo (direct). These packets then go directly thru. If we still
- * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
+ * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
  * then finish and return direct queue.
  */
 #define HTB_DIRECT ((struct htb_class *)-1L)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index edbbf7ad6623..69c35f6cd13f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -160,7 +160,7 @@ static bool loss_4state(struct netem_sched_data *q)
 	u32 rnd = net_random();
 
 	/*
-	 * Makes a comparision between rnd and the transition
+	 * Makes a comparison between rnd and the transition
 	 * probabilities outgoing from the current state, then decides the
 	 * next state and if the next packet has to be transmitted or lost.
 	 * The four states correspond to:
@@ -212,9 +212,9 @@ static bool loss_4state(struct netem_sched_data *q)
  * Generates losses according to the Gilbert-Elliot loss model or
  * its special cases  (Gilbert or Simple Gilbert)
  *
- * Makes a comparision between random number and the transition
+ * Makes a comparison between random number and the transition
  * probabilities outgoing from the current state, then decides the
- * next state. A second random number is extracted and the comparision
+ * next state. A second random number is extracted and the comparison
  * with the loss probability of the current state decides if the next
  * packet will be transmitted or lost.
  */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 6b04287913cd..0698cad61763 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1593,7 +1593,7 @@ void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc)
 	struct sctp_chunk *ack;
 	struct sctp_chunk *tmp;
 
-	/* We can remove all the entries from the queue upto
+	/* We can remove all the entries from the queue up to
 	 * the "Peer-Sequence-Number".
 	 */
 	list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list,
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index ddbbf7c81fa1..865e68fef21c 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -113,7 +113,7 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp)
 	return new;
 }
 
-/* Free the shared key stucture */
+/* Free the shared key structure */
 static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
 {
 	BUG_ON(!list_empty(&sh_key->key_list));
@@ -122,7 +122,7 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key)
 	kfree(sh_key);
 }
 
-/* Destory the entire key list.  This is done during the
+/* Destroy the entire key list.  This is done during the
  * associon and endpoint free process.
  */
 void sctp_auth_destroy_keys(struct list_head *keys)
@@ -324,7 +324,7 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret(
 	if (!peer_key_vector || !local_key_vector)
 		goto out;
 
-	/* Figure out the order in wich the key_vectors will be
+	/* Figure out the order in which the key_vectors will be
 	 * added to the endpoint shared key.
 	 * SCTP-AUTH, Section 6.1:
 	 *   This is performed by selecting the numerically smaller key
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 826661be73e7..5436c6921167 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1034,7 +1034,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 *    association.
 *
 * This means that any chunks that can help us identify the association need
-* to be looked at to find this assocation.
+* to be looked at to find this association.
 */
 static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
 				      const union sctp_addr *laddr,
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 60600d337a3a..b4f3cf06d8da 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -510,7 +510,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 		sh->checksum = sctp_end_cksum(crc32);
 	} else {
 		if (dst->dev->features & NETIF_F_SCTP_CSUM) {
-			/* no need to seed psuedo checksum for SCTP */
+			/* no need to seed pseudo checksum for SCTP */
 			nskb->ip_summed = CHECKSUM_PARTIAL;
 			nskb->csum_start = (skb_transport_header(nskb) -
 			                    nskb->head);
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 26dc005113a0..bf92a5b68f8b 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -177,13 +177,13 @@ static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn)
  * 3) If the missing report count for TSN t is to be
  * incremented according to [RFC2960] and
  * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set,
- * then the sender MUST futher execute steps 3.1 and
+ * then the sender MUST further execute steps 3.1 and
  * 3.2 to determine if the missing report count for
  * TSN t SHOULD NOT be incremented.
  *
  * 3.3) If 3.1 and 3.2 do not dictate that the missing
  * report count for t should not be incremented, then
- * the sender SOULD increment missing report count for
+ * the sender SHOULD increment missing report count for
  * t (according to [RFC2960] and [SCTP_STEWART_2002]).
  */
 static inline int sctp_cacc_skip(struct sctp_transport *primary,
@@ -843,7 +843,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 		case SCTP_CID_ECN_CWR:
 		case SCTP_CID_ASCONF_ACK:
 			one_packet = 1;
-			/* Fall throught */
+			/* Fall through */
 
 		case SCTP_CID_SACK:
 		case SCTP_CID_HEARTBEAT:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b21b218d564f..5f86ee4b54c1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -482,7 +482,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
 	 * If the timer was a heartbeat, we only increment error counts
 	 * when we already have an outstanding HEARTBEAT that has not
 	 * been acknowledged.
-	 * Additionaly, some tranport states inhibit error increments.
+	 * Additionally, some tranport states inhibit error increments.
 	 */
 	if (!is_hb) {
 		asoc->overall_error_count++;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4b4eb7c96bbd..76792083c379 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -551,7 +551,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 		 *
 		 * This means that if we only want to abort associations
 		 * in an authenticated way (i.e AUTH+ABORT), then we
-		 * can't destroy this association just becuase the packet
+		 * can't destroy this association just because the packet
 		 * was malformed.
 		 */
 		if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
@@ -1546,7 +1546,7 @@ cleanup:
 }
 
 /*
- * Handle simultanous INIT.
+ * Handle simultaneous INIT.
  * This means we started an INIT and then we got an INIT request from
  * our peer.
  *
@@ -2079,7 +2079,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	 * RFC 2960, Section 3.3.7
 	 *    If an endpoint receives an ABORT with a format error or for an
 	 *    association that doesn't exist, it MUST silently discard it.
-	 * Becasue the length is "invalid", we can't really discard just
+	 * Because the length is "invalid", we can't really discard just
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
@@ -2120,7 +2120,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	 * RFC 2960, Section 3.3.7
 	 *    If an endpoint receives an ABORT with a format error or for an
 	 *    association that doesn't exist, it MUST silently discard it.
-	 * Becasue the length is "invalid", we can't really discard just
+	 * Because the length is "invalid", we can't really discard just
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
@@ -2381,7 +2381,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	 * RFC 2960, Section 3.3.7
 	 *    If an endpoint receives an ABORT with a format error or for an
 	 *    association that doesn't exist, it MUST silently discard it.
-	 * Becasue the length is "invalid", we can't really discard just
+	 * Because the length is "invalid", we can't really discard just
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
@@ -2448,7 +2448,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
 	 * RFC 2960, Section 3.3.7
 	 *    If an endpoint receives an ABORT with a format error or for an
 	 *    association that doesn't exist, it MUST silently discard it.
-	 * Becasue the length is "invalid", we can't really discard just
+	 * Because the length is "invalid", we can't really discard just
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
@@ -3855,7 +3855,7 @@ gen_shutdown:
 }
 
 /*
- * SCTP-AUTH Section 6.3 Receving authenticated chukns
+ * SCTP-AUTH Section 6.3 Receiving authenticated chukns
  *
  *    The receiver MUST use the HMAC algorithm indicated in the HMAC
  *    Identifier field.  If this algorithm was not specified by the
@@ -4231,7 +4231,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 	 *
 	 * This means that if we only want to abort associations
 	 * in an authenticated way (i.e AUTH+ABORT), then we
-	 * can't destroy this association just becuase the packet
+	 * can't destroy this association just because the packet
 	 * was malformed.
 	 */
 	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
@@ -4402,9 +4402,9 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 }
 
 /* Handle protocol violation of an invalid chunk bundling.  For example,
- * when we have an association and we recieve bundled INIT-ACK, or
+ * when we have an association and we receive bundled INIT-ACK, or
  * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle"
- * statement from the specs.  Additinally, there might be an attacker
+ * statement from the specs.  Additionally, there might be an attacker
  * on the path and we may not want to continue this communication.
  */
 static sctp_disposition_t sctp_sf_violation_chunk(
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3951a10605bc..deb82e35a107 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1193,7 +1193,7 @@ out_free:
  * an endpoint that is multi-homed.  Much like sctp_bindx() this call
  * allows a caller to specify multiple addresses at which a peer can be
  * reached.  The way the SCTP stack uses the list of addresses to set up
- * the association is implementation dependant.  This function only
+ * the association is implementation dependent.  This function only
  * specifies that the stack will try to make use of all the addresses in
  * the list when needed.
  *
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index aa72e89c3ee1..dff27d5e22fd 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));
 
 	/* Per TSVWG discussion with Randy. Allow the application to
-	 * ressemble a fragmented message.
+	 * resemble a fragmented message.
 	 */
 	ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;
 
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 17678189d054..f2d1de7f2ffb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -240,7 +240,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 		} else {
 			/*
 			 * If fragment interleave is enabled, we
-			 * can queue this to the recieve queue instead
+			 * can queue this to the receive queue instead
 			 * of the lobby.
 			 */
 			if (sctp_sk(sk)->frag_interleave)
diff --git a/net/socket.c b/net/socket.c
index 5212447c86e7..310d16b1b3c9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2986,7 +2986,7 @@ out:
 
 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
  * for some operations; this forces use of the newer bridge-utils that
- * use compatiable ioctls
+ * use compatible ioctls
  */
 static int old_bridge_ioctl(compat_ulong_t __user *argp)
 {
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index bcdae78fdfc6..8d0f7d3c71c8 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1101,7 +1101,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 
 	/* credential is:
 	 *   version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
-	 * at least 5 u32s, and is preceeded by length, so that makes 6.
+	 * at least 5 u32s, and is preceded by length, so that makes 6.
 	 */
 
 	if (argv->iov_len < 5 * 4)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1e336a06d3e6..bf005d3c65ef 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -504,7 +504,7 @@ static int xs_nospace(struct rpc_task *task)
  *   EAGAIN:	The socket was blocked, please call again later to
  *		complete the request
  * ENOTCONN:	Caller needs to invoke connect logic then call again
- *    other:	Some other error occured, the request was not sent
+ *    other:	Some other error occurred, the request was not sent
  */
 static int xs_udp_send_request(struct rpc_task *task)
 {
@@ -590,7 +590,7 @@ static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
  *   EAGAIN:	The socket was blocked, please call again later to
  *		complete the request
  * ENOTCONN:	Caller needs to invoke connect logic then call again
- *    other:	Some other error occured, the request was not sent
+ *    other:	Some other error occurred, the request was not sent
  *
  * XXX: In the case of soft timeouts, should we eventually give up
  *	if sendmsg is not able to make progress?
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 43639ff1cbec..ebf338f7b14e 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2471,7 +2471,7 @@ exit:
  * A pending message being re-assembled must store certain values
  * to handle subsequent fragments correctly. The following functions
  * help storing these values in unused, available fields in the
- * pending message. This makes dynamic memory allocation unecessary.
+ * pending message. This makes dynamic memory allocation unnecessary.
  */
 
 static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index c9fa6dfcf287..80025a1b3bfd 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -160,7 +160,7 @@ void tipc_named_withdraw(struct publication *publ)
 
 	buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
 	if (!buf) {
-		warn("Withdrawl distribution failure\n");
+		warn("Withdrawal distribution failure\n");
 		return;
 	}
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1663e1a2efdd..3a43a8304768 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -207,7 +207,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 		/*
 		 * This may look like an off by one error but it is a bit more
 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
-		 * sun_path[108] doesnt as such exist.  However in kernel space
+		 * sun_path[108] doesn't as such exist.  However in kernel space
 		 * we are guaranteed that it is a valid memory location in our
 		 * kernel address buffer.
 		 */
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 11f25c7a7a05..f346395314ba 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -51,7 +51,7 @@
 
 /*
  *	Structures for interfacing with the /proc filesystem.
- *	Router creates its own directory /proc/net/router with the folowing
+ *	Router creates its own directory /proc/net/router with the following
  *	entries:
  *	config		device configuration
  *	status		global device statistics
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3332d5bce317..ab801a1097b2 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -809,7 +809,7 @@ static void handle_channel(struct wiphy *wiphy,
 	if (r) {
 		/*
 		 * We will disable all channels that do not match our
-		 * recieved regulatory rule unless the hint is coming
+		 * received regulatory rule unless the hint is coming
 		 * from a Country IE and the Country IE had no information
 		 * about a band. The IEEE 802.11 spec allows for an AP
 		 * to send only a subset of the regulatory rules allowed,
@@ -838,7 +838,7 @@ static void handle_channel(struct wiphy *wiphy,
 	    request_wiphy && request_wiphy == wiphy &&
 	    request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) {
 		/*
-		 * This gaurantees the driver's requested regulatory domain
+		 * This guarantees the driver's requested regulatory domain
 		 * will always be used as a base for further regulatory
 		 * settings
 		 */
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 406207515b5e..f77e4e75f914 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -31,7 +31,7 @@
  * x25_parse_facilities - Parse facilities from skb into the facilities structs
  *
  * @skb: sk_buff to parse
- * @facilities: Regular facilites, updated as facilities are found
+ * @facilities: Regular facilities, updated as facilities are found
  * @dte_facs: ITU DTE facilities, updated as DTE facilities are found
  * @vc_fac_mask: mask is updated with all facilities found
  *
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index 25a810793968..c541b622ae16 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -31,7 +31,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
 		goto out_no_route;
 
 	if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) {
-		/* This shouldnt happen, if it occurs somehow
+		/* This shouldn't happen, if it occurs somehow
 		 * do something sensible
 		 */
 		goto out_put_route;
@@ -45,7 +45,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
 	}
 
 	/* Remote end sending a call request on an already
-	 * established LCI? It shouldnt happen, just in case..
+	 * established LCI? It shouldn't happen, just in case..
 	 */
 	read_lock_bh(&x25_forward_list_lock);
 	list_for_each(entry, &x25_forward_list) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3d15d3e1b2c4..5d1d60d3ca83 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -894,7 +894,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 	u32 *f;
 
 	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
-	if (nlh == NULL) /* shouldnt really happen ... */
+	if (nlh == NULL) /* shouldn't really happen ... */
 		return -EMSGSIZE;
 
 	f = nlmsg_data(nlh);
@@ -954,7 +954,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
 	u32 *f;
 
 	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
-	if (nlh == NULL) /* shouldnt really happen ... */
+	if (nlh == NULL) /* shouldn't really happen ... */
 		return -EMSGSIZE;
 
 	f = nlmsg_data(nlh);
@@ -1361,7 +1361,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (!xp)
 		return err;
 
-	/* shouldnt excl be based on nlh flags??
+	/* shouldn't excl be based on nlh flags??
 	 * Aha! this is anti-netlink really i.e  more pfkey derived
 	 * in netlink excl is a flag and you wouldnt need
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
-- 
cgit v1.2.3


From c100c8f4c3c6f2a407bdbaaad2c4f1062e6a473a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 31 Mar 2011 18:59:10 -0700
Subject: appletalk: Fix OOPS in atalk_release().

Commit 60d9f461a20ba59219fdcdc30cbf8e3a4ad3f625 ("appletalk: remove
the BKL") added a dereference of "sk" before checking for NULL in
atalk_release().

Guard the code block completely, rather than partially, with the
NULL check.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 206e771e82d1..956a5302002a 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1051,16 +1051,17 @@ static int atalk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
-	sock_hold(sk);
-	lock_sock(sk);
 	if (sk) {
+		sock_hold(sk);
+		lock_sock(sk);
+
 		sock_orphan(sk);
 		sock->sk = NULL;
 		atalk_destroy_socket(sk);
-	}
-	release_sock(sk);
-	sock_put(sk);
 
+		release_sock(sk);
+		sock_put(sk);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2cab86bee8e7f353e6ac8c15b3eb906643497644 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Thu, 31 Mar 2011 23:42:55 +0000
Subject: sctp: malloc enough room for asconf-ack chunk

Sometime the ASCONF_ACK parameters can equal to the fourfold of
ASCONF parameters, this only happend in some special case:

  ASCONF parameter is :
    Unrecognized Parameter (4 bytes)
  ASCONF_ACK parameter should be:
    Error Cause Indication parameter (8 bytes header)
     + Error Cause (4 bytes header)
       + Unrecognized Parameter (4bytes)

Four 4bytes Unrecognized Parameters in ASCONF chunk will cause panic.

Pid: 0, comm: swapper Not tainted 2.6.38-next+ #22 Bochs Bochs
EIP: 0060:[<c0717eae>] EFLAGS: 00010246 CPU: 0
EIP is at skb_put+0x60/0x70
EAX: 00000077 EBX: c09060e2 ECX: dec1dc30 EDX: c09469c0
ESI: 00000000 EDI: de3c8d40 EBP: dec1dc58 ESP: dec1dc2c
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process swapper (pid: 0, ti=dec1c000 task=c09aef20 task.ti=c0980000)
Stack:
 c09469c0 e1894fa4 00000044 00000004 de3c8d00 de3c8d00 de3c8d44 de3c8d40
 c09060e2 de25dd80 de3c8d40 dec1dc7c e1894fa4 dec1dcb0 00000040 00000004
 00000000 00000800 00000004 00000004 dec1dce0 e1895a2b dec1dcb4 de25d960
Call Trace:
 [<e1894fa4>] ? sctp_addto_chunk+0x4e/0x89 [sctp]
 [<e1894fa4>] sctp_addto_chunk+0x4e/0x89 [sctp]
 [<e1895a2b>] sctp_process_asconf+0x32f/0x3d1 [sctp]
 [<e188d554>] sctp_sf_do_asconf+0xf8/0x173 [sctp]
 [<e1890b02>] sctp_do_sm+0xb8/0x159 [sctp]
 [<e18a2248>] ? sctp_cname+0x0/0x52 [sctp]
 [<e189392d>] sctp_assoc_bh_rcv+0xac/0xe3 [sctp]
 [<e1897d76>] sctp_inq_push+0x2d/0x30 [sctp]
 [<e18a21b2>] sctp_rcv+0x7a7/0x83d [sctp]
 [<c077a95c>] ? ipv4_confirm+0x118/0x125
 [<c073a970>] ? nf_iterate+0x34/0x62
 [<c074789d>] ? ip_local_deliver_finish+0x0/0x194
 [<c074789d>] ? ip_local_deliver_finish+0x0/0x194
 [<c0747992>] ip_local_deliver_finish+0xf5/0x194
 [<c074789d>] ? ip_local_deliver_finish+0x0/0x194
 [<c0747a6e>] NF_HOOK.clone.1+0x3d/0x44
 [<c0747ab3>] ip_local_deliver+0x3e/0x44
 [<c074789d>] ? ip_local_deliver_finish+0x0/0x194
 [<c074775c>] ip_rcv_finish+0x29f/0x2c7
 [<c07474bd>] ? ip_rcv_finish+0x0/0x2c7
 [<c0747a6e>] NF_HOOK.clone.1+0x3d/0x44
 [<c0747cae>] ip_rcv+0x1f5/0x233
 [<c07474bd>] ? ip_rcv_finish+0x0/0x2c7
 [<c071dce3>] __netif_receive_skb+0x310/0x336
 [<c07221f3>] netif_receive_skb+0x4b/0x51
 [<e0a4ed3d>] cp_rx_poll+0x1e7/0x29c [8139cp]
 [<c072275e>] net_rx_action+0x65/0x13a
 [<c0445a54>] __do_softirq+0xa1/0x149
 [<c04459b3>] ? __do_softirq+0x0/0x149
 <IRQ>
 [<c0445891>] ? irq_exit+0x37/0x72
 [<c040a7e9>] ? do_IRQ+0x81/0x95
 [<c07b3670>] ? common_interrupt+0x30/0x38
 [<c0428058>] ? native_safe_halt+0xa/0xc
 [<c040f5d7>] ? default_idle+0x58/0x92
 [<c0408fb0>] ? cpu_idle+0x96/0xb2
 [<c0797989>] ? rest_init+0x5d/0x5f
 [<c09fd90c>] ? start_kernel+0x34b/0x350
 [<c09fd0cb>] ? i386_start_kernel+0xba/0xc1

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index de98665db524..b3434cc7d0cf 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3106,10 +3106,10 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 
 	/* create an ASCONF_ACK chunk.
 	 * Based on the definitions of parameters, we know that the size of
-	 * ASCONF_ACK parameters are less than or equal to the twice of ASCONF
+	 * ASCONF_ACK parameters are less than or equal to the fourfold of ASCONF
 	 * parameters.
 	 */
-	asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 2);
+	asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 4);
 	if (!asconf_ack)
 		goto done;
 
-- 
cgit v1.2.3


From 2fceec13375e5d98ef033c6b0ee03943fc460950 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 1 Apr 2011 21:47:41 -0700
Subject: tcp: len check is unnecessarily devastating, change to WARN_ON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All callers are prepared for alloc failures anyway, so this error
can safely be boomeranged to the callers domain without super
bad consequences. ...At worst the connection might go into a state
where each RTO tries to (unsuccessfully) re-fragment with such
a mis-sized value and eventually dies.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dfa5beb0c1c8..8b0d0167e44a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1003,7 +1003,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	int nlen;
 	u8 flags;
 
-	BUG_ON(len > skb->len);
+	if (WARN_ON(len > skb->len))
+		return -EINVAL;
 
 	nsize = skb_headlen(skb) - len;
 	if (nsize < 0)
-- 
cgit v1.2.3


From 512d06b5b64fb422d90f199b1be188082729edf9 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 4 Apr 2011 15:18:45 +0200
Subject: netfilter: ipset: list:set timeout variant fixes

- the timeout value was actually not set
- the garbage collector was broken

The variant is fixed, the tests to the ipset testsuite are added.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_list_set.c | 53 ++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index a47c32982f06..f4a46c0d25f3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -43,14 +43,19 @@ struct list_set {
 static inline struct set_elem *
 list_set_elem(const struct list_set *map, u32 id)
 {
-	return (struct set_elem *)((char *)map->members + id * map->dsize);
+	return (struct set_elem *)((void *)map->members + id * map->dsize);
+}
+
+static inline struct set_telem *
+list_set_telem(const struct list_set *map, u32 id)
+{
+	return (struct set_telem *)((void *)map->members + id * map->dsize);
 }
 
 static inline bool
 list_set_timeout(const struct list_set *map, u32 id)
 {
-	const struct set_telem *elem =
-		(const struct set_telem *) list_set_elem(map, id);
+	const struct set_telem *elem = list_set_telem(map, id);
 
 	return ip_set_timeout_test(elem->timeout);
 }
@@ -58,19 +63,11 @@ list_set_timeout(const struct list_set *map, u32 id)
 static inline bool
 list_set_expired(const struct list_set *map, u32 id)
 {
-	const struct set_telem *elem =
-		(const struct set_telem *) list_set_elem(map, id);
+	const struct set_telem *elem = list_set_telem(map, id);
 
 	return ip_set_timeout_expired(elem->timeout);
 }
 
-static inline int
-list_set_exist(const struct set_telem *elem)
-{
-	return elem->id != IPSET_INVALID_ID &&
-	       !ip_set_timeout_expired(elem->timeout);
-}
-
 /* Set list without and with timeout */
 
 static int
@@ -146,11 +143,11 @@ list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id,
 	struct set_telem *e;
 
 	for (; i < map->size; i++) {
-		e = (struct set_telem *)list_set_elem(map, i);
+		e = list_set_telem(map, i);
 		swap(e->id, id);
+		swap(e->timeout, timeout);
 		if (e->id == IPSET_INVALID_ID)
 			break;
-		swap(e->timeout, timeout);
 	}
 }
 
@@ -164,7 +161,7 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
 		/* Last element replaced: e.g. add new,before,last */
 		ip_set_put_byindex(e->id);
 	if (with_timeout(map->timeout))
-		list_elem_tadd(map, i, id, timeout);
+		list_elem_tadd(map, i, id, ip_set_timeout_set(timeout));
 	else
 		list_elem_add(map, i, id);
 
@@ -172,11 +169,11 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id,
 }
 
 static int
-list_set_del(struct list_set *map, ip_set_id_t id, u32 i)
+list_set_del(struct list_set *map, u32 i)
 {
 	struct set_elem *a = list_set_elem(map, i), *b;
 
-	ip_set_put_byindex(id);
+	ip_set_put_byindex(a->id);
 
 	for (; i < map->size - 1; i++) {
 		b = list_set_elem(map, i + 1);
@@ -308,11 +305,11 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
 				 (before == 0 ||
 				  (before > 0 &&
 				   next_id_eq(map, i, refid))))
-				ret = list_set_del(map, id, i);
+				ret = list_set_del(map, i);
 			else if (before < 0 &&
 				 elem->id == refid &&
 				 next_id_eq(map, i, id))
-				ret = list_set_del(map, id, i + 1);
+				ret = list_set_del(map, i + 1);
 		}
 		break;
 	default:
@@ -460,17 +457,15 @@ list_set_gc(unsigned long ul_set)
 	struct list_set *map = set->data;
 	struct set_telem *e;
 	u32 i;
-
-	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
-	read_lock_bh(&set->lock);
-	for (i = map->size - 1; i >= 0; i--) {
-		e = (struct set_telem *) list_set_elem(map, i);
-		if (e->id != IPSET_INVALID_ID &&
-		    list_set_expired(map, i))
-			list_set_del(map, e->id, i);
+	
+	/* nfnl_lock should be called */
+	write_lock_bh(&set->lock);
+	for (i = 0; i < map->size; i++) {
+		e = list_set_telem(map, i);
+		if (e->id != IPSET_INVALID_ID && list_set_expired(map, i))
+			list_set_del(map, i);
 	}
-	read_unlock_bh(&set->lock);
+	write_unlock_bh(&set->lock);
 
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
 	add_timer(&map->gc);
-- 
cgit v1.2.3


From 2f9f28b212a2bd4948c8ceaaec33ce0123632129 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 4 Apr 2011 15:19:25 +0200
Subject: netfilter: ipset: references are protected by rwlock instead of mutex

The timeout variant of the list:set type must reference the member sets.
However, its garbage collector runs at timer interrupt so the mutex
protection of the references is a no go. Therefore the reference protection
is converted to rwlock.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_bitmap_ip.c    |   3 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c |   3 +-
 net/netfilter/ipset/ip_set_bitmap_port.c  |   3 +-
 net/netfilter/ipset/ip_set_core.c         | 109 ++++++++++++++++++------------
 net/netfilter/ipset/ip_set_list_set.c     |   6 +-
 5 files changed, 71 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index bca96990218d..a113ff066928 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
 	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
 	if (map->netmask != 32)
 		NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask);
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map) + map->memsize));
 	if (with_timeout(map->timeout))
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 5e790172deff..00a33242e90c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -434,8 +434,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip));
 	NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map)
 			    + (map->last_ip - map->first_ip + 1) * map->dsize));
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 165f09b1a9cb..6b38eb8f6ed8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
 		goto nla_put_failure;
 	NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port));
 	NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map) + map->memsize));
 	if (with_timeout(map->timeout))
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d6b48230a540..e88ac3c3ed07 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -26,6 +26,7 @@
 
 static LIST_HEAD(ip_set_type_list);		/* all registered set types */
 static DEFINE_MUTEX(ip_set_type_mutex);		/* protects ip_set_type_list */
+static DEFINE_RWLOCK(ip_set_ref_lock);		/* protects the set refs */
 
 static struct ip_set **ip_set_list;		/* all individual sets */
 static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */
@@ -301,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 static inline void
 __ip_set_get(ip_set_id_t index)
 {
-	atomic_inc(&ip_set_list[index]->ref);
+	write_lock_bh(&ip_set_ref_lock);
+	ip_set_list[index]->ref++;
+	write_unlock_bh(&ip_set_ref_lock);
 }
 
 static inline void
 __ip_set_put(ip_set_id_t index)
 {
-	atomic_dec(&ip_set_list[index]->ref);
+	write_lock_bh(&ip_set_ref_lock);
+	BUG_ON(ip_set_list[index]->ref == 0);
+	ip_set_list[index]->ref--;
+	write_unlock_bh(&ip_set_ref_lock);
 }
 
 /*
@@ -324,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	struct ip_set *set = ip_set_list[index];
 	int ret = 0;
 
-	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	BUG_ON(set == NULL);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (dim < set->type->dimension ||
@@ -356,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	struct ip_set *set = ip_set_list[index];
 	int ret;
 
-	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	BUG_ON(set == NULL);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (dim < set->type->dimension ||
@@ -378,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	struct ip_set *set = ip_set_list[index];
 	int ret = 0;
 
-	BUG_ON(set == NULL || atomic_read(&set->ref) == 0);
+	BUG_ON(set == NULL);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (dim < set->type->dimension ||
@@ -397,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del);
  * Find set by name, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.
  *
- * The nfnl mutex must already be activated.
  */
 ip_set_id_t
 ip_set_get_byname(const char *name, struct ip_set **set)
@@ -423,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
  * reference count by 1. The caller shall not assume the index
  * to be valid, after calling this function.
  *
- * The nfnl mutex must already be activated.
  */
 void
 ip_set_put_byindex(ip_set_id_t index)
 {
-	if (ip_set_list[index] != NULL) {
-		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
+	if (ip_set_list[index] != NULL)
 		__ip_set_put(index);
-	}
 }
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
@@ -441,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex);
  * can't be destroyed. The set cannot be renamed due to
  * the referencing either.
  *
- * The nfnl mutex must already be activated.
  */
 const char *
 ip_set_name_byindex(ip_set_id_t index)
@@ -449,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index)
 	const struct ip_set *set = ip_set_list[index];
 
 	BUG_ON(set == NULL);
-	BUG_ON(atomic_read(&set->ref) == 0);
+	BUG_ON(set->ref == 0);
 
 	/* Referenced, so it's safe */
 	return set->name;
@@ -515,10 +516,7 @@ void
 ip_set_nfnl_put(ip_set_id_t index)
 {
 	nfnl_lock();
-	if (ip_set_list[index] != NULL) {
-		BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0);
-		__ip_set_put(index);
-	}
+	ip_set_put_byindex(index);
 	nfnl_unlock();
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
@@ -526,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 /*
  * Communication protocol with userspace over netlink.
  *
- * We already locked by nfnl_lock.
+ * The commands are serialized by the nfnl mutex.
  */
 
 static inline bool
@@ -657,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		return -ENOMEM;
 	rwlock_init(&set->lock);
 	strlcpy(set->name, name, IPSET_MAXNAMELEN);
-	atomic_set(&set->ref, 0);
 	set->family = family;
 
 	/*
@@ -690,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 
 	/*
 	 * Here, we have a valid, constructed set and we are protected
-	 * by nfnl_lock. Find the first free index in ip_set_list and
-	 * check clashing.
+	 * by the nfnl mutex. Find the first free index in ip_set_list
+	 * and check clashing.
 	 */
 	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
 		/* If this is the same set and requested, ignore error */
@@ -751,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	       const struct nlattr * const attr[])
 {
 	ip_set_id_t i;
+	int ret = 0;
 
 	if (unlikely(protocol_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
 
-	/* References are protected by the nfnl mutex */
+	/* Commands are serialized and references are
+	 * protected by the ip_set_ref_lock.
+	 * External systems (i.e. xt_set) must call
+	 * ip_set_put|get_nfnl_* functions, that way we
+	 * can safely check references here.
+	 *
+	 * list:set timer can only decrement the reference
+	 * counter, so if it's already zero, we can proceed
+	 * without holding the lock.
+	 */
+	read_lock_bh(&ip_set_ref_lock);
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < ip_set_max; i++) {
-			if (ip_set_list[i] != NULL &&
-			    (atomic_read(&ip_set_list[i]->ref)))
-				return -IPSET_ERR_BUSY;
+			if (ip_set_list[i] != NULL && ip_set_list[i]->ref) {
+				ret = IPSET_ERR_BUSY;
+				goto out;
+			}
 		}
+		read_unlock_bh(&ip_set_ref_lock);
 		for (i = 0; i < ip_set_max; i++) {
 			if (ip_set_list[i] != NULL)
 				ip_set_destroy_set(i);
 		}
 	} else {
 		i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME]));
-		if (i == IPSET_INVALID_ID)
-			return -ENOENT;
-		else if (atomic_read(&ip_set_list[i]->ref))
-			return -IPSET_ERR_BUSY;
+		if (i == IPSET_INVALID_ID) {
+			ret = -ENOENT;
+			goto out;
+		} else if (ip_set_list[i]->ref) {
+			ret = -IPSET_ERR_BUSY;
+			goto out;
+		}
+		read_unlock_bh(&ip_set_ref_lock);
 
 		ip_set_destroy_set(i);
 	}
 	return 0;
+out:
+	read_unlock_bh(&ip_set_ref_lock);
+	return ret;
 }
 
 /* Flush sets */
@@ -834,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set *set;
 	const char *name2;
 	ip_set_id_t i;
+	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
 		     attr[IPSET_ATTR_SETNAME] == NULL ||
@@ -843,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	set = find_set(nla_data(attr[IPSET_ATTR_SETNAME]));
 	if (set == NULL)
 		return -ENOENT;
-	if (atomic_read(&set->ref) != 0)
-		return -IPSET_ERR_REFERENCED;
+
+	read_lock_bh(&ip_set_ref_lock);
+	if (set->ref != 0) {
+		ret = -IPSET_ERR_REFERENCED;
+		goto out;
+	}
 
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
 	for (i = 0; i < ip_set_max; i++) {
 		if (ip_set_list[i] != NULL &&
-		    STREQ(ip_set_list[i]->name, name2))
-			return -IPSET_ERR_EXIST_SETNAME2;
+		    STREQ(ip_set_list[i]->name, name2)) {
+			ret = -IPSET_ERR_EXIST_SETNAME2;
+			goto out;
+		}
 	}
 	strncpy(set->name, name2, IPSET_MAXNAMELEN);
 
-	return 0;
+out:
+	read_unlock_bh(&ip_set_ref_lock);
+	return ret;
 }
 
 /* Swap two sets so that name/index points to the other.
  * References and set names are also swapped.
  *
- * We are protected by the nfnl mutex and references are
- * manipulated only by holding the mutex. The kernel interfaces
+ * The commands are serialized by the nfnl mutex and references are
+ * protected by the ip_set_ref_lock. The kernel interfaces
  * do not hold the mutex but the pointer settings are atomic
  * so the ip_set_list always contains valid pointers to the sets.
  */
@@ -874,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set *from, *to;
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
-	u32 from_ref;
 
 	if (unlikely(protocol_failed(attr) ||
 		     attr[IPSET_ATTR_SETNAME] == NULL ||
@@ -899,17 +924,15 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	      from->type->family == to->type->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
 
-	/* No magic here: ref munging protected by the nfnl_lock */
 	strncpy(from_name, from->name, IPSET_MAXNAMELEN);
-	from_ref = atomic_read(&from->ref);
-
 	strncpy(from->name, to->name, IPSET_MAXNAMELEN);
-	atomic_set(&from->ref, atomic_read(&to->ref));
 	strncpy(to->name, from_name, IPSET_MAXNAMELEN);
-	atomic_set(&to->ref, from_ref);
 
+	write_lock_bh(&ip_set_ref_lock);
+	swap(from->ref, to->ref);
 	ip_set_list[from_id] = to;
 	ip_set_list[to_id] = from;
+	write_unlock_bh(&ip_set_ref_lock);
 
 	return 0;
 }
@@ -926,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb)
 {
 	if (cb->args[2]) {
 		pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name);
-		__ip_set_put((ip_set_id_t) cb->args[1]);
+		ip_set_put_byindex((ip_set_id_t) cb->args[1]);
 	}
 	return 0;
 }
@@ -1068,7 +1091,7 @@ release_refcount:
 	/* If there was an error or set is done, release set */
 	if (ret || !cb->args[2]) {
 		pr_debug("release set %s\n", ip_set_list[index]->name);
-		__ip_set_put(index);
+		ip_set_put_byindex(index);
 	}
 
 	/* If we dump all sets, continue with dumping last ones */
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f4a46c0d25f3..e9159e99fc4b 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -366,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
 	NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size));
 	if (with_timeout(map->timeout))
 		NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout));
-	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES,
-		      htonl(atomic_read(&set->ref) - 1));
+	NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1));
 	NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE,
 		      htonl(sizeof(*map) + map->size * map->dsize));
 	ipset_nest_end(skb, nested);
@@ -457,8 +456,7 @@ list_set_gc(unsigned long ul_set)
 	struct list_set *map = set->data;
 	struct set_telem *e;
 	u32 i;
-	
-	/* nfnl_lock should be called */
+
 	write_lock_bh(&set->lock);
 	for (i = 0; i < map->size; i++) {
 		e = list_set_telem(map, i);
-- 
cgit v1.2.3


From b4232a22776aa5d063f890d21ca69870dbbe431b Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Mon, 4 Apr 2011 15:21:02 +0200
Subject: netfilter: h323: bug in parsing of ASN1 SEQOF field

Static analyzer of clang found a dead store which appears to be a bug in
reading count of items in SEQOF field, only the lower byte of word is
stored. This may lead to corrupted read and communication shutdown.

The bug has been in the module since it's first inclusion into linux
kernel.

[Patrick: the bug is real, but without practical consequence since the
 largest amount of sequence-of members we parse is 30.]

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_h323_asn1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 867882313e49..bcd5ed6b7130 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f,
 		CHECK_BOUND(bs, 2);
 		count = *bs->cur++;
 		count <<= 8;
-		count = *bs->cur++;
+		count += *bs->cur++;
 		break;
 	case SEMI:
 		BYTE_ALIGN(bs);
-- 
cgit v1.2.3


From a09d19779f3ffac6e16821accc2c1cc4df1b643a Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Mon, 4 Apr 2011 15:25:18 +0200
Subject: IPVS: fix NULL ptr dereference in ip_vs_ctl.c
 ip_vs_genl_dump_daemons()

ipvsadm -ln --daemon will trigger a Null pointer exception because
ip_vs_genl_dump_daemons() uses skb_net() instead of skb_sknet().

To prevent others from NULL ptr a check is made in ip_vs.h skb_net().

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 33733c8872e7..ae47090bf45f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3120,7 +3120,7 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 				   struct netlink_callback *cb)
 {
-	struct net *net = skb_net(skb);
+	struct net *net = skb_sknet(skb);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	mutex_lock(&__ip_vs_mutex);
-- 
cgit v1.2.3


From 31ad3dd64e689bc79dd819f8f134b9b025240eb8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Apr 2011 16:56:29 +0200
Subject: netfilter: af_info: add network namespace parameter to route hook

This is required to eventually replace the rt6_lookup call in
xt_addrtype.c with nf_afinfo->route().

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter.c                   | 5 +++--
 net/ipv6/netfilter.c                   | 5 +++--
 net/netfilter/nf_conntrack_h323_main.c | 8 ++++----
 net/netfilter/xt_TCPMSS.c              | 2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f3c0b549b8e1..f1035f056503 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 }
 
-static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
+static int nf_ip_route(struct net *net, struct dst_entry **dst,
+		       struct flowi *fl)
 {
-	struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4);
+	struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 	*dst = &rt->dst;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39aaca2b4fd2..e008b9b4a779 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -90,9 +90,10 @@ static int nf_ip6_reroute(struct sk_buff *skb,
 	return 0;
 }
 
-static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl)
+static int nf_ip6_route(struct net *net, struct dst_entry **dst,
+			struct flowi *fl)
 {
-	*dst = ip6_route_output(&init_net, NULL, &fl->u.ip6);
+	*dst = ip6_route_output(net, NULL, &fl->u.ip6);
 	return (*dst)->error;
 }
 
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 533a183e6661..39a453895b4d 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -731,9 +731,9 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 
 		memset(&fl2, 0, sizeof(fl2));
 		fl2.daddr = dst->ip;
-		if (!afinfo->route((struct dst_entry **)&rt1,
+		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
 				   flowi4_to_flowi(&fl1))) {
-			if (!afinfo->route((struct dst_entry **)&rt2,
+			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
 					   flowi4_to_flowi(&fl2))) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
 				    rt1->dst.dev  == rt2->dst.dev)
@@ -755,9 +755,9 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 
 		memset(&fl2, 0, sizeof(fl2));
 		ipv6_addr_copy(&fl2.daddr, &dst->in6);
-		if (!afinfo->route((struct dst_entry **)&rt1,
+		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
 				   flowi6_to_flowi(&fl1))) {
-			if (!afinfo->route((struct dst_entry **)&rt2,
+			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
 					   flowi6_to_flowi(&fl2))) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6e6b46cb1db9..8690125e3b18 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 	rcu_read_lock();
 	ai = nf_get_afinfo(family);
 	if (ai != NULL)
-		ai->route((struct dst_entry **)&rt, &fl);
+		ai->route(&init_net, (struct dst_entry **)&rt, &fl);
 	rcu_read_unlock();
 
 	if (rt != NULL) {
-- 
cgit v1.2.3


From 0fae2e7740aca7e384c5f337f458897e7e337d58 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Apr 2011 17:00:54 +0200
Subject: netfilter: af_info: add 'strict' parameter to limit lookup to .oif

ipv6 fib lookup can set RT6_LOOKUP_F_IFACE flag to restrict search
to an interface, but this flag cannot be set via struct flowi.

Also, it cannot be set via ip6_route_output: this function uses the
passed sock struct to determine if this flag is required
(by testing for nonzero sk_bound_dev_if).

Work around this by passing in an artificial struct sk in case
'strict' argument is true.

This is required to replace the rt6_lookup call in xt_addrtype.c with
nf_afinfo->route().

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter.c                   |  2 +-
 net/ipv6/netfilter.c                   | 12 ++++++++++--
 net/netfilter/nf_conntrack_h323_main.c |  8 ++++----
 net/netfilter/xt_TCPMSS.c              |  2 +-
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f1035f056503..4614babdc45f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -222,7 +222,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 }
 
 static int nf_ip_route(struct net *net, struct dst_entry **dst,
-		       struct flowi *fl)
+		       struct flowi *fl, bool strict __always_unused)
 {
 	struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
 	if (IS_ERR(rt))
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index e008b9b4a779..28bc1f644b7b 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -91,9 +91,17 @@ static int nf_ip6_reroute(struct sk_buff *skb,
 }
 
 static int nf_ip6_route(struct net *net, struct dst_entry **dst,
-			struct flowi *fl)
+			struct flowi *fl, bool strict)
 {
-	*dst = ip6_route_output(net, NULL, &fl->u.ip6);
+	static const struct ipv6_pinfo fake_pinfo;
+	static const struct inet_sock fake_sk = {
+		/* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
+		.sk.sk_bound_dev_if = 1,
+		.pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
+	};
+	const void *sk = strict ? &fake_sk : NULL;
+
+	*dst = ip6_route_output(net, sk, &fl->u.ip6);
 	return (*dst)->error;
 }
 
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 39a453895b4d..18b2ce5c8ced 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -732,9 +732,9 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		memset(&fl2, 0, sizeof(fl2));
 		fl2.daddr = dst->ip;
 		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
-				   flowi4_to_flowi(&fl1))) {
+				   flowi4_to_flowi(&fl1), false)) {
 			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
-					   flowi4_to_flowi(&fl2))) {
+					   flowi4_to_flowi(&fl2), false)) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
 				    rt1->dst.dev  == rt2->dst.dev)
 					ret = 1;
@@ -756,9 +756,9 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		memset(&fl2, 0, sizeof(fl2));
 		ipv6_addr_copy(&fl2.daddr, &dst->in6);
 		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
-				   flowi6_to_flowi(&fl1))) {
+				   flowi6_to_flowi(&fl1), false)) {
 			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
-					   flowi6_to_flowi(&fl2))) {
+					   flowi6_to_flowi(&fl2), false)) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
 				    rt1->dst.dev == rt2->dst.dev)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8690125e3b18..9e63b43faeed 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 	rcu_read_lock();
 	ai = nf_get_afinfo(family);
 	if (ai != NULL)
-		ai->route(&init_net, (struct dst_entry **)&rt, &fl);
+		ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
 	rcu_read_unlock();
 
 	if (rt != NULL) {
-- 
cgit v1.2.3


From b7225041e93f81e7e38fcdf27fc82044e7695efd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Apr 2011 17:01:43 +0200
Subject: netfilter: xt_addrtype: replace rt6_lookup with nf_afinfo->route

This avoids pulling in the ipv6 module when using (ipv4-only) iptables
-m addrtype.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig       |  1 -
 net/netfilter/xt_addrtype.c | 42 ++++++++++++++++++++++++++++--------------
 2 files changed, 28 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c3f988aa1152..32bff6d86cb2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -652,7 +652,6 @@ comment "Xtables matches"
 config NETFILTER_XT_MATCH_ADDRTYPE
 	tristate '"addrtype" address type match support'
 	depends on NETFILTER_ADVANCED
-	depends on (IPV6 || IPV6=n)
 	---help---
 	  This option allows you to match what routing thinks of an address,
 	  eg. UNICAST, LOCAL, BROADCAST, ...
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 2220b85e9519..b77d383cec78 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -32,11 +32,32 @@ MODULE_ALIAS("ipt_addrtype");
 MODULE_ALIAS("ip6t_addrtype");
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
+static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
+			    const struct in6_addr *addr)
 {
+	const struct nf_afinfo *afinfo;
+	struct flowi6 flow;
+	struct rt6_info *rt;
 	u32 ret;
+	int route_err;
 
-	if (!rt)
+	memset(&flow, 0, sizeof(flow));
+	ipv6_addr_copy(&flow.daddr, addr);
+	if (dev)
+		flow.flowi6_oif = dev->ifindex;
+
+	rcu_read_lock();
+
+	afinfo = nf_get_afinfo(NFPROTO_IPV6);
+	if (afinfo != NULL)
+		route_err = afinfo->route(net, (struct dst_entry **)&rt,
+					flowi6_to_flowi(&flow), !!dev);
+	else
+		route_err = 1;
+
+	rcu_read_unlock();
+
+	if (route_err)
 		return XT_ADDRTYPE_UNREACHABLE;
 
 	if (rt->rt6i_flags & RTF_REJECT)
@@ -48,6 +69,9 @@ static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
 		ret |= XT_ADDRTYPE_LOCAL;
 	if (rt->rt6i_flags & RTF_ANYCAST)
 		ret |= XT_ADDRTYPE_ANYCAST;
+
+
+	dst_release(&rt->dst);
 	return ret;
 }
 
@@ -65,18 +89,8 @@ static bool match_type6(struct net *net, const struct net_device *dev,
 		return false;
 
 	if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
-	     XT_ADDRTYPE_UNREACHABLE) & mask) {
-		struct rt6_info *rt;
-		u32 type;
-		int ifindex = dev ? dev->ifindex : 0;
-
-		rt = rt6_lookup(net, addr, NULL, ifindex, !!dev);
-
-		type = xt_addrtype_rt6_to_type(rt);
-
-		dst_release(&rt->dst);
-		return !!(mask & type);
-	}
+	     XT_ADDRTYPE_UNREACHABLE) & mask)
+		return !!(mask & match_lookup_rt6(net, dev, addr));
 	return true;
 }
 
-- 
cgit v1.2.3


From 96120d86fe302c006259baee9061eea9e1b9e486 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Apr 2011 17:06:21 +0200
Subject: netfilter: xt_conntrack: fix inverted conntrack direction test

--ctdir ORIGINAL matches REPLY packets, and vv:

userspace sets "invert_flags &= ~XT_CONNTRACK_DIRECTION" in ORIGINAL
case.

Thus: (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
      !!(info->invert_flags & XT_CONNTRACK_DIRECTION))

yields "1 ^ 0", which is true -> returns false.

Reproducer:
iptables -I OUTPUT 1 -p tcp --syn -m conntrack --ctdir ORIGINAL

Signed-off-by: Florian Westphal <fwestphal@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_conntrack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2c0086a4751e..481a86fdc409 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -195,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 		return info->match_flags & XT_CONNTRACK_STATE;
 	if ((info->match_flags & XT_CONNTRACK_DIRECTION) &&
 	    (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^
-	    !!(info->invert_flags & XT_CONNTRACK_DIRECTION))
+	    !(info->invert_flags & XT_CONNTRACK_DIRECTION))
 		return false;
 
 	if (info->match_flags & XT_CONNTRACK_ORIGSRC)
-- 
cgit v1.2.3


From fcf8bd3ba5362682f945a3f838070ac5e10ff871 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 1 Apr 2011 15:46:05 +0200
Subject: mac80211: Fix duplicate frames on cooked monitor

Cleaning the ieee80211_rx_data.flags field here is wrong, instead the
flags should be valid accross processing the frame on different
interfaces. Fix this by removing the incorrect flags=0 assignment.

Introduced in commit 554891e63a29af35cc6bb403ef34e319518114d0
(mac80211: move packet flags into packet).

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index aa5cc37b4921..2afeac9c6453 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2541,7 +2541,6 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
 		 * same TID from the same station
 		 */
 		rx->skb = skb;
-		rx->flags = 0;
 
 		CALL_RXH(ieee80211_rx_h_decrypt)
 		CALL_RXH(ieee80211_rx_h_check_more_data)
@@ -2612,6 +2611,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 		.sdata = sta->sdata,
 		.local = sta->local,
 		.queue = tid,
+		.flags = 0,
 	};
 	struct tid_ampdu_rx *tid_agg_rx;
 
-- 
cgit v1.2.3


From 738faca34335cd1d5d87fa7c58703139c7fa15bd Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@amd.com>
Date: Mon, 4 Apr 2011 13:07:26 -0700
Subject: ipv6: Don't pass invalid dst_entry pointer to dst_release().

Make sure dst_release() is not called with error pointer. This is
similar to commit 4910ac6c526d2868adcb5893e0c428473de862b5 ("ipv4:
Don't ip_rt_put() an error pointer in RAW sockets.").

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b0c186862c8..56fa12538d45 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -503,6 +503,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
+		dst = NULL;
 		goto done;
 	}
 	skb = tcp_make_synack(sk, dst, req, rvp);
-- 
cgit v1.2.3


From 77f38e0eeac290827f41fd2215ab82546b8f73b8 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 6 Apr 2011 09:09:16 -0700
Subject: libceph: fix linger request requeueing

Fix the request transition from linger -> normal request.  The key is to
preserve r_osd and requeue on the same OSD.  Reregister as a normal request,
add the request to the proper queues, then unregister the linger.  Fix the
unregister helper to avoid clearing r_osd (and also simplify the parallel
check in __unregister_request()).

Reported-by: Henry Chang <henry.cy.chang@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3b91d651fe08..9204de484c2e 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -579,9 +579,15 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 
 	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
 				 r_linger_osd) {
-		__unregister_linger_request(osdc, req);
+		/*
+		 * reregister request prior to unregistering linger so
+		 * that r_osd is preserved.
+		 */
+		BUG_ON(!list_empty(&req->r_req_lru_item));
 		__register_request(osdc, req);
-		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add(&req->r_req_lru_item, &osdc->req_unsent);
+		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+		__unregister_linger_request(osdc, req);
 		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
 	}
@@ -798,7 +804,7 @@ static void __register_request(struct ceph_osd_client *osdc,
 	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 
-	dout("register_request %p tid %lld\n", req, req->r_tid);
+	dout("__register_request %p tid %lld\n", req, req->r_tid);
 	__insert_request(osdc, req);
 	ceph_osdc_get_request(req);
 	osdc->num_requests++;
-- 
cgit v1.2.3


From 0867659fa3c245bf203d837a82e0f6ea5079c2c5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 6 Apr 2011 10:13:32 -0700
Subject: Revert "net/sunrpc: Use static const char arrays"

This reverts commit 411b5e05617593efebc06241dbc56f42150f2abe.

Olga Kornievskaia reports:

Problem: linux client mounting linux server using rc4-hmac-md5
enctype. gssd fails with create a context after receiving a reply from
the server.

Diagnose: putting printout statements in the server kernel and
kerberos libraries revealed that client and server derived different
integrity keys.

Server kernel code was at fault due the the commit

[aglo@skydive linux-pnfs]$ git show 411b5e05617593efebc06241dbc56f42150f2abe

Trond: The problem is that since it relies on virt_to_page(), you cannot
call sg_set_buf() for data in the const section.

Reported-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org	[2.6.36+]
---
 net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 9022f0a6503e..0a9a2ec2e469 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -427,7 +427,7 @@ static int
 context_derive_keys_rc4(struct krb5_ctx *ctx)
 {
 	struct crypto_hash *hmac;
-	static const char sigkeyconstant[] = "signaturekey";
+	char sigkeyconstant[] = "signaturekey";
 	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
 	struct hash_desc desc;
 	struct scatterlist sg[1];
-- 
cgit v1.2.3


From 47482f132a689af168fae3055ff1899dfd032d3a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 6 Apr 2011 13:07:09 -0700
Subject: ipv6: Enable RFS sk_rxhash tracking for ipv6 sockets (v2)

properly record sk_rxhash in ipv6 sockets (v2)

Noticed while working on another project that flows to sockets which I had open
on a test systems weren't getting steered properly when I had RFS enabled.
Looking more closely I found that:

1) The affected sockets were all ipv6
2) They weren't getting steered because sk->sk_rxhash was never set from the
incomming skbs on that socket.

This was occuring because there are several points in the IPv4 tcp and udp code
which save the rxhash value when a new connection is established.  Those calls
to sock_rps_save_rxhash were never added to the corresponding ipv6 code paths.
This patch adds those calls.  Tested by myself to properly enable RFS
functionalty on ipv6.

Change notes:
v2:
	Filtered UDP to only arm RFS on bound sockets (Eric Dumazet)

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 4 +++-
 net/ipv6/udp.c      | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 56fa12538d45..4f49e5dd41bb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1622,6 +1622,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		opt_skb = skb_clone(skb, GFP_ATOMIC);
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+		sock_rps_save_rxhash(sk, skb->rxhash);
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
 			goto reset;
 		if (opt_skb)
@@ -1649,7 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 				__kfree_skb(opt_skb);
 			return 0;
 		}
-	}
+	} else
+		sock_rps_save_rxhash(sk, skb->rxhash);
 
 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
 		goto reset;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d7037c006e13..15c37746845e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -505,6 +505,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 	int rc;
 	int is_udplite = IS_UDPLITE(sk);
 
+	if (!ipv6_addr_any(&inet6_sk(sk)->daddr))
+		sock_rps_save_rxhash(sk, skb->rxhash);
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto drop;
 
-- 
cgit v1.2.3


From ec80bfcb68a0c46443991991d459a0cde773cdea Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <jacmet@sunsite.dk>
Date: Tue, 5 Apr 2011 03:03:56 +0000
Subject: dsa/mv88e6131: add support for mv88e6085 switch

The mv88e6085 is identical to the mv88e6095, except that all ports are
10/100 Mb/s, so use the existing setup code except for the cpu/dsa speed
selection in _setup_port().

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
Acked-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/mv88e6131.c | 23 +++++++++++++++++++----
 net/dsa/mv88e6xxx.h |  2 ++
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
index bb2b41bc854e..a8e4f8c7dca4 100644
--- a/net/dsa/mv88e6131.c
+++ b/net/dsa/mv88e6131.c
@@ -14,6 +14,13 @@
 #include "dsa_priv.h"
 #include "mv88e6xxx.h"
 
+/*
+ * Switch product IDs
+ */
+#define ID_6085		0x04a0
+#define ID_6095		0x0950
+#define ID_6131		0x1060
+
 static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
 {
 	int ret;
@@ -21,9 +28,11 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
 	ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
 	if (ret >= 0) {
 		ret &= 0xfff0;
-		if (ret == 0x0950)
+		if (ret == ID_6085)
+			return "Marvell 88E6085";
+		if (ret == ID_6095)
 			return "Marvell 88E6095/88E6095F";
-		if (ret == 0x1060)
+		if (ret == ID_6131)
 			return "Marvell 88E6131";
 	}
 
@@ -164,6 +173,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
 
 static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
 {
+	struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
 	int addr = REG_PORT(p);
 	u16 val;
 
@@ -171,10 +181,13 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
 	 * MAC Forcing register: don't force link, speed, duplex
 	 * or flow control state to any particular values on physical
 	 * ports, but force the CPU port and all DSA ports to 1000 Mb/s
-	 * full duplex.
+	 * (100 Mb/s on 6085) full duplex.
 	 */
 	if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
-		REG_WRITE(addr, 0x01, 0x003e);
+		if (ps->id == ID_6085)
+			REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */
+		else
+			REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */
 	else
 		REG_WRITE(addr, 0x01, 0x0003);
 
@@ -286,6 +299,8 @@ static int mv88e6131_setup(struct dsa_switch *ds)
 	mv88e6xxx_ppu_state_init(ds);
 	mutex_init(&ps->stats_mutex);
 
+	ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
+
 	ret = mv88e6131_switch_reset(ds);
 	if (ret < 0)
 		return ret;
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
index eb0e0aaa9f1b..61156ca26a0d 100644
--- a/net/dsa/mv88e6xxx.h
+++ b/net/dsa/mv88e6xxx.h
@@ -39,6 +39,8 @@ struct mv88e6xxx_priv_state {
 	 * Hold this mutex over snapshot + dump sequences.
 	 */
 	struct mutex	stats_mutex;
+
+	int		id; /* switch product id */
 };
 
 struct mv88e6xxx_hw_stat {
-- 
cgit v1.2.3


From 1b86a58f9d7ce4fe2377687f378fbfb53bdc9b6c Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 7 Apr 2011 14:04:08 -0700
Subject: ipv4: Fix "Set rt->rt_iif more sanely on output routes."

Commit 1018b5c01636c7c6bda31a719bda34fc631db29a ("Set rt->rt_iif more
sanely on output routes.")  breaks rt_is_{output,input}_route.

This became the cause to return "IP_PKTINFO's ->ipi_ifindex == 0".

To fix it, this does:

1) Add "int rt_route_iif;" to struct rtable

2) For input routes, always set rt_route_iif to same value as rt_iif

3) For output routes, always set rt_route_iif to zero.  Set rt_iif
   as it is done currently.

4) Change rt_is_{output,input}_route() to test rt_route_iif

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c        | 8 ++++++--
 net/ipv4/xfrm4_policy.c | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4b0c81180804..1628be530314 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1891,6 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
+	rth->rt_route_iif = dev->ifindex;
 	rth->rt_iif	= dev->ifindex;
 	rth->dst.dev	= init_net.loopback_dev;
 	dev_hold(rth->dst.dev);
@@ -2026,6 +2027,7 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_key_src	= saddr;
 	rth->rt_src	= saddr;
 	rth->rt_gateway	= daddr;
+	rth->rt_route_iif = in_dev->dev->ifindex;
 	rth->rt_iif 	= in_dev->dev->ifindex;
 	rth->dst.dev	= (out_dev)->dev;
 	dev_hold(rth->dst.dev);
@@ -2202,6 +2204,7 @@ local_input:
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
+	rth->rt_route_iif = dev->ifindex;
 	rth->rt_iif	= dev->ifindex;
 	rth->dst.dev	= net->loopback_dev;
 	dev_hold(rth->dst.dev);
@@ -2401,7 +2404,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	rth->rt_mark    = oldflp4->flowi4_mark;
 	rth->rt_dst	= fl4->daddr;
 	rth->rt_src	= fl4->saddr;
-	rth->rt_iif	= 0;
+	rth->rt_route_iif = 0;
+	rth->rt_iif	= oldflp4->flowi4_oif ? : dev_out->ifindex;
 	/* get references to the devices that are to be hold by the routing
 	   cache entry */
 	rth->dst.dev	= dev_out;
@@ -2716,6 +2720,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_key_dst = ort->rt_key_dst;
 		rt->rt_key_src = ort->rt_key_src;
 		rt->rt_tos = ort->rt_tos;
+		rt->rt_route_iif = ort->rt_route_iif;
 		rt->rt_iif = ort->rt_iif;
 		rt->rt_oif = ort->rt_oif;
 		rt->rt_mark = ort->rt_mark;
@@ -2725,7 +2730,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_type = ort->rt_type;
 		rt->rt_dst = ort->rt_dst;
 		rt->rt_src = ort->rt_src;
-		rt->rt_iif = ort->rt_iif;
 		rt->rt_gateway = ort->rt_gateway;
 		rt->rt_spec_dst = ort->rt_spec_dst;
 		rt->peer = ort->peer;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 13e0e7f659ff..d20a05e970d8 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -74,6 +74,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	rt->rt_key_dst = fl4->daddr;
 	rt->rt_key_src = fl4->saddr;
 	rt->rt_tos = fl4->flowi4_tos;
+	rt->rt_route_iif = fl4->flowi4_iif;
 	rt->rt_iif = fl4->flowi4_iif;
 	rt->rt_oif = fl4->flowi4_oif;
 	rt->rt_mark = fl4->flowi4_mark;
-- 
cgit v1.2.3


From 0c184ed9032c58b21f0d90de28c796874b73d6a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= <sjur.brandeland@stericsson.com>
Date: Mon, 11 Apr 2011 10:11:29 +0000
Subject: caif: Bugfix use for_each_safe when removing list nodes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfmuxl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 46f34b2e0478..24f1ffa74b06 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -244,9 +244,9 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 				int phyid)
 {
 	struct cfmuxl *muxl = container_obj(layr);
-	struct list_head *node;
+	struct list_head *node, *next;
 	struct cflayer *layer;
-	list_for_each(node, &muxl->srvl_list) {
+	list_for_each_safe(node, next, &muxl->srvl_list) {
 		layer = list_entry(node, struct cflayer, node);
 		if (cfsrvl_phyid_match(layer, phyid))
 			layer->ctrlcmd(layer, ctrl, phyid);
-- 
cgit v1.2.3


From 4a9f65f6304a00f6473e83b19c1e83caa1e42530 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= <sjur.brandeland@stericsson.com>
Date: Mon, 11 Apr 2011 10:11:30 +0000
Subject: caif: performance bugfix - allow radio stack to prioritize packets.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

    In the CAIF Payload message the Packet Type indication must be set to
    UNCLASSIFIED in order to allow packet prioritization in the modem's
    network stack. Otherwise TCP-Ack is not prioritized in the modems
    transmit queue.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfdgml.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 27dab26ad3b8..054fdb5aeb88 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -13,6 +13,7 @@
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
 
+
 #define container_obj(layr) ((struct cfsrvl *) layr)
 
 #define DGM_CMD_BIT  0x80
@@ -83,6 +84,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
 
 static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
+	u8 packet_type;
 	u32 zero = 0;
 	struct caif_payload_info *info;
 	struct cfsrvl *service = container_obj(layr);
@@ -94,7 +96,9 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (cfpkt_getlen(pkt) > DGM_MTU)
 		return -EMSGSIZE;
 
-	cfpkt_add_head(pkt, &zero, 4);
+	cfpkt_add_head(pkt, &zero, 3);
+	packet_type = 0x08; /* B9 set - UNCLASSIFIED */
+	cfpkt_add_head(pkt, &packet_type, 1);
 
 	/* Add info for MUX-layer to route the packet out. */
 	info = cfpkt_info(pkt);
-- 
cgit v1.2.3


From aa8673599f1d269b4e4d9b0c0f61fca57bc02699 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 11 Apr 2011 18:59:05 -0700
Subject: llc: Fix length check in llc_fixup_skb().

Fixes bugzilla #32872

The LLC stack pretends to support non-linear skbs but there is a
direct use of skb_tail_pointer() in llc_fixup_skb().

Use pskb_may_pull() to see if data_size bytes remain and can be
accessed linearly in the packet, instead of direct pointer checks.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_input.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 058f1e9a9128..903242111317 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -121,8 +121,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
 		s32 data_size = ntohs(pdulen) - llc_len;
 
 		if (data_size < 0 ||
-		    ((skb_tail_pointer(skb) -
-		      (u8 *)pdu) - llc_len) < data_size)
+		    !pskb_may_pull(skb, data_size))
 			return 0;
 		if (unlikely(pskb_trim_rcsum(skb, data_size)))
 			return 0;
-- 
cgit v1.2.3


From f8e9881c2aef1e982e5abc25c046820cd0b7cf64 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 12 Apr 2011 13:39:14 -0700
Subject: bridge: reset IPCB in br_parse_ip_options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 462fb2af9788a82 (bridge : Sanitize skb before it enters the IP
stack), missed one IPCB init before calling ip_options_compile()

Thanks to Scot Doyle for his tests and bug reports.

Reported-by: Scot Doyle <lkml@scotdoyle.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Acked-by: Bandan Das <bandan.das@stratus.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Cc: Jan Lübbe <jluebbe@debian.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 008ff6c4eecf..f3bc322c5891 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -249,11 +249,9 @@ static int br_parse_ip_options(struct sk_buff *skb)
 		goto drop;
 	}
 
-	/* Zero out the CB buffer if no options present */
-	if (iph->ihl == 5) {
-		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+	if (iph->ihl == 5)
 		return 0;
-	}
 
 	opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
 	if (ip_options_compile(dev_net(dev), opt, skb))
-- 
cgit v1.2.3


From 66944e1c5797562cebe2d1857d46dff60bf9a69e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Apr 2011 22:39:40 +0000
Subject: inetpeer: reduce stack usage

On 64bit arches, we use 752 bytes of stack when cleanup_once() is called
from inet_getpeer().

Lets share the avl stack to save ~376 bytes.

Before patch :

# objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl

0x000006c3 unlink_from_pool [inetpeer.o]:		376
0x00000721 unlink_from_pool [inetpeer.o]:		376
0x00000cb1 inet_getpeer [inetpeer.o]:			376
0x00000e6d inet_getpeer [inetpeer.o]:			376
0x0004 inet_initpeers [inetpeer.o]:			112
# size net/ipv4/inetpeer.o
   text	   data	    bss	    dec	    hex	filename
   5320	    432	     21	   5773	   168d	net/ipv4/inetpeer.o

After patch :

objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl
0x00000c11 inet_getpeer [inetpeer.o]:			376
0x00000dcd inet_getpeer [inetpeer.o]:			376
0x00000ab9 peer_check_expire [inetpeer.o]:		328
0x00000b7f peer_check_expire [inetpeer.o]:		328
0x0004 inet_initpeers [inetpeer.o]:			112
# size net/ipv4/inetpeer.o
   text	   data	    bss	    dec	    hex	filename
   5163	    432	     21	   5616	   15f0	net/ipv4/inetpeer.o

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Scot Doyle <lkml@scotdoyle.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Reviewed-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index dd1b20eca1a2..9df4e635fb5f 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -354,7 +354,8 @@ static void inetpeer_free_rcu(struct rcu_head *head)
 }
 
 /* May be called with local BH enabled. */
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
+static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
+			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
 {
 	int do_free;
 
@@ -368,7 +369,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 	 * We use refcnt=-1 to alert lockless readers this entry is deleted.
 	 */
 	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
-		struct inet_peer __rcu **stack[PEER_MAXDEPTH];
 		struct inet_peer __rcu ***stackptr, ***delp;
 		if (lookup(&p->daddr, stack, base) != p)
 			BUG();
@@ -422,7 +422,7 @@ static struct inet_peer_base *peer_to_base(struct inet_peer *p)
 }
 
 /* May be called with local BH enabled. */
-static int cleanup_once(unsigned long ttl)
+static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH])
 {
 	struct inet_peer *p = NULL;
 
@@ -454,7 +454,7 @@ static int cleanup_once(unsigned long ttl)
 		 * happen because of entry limits in route cache. */
 		return -1;
 
-	unlink_from_pool(p, peer_to_base(p));
+	unlink_from_pool(p, peer_to_base(p), stack);
 	return 0;
 }
 
@@ -524,7 +524,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 
 	if (base->total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
-		cleanup_once(0);
+		cleanup_once(0, stack);
 
 	return p;
 }
@@ -540,6 +540,7 @@ static void peer_check_expire(unsigned long dummy)
 {
 	unsigned long now = jiffies;
 	int ttl, total;
+	struct inet_peer __rcu **stack[PEER_MAXDEPTH];
 
 	total = compute_total();
 	if (total >= inet_peer_threshold)
@@ -548,7 +549,7 @@ static void peer_check_expire(unsigned long dummy)
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
 					total / inet_peer_threshold * HZ;
-	while (!cleanup_once(ttl)) {
+	while (!cleanup_once(ttl, stack)) {
 		if (jiffies != now)
 			break;
 	}
-- 
cgit v1.2.3


From 192910a6cca5e50e5bd6cbd1da0e7376c7adfe62 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Tue, 12 Apr 2011 13:59:33 -0700
Subject: net: Do not wrap sysctl igmp_max_memberships in IP_MULTICAST

controlling igmp_max_membership is useful even when IP_MULTICAST
is off.
Quagga(an OSPF deamon) uses multicast addresses for all interfaces
using a single socket and hits igmp_max_membership limit when
there are 20 interfaces or more.
Always export sysctl igmp_max_memberships in proc, just like
igmp_max_msf

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1a456652086b..321e6e84dbcc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -311,7 +311,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_do_large_bitmap,
 	},
-#ifdef CONFIG_IP_MULTICAST
 	{
 		.procname	= "igmp_max_memberships",
 		.data		= &sysctl_igmp_max_memberships,
@@ -319,8 +318,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-
-#endif
 	{
 		.procname	= "igmp_max_msf",
 		.data		= &sysctl_igmp_max_msf,
-- 
cgit v1.2.3


From 020318d0d2af51e0fd59ba654ede9b2171558720 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 12 Apr 2011 15:29:54 -0700
Subject: irda: fix locking unbalance in irda_sendmsg

5b40964eadea40509d353318d2c82e8b7bf5e8a5 ("irda: Remove BKL instances
from af_irda.c") introduced a path where we have a locking unbalance.
If we pass invalid flags, we unlock a socket we never locked,
resulting in this...

=====================================
[ BUG: bad unlock balance detected! ]
-------------------------------------
trinity/20101 is trying to release lock (sk_lock-AF_IRDA) at:
[<ffffffffa057f001>] irda_sendmsg+0x207/0x21d [irda]
but there are no more locks to release!

other info that might help us debug this:
no locks held by trinity/20101.

stack backtrace:
Pid: 20101, comm: trinity Not tainted 2.6.39-rc3+ #3
Call Trace:
 [<ffffffffa057f001>] ? irda_sendmsg+0x207/0x21d [irda]
 [<ffffffff81085041>] print_unlock_inbalance_bug+0xc7/0xd2
 [<ffffffffa057f001>] ? irda_sendmsg+0x207/0x21d [irda]
 [<ffffffff81086aca>] lock_release+0xcf/0x18e
 [<ffffffff813ed190>] release_sock+0x2d/0x155
 [<ffffffffa057f001>] irda_sendmsg+0x207/0x21d [irda]
 [<ffffffff813e9f8c>] __sock_sendmsg+0x69/0x75
 [<ffffffff813ea105>] sock_sendmsg+0xa1/0xb6
 [<ffffffff81100ca3>] ? might_fault+0x5c/0xac
 [<ffffffff81086b7c>] ? lock_release+0x181/0x18e
 [<ffffffff81100cec>] ? might_fault+0xa5/0xac
 [<ffffffff81100ca3>] ? might_fault+0x5c/0xac
 [<ffffffff81133b94>] ? fcheck_files+0xb9/0xf0
 [<ffffffff813f387a>] ? copy_from_user+0x2f/0x31
 [<ffffffff813f3b70>] ? verify_iovec+0x52/0xa6
 [<ffffffff813eb4e3>] sys_sendmsg+0x23a/0x2b8
 [<ffffffff81086b7c>] ? lock_release+0x181/0x18e
 [<ffffffff810773c6>] ? up_read+0x28/0x2c
 [<ffffffff814bec3d>] ? do_page_fault+0x360/0x3b4
 [<ffffffff81087043>] ? trace_hardirqs_on_caller+0x10b/0x12f
 [<ffffffff810458aa>] ? finish_task_switch+0xb2/0xe3
 [<ffffffff8104583e>] ? finish_task_switch+0x46/0xe3
 [<ffffffff8108364a>] ? trace_hardirqs_off_caller+0x33/0x90
 [<ffffffff814bbaf9>] ? retint_swapgs+0x13/0x1b
 [<ffffffff81087043>] ? trace_hardirqs_on_caller+0x10b/0x12f
 [<ffffffff810a9dd3>] ? audit_syscall_entry+0x11c/0x148
 [<ffffffff8125609e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff814c22c2>] system_call_fastpath+0x16/0x1b

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index c9890e25cd4c..cc616974a447 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1297,8 +1297,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 	/* Note : socket.c set MSG_EOR on SEQPACKET sockets */
 	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT |
 			       MSG_NOSIGNAL)) {
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	lock_sock(sk);
-- 
cgit v1.2.3


From bfac3693c426d280b026f6a1b77dc2294ea43fea Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 12 Apr 2011 15:33:23 -0700
Subject: ieee802154: Remove hacked CFLAGS in net/ieee802154/Makefile

It adds -Wall (which the kernel carefully controls already) and of all
things -DDEBUG (which should be set by other means if desired, please
we have dynamic-debug these days).

Kill this noise.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/Makefile | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index ce2d33582859..5761185f884e 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,5 +1,3 @@
 obj-$(CONFIG_IEEE802154) +=	ieee802154.o af_802154.o
 ieee802154-y		:= netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o
 af_802154-y		:= af_ieee802154.o raw.o dgram.o
-
-ccflags-y += -Wall -DDEBUG
-- 
cgit v1.2.3


From ea2d36883ca8e6caab23b6d15bfa80b1d1d81d2f Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 12 Apr 2011 14:38:37 +0000
Subject: net: Disable all TSO features when SG is disabled

The feature flags NETIF_F_TSO and NETIF_F_TSO6 independently enable
TSO for IPv4 and IPv6 respectively.  However, the test in
netdev_fix_features() and its predecessor functions was never updated
to check for NETIF_F_TSO6, possibly because it was originally proposed
that TSO for IPv6 would be dependent on both feature flags.

Now that these feature flags can be changed independently from
user-space and we depend on netdev_fix_features() to fix invalid
feature combinations, it's important to disable them both if
scatter-gather is disabled.  Also disable NETIF_F_TSO_ECN so
user-space sees all TSO features as disabled.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 956d3b006e8b..6401fb588145 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5203,9 +5203,9 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 	}
 
 	/* TSO requires that SG is present as well. */
-	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
-		netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n");
-		features &= ~NETIF_F_TSO;
+	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
+		netdev_info(dev, "Dropping TSO features since no SG feature.\n");
+		features &= ~NETIF_F_ALL_TSO;
 	}
 
 	/* Software GSO depends on SG. */
-- 
cgit v1.2.3


From 31d8b9e099e59f880aa65095951559896d4e20fa Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 12 Apr 2011 14:47:15 +0000
Subject: net: Disable NETIF_F_TSO_ECN when TSO is disabled

NETIF_F_TSO_ECN has no effect when TSO is disabled; this just means
that feature state will be accurately reported to user-space.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6401fb588145..c2ac599fa0f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5208,6 +5208,10 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 		features &= ~NETIF_F_ALL_TSO;
 	}
 
+	/* TSO ECN requires that TSO is present as well. */
+	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
+		features &= ~NETIF_F_TSO_ECN;
+
 	/* Software GSO depends on SG. */
 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
 		netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
-- 
cgit v1.2.3


From 25f7bf7d0dfb460505cbe42676340e33100aca2e Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Tue, 12 Apr 2011 15:20:48 +0000
Subject: sctp: fix oops when updating retransmit path with DEBUG on

commit fbdf501c9374966a56829ecca3a7f25d2b49a305
  sctp: Do no select unconfirmed transports for retransmissions

Introduced the initial falt.

commit d598b166ced20d9b9281ea3527c0e18405ddb803
  sctp: Make sure we always return valid retransmit path

Solved the problem, but forgot to change the DEBUG statement.
Thus it was still possible to dereference a NULL pointer.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0698cad61763..922fdd7eb573 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1323,6 +1323,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 
 	if (t)
 		asoc->peer.retran_path = t;
+	else
+		t = asoc->peer.retran_path;
 
 	SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
 				 " %p addr: ",
-- 
cgit v1.2.3


From 9494c7c5774d64a84a269aad38c153c4dbff97e6 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 12 Apr 2011 15:22:22 +0000
Subject: sctp: fix oops while removed transport still using as retran path

Since we can not update retran path to unconfirmed transports,
when we remove a peer, the retran path may not be update if the
other transports are all unconfirmed, and we will still using
the removed transport as the retran path. This may cause panic
if retrasnmit happen.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 922fdd7eb573..1a21c571aa03 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -569,6 +569,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 		sctp_assoc_set_primary(asoc, transport);
 	if (asoc->peer.active_path == peer)
 		asoc->peer.active_path = transport;
+	if (asoc->peer.retran_path == peer)
+		asoc->peer.retran_path = transport;
 	if (asoc->peer.last_data_from == peer)
 		asoc->peer.last_data_from = transport;
 
-- 
cgit v1.2.3


From 0e8a835aa59d08d702af0fcfd296e2218b2e344b Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 13 Apr 2011 13:43:23 +0200
Subject: netfilter: ipset: bitmap:ip,mac type requires "src" for MAC

Enforce that the second "src/dst" parameter of the set match and SET target
must be "src", because we have access to the source MAC only in the packet.
The previous behaviour, that the type required the second parameter
but actually ignored the value was counter-intuitive and confusing.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 00a33242e90c..a274300b6a56 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -343,6 +343,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct ipmac data;
 
+	/* MAC can be src only */
+	if (!(flags & IPSET_DIM_TWO_SRC))
+		return 0;
+
 	data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
 	if (data.id < map->first_ip || data.id > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
-- 
cgit v1.2.3


From eafbd3fde6fc5ada0d61307367e408813b04928a Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Wed, 13 Apr 2011 13:45:57 +0200
Subject: netfilter: ipset: set match and SET target fixes

The SET target with --del-set did not work due to using wrongly
the internal dimension of --add-set instead of --del-set.
Also, the checkentries did not release the set references when
returned an error. Bugs reported by Lennert Buytenhek.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_set.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 061d48cec137..b3babaed7719 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -81,6 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
+		ip_set_nfnl_put(info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -135,6 +136,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -142,6 +145,10 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	    info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -192,6 +199,7 @@ set_match_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.dim > IPSET_DIM_MAX) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
+		ip_set_nfnl_put(info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -219,7 +227,7 @@ set_target(struct sk_buff *skb, const struct xt_action_param *par)
 	if (info->del_set.index != IPSET_INVALID_ID)
 		ip_set_del(info->del_set.index,
 			   skb, par->family,
-			   info->add_set.dim,
+			   info->del_set.dim,
 			   info->del_set.flags);
 
 	return XT_CONTINUE;
@@ -245,13 +253,19 @@ set_target_checkentry(const struct xt_tgchk_param *par)
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(info->add_set.index);
 			return -ENOENT;
 		}
 	}
 	if (info->add_set.dim > IPSET_DIM_MAX ||
-	    info->del_set.flags > IPSET_DIM_MAX) {
+	    info->del_set.dim > IPSET_DIM_MAX) {
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->del_set.index);
 		return -ERANGE;
 	}
 
-- 
cgit v1.2.3


From 3e8c806a08c7beecd972e7ce15c570b9aba64baa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 13 Apr 2011 12:01:14 -0700
Subject: Revert "tcp: disallow bind() to reuse addr/port"

This reverts commit c191a836a908d1dd6b40c503741f91b914de3348.

It causes known regressions for programs that expect to be able to use
SO_REUSEADDR to shutdown a socket, then successfully rebind another
socket to the same ID.

Programs such as haproxy and amavisd expect this to work.

This should fix kernel bugzilla 32832.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c  | 5 ++---
 net/ipv6/inet6_connection_sock.c | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6c0b7f4a3d7d..38f23e721b80 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
 			if (!reuse || !sk2->sk_reuse ||
-			    ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) {
+			    sk2->sk_state == TCP_LISTEN) {
 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -122,8 +122,7 @@ again:
 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
-						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
-						    !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
 							spin_unlock(&head->lock);
 							snum = smallest_rover;
 							goto have_snum;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 166054650466..f2c5b0fc0f21 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) &&
+		     sk2->sk_state == TCP_LISTEN) &&
 		     ipv6_rcv_saddr_equal(sk, sk2))
 			break;
 	}
-- 
cgit v1.2.3


From d1a8016a2d1e75021ecc8715e3c81442d7218eb6 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Wed, 13 Apr 2011 14:31:28 -0400
Subject: NFS: Fix infinite loop in gss_create_upcall()

There can be an infinite loop if gss_create_upcall() is called without
the userspace program running.  To prevent this, we return -EACCES if
we notice that pipe_version hasn't changed (indicating that the pipe
has not been opened).

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f3914d0c5079..339ba64cce1e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -520,7 +520,7 @@ gss_refresh_upcall(struct rpc_task *task)
 		warn_gssd();
 		task->tk_timeout = 15*HZ;
 		rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
-		return 0;
+		return -EAGAIN;
 	}
 	if (IS_ERR(gss_msg)) {
 		err = PTR_ERR(gss_msg);
@@ -563,10 +563,12 @@ retry:
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		err = wait_event_interruptible_timeout(pipe_version_waitqueue,
 				pipe_version >= 0, 15*HZ);
+		if (pipe_version < 0) {
+			warn_gssd();
+			err = -EACCES;
+		}
 		if (err)
 			goto out;
-		if (pipe_version < 0)
-			warn_gssd();
 		goto retry;
 	}
 	if (IS_ERR(gss_msg)) {
-- 
cgit v1.2.3


From c65353daf137dd41f3ede3baf62d561fca076228 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 14 Apr 2011 05:55:37 +0000
Subject: ip: ip_options_compile() resilient to NULL skb route

Scot Doyle demonstrated ip_options_compile() could be called with an skb
without an attached route, using a setup involving a bridge, netfilter,
and forged IP packets.

Let's make ip_options_compile() and ip_options_rcv_srr() a bit more
robust, instead of changing bridge/netfilter code.

With help from Hiroaki SHIMODA.

Reported-by: Scot Doyle <lkml@scotdoyle.com>
Tested-by: Scot Doyle <lkml@scotdoyle.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Hiroaki SHIMODA <shimoda.hiroaki@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_options.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 28a736f3442f..2391b24e8251 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -329,7 +329,7 @@ int ip_options_compile(struct net *net,
 					pp_ptr = optptr + 2;
 					goto error;
 				}
-				if (skb) {
+				if (rt) {
 					memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
 					opt->is_changed = 1;
 				}
@@ -371,7 +371,7 @@ int ip_options_compile(struct net *net,
 						goto error;
 					}
 					opt->ts = optptr - iph;
-					if (skb) {
+					if (rt)  {
 						memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
 						timeptr = (__be32*)&optptr[optptr[2]+3];
 					}
@@ -603,7 +603,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	unsigned long orefdst;
 	int err;
 
-	if (!opt->srr)
+	if (!opt->srr || !rt)
 		return 0;
 
 	if (skb->pkt_type != PACKET_HOST)
-- 
cgit v1.2.3


From df5d8c80f1871d9e79af4b0f3656a9528a7d4bab Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 24 Mar 2011 20:38:35 +0530
Subject: 9p: revert tsyncfs related changes

Now that we use write_inode to flush server
cache related to fid, we don't need tsyncfs either fort dotl or dotu
protocols. For dotu this helps to do a more efficient server flush.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 48b8e084e710..d72aac7d25cc 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1220,27 +1220,6 @@ error:
 }
 EXPORT_SYMBOL(p9_client_fsync);
 
-int p9_client_sync_fs(struct p9_fid *fid)
-{
-	int err = 0;
-	struct p9_req_t *req;
-	struct p9_client *clnt;
-
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
-
-	clnt = fid->clnt;
-	req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto error;
-	}
-	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
-	p9_free_req(clnt, req);
-error:
-	return err;
-}
-EXPORT_SYMBOL(p9_client_sync_fs);
-
 int p9_client_clunk(struct p9_fid *fid)
 {
 	int err;
-- 
cgit v1.2.3


From bd8c8ade6b6f109bc3dce14a8d12013f27f2a590 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 24 Mar 2011 23:14:46 +0530
Subject: 9p: Fix sparse error

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/protocol.c     |  3 ++-
 net/9p/trans_common.c |  2 +-
 net/9p/trans_virtio.c | 15 +++++++++++----
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 8a4084fa8b5a..a7e899740041 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -468,7 +468,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 		case 'E':{
 				 int32_t cnt = va_arg(ap, int32_t);
 				 const char *k = va_arg(ap, const void *);
-				 const char *u = va_arg(ap, const void *);
+				 const char __user *u = va_arg(ap,
+							const void __user *);
 				 errcode = p9pdu_writef(pdu, proto_version, "d",
 						 cnt);
 				 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index d47880e971dd..e883172f9aa2 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -66,7 +66,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 	uint32_t pdata_mapped_pages;
 	struct trans_rpage_info  *rpinfo;
 
-	*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
+	*pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
 
 	if (*pdata_off)
 		first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e8f046b07182..244e70742183 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -326,8 +326,11 @@ req_retry_pinned:
 			outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
 					pdata_off, rpinfo->rp_data, pdata_len);
 		} else {
-			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
-								req->tc->pkbuf;
+			char *pbuf;
+			if (req->tc->pubuf)
+				pbuf = (__force char *) req->tc->pubuf;
+			else
+				pbuf = req->tc->pkbuf;
 			outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
 					req->tc->pbuf_size);
 		}
@@ -352,8 +355,12 @@ req_retry_pinned:
 			in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
 					pdata_off, rpinfo->rp_data, pdata_len);
 		} else {
-			char *pbuf = req->tc->pubuf ? req->tc->pubuf :
-								req->tc->pkbuf;
+			char *pbuf;
+			if (req->tc->pubuf)
+				pbuf = (__force char *) req->tc->pubuf;
+			else
+				pbuf = req->tc->pkbuf;
+
 			in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
 					pbuf, req->tc->pbuf_size);
 		}
-- 
cgit v1.2.3


From b76225e22ac98070325ee2ba89473c1e1360c4cb Mon Sep 17 00:00:00 2001
From: Harsh Prateek Bora <harsh@linux.vnet.ibm.com>
Date: Thu, 31 Mar 2011 15:49:39 +0530
Subject: net/9p: nwname should be an unsigned int

Signed-off-by: Harsh Prateek Bora <harsh@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric VAn Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 8 ++++----
 net/9p/protocol.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index d72aac7d25cc..77367745be9b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -929,15 +929,15 @@ error:
 }
 EXPORT_SYMBOL(p9_client_attach);
 
-struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
-	int clone)
+struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
+		char **wnames, int clone)
 {
 	int err;
 	struct p9_client *clnt;
 	struct p9_fid *fid;
 	struct p9_qid *wqids;
 	struct p9_req_t *req;
-	int16_t nwqids, count;
+	uint16_t nwqids, count;
 
 	err = 0;
 	wqids = NULL;
@@ -955,7 +955,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 		fid = oldfid;
 
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n",
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
 		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
 
 	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index a7e899740041..b58a501cf3d1 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -265,7 +265,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case 'T':{
-				int16_t *nwname = va_arg(ap, int16_t *);
+				uint16_t *nwname = va_arg(ap, uint16_t *);
 				char ***wnames = va_arg(ap, char ***);
 
 				errcode = p9pdu_readf(pdu, proto_version,
@@ -496,7 +496,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case 'T':{
-				int16_t nwname = va_arg(ap, int);
+				uint16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
 
 				errcode = p9pdu_writef(pdu, proto_version, "w",
-- 
cgit v1.2.3


From e3b2854faabd10438f5e7e34e078b099c3375577 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Apr 2011 12:58:56 -0400
Subject: SUNRPC: Fix the SUNRPC Kerberos V RPCSEC_GSS module dependencies

Since kernel 2.6.35, the SUNRPC Kerberos support has had an implicit
dependency on a number of additional crypto modules. The following
patch makes that dependency explicit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/Kconfig | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 8873fd8ddacd..b2198e65d8bb 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,14 +18,13 @@ config SUNRPC_XPRT_RDMA
 	  If unsure, say N.
 
 config RPCSEC_GSS_KRB5
-	tristate
+	tristate "Secure RPC: Kerberos V mechanism"
 	depends on SUNRPC && CRYPTO
-	prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4)
+	depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
+	depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
+	depends on CRYPTO_ARC4
 	default y
 	select SUNRPC_GSS
-	select CRYPTO_MD5
-	select CRYPTO_DES
-	select CRYPTO_CBC
 	help
 	  Choose Y here to enable Secure RPC using the Kerberos version 5
 	  GSS-API mechanism (RFC 1964).
-- 
cgit v1.2.3


From 468f86134ee515234afe5c5b3f39f266c50e61a5 Mon Sep 17 00:00:00 2001
From: Bryan Schumaker <bjschuma@netapp.com>
Date: Mon, 18 Apr 2011 15:57:32 -0400
Subject: NFSv4.1: Don't update sequence number if rpc_task is not sent

If we fail to contact the gss upcall program, then no message will
be sent to the server.  The client still updated the sequence number,
however, and this lead to NFS4ERR_SEQ_MISMATCH for the next several
RPC calls.

Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9494c3767356..ce5eb68a9664 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -906,6 +906,7 @@ void xprt_transmit(struct rpc_task *task)
 	}
 
 	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
 	xprt->ops->set_retrans_timeout(task);
-- 
cgit v1.2.3


From 7a74aeb022b34a8fa8ef00545e66cf0568b5ddf6 Mon Sep 17 00:00:00 2001
From: Ville Tervo <ville.tervo@nokia.com>
Date: Thu, 7 Apr 2011 14:59:50 +0300
Subject: Bluetooth: Fix refcount balance for hci connection

hci_io_capa_reply_evt() holds reference for hciconnection. It's useless since
hci_io_capa_request_evt()/hci_simple_pair_complete_evt() already protects the
connection. In addition it leaves connection open after failed SSP pairing.

Signed-off-by: Ville Tervo <ville.tervo@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_event.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cebe7588469f..b2570159a044 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2387,8 +2387,6 @@ static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *s
 	if (!conn)
 		goto unlock;
 
-	hci_conn_hold(conn);
-
 	conn->remote_cap = ev->capability;
 	conn->remote_oob = ev->oob_data;
 	conn->remote_auth = ev->authentication;
-- 
cgit v1.2.3


From b79f44c16a4e2181b1d6423afe746745d5e949ff Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Date: Mon, 11 Apr 2011 18:46:55 -0300
Subject: Bluetooth: Fix keeping the command timer running

In the teardown path the reset command is sent to the controller,
this event causes the command timer to be reactivated.

So the timer is removed in two situations, when the adapter isn't
marked as UP and when we know that some command has been sent.

Reported-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2216620ff296..e7dced9080a5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -587,10 +587,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_req_cancel(hdev, ENODEV);
 	hci_req_lock(hdev);
 
-	/* Stop timer, it might be running */
-	del_timer_sync(&hdev->cmd_timer);
-
 	if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
+		del_timer_sync(&hdev->cmd_timer);
 		hci_req_unlock(hdev);
 		return 0;
 	}
@@ -629,6 +627,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	/* Drop last sent command */
 	if (hdev->sent_cmd) {
+		del_timer_sync(&hdev->cmd_timer);
 		kfree_skb(hdev->sent_cmd);
 		hdev->sent_cmd = NULL;
 	}
-- 
cgit v1.2.3


From f21ca5fff6e548833fa5ee8867239a8378623150 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.dentz-von@nokia.com>
Date: Fri, 8 Apr 2011 17:10:41 +0300
Subject: Bluetooth: fix shutdown on SCO sockets

shutdown should wait for SCO link to be properly disconnected before
detroying the socket, otherwise an application using the socket may
assume link is properly disconnected before it really happens which
can be a problem when e.g synchronizing profile switch.

Signed-off-by: Luiz Augusto von Dentz <luiz.dentz-von@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/sco.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 42fdffd1d76c..94954c74f6ae 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -369,6 +369,15 @@ static void __sco_sock_close(struct sock *sk)
 
 	case BT_CONNECTED:
 	case BT_CONFIG:
+		if (sco_pi(sk)->conn) {
+			sk->sk_state = BT_DISCONN;
+			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
+			hci_conn_put(sco_pi(sk)->conn->hcon);
+			sco_pi(sk)->conn = NULL;
+		} else
+			sco_chan_del(sk, ECONNRESET);
+		break;
+
 	case BT_CONNECT:
 	case BT_DISCONN:
 		sco_chan_del(sk, ECONNRESET);
-- 
cgit v1.2.3


From a429b51930e64dd355840c37251a563000d7c10b Mon Sep 17 00:00:00 2001
From: Ruiyi Zhang <Ruiyi.zhang@atheros.com>
Date: Mon, 18 Apr 2011 11:04:30 +0800
Subject: Bluetooth: Only keeping SAR bits when retransmitting one frame.

When retrasmitting one frame, only SAR bits in control field should
be kept.

Signed-off-by: Ruiyi Zhang <Ruiyi.zhang@atheros.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap_core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ca27f3a41536..2c8dd4494c63 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1051,6 +1051,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	tx_skb = skb_clone(skb, GFP_ATOMIC);
 	bt_cb(skb)->retries++;
 	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+	control &= L2CAP_CTRL_SAR;
 
 	if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
 		control |= L2CAP_CTRL_FINAL;
-- 
cgit v1.2.3


From a8a8a0937e22a5fd55aeb22586724ba6bb70aadd Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 19 Apr 2011 15:59:15 +0200
Subject: netfilter: ipset: Fix the order of listing of sets

A restoreable saving of sets requires that list:set type of sets
come last and the code part which should have taken into account
the ordering was broken. The patch fixes the listing order.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_core.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e88ac3c3ed07..d87e03bc8ef8 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1022,8 +1022,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	if (cb->args[1] >= ip_set_max)
 		goto out;
 
-	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
 	max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+dump_last:
+	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
 	for (; cb->args[1] < max; cb->args[1]++) {
 		index = (ip_set_id_t) cb->args[1];
 		set = ip_set_list[index];
@@ -1038,8 +1039,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		 * so that lists (unions of sets) are dumped last.
 		 */
 		if (cb->args[0] != DUMP_ONE &&
-		    !((cb->args[0] == DUMP_ALL) ^
-		      (set->type->features & IPSET_DUMP_LAST)))
+		    ((cb->args[0] == DUMP_ALL) ==
+		     !!(set->type->features & IPSET_DUMP_LAST)))
 			continue;
 		pr_debug("List set: %s\n", set->name);
 		if (!cb->args[2]) {
@@ -1083,6 +1084,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 			goto release_refcount;
 		}
 	}
+	/* If we dump all sets, continue with dumping last ones */
+	if (cb->args[0] == DUMP_ALL) {
+		cb->args[0] = DUMP_LAST;
+		cb->args[1] = 0;
+		goto dump_last;
+	}
 	goto out;
 
 nla_put_failure:
@@ -1093,11 +1100,6 @@ release_refcount:
 		pr_debug("release set %s\n", ip_set_list[index]->name);
 		ip_set_put_byindex(index);
 	}
-
-	/* If we dump all sets, continue with dumping last ones */
-	if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
-		cb->args[0] = DUMP_LAST;
-
 out:
 	if (nlh) {
 		nlmsg_end(skb, nlh);
-- 
cgit v1.2.3


From c6914a6f261aca0c9f715f883a353ae7ff51fe83 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 19 Apr 2011 20:36:59 -0700
Subject: can: Add missing socket check in can/bcm release.

We can get here with a NULL socket argument passed from userspace,
so we need to handle it accordingly.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/bcm.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 57b1aed79014..8a6a05e7c3c8 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1427,9 +1427,14 @@ static int bcm_init(struct sock *sk)
 static int bcm_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
-	struct bcm_sock *bo = bcm_sk(sk);
+	struct bcm_sock *bo;
 	struct bcm_op *op, *next;
 
+	if (sk == NULL)
+		return 0;
+
+	bo = bcm_sk(sk);
+
 	/* remove bcm_ops, timer, rx_unregister(), etc. */
 
 	unregister_netdevice_notifier(&bo->notifier);
-- 
cgit v1.2.3


From 10022a6c66e199d8f61d9044543f38785713cbbd Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 20 Apr 2011 01:57:15 +0000
Subject: can: add missing socket check in can/raw release

v2: added space after 'if' according code style.

We can get here with a NULL socket argument passed from userspace,
so we need to handle it accordingly.

Thanks to Dave Jones pointing at this issue in net/can/bcm.c

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/raw.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/can/raw.c b/net/can/raw.c
index 649acfa7c70a..0eb39a7fdf64 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -305,7 +305,12 @@ static int raw_init(struct sock *sk)
 static int raw_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
-	struct raw_sock *ro = raw_sk(sk);
+	struct raw_sock *ro;
+
+	if (!sk)
+		return 0;
+
+	ro = raw_sk(sk);
 
 	unregister_netdevice_notifier(&ro->notifier);
 
-- 
cgit v1.2.3


From 243e6df4ed919880d079d717641ad699c6530a03 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Apr 2011 20:44:04 +0200
Subject: mac80211: fix SMPS debugfs locking

The locking with SMPS requests means that the
debugs file should lock the mgd mutex, not the
iflist mutex. Calls to __ieee80211_request_smps()
need to hold that mutex, so add an assertion.

This has always been wrong, but for some reason
never been noticed, probably because the locking
error only happens while unassociated.

Cc: stable@kernel.org [2.6.34+]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c            | 2 ++
 net/mac80211/debugfs_netdev.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 334213571ad0..44049733c4ea 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1504,6 +1504,8 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 	enum ieee80211_smps_mode old_req;
 	int err;
 
+	lockdep_assert_held(&sdata->u.mgd.mtx);
+
 	old_req = sdata->u.mgd.req_smps;
 	sdata->u.mgd.req_smps = smps_mode;
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index dacace6b1393..9ea7c0d0103f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -177,9 +177,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
 	if (sdata->vif.type != NL80211_IFTYPE_STATION)
 		return -EOPNOTSUPP;
 
-	mutex_lock(&local->iflist_mtx);
+	mutex_lock(&sdata->u.mgd.mtx);
 	err = __ieee80211_request_smps(sdata, smps_mode);
-	mutex_unlock(&local->iflist_mtx);
+	mutex_unlock(&sdata->u.mgd.mtx);
 
 	return err;
 }
-- 
cgit v1.2.3


From a9cf73ea7ff78f52662c8658d93c226effbbedde Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Tue, 19 Apr 2011 22:52:49 +0000
Subject: ipv6: udp: fix the wrong headroom check

At this point, skb->data points to skb_transport_header.
So, headroom check is wrong.

For some case:bridge(UFO is on) + eth device(UFO is off),
there is no enough headroom for IPv6 frag head.
But headroom check is always false.

This will bring about data be moved to there prior to skb->head,
when adding IPv6 frag header to skb.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 15c37746845e..9e305d74b3d4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1335,7 +1335,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Check if there is enough headroom to insert fragment header. */
-	if ((skb_headroom(skb) < frag_hdr_sz) &&
+	if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
 	    pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
 		goto out;
 
-- 
cgit v1.2.3


From e965c05dabdabb85af0187952ccd75e43995c4b3 Mon Sep 17 00:00:00 2001
From: Thomas Egerer <thomas.egerer@secunet.com>
Date: Wed, 20 Apr 2011 22:56:02 +0000
Subject: ipv6: Remove hoplimit initialization to -1

The changes introduced with git-commit a02e4b7d ("ipv6: Demark default
hoplimit as zero.") missed to remove the hoplimit initialization. As a
result, ipv6_get_mtu interprets the return value of dst_metric_raw
(-1) as 255 and answers ping6 with this hoplimit.  This patche removes
the line such that ping6 is answered with the hoplimit value
configured via sysctl.

Signed-off-by: Thomas Egerer <thomas.egerer@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 843406f14d7b..0a5d02ae5ceb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2012,7 +2012,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-- 
cgit v1.2.3


From f01cb5fbea1c1613621f9f32f385e12c1a29dde0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 21 Apr 2011 21:17:25 -0700
Subject: Revert "bridge: Forward reserved group addresses if !STP"

This reverts commit 1e253c3b8a1aeed51eef6fc366812f219b97de65.

It breaks 802.3ad bonding inside of a bridge.

The commit was meant to support transport bridging, and specifically
virtual machines bridged to an ethernet interface connected to a
switch port wiht 802.1x enabled.

But this isn't the way to do it, it breaks too many other things.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2160792e1bc..0c7badad62af 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -164,7 +164,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 			goto drop;
 
 		/* If STP is turned off, then forward */
-		if (p->br->stp_enabled == BR_NO_STP)
+		if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
 			goto forward;
 
 		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
-- 
cgit v1.2.3


From 7494d00c7b826b6ceb79ec33892bd0ef59be5614 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 24 Apr 2011 14:28:45 -0400
Subject: SUNRPC: Allow RPC calls to return ETIMEDOUT instead of EIO

On occasion, it is useful for the NFS layer to distinguish between
soft timeouts and other EIO errors due to (say) encoding errors,
or authentication errors.

The following patch ensures that the default behaviour of the RPC
layer remains to return EIO on soft timeouts (until we have
audited all the callers).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e7a96e478f63..8d83f9d48713 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1508,7 +1508,10 @@ call_timeout(struct rpc_task *task)
 		if (clnt->cl_chatty)
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
-		rpc_exit(task, -EIO);
+		if (task->tk_flags & RPC_TASK_TIMEOUT)
+			rpc_exit(task, -ETIMEDOUT);
+		else
+			rpc_exit(task, -EIO);
 		return;
 	}
 
-- 
cgit v1.2.3


From 0972ddb2373d5e127aabdcabd8305eff0242cd0b Mon Sep 17 00:00:00 2001
From: Held Bernhard <berny156@gmx.de>
Date: Sun, 24 Apr 2011 22:07:32 +0000
Subject: net: provide cow_metrics() methods to blackhole dst_ops

Since commit 62fa8a846d7d (net: Implement read-only protection and COW'ing
of metrics.) the kernel throws an oops.

[  101.620985] BUG: unable to handle kernel NULL pointer dereference at
           (null)
[  101.621050] IP: [<          (null)>]           (null)
[  101.621084] PGD 6e53c067 PUD 3dd6a067 PMD 0
[  101.621122] Oops: 0010 [#1] SMP
[  101.621153] last sysfs file: /sys/devices/virtual/ppp/ppp/uevent
[  101.621192] CPU 2
[  101.621206] Modules linked in: l2tp_ppp pppox ppp_generic slhc
l2tp_netlink l2tp_core deflate zlib_deflate twofish_x86_64
twofish_common des_generic cbc ecb sha1_generic hmac af_key
iptable_filter snd_pcm_oss snd_mixer_oss snd_seq snd_seq_device loop
snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec
snd_pcm snd_timer snd i2c_i801 iTCO_wdt psmouse soundcore snd_page_alloc
evdev uhci_hcd ehci_hcd thermal
[  101.621552]
[  101.621567] Pid: 5129, comm: openl2tpd Not tainted 2.6.39-rc4-Quad #3
Gigabyte Technology Co., Ltd. G33-DS3R/G33-DS3R
[  101.621637] RIP: 0010:[<0000000000000000>]  [<          (null)>]   (null)
[  101.621684] RSP: 0018:ffff88003ddeba60  EFLAGS: 00010202
[  101.621716] RAX: ffff88003ddb5600 RBX: ffff88003ddb5600 RCX:
0000000000000020
[  101.621758] RDX: ffffffff81a69a00 RSI: ffffffff81b7ee61 RDI:
ffff88003ddb5600
[  101.621800] RBP: ffff8800537cd900 R08: 0000000000000000 R09:
ffff88003ddb5600
[  101.621840] R10: 0000000000000005 R11: 0000000000014b38 R12:
ffff88003ddb5600
[  101.621881] R13: ffffffff81b7e480 R14: ffffffff81b7e8b8 R15:
ffff88003ddebad8
[  101.621924] FS:  00007f06e4182700(0000) GS:ffff88007fd00000(0000)
knlGS:0000000000000000
[  101.621971] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  101.622005] CR2: 0000000000000000 CR3: 0000000045274000 CR4:
00000000000006e0
[  101.622046] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[  101.622087] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
0000000000000400
[  101.622129] Process openl2tpd (pid: 5129, threadinfo
ffff88003ddea000, task ffff88003de9a280)
[  101.622177] Stack:
[  101.622191]  ffffffff81447efa ffff88007d3ded80 ffff88003de9a280
ffff88007d3ded80
[  101.622245]  0000000000000001 ffff88003ddebbb8 ffffffff8148d5a7
0000000000000212
[  101.622299]  ffff88003dcea000 ffff88003dcea188 ffffffff00000001
ffffffff81b7e480
[  101.622353] Call Trace:
[  101.622374]  [<ffffffff81447efa>] ? ipv4_blackhole_route+0x1ba/0x210
[  101.622415]  [<ffffffff8148d5a7>] ? xfrm_lookup+0x417/0x510
[  101.622450]  [<ffffffff8127672a>] ? extract_buf+0x9a/0x140
[  101.622485]  [<ffffffff8144c6a0>] ? __ip_flush_pending_frames+0x70/0x70
[  101.622526]  [<ffffffff8146fbbf>] ? udp_sendmsg+0x62f/0x810
[  101.622562]  [<ffffffff813f98a6>] ? sock_sendmsg+0x116/0x130
[  101.622599]  [<ffffffff8109df58>] ? find_get_page+0x18/0x90
[  101.622633]  [<ffffffff8109fd6a>] ? filemap_fault+0x12a/0x4b0
[  101.622668]  [<ffffffff813fb5c4>] ? move_addr_to_kernel+0x64/0x90
[  101.622706]  [<ffffffff81405d5a>] ? verify_iovec+0x7a/0xf0
[  101.622739]  [<ffffffff813fc772>] ? sys_sendmsg+0x292/0x420
[  101.622774]  [<ffffffff810b994a>] ? handle_pte_fault+0x8a/0x7c0
[  101.622810]  [<ffffffff810b76fe>] ? __pte_alloc+0xae/0x130
[  101.622844]  [<ffffffff810ba2f8>] ? handle_mm_fault+0x138/0x380
[  101.622880]  [<ffffffff81024af9>] ? do_page_fault+0x189/0x410
[  101.622915]  [<ffffffff813fbe03>] ? sys_getsockname+0xf3/0x110
[  101.622952]  [<ffffffff81450c4d>] ? ip_setsockopt+0x4d/0xa0
[  101.622986]  [<ffffffff813f9932>] ? sockfd_lookup_light+0x22/0x90
[  101.623024]  [<ffffffff814b61fb>] ? system_call_fastpath+0x16/0x1b
[  101.623060] Code:  Bad RIP value.
[  101.623090] RIP  [<          (null)>]           (null)
[  101.623125]  RSP <ffff88003ddeba60>
[  101.623146] CR2: 0000000000000000
[  101.650871] ---[ end trace ca3856a7d8e8dad4 ]---
[  101.651011] __sk_free: optmem leakage (160 bytes) detected.

The oops happens in dst_metrics_write_ptr()
include/net/dst.h:124: return dst->ops->cow_metrics(dst, p);

dst->ops->cow_metrics is NULL and causes the oops.

Provide cow_metrics() methods, like we did in commit 214f45c91bb
(net: provide default_advmss() methods to blackhole dst_ops)

Signed-off-by: Held Bernhard <berny156@gmx.de>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 7 +++++++
 net/ipv6/route.c | 7 +++++++
 2 files changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c1acf69858fd..99e6e4bb1c72 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2690,6 +2690,12 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
 
+static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
+					  unsigned long old)
+{
+	return NULL;
+}
+
 static struct dst_ops ipv4_dst_blackhole_ops = {
 	.family			=	AF_INET,
 	.protocol		=	cpu_to_be16(ETH_P_IP),
@@ -2698,6 +2704,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.default_mtu		=	ipv4_blackhole_default_mtu,
 	.default_advmss		=	ipv4_default_advmss,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
+	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
 };
 
 struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0a5d02ae5ceb..fd0eec6f88c6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -153,6 +153,12 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
 
+static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
+					 unsigned long old)
+{
+	return NULL;
+}
+
 static struct dst_ops ip6_dst_blackhole_ops = {
 	.family			=	AF_INET6,
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
@@ -161,6 +167,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.default_mtu		=	ip6_blackhole_default_mtu,
 	.default_advmss		=	ip6_default_advmss,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
+	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
 };
 
 static const u32 ip6_template_metrics[RTAX_MAX] = {
-- 
cgit v1.2.3


From 3f602b08dec32c418fc391fc838db357aab84f8a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 25 Apr 2011 19:39:24 +0000
Subject: xfrm: Fix replay window size calculation on initialization

On replay initialization, we compute the size of the replay
buffer to see if the replay window fits into the buffer.
This computation lacks a mutliplication by 8 because we need
the size in bit, not in byte. So we might return an error
even though the replay window would fit into the buffer.
This patch fixes this issue.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_replay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index f218385950ca..e8a781422feb 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -532,7 +532,7 @@ int xfrm_init_replay(struct xfrm_state *x)
 
 	if (replay_esn) {
 		if (replay_esn->replay_window >
-		    replay_esn->bmp_len * sizeof(__u32))
+		    replay_esn->bmp_len * sizeof(__u32) * 8)
 			return -EINVAL;
 
 	if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn)
-- 
cgit v1.2.3


From c0a56e64aec331f33ead29ba493ee184d9bdc840 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 25 Apr 2011 19:40:23 +0000
Subject: esp6: Fix scatterlist initialization

When we use IPsec extended sequence numbers, we may overwrite
the last scatterlist of the associated data by the scatterlist
for the skb. This patch fixes this by placing the scatterlist
for the skb right behind the last scatterlist of the associated
data. esp4 does it already like that.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/esp6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 5aa8ec88f194..59dccfbb5b11 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -371,7 +371,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
 	req = esp_tmp_req(aead, iv);
 	asg = esp_req_sg(aead, req);
-	sg = asg + 1;
+	sg = asg + sglists;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-- 
cgit v1.2.3


From 7833aa05b8db63484b43b4b4c389cd4533140afb Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 25 Apr 2011 19:41:21 +0000
Subject: xfrm: Check for the new replay implementation if an esn state is
 inserted

IPsec extended sequence numbers can be used only with the new
anti-replay window implementation. So check if the new implementation
is used if an esn state is inserted and return an error if it is not.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5d1d60d3ca83..c658cb3bc7c3 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -124,6 +124,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
 {
 	struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
 
+	if ((p->flags & XFRM_STATE_ESN) && !rt)
+		return -EINVAL;
+
 	if (!rt)
 		return 0;
 
-- 
cgit v1.2.3


From e9c549998dc24209847007e1f209f3b6c88d21ba Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Date: Tue, 26 Apr 2011 23:28:26 -0700
Subject: Revert wrong fixes for common misspellings

These changes were incorrectly fixed by codespell. They were now
manually corrected.

Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
---
 net/l2tp/l2tp_ip.c  | 2 +-
 net/sctp/ulpevent.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index fce9bd3bd3fe..5c04f3e42704 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -667,7 +667,7 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP over IP");
 MODULE_VERSION("1.0");
 
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like
+/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
  * enums
  */
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index dff27d5e22fd..61b1f5ada96a 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 	memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo));
 
 	/* Per TSVWG discussion with Randy. Allow the application to
-	 * resemble a fragmented message.
+	 * reassemble a fragmented message.
 	 */
 	ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags;
 
-- 
cgit v1.2.3


From b3b270054b80e6195b1d2b2ce082239911261839 Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <jacmet@sunsite.dk>
Date: Tue, 26 Apr 2011 01:45:41 +0000
Subject: dsa/mv88e6131: fix unknown multicast/broadcast forwarding on
 mv88e6085

The 88e6085 has a few differences from the other devices in the port
control registers, causing unknown multicast/broadcast packets to get
dropped when using the standard port setup.

At the same time update kconfig to clarify that the mv88e6085 is now
supported.

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
Acked-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig     |  4 ++--
 net/dsa/mv88e6131.c | 26 +++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 87bb5f4de0e8..c53ded2a98df 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -41,12 +41,12 @@ config NET_DSA_MV88E6XXX_NEED_PPU
 	default n
 
 config NET_DSA_MV88E6131
-	bool "Marvell 88E6095/6095F/6131 ethernet switch chip support"
+	bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
 	select NET_DSA_MV88E6XXX
 	select NET_DSA_MV88E6XXX_NEED_PPU
 	select NET_DSA_TAG_DSA
 	---help---
-	  This enables support for the Marvell 88E6095/6095F/6131
+	  This enables support for the Marvell 88E6085/6095/6095F/6131
 	  ethernet switch chips.
 
 config NET_DSA_MV88E6123_61_65
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
index 3da418894efc..45f7411e90ba 100644
--- a/net/dsa/mv88e6131.c
+++ b/net/dsa/mv88e6131.c
@@ -207,8 +207,15 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
 	 * mode, but do not enable forwarding of unknown unicasts.
 	 */
 	val = 0x0433;
-	if (p == dsa_upstream_port(ds))
+	if (p == dsa_upstream_port(ds)) {
 		val |= 0x0104;
+		/*
+		 * On 6085, unknown multicast forward is controlled
+		 * here rather than in Port Control 2 register.
+		 */
+		if (ps->id == ID_6085)
+			val |= 0x0008;
+	}
 	if (ds->dsa_port_mask & (1 << p))
 		val |= 0x0100;
 	REG_WRITE(addr, 0x04, val);
@@ -251,10 +258,19 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
 	 * If this is the upstream port for this switch, enable
 	 * forwarding of unknown multicast addresses.
 	 */
-	val = 0x0080 | dsa_upstream_port(ds);
-	if (p == dsa_upstream_port(ds))
-		val |= 0x0040;
-	REG_WRITE(addr, 0x08, val);
+	if (ps->id == ID_6085)
+		/*
+		 * on 6085, bits 3:0 are reserved, bit 6 control ARP
+		 * mirroring, and multicast forward is handled in
+		 * Port Control register.
+		 */
+		REG_WRITE(addr, 0x08, 0x0080);
+	else {
+		val = 0x0080 | dsa_upstream_port(ds);
+		if (p == dsa_upstream_port(ds))
+			val |= 0x0040;
+		REG_WRITE(addr, 0x08, val);
+	}
 
 	/*
 	 * Rate Control: disable ingress rate limiting.
-- 
cgit v1.2.3


From a05d2ad1c1f391c7f514a1d1e09b5417968a7d07 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 24 Apr 2011 01:54:57 +0000
Subject: af_unix: Only allow recv on connected seqpacket sockets.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the following oops discovered by Dan Aloni:
> Anyway, the following is the output of the Oops that I got on the
> Ubuntu kernel on which I first detected the problem
> (2.6.37-12-generic). The Oops that followed will be more useful, I
> guess.

>[ 5594.669852] BUG: unable to handle kernel NULL pointer dereference
> at           (null)
> [ 5594.681606] IP: [<ffffffff81550b7b>] unix_dgram_recvmsg+0x1fb/0x420
> [ 5594.687576] PGD 2a05d067 PUD 2b951067 PMD 0
> [ 5594.693720] Oops: 0002 [#1] SMP
> [ 5594.699888] last sysfs file:

The bug was that unix domain sockets use a pseduo packet for
connecting and accept uses that psudo packet to get the socket.
In the buggy seqpacket case we were allowing unconnected
sockets to call recvmsg and try to receive the pseudo packet.

That is always wrong and as of commit 7361c36c5 the pseudo
packet had become enough different from a normal packet
that the kernel started oopsing.

Do for seqpacket_recv what was done for seqpacket_send in 2.5
and only allow it on connected seqpacket sockets.

Cc: stable@kernel.org
Tested-by: Dan Aloni <dan@aloni.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3a43a8304768..b1d75beb7e20 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -524,6 +524,8 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *,
 			      int, int);
 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 				  struct msghdr *, size_t);
+static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
+				  struct msghdr *, size_t, int);
 
 static const struct proto_ops unix_stream_ops = {
 	.family =	PF_UNIX,
@@ -583,7 +585,7 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.setsockopt =	sock_no_setsockopt,
 	.getsockopt =	sock_no_getsockopt,
 	.sendmsg =	unix_seqpacket_sendmsg,
-	.recvmsg =	unix_dgram_recvmsg,
+	.recvmsg =	unix_seqpacket_recvmsg,
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
 };
@@ -1699,6 +1701,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
 }
 
+static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
+			      struct msghdr *msg, size_t size,
+			      int flags)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTCONN;
+
+	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
+}
+
 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 {
 	struct unix_sock *u = unix_sk(sk);
-- 
cgit v1.2.3


From 7cfd260910b881250cde76ba92ebe3cbf8493a8f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 1 May 2011 02:04:11 +0000
Subject: ipv4: don't spam dmesg with "Using LC-trie" messages

fib_trie_table() is called during netns creation and
Chromium uses clone(CLONE_NEWNET) to sandbox renderer process.

Don't print anything.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e9013d6c1f51..5fe9b8b41df3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1978,9 +1978,6 @@ struct fib_table *fib_trie_table(u32 id)
 	t = (struct trie *) tb->tb_data;
 	memset(t, 0, sizeof(*t));
 
-	if (id == RT_TABLE_LOCAL)
-		pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
-
 	return tb;
 }
 
-- 
cgit v1.2.3


From 41c31f318a5209922d051e293c61e4724daad11c Mon Sep 17 00:00:00 2001
From: Lifeng Sun <lifongsun@gmail.com>
Date: Wed, 27 Apr 2011 22:04:51 +0000
Subject: networking: inappropriate ioctl operation should return ENOTTY

ioctl() calls against a socket with an inappropriate ioctl operation
are incorrectly returning EINVAL rather than ENOTTY:

  [ENOTTY]
      Inappropriate I/O control operation.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=33992

Signed-off-by: Lifeng Sun <lifongsun@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index c2ac599fa0f6..856b6ee9a1d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4773,7 +4773,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
 		 * is never reached
 		 */
 		WARN_ON(1);
-		err = -EINVAL;
+		err = -ENOTTY;
 		break;
 
 	}
@@ -5041,7 +5041,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		/* Set the per device memory buffer space.
 		 * Not applicable in our case */
 	case SIOCSIFLINK:
-		return -EINVAL;
+		return -ENOTTY;
 
 	/*
 	 *	Unknown or private ioctl.
@@ -5062,7 +5062,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		/* Take care of Wireless Extensions */
 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
 			return wext_handle_ioctl(net, &ifr, cmd, arg);
-		return -EINVAL;
+		return -ENOTTY;
 	}
 }
 
-- 
cgit v1.2.3


From ff538818f4a82c4cf02d2d6bd6ac5c7360b9d41d Mon Sep 17 00:00:00 2001
From: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
Date: Sun, 1 May 2011 01:44:01 +0000
Subject: sysctl: net: call unregister_net_sysctl_table where needed

ctl_table_headers registered with register_net_sysctl_table should
have been unregistered with the equivalent unregister_net_sysctl_table

Signed-off-by: Lucian Adrian Grijincu <lucian.grijincu@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c  | 2 +-
 net/ipv6/addrconf.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5345b0bee6df..cd9ca0811cfa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1680,7 +1680,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
 		return;
 
 	cnf->sysctl = NULL;
-	unregister_sysctl_table(t->sysctl_header);
+	unregister_net_sysctl_table(t->sysctl_header);
 	kfree(t->dev_name);
 	kfree(t);
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1493534116df..a7bda0757053 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4537,7 +4537,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
 
 	t = p->sysctl;
 	p->sysctl = NULL;
-	unregister_sysctl_table(t->sysctl_header);
+	unregister_net_sysctl_table(t->sysctl_header);
 	kfree(t->dev_name);
 	kfree(t);
 }
-- 
cgit v1.2.3


From ca20892db7567c40e8ed0668f46cf0d085d7db6d Mon Sep 17 00:00:00 2001
From: Henry C Chang <henry.cy.chang@gmail.com>
Date: Tue, 3 May 2011 02:29:56 +0000
Subject: libceph: fix ceph_msg_new error path

If memory allocation failed, calling ceph_msg_put() will cause GPF
since some of ceph_msg variables are not initialized first.

Fix Bug #970.

Signed-off-by: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/messenger.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 05f357828a2f..e15a82ccc05f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2267,6 +2267,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	m->more_to_follow = false;
 	m->pool = NULL;
 
+	/* middle */
+	m->middle = NULL;
+
+	/* data */
+	m->nr_pages = 0;
+	m->page_alignment = 0;
+	m->pages = NULL;
+	m->pagelist = NULL;
+	m->bio = NULL;
+	m->bio_iter = NULL;
+	m->bio_seg = 0;
+	m->trail = NULL;
+
 	/* front */
 	if (front_len) {
 		if (front_len > PAGE_CACHE_SIZE) {
@@ -2286,19 +2299,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags)
 	}
 	m->front.iov_len = front_len;
 
-	/* middle */
-	m->middle = NULL;
-
-	/* data */
-	m->nr_pages = 0;
-	m->page_alignment = 0;
-	m->pages = NULL;
-	m->pagelist = NULL;
-	m->bio = NULL;
-	m->bio_iter = NULL;
-	m->bio_seg = 0;
-	m->trail = NULL;
-
 	dout("ceph_msg_new %p front %d\n", m, front_len);
 	return m;
 
-- 
cgit v1.2.3


From 4ad12621e442b7a072e81270808f617cb65c5672 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 3 May 2011 09:23:36 -0700
Subject: libceph: fix ceph_osdc_alloc_request error checks

ceph_osdc_alloc_request returns NULL on failure.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osd_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5a80f41c0cba..6b5dda1cb5df 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -470,8 +470,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					 snapc, ops,
 					 use_mempool,
 					 GFP_NOFS, NULL, NULL);
-	if (IS_ERR(req))
-		return req;
+	if (!req)
+		return NULL;
 
 	/* calculate max write size */
 	calc_layout(osdc, vino, layout, off, plen, req, ops);
-- 
cgit v1.2.3


From 64f3b9e203bd06855072e295557dca1485a2ecba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 May 2011 10:02:26 +0000
Subject: net: ip_expire() must revalidate route

Commit 4a94445c9a5c (net: Use ip_route_input_noref() in input path)
added a bug in IP defragmentation handling, in case timeout is fired.

When a frame is defragmented, we use last skb dst field when building
final skb. Its dst is valid, since we are in rcu read section.

But if a timeout occurs, we take first queued fragment to build one ICMP
TIME EXCEEDED message. Problem is all queued skb have weak dst pointers,
since we escaped RCU critical section after their queueing. icmp_send()
might dereference a now freed (and possibly reused) part of memory.

Calling skb_dst_drop() and ip_route_input_noref() to revalidate route is
the only possible choice.

Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a1151b8adf3c..b1d282f11be7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -223,31 +223,30 @@ static void ip_expire(unsigned long arg)
 
 	if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
 		struct sk_buff *head = qp->q.fragments;
+		const struct iphdr *iph;
+		int err;
 
 		rcu_read_lock();
 		head->dev = dev_get_by_index_rcu(net, qp->iif);
 		if (!head->dev)
 			goto out_rcu_unlock;
 
+		/* skb dst is stale, drop it, and perform route lookup again */
+		skb_dst_drop(head);
+		iph = ip_hdr(head);
+		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+					   iph->tos, head->dev);
+		if (err)
+			goto out_rcu_unlock;
+
 		/*
-		 * Only search router table for the head fragment,
-		 * when defraging timeout at PRE_ROUTING HOOK.
+		 * Only an end host needs to send an ICMP
+		 * "Fragment Reassembly Timeout" message, per RFC792.
 		 */
-		if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) {
-			const struct iphdr *iph = ip_hdr(head);
-			int err = ip_route_input(head, iph->daddr, iph->saddr,
-						 iph->tos, head->dev);
-			if (unlikely(err))
-				goto out_rcu_unlock;
-
-			/*
-			 * Only an end host needs to send an ICMP
-			 * "Fragment Reassembly Timeout" message, per RFC792.
-			 */
-			if (skb_rtable(head)->rt_type != RTN_LOCAL)
-				goto out_rcu_unlock;
+		if (qp->user == IP_DEFRAG_CONNTRACK_IN &&
+		    skb_rtable(head)->rt_type != RTN_LOCAL)
+			goto out_rcu_unlock;
 
-		}
 
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-- 
cgit v1.2.3


From a294865978b701e4d0d90135672749531b9a900d Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 6 May 2011 03:27:18 +0000
Subject: dccp: handle invalid feature options length

A length of zero (after subtracting two for the type and len fields) for
the DCCPO_{CHANGE,CONFIRM}_{L,R} options will cause an underflow due to
the subtraction.  The subsequent code may read past the end of the
options value buffer when parsing.  I'm unsure of what the consequences
of this might be, but it's probably not good.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: stable@kernel.org
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/options.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index f06ffcfc8d71..4b2ab657ac8e 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
 			if (pkt_type == DCCP_PKT_DATA)      /* RFC 4340, 6 */
 				break;
+			if (len == 0)
+				goto out_invalid_option;
 			rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
 						    *value, value + 1, len - 1);
 			if (rc)
-- 
cgit v1.2.3


From b9f47a3aaeabdce3b42829bbb27765fa340f76ba Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 4 May 2011 10:04:56 +0000
Subject: tcp_cubic: limit delayed_ack ratio to prevent divide error

TCP Cubic keeps a metric that estimates the amount of delayed
acknowledgements to use in adjusting the window. If an abnormally
large number of packets are acknowledged at once, then the update
could wrap and reach zero. This kind of ACK could only
happen when there was a large window and huge number of
ACK's were lost.

This patch limits the value of delayed ack ratio. The choice of 32
is just a conservative value since normally it should be range of
1 to 4 packets.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 34340c9c95fa..f376b05cca81 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -93,6 +93,7 @@ struct bictcp {
 	u32	ack_cnt;	/* number of acks */
 	u32	tcp_cwnd;	/* estimated tcp cwnd */
 #define ACK_RATIO_SHIFT	4
+#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
 	u16	delayed_ack;	/* estimate the ratio of Packets/ACKs << 4 */
 	u8	sample_cnt;	/* number of samples to decide curr_rtt */
 	u8	found;		/* the exit point is found? */
@@ -398,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 	u32 delay;
 
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
-		ca->delayed_ack += cnt;
+		u32 ratio = ca->delayed_ack;
+
+		ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+		ratio += cnt;
+
+		ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
 	}
 
 	/* Some calls are for duplicates without timetamps */
-- 
cgit v1.2.3


From 3cd7967825a2b3926dc96ae566d986c4420919f7 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Fri, 15 Apr 2011 13:59:33 +0530
Subject: net/9p: Handle get_user_pages_fast return properly

Use proper data type to handle get_user_pages_fast error condition. Also
do not treat EFAULT error as fatal.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c       |  2 +-
 net/9p/trans_common.c | 11 +++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 77367745be9b..a9aa2dd66482 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -614,7 +614,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
-		if (err != -ERESTARTSYS)
+		if (err != -ERESTARTSYS && err != -EFAULT)
 			c->status = Disconnected;
 		goto reterr;
 	}
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index e883172f9aa2..9a70ebdec56e 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -63,7 +63,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 		int nr_pages, u8 rw)
 {
 	uint32_t first_page_bytes = 0;
-	uint32_t pdata_mapped_pages;
+	int32_t pdata_mapped_pages;
 	struct trans_rpage_info  *rpinfo;
 
 	*pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
@@ -75,14 +75,9 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 	rpinfo = req->tc->private;
 	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
 			nr_pages, rw, &rpinfo->rp_data[0]);
+	if (pdata_mapped_pages <= 0)
+		return pdata_mapped_pages;
 
-	if (pdata_mapped_pages < 0) {
-		printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
-				"nr_pages:%d\n", pdata_mapped_pages,
-				req->tc->pubuf, nr_pages);
-		pdata_mapped_pages = 0;
-		return -EIO;
-	}
 	rpinfo->rp_nr_pages = pdata_mapped_pages;
 	if (*pdata_off) {
 		*pdata_len = first_page_bytes;
-- 
cgit v1.2.3


From 315c34dae0069d0c67abd714bb846cd466289c7f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 21 Apr 2011 10:55:07 +0200
Subject: netfilter: ctnetlink: fix timestamp support for new conntracks

This patch fixes the missing initialization of the start time if
the timestamp support is enabled.

libnetfilter_conntrack/utils# conntrack -E &
libnetfilter_conntrack/utils# ./conntrack_create
tcp      6 109 ESTABLISHED src=1.1.1.1 dst=2.2.2.2 sport=1025 dport=21 packets=0 bytes=0 [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=21 dport=1025 packets=0 bytes=0 mark=0 delta-time=1303296401 use=2

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 30bf8a167fc8..482e90c61850 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1334,6 +1334,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	struct nf_conn *ct;
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
+	struct nf_conn_tstamp *tstamp;
 
 	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
@@ -1451,6 +1452,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 		__set_bit(IPS_EXPECTED_BIT, &ct->status);
 		ct->master = master_ct;
 	}
+	tstamp = nf_conn_tstamp_find(ct);
+	if (tstamp)
+		tstamp->start = ktime_to_ns(ktime_get_real());
 
 	add_timer(&ct->timeout);
 	nf_conntrack_hash_insert(ct);
-- 
cgit v1.2.3


From 5a6351eecf8c87afed9c883bb6341d09406d74ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 21 Apr 2011 10:57:21 +0200
Subject: netfilter: fix ebtables compat support

commit 255d0dc34068a976 (netfilter: x_table: speedup compat operations)
made ebtables not working anymore.

1) xt_compat_calc_jump() is not an exact match lookup
2) compat_table_info() has a typo in xt_compat_init_offsets() call
3) compat_do_replace() misses a xt_compat_init_offsets() call

Reported-by: dann frazier <dannf@dannf.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 3 ++-
 net/netfilter/x_tables.c        | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 893669caa8de..9707079bc40a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1766,7 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info,
 
 	newinfo->entries_size = size;
 
-	xt_compat_init_offsets(AF_INET, info->nentries);
+	xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries);
 	return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
 							entries, newinfo);
 }
@@ -2240,6 +2240,7 @@ static int compat_do_replace(struct net *net, void __user *user,
 
 	xt_compat_lock(NFPROTO_BRIDGE);
 
+	xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);
 	ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);
 	if (ret < 0)
 		goto out_unlock;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a9adf4c6b299..8a025a585d2f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -455,6 +455,7 @@ void xt_compat_flush_offsets(u_int8_t af)
 		vfree(xt[af].compat_tab);
 		xt[af].compat_tab = NULL;
 		xt[af].number = 0;
+		xt[af].cur = 0;
 	}
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
@@ -473,8 +474,7 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 		else
 			return mid ? tmp[mid - 1].delta : 0;
 	}
-	WARN_ON_ONCE(1);
-	return 0;
+	return left ? tmp[left - 1].delta : 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
-- 
cgit v1.2.3


From 103a9778e07bcc0cd34b5c35a87281454eec719e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 Apr 2011 10:58:25 +0200
Subject: netfilter: ebtables: only call xt_compat_add_offset once per rule

The optimizations in commit 255d0dc34068a976
(netfilter: x_table: speedup compat operations) assume that
xt_compat_add_offset is called once per rule.

ebtables however called it for each match/target found in a rule.

The match/watcher/target parser already returns the needed delta, so it
is sufficient to move the xt_compat_add_offset call to a more reasonable
location.

While at it, also get rid of the unused COMPAT iterator macros.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebtables.c | 61 ++++++-----------------------------------
 1 file changed, 9 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9707079bc40a..1a92b369c820 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1882,7 +1882,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 	struct xt_match *match;
 	struct xt_target *wt;
 	void *dst = NULL;
-	int off, pad = 0, ret = 0;
+	int off, pad = 0;
 	unsigned int size_kern, entry_offset, match_size = mwt->match_size;
 
 	strlcpy(name, mwt->u.name, sizeof(name));
@@ -1935,13 +1935,6 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 		break;
 	}
 
-	if (!dst) {
-		ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset,
-					off + ebt_compat_entry_padsize());
-		if (ret < 0)
-			return ret;
-	}
-
 	state->buf_kern_offset += match_size + off;
 	state->buf_user_offset += match_size;
 	pad = XT_ALIGN(size_kern) - size_kern;
@@ -2016,50 +2009,6 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
 	return growth;
 }
 
-#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...)          \
-({                                                          \
-	unsigned int __i;                                   \
-	int __ret = 0;                                      \
-	struct compat_ebt_entry_mwt *__watcher;             \
-	                                                    \
-	for (__i = e->watchers_offset;                      \
-	     __i < (e)->target_offset;                      \
-	     __i += __watcher->watcher_size +               \
-	     sizeof(struct compat_ebt_entry_mwt)) {         \
-		__watcher = (void *)(e) + __i;              \
-		__ret = fn(__watcher , ## args);            \
-		if (__ret != 0)                             \
-			break;                              \
-	}                                                   \
-	if (__ret == 0) {                                   \
-		if (__i != (e)->target_offset)              \
-			__ret = -EINVAL;                    \
-	}                                                   \
-	__ret;                                              \
-})
-
-#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...)            \
-({                                                          \
-	unsigned int __i;                                   \
-	int __ret = 0;                                      \
-	struct compat_ebt_entry_mwt *__match;               \
-	                                                    \
-	for (__i = sizeof(struct ebt_entry);                \
-	     __i < (e)->watchers_offset;                    \
-	     __i += __match->match_size +                   \
-	     sizeof(struct compat_ebt_entry_mwt)) {         \
-		__match = (void *)(e) + __i;                \
-		__ret = fn(__match , ## args);              \
-		if (__ret != 0)                             \
-			break;                              \
-	}                                                   \
-	if (__ret == 0) {                                   \
-		if (__i != (e)->watchers_offset)            \
-			__ret = -EINVAL;                    \
-	}                                                   \
-	__ret;                                              \
-})
-
 /* called for all ebt_entry structures. */
 static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 			  unsigned int *total,
@@ -2132,6 +2081,14 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 		}
 	}
 
+	if (state->buf_kern_start == NULL) {
+		unsigned int offset = buf_start - (char *) base;
+
+		ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset);
+		if (ret < 0)
+			return ret;
+	}
+
 	startoff = state->buf_user_offset - startoff;
 
 	BUG_ON(*total < startoff);
-- 
cgit v1.2.3


From 1ae132b0347907ac95b8bc9dba37934f59d2a508 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans@schillstrom.com>
Date: Tue, 3 May 2011 22:09:30 +0200
Subject: IPVS: Change of socket usage to enable name space exit.

If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.

Thanks Eric W Biederman for your advices.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
[horms@verge.net.au: minor edit to changelog]
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c |  2 +-
 net/netfilter/ipvs/ip_vs_sync.c | 58 ++++++++++++++++++++++++++---------------
 2 files changed, 38 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 07accf6b2401..a0791dc05a27 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net)
 
 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }
 
 static struct pernet_operations ipvs_core_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3e7961e85e9c..0cce95310820 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net)
 	struct socket *sock;
 	int result;
 
-	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	/* First create a socket move it to right name space later */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net)
 	int result;
 
 	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;
 
@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)
 
 	return sock;
 
-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }
 
@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
 		ip_vs_sync_buff_release(sb);
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo);
 
 	return 0;
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
 	}
 
 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo->buf);
 	kfree(tinfo);
 
@@ -1601,7 +1611,7 @@ outtinfo:
 outbuf:
 	kfree(buf);
 outsocket:
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 out:
 	return result;
 }
@@ -1610,6 +1620,7 @@ out:
 int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	int retc = -EINVAL;
 
 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state)
 		spin_lock_bh(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
 		spin_unlock_bh(&ipvs->sync_lock);
-		kthread_stop(ipvs->master_thread);
+		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (!ipvs->backup_thread)
@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state)
 			task_pid_nr(ipvs->backup_thread));
 
 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(ipvs->backup_thread);
+		retc = kthread_stop(ipvs->backup_thread);
 		ipvs->backup_thread = NULL;
-	} else {
-		return -EINVAL;
 	}
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
 
-	return 0;
+	return retc;
 }
 
 /*
@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 
 static void __ip_vs_sync_cleanup(struct net *net)
 {
-	stop_sync_thread(net, IP_VS_STATE_MASTER);
-	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	int retc;
+
+	retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Master Daemon\n");
+
+	retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	if (retc && retc != -ESRCH)
+		pr_err("Failed to stop Backup Daemon\n");
 }
 
 static struct pernet_operations ipvs_sync_ops = {
@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = {
 
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_subsys(&ipvs_sync_ops);
+	return register_pernet_device(&ipvs_sync_ops);
 }
 
 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_sync_ops);
+	unregister_pernet_device(&ipvs_sync_ops);
 }
-- 
cgit v1.2.3


From 7a4f0761fce32ff4918a7c23b08db564ad33092d Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans@schillstrom.com>
Date: Tue, 3 May 2011 22:09:31 +0200
Subject: IPVS: init and cleanup restructuring

DESCRIPTION
This patch tries to restore the initial init and cleanup
sequences that was before namspace patch.
Netns also requires action when net devices unregister
which has never been implemented. I.e this patch also
covers when a device moves into a network namespace,
and has to be released.

IMPLEMENTATION
The number of calls to register_pernet_device have been
reduced to one for the ip_vs.ko
Schedulers still have their own calls.

This patch adds a function __ip_vs_service_cleanup()
and an enable flag for the netfilter hooks.

The nf hooks will be enabled when the first service is loaded
and never disabled again, except when a namespace exit starts.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
[horms@verge.net.au: minor edit to changelog]
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_app.c   |  15 +----
 net/netfilter/ipvs/ip_vs_conn.c  |  12 +---
 net/netfilter/ipvs/ip_vs_core.c  | 101 +++++++++++++++++++++++++++++---
 net/netfilter/ipvs/ip_vs_ctl.c   | 120 ++++++++++++++++++++++++++++++++-------
 net/netfilter/ipvs/ip_vs_est.c   |  14 +----
 net/netfilter/ipvs/ip_vs_proto.c |  11 +---
 net/netfilter/ipvs/ip_vs_sync.c  |  13 +----
 7 files changed, 206 insertions(+), 80 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 2dc6de13ac18..51f3af7c4743 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-static int __net_init __ip_vs_app_init(struct net *net)
+int __net_init __ip_vs_app_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit __ip_vs_app_cleanup(struct net *net)
+void __net_exit __ip_vs_app_cleanup(struct net *net)
 {
 	proc_net_remove(net, "ip_vs_app");
 }
 
-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_app_init,
-	.exit = __ip_vs_app_cleanup,
-};
-
 int __init ip_vs_app_init(void)
 {
-	int rv;
-
-	rv = register_pernet_subsys(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }
 
 
 void ip_vs_app_cleanup(void)
 {
-	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index c97bd45975be..d3fd91bbba49 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+void __net_exit __ip_vs_conn_cleanup(struct net *net)
 {
 	/* flush all the connection entries first */
 	ip_vs_conn_flush(net);
 	proc_net_remove(net, "ip_vs_conn");
 	proc_net_remove(net, "ip_vs_conn_sync");
 }
-static struct pernet_operations ipvs_conn_ops = {
-	.init = __ip_vs_conn_init,
-	.exit = __ip_vs_conn_cleanup,
-};
 
 int __init ip_vs_conn_init(void)
 {
 	int idx;
-	int retc;
 
 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void)
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}
 
-	retc = register_pernet_subsys(&ipvs_conn_ops);
-
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
 
-	return retc;
+	return 0;
 }
 
 void ip_vs_conn_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_conn_ops);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a0791dc05a27..a74dae6c5dbc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1113,6 +1113,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 		return NF_ACCEPT;
 
 	net = skb_net(skb);
+	if (!net_ipvs(net)->enable)
+		return NF_ACCEPT;
+
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
@@ -1343,6 +1346,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
 
 	net = skb_net(skb);
+
 	pd = ip_vs_proto_data_get(net, cih->protocol);
 	if (!pd)
 		return NF_ACCEPT;
@@ -1529,6 +1533,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 			      IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
 		return NF_ACCEPT;
 	}
+	/* ipvs enabled in this netns ? */
+	net = skb_net(skb);
+	if (!net_ipvs(net)->enable)
+		return NF_ACCEPT;
+
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	/* Bad... Do not break raw sockets */
@@ -1562,7 +1571,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}
 
-	net = skb_net(skb);
 	/* Protocol supported? */
 	pd = ip_vs_proto_data_get(net, iph.protocol);
 	if (unlikely(!pd))
@@ -1588,7 +1596,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	net = skb_net(skb);
 	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1743,10 +1750,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 		   int (*okfn)(struct sk_buff *))
 {
 	int r;
+	struct net *net;
 
 	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
+	/* ipvs enabled in this netns ? */
+	net = skb_net(skb);
+	if (!net_ipvs(net)->enable)
+		return NF_ACCEPT;
+
 	return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
@@ -1757,10 +1770,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 		      int (*okfn)(struct sk_buff *))
 {
 	int r;
+	struct net *net;
 
 	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
 		return NF_ACCEPT;
 
+	/* ipvs enabled in this netns ? */
+	net = skb_net(skb);
+	if (!net_ipvs(net)->enable)
+		return NF_ACCEPT;
+
 	return ip_vs_in_icmp_v6(skb, &r, hooknum);
 }
 #endif
@@ -1884,21 +1903,72 @@ static int __net_init __ip_vs_init(struct net *net)
 		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
+	/* Hold the beast until a service is registerd */
+	ipvs->enable = 0;
 	ipvs->net = net;
 	/* Counters used for creating unique names */
 	ipvs->gen = atomic_read(&ipvs_netns_cnt);
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;
+
+	if (__ip_vs_estimator_init(net) < 0)
+		goto estimator_fail;
+
+	if (__ip_vs_control_init(net) < 0)
+		goto control_fail;
+
+	if (__ip_vs_protocol_init(net) < 0)
+		goto protocol_fail;
+
+	if (__ip_vs_app_init(net) < 0)
+		goto app_fail;
+
+	if (__ip_vs_conn_init(net) < 0)
+		goto conn_fail;
+
+	if (__ip_vs_sync_init(net) < 0)
+		goto sync_fail;
+
 	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
 			 sizeof(struct netns_ipvs), ipvs->gen);
 	return 0;
+/*
+ * Error handling
+ */
+
+sync_fail:
+	__ip_vs_conn_cleanup(net);
+conn_fail:
+	__ip_vs_app_cleanup(net);
+app_fail:
+	__ip_vs_protocol_cleanup(net);
+protocol_fail:
+	__ip_vs_control_cleanup(net);
+control_fail:
+	__ip_vs_estimator_cleanup(net);
+estimator_fail:
+	return -ENOMEM;
 }
 
 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
+	__ip_vs_service_cleanup(net);	/* ip_vs_flush() with locks */
+	__ip_vs_conn_cleanup(net);
+	__ip_vs_app_cleanup(net);
+	__ip_vs_protocol_cleanup(net);
+	__ip_vs_control_cleanup(net);
+	__ip_vs_estimator_cleanup(net);
 	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }
 
+static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+{
+	EnterFunction(2);
+	net_ipvs(net)->enable = 0;	/* Disable packet reception */
+	__ip_vs_sync_cleanup(net);
+	LeaveFunction(2);
+}
+
 static struct pernet_operations ipvs_core_ops = {
 	.init = __ip_vs_init,
 	.exit = __ip_vs_cleanup,
@@ -1906,6 +1976,10 @@ static struct pernet_operations ipvs_core_ops = {
 	.size = sizeof(struct netns_ipvs),
 };
 
+static struct pernet_operations ipvs_core_dev_ops = {
+	.exit = __ip_vs_dev_cleanup,
+};
+
 /*
  *	Initialize IP Virtual Server
  */
@@ -1913,10 +1987,6 @@ static int __init ip_vs_init(void)
 {
 	int ret;
 
-	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
-	if (ret < 0)
-		return ret;
-
 	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
@@ -1944,15 +2014,28 @@ static int __init ip_vs_init(void)
 		goto cleanup_conn;
 	}
 
+	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		goto cleanup_sync;
+
+	ret = register_pernet_device(&ipvs_core_dev_ops);
+	if (ret < 0)
+		goto cleanup_sub;
+
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_sync;
+		goto cleanup_dev;
 	}
 
 	pr_info("ipvs loaded.\n");
+
 	return ret;
 
+cleanup_dev:
+	unregister_pernet_device(&ipvs_core_dev_ops);
+cleanup_sub:
+	unregister_pernet_subsys(&ipvs_core_ops);
 cleanup_sync:
 	ip_vs_sync_cleanup();
   cleanup_conn:
@@ -1964,20 +2047,20 @@ cleanup_sync:
 	ip_vs_control_cleanup();
   cleanup_estimator:
 	ip_vs_estimator_cleanup();
-	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }
 
 static void __exit ip_vs_cleanup(void)
 {
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	unregister_pernet_device(&ipvs_core_dev_ops);
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
-	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ae47090bf45f..ea722810faf3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void)
 }
 #endif
 
+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 static int __ip_vs_addr_is_local_v6(struct net *net,
@@ -1214,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	write_unlock_bh(&__ip_vs_svc_lock);
 
 	*svc_p = svc;
+	/* Now there is a service - full throttle */
+	ipvs->enable = 1;
 	return 0;
 
 
@@ -1472,6 +1479,84 @@ static int ip_vs_flush(struct net *net)
 	return 0;
 }
 
+/*
+ *	Delete service by {netns} in the service table.
+ *	Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+	EnterFunction(2);
+	/* Check for "full" addressed entries */
+	mutex_lock(&__ip_vs_mutex);
+	ip_vs_flush(net);
+	mutex_unlock(&__ip_vs_mutex);
+	LeaveFunction(2);
+}
+/*
+ * Release dst hold by dst_cache
+ */
+static inline void
+__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+{
+	spin_lock_bh(&dest->dst_lock);
+	if (dest->dst_cache && dest->dst_cache->dev == dev) {
+		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
+			      dev->name,
+			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
+			      ntohs(dest->port),
+			      atomic_read(&dest->refcnt));
+		ip_vs_dst_reset(dest);
+	}
+	spin_unlock_bh(&dest->dst_lock);
+
+}
+/*
+ * Netdev event receiver
+ * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
+ * a device that is "unregister" it must be released.
+ */
+static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	unsigned int idx;
+
+	if (event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
+	EnterFunction(2);
+	mutex_lock(&__ip_vs_mutex);
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (net_eq(svc->net, net)) {
+				list_for_each_entry(dest, &svc->destinations,
+						    n_list) {
+					__ip_vs_dev_reset(dest, dev);
+				}
+			}
+		}
+
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (net_eq(svc->net, net)) {
+				list_for_each_entry(dest, &svc->destinations,
+						    n_list) {
+					__ip_vs_dev_reset(dest, dev);
+				}
+			}
+
+		}
+	}
+
+	list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
+		__ip_vs_dev_reset(dest, dev);
+	}
+	mutex_unlock(&__ip_vs_mutex);
+	LeaveFunction(2);
+	return NOTIFY_DONE;
+}
 
 /*
  *	Zero counters in a service or all services
@@ -3588,6 +3673,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
 
 #endif
 
+static struct notifier_block ip_vs_dst_notifier = {
+	.notifier_call = ip_vs_dst_event,
+};
+
 int __net_init __ip_vs_control_init(struct net *net)
 {
 	int idx;
@@ -3626,7 +3715,7 @@ err:
 	return -ENOMEM;
 }
 
-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_exit __ip_vs_control_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -3639,11 +3728,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
 	free_percpu(ipvs->tot_stats.cpustats);
 }
 
-static struct pernet_operations ipvs_control_ops = {
-	.init = __ip_vs_control_init,
-	.exit = __ip_vs_control_cleanup,
-};
-
 int __init ip_vs_control_init(void)
 {
 	int idx;
@@ -3657,33 +3741,32 @@ int __init ip_vs_control_init(void)
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}
 
-	ret = register_pernet_subsys(&ipvs_control_ops);
-	if (ret) {
-		pr_err("cannot register namespace.\n");
-		goto err;
-	}
-
 	smp_wmb();	/* Do we really need it now ? */
 
 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
 		pr_err("cannot register sockopt.\n");
-		goto err_net;
+		goto err_sock;
 	}
 
 	ret = ip_vs_genl_register();
 	if (ret) {
 		pr_err("cannot register Generic Netlink interface.\n");
-		nf_unregister_sockopt(&ip_vs_sockopts);
-		goto err_net;
+		goto err_genl;
 	}
 
+	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+	if (ret < 0)
+		goto err_notf;
+
 	LeaveFunction(2);
 	return 0;
 
-err_net:
-	unregister_pernet_subsys(&ipvs_control_ops);
-err:
+err_notf:
+	ip_vs_genl_unregister();
+err_genl:
+	nf_unregister_sockopt(&ip_vs_sockopts);
+err_sock:
 	return ret;
 }
 
@@ -3691,7 +3774,6 @@ err:
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	unregister_pernet_subsys(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 8c8766ca56ad..508cce98777c 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
 	dst->outbps = (e->outbps + 0xF) >> 5;
 }
 
-static int __net_init __ip_vs_estimator_init(struct net *net)
+int __net_init __ip_vs_estimator_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit __ip_vs_estimator_exit(struct net *net)
+void __net_exit __ip_vs_estimator_cleanup(struct net *net)
 {
 	del_timer_sync(&net_ipvs(net)->est_timer);
 }
-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_estimator_init,
-	.exit = __ip_vs_estimator_exit,
-};
 
 int __init ip_vs_estimator_init(void)
 {
-	int rv;
-
-	rv = register_pernet_subsys(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }
 
 void ip_vs_estimator_cleanup(void)
 {
-	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 17484a4416ef..eb86028536fc 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 /*
  * per network name-space init
  */
-static int __net_init __ip_vs_protocol_init(struct net *net)
+int __net_init __ip_vs_protocol_init(struct net *net)
 {
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
@@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd;
@@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 	}
 }
 
-static struct pernet_operations ipvs_proto_ops = {
-	.init = __ip_vs_protocol_init,
-	.exit = __ip_vs_protocol_cleanup,
-};
-
 int __init ip_vs_protocol_init(void)
 {
 	char protocols[64];
@@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
-	return register_pernet_subsys(&ipvs_proto_ops);
 
 	return 0;
 }
@@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;
 
-	unregister_pernet_subsys(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 0cce95310820..e292e5bddc70 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state)
 /*
  * Initialize data struct for each netns
  */
-static int __net_init __ip_vs_sync_init(struct net *net)
+int __net_init __ip_vs_sync_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
@@ -1677,7 +1677,7 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 	return 0;
 }
 
-static void __ip_vs_sync_cleanup(struct net *net)
+void __ip_vs_sync_cleanup(struct net *net)
 {
 	int retc;
 
@@ -1690,18 +1690,11 @@ static void __ip_vs_sync_cleanup(struct net *net)
 		pr_err("Failed to stop Backup Daemon\n");
 }
 
-static struct pernet_operations ipvs_sync_ops = {
-	.init = __ip_vs_sync_init,
-	.exit = __ip_vs_sync_cleanup,
-};
-
-
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_device(&ipvs_sync_ops);
+	return 0;
 }
 
 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_device(&ipvs_sync_ops);
 }
-- 
cgit v1.2.3


From 4319cc0cf5bb894b7368008cdf6dd20eb8868018 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Tue, 10 May 2011 09:55:44 +0200
Subject: netfilter: IPv6: initialize TOS field in REJECT target module

The IPv6 header is not zeroed out in alloc_skb so we must initialize
it properly unless we want to see IPv6 packets with random TOS fields
floating around. The current implementation resets the flow label
but this could be changed if deemed necessary.

We stumbled upon this issue when trying to apply a mangle rule to
the RST packet generated by the REJECT target module.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 28e74488a329..a5a4c5dd5396 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -45,6 +45,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	int tcphoff, needs_ack;
 	const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
 	struct ipv6hdr *ip6h;
+#define DEFAULT_TOS_VALUE	0x0U
+	const __u8 tclass = DEFAULT_TOS_VALUE;
 	struct dst_entry *dst = NULL;
 	u8 proto;
 	struct flowi6 fl6;
@@ -124,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	skb_put(nskb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(nskb);
 	ip6h = ipv6_hdr(nskb);
-	ip6h->version = 6;
+	*(__be32 *)ip6h =  htonl(0x60000000 | (tclass << 20));
 	ip6h->hop_limit = ip6_dst_hoplimit(dst);
 	ip6h->nexthdr = IPPROTO_TCP;
 	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
-- 
cgit v1.2.3


From 1ed2f73d90fb49bcf5704aee7e9084adb882bfc5 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Tue, 10 May 2011 10:00:21 +0200
Subject: netfilter: IPv6: fix DSCP mangle code

The mask indicates the bits one wants to zero out, so it needs to be
inverted before applying to the original TOS field.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_DSCP.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 0a229191e55b..ae8271652efa 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	u_int8_t orig, nv;
 
 	orig = ipv6_get_dsfield(iph);
-	nv   = (orig & info->tos_mask) ^ info->tos_value;
+	nv   = (orig & ~info->tos_mask) ^ info->tos_value;
 
 	if (orig != nv) {
 		if (!skb_make_writable(skb, sizeof(struct iphdr)))
-- 
cgit v1.2.3


From 93bbce1ad0cd788190dd7d6c17d289f771fe3d0d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 May 2011 12:13:36 +0200
Subject: netfilter: revert a2361c8735e07322023aedc36e4938b35af31eb0

This patch reverts a2361c8735e07322023aedc36e4938b35af31eb0:
"[PATCH] netfilter: xt_conntrack: warn about use in raw table"

Florian Wesphal says:
"... when the packet was sent from the local machine the skb
already has ->nfct attached, and -m conntrack seems to do
the right thing."

Acked-by: Jan Engelhardt <jengelh@medozas.de>
Reported-by: Florian Wesphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_conntrack.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 481a86fdc409..61805d7b38aa 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -272,11 +272,6 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	int ret;
 
-	if (strcmp(par->table, "raw") == 0) {
-		pr_info("state is undetermined at the time of raw table\n");
-		return -EINVAL;
-	}
-
 	ret = nf_ct_l3proto_try_module_get(par->family);
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
-- 
cgit v1.2.3


From 5db1c07ced19b2eec3a149a3c624d88e02e246ae Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Tue, 3 May 2011 21:40:08 +0300
Subject: mac80211: don't start the dynamic ps timer if not associated

When we are disconnecting, we set PS off, but this happens before we
send the deauth/disassoc request.  When the deauth/disassoc frames are
sent, we trigger the dynamic ps timer, which then times out and turns
PS back on.  Thus, PS remains on after disconnecting, causing problems
when associating again.

This can be fixed by preventing the timer to start when we're not
associated anymore.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ce4596ed1268..bd1224fd216a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -237,6 +237,10 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
 				     &local->dynamic_ps_disable_work);
 	}
 
+	/* Don't restart the timer if we're not disassociated */
+	if (!ifmgd->associated)
+		return TX_CONTINUE;
+
 	mod_timer(&local->dynamic_ps_timer, jiffies +
 		  msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
 
-- 
cgit v1.2.3


From 55aee10dec477254241e4f72968f92e0543b33ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 10 May 2011 12:22:54 -0700
Subject: vlan: fix GVRP at dismantle time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ip link add link eth2 eth2.103 type vlan id 103 gvrp on loose_binding on
ip link set eth2.103 up
rmmod tg3    # driver providing eth2

 BUG: unable to handle kernel NULL pointer dereference at           (null)
 IP: [<ffffffffa0030c9e>] garp_request_leave+0x3e/0xc0 [garp]
 PGD 11d251067 PUD 11b9e0067 PMD 0
 Oops: 0000 [#1] SMP
 last sysfs file: /sys/devices/virtual/net/eth2.104/ifindex
 CPU 0
 Modules linked in: tg3(-) 8021q garp nfsd lockd auth_rpcgss sunrpc libphy sg [last unloaded: x_tables]

 Pid: 11494, comm: rmmod Tainted: G        W   2.6.39-rc6-00261-gfd71257-dirty #580 HP ProLiant BL460c G6
 RIP: 0010:[<ffffffffa0030c9e>]  [<ffffffffa0030c9e>] garp_request_leave+0x3e/0xc0 [garp]
 RSP: 0018:ffff88007a19bae8  EFLAGS: 00010286
 RAX: 0000000000000000 RBX: ffff88011b5e2000 RCX: 0000000000000002
 RDX: 0000000000000000 RSI: 0000000000000175 RDI: ffffffffa0030d5b
 RBP: ffff88007a19bb18 R08: 0000000000000001 R09: ffff88011bd64a00
 R10: ffff88011d34ec00 R11: 0000000000000000 R12: 0000000000000002
 R13: ffff88007a19bc48 R14: ffff88007a19bb88 R15: 0000000000000001
 FS:  0000000000000000(0000) GS:ffff88011fc00000(0063) knlGS:00000000f77d76c0
 CS:  0010 DS: 002b ES: 002b CR0: 000000008005003b
 CR2: 0000000000000000 CR3: 000000011a675000 CR4: 00000000000006f0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
 Process rmmod (pid: 11494, threadinfo ffff88007a19a000, task ffff8800798595c0)
 Stack:
  ffff88007a19bb36 ffff88011c84b800 ffff88011b5e2000 ffff88007a19bc48
  ffff88007a19bb88 0000000000000006 ffff88007a19bb38 ffffffffa003a5f6
  ffff88007a19bb38 670088007a19bba8 ffff88007a19bb58 ffffffffa00397e7
 Call Trace:
  [<ffffffffa003a5f6>] vlan_gvrp_request_leave+0x46/0x50 [8021q]
  [<ffffffffa00397e7>] vlan_dev_stop+0xb7/0xc0 [8021q]
  [<ffffffff8137e427>] __dev_close_many+0x87/0xe0
  [<ffffffff8137e507>] dev_close_many+0x87/0x110
  [<ffffffff8137e630>] rollback_registered_many+0xa0/0x240
  [<ffffffff8137e7e9>] unregister_netdevice_many+0x19/0x60
  [<ffffffffa00389eb>] vlan_device_event+0x53b/0x550 [8021q]
  [<ffffffff8143f448>] ? ip6mr_device_event+0xa8/0xd0
  [<ffffffff81479d03>] notifier_call_chain+0x53/0x80
  [<ffffffff81062539>] __raw_notifier_call_chain+0x9/0x10
  [<ffffffff81062551>] raw_notifier_call_chain+0x11/0x20
  [<ffffffff8137df82>] call_netdevice_notifiers+0x32/0x60
  [<ffffffff8137e69f>] rollback_registered_many+0x10f/0x240
  [<ffffffff8137e85f>] rollback_registered+0x2f/0x40
  [<ffffffff8137e8c8>] unregister_netdevice_queue+0x58/0x90
  [<ffffffff8137e9eb>] unregister_netdev+0x1b/0x30
  [<ffffffffa005d73f>] tg3_remove_one+0x6f/0x10b [tg3]

We should call vlan_gvrp_request_leave() from unregister_vlan_dev(),
not from vlan_dev_stop(), because vlan_gvrp_uninit_applicant()
is called right after unregister_netdevice_queue(). In batch mode,
unregister_netdevice_queue() doesn’t immediately call vlan_dev_stop().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c     | 3 +++
 net/8021q/vlan_dev.c | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 7850412f52b7..0eb1a886b370 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -124,6 +124,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	grp->nr_vlans--;
 
+	if (vlan->flags & VLAN_FLAG_GVRP)
+		vlan_gvrp_request_leave(dev);
+
 	vlan_group_set_device(grp, vlan_id, NULL);
 	if (!grp->killall)
 		synchronize_net();
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e34ea9e5e28b..b2ff6c8d3603 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -487,9 +487,6 @@ static int vlan_dev_stop(struct net_device *dev)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 
-	if (vlan->flags & VLAN_FLAG_GVRP)
-		vlan_gvrp_request_leave(dev);
-
 	dev_mc_unsync(real_dev, dev);
 	dev_uc_unsync(real_dev, dev);
 	if (dev->flags & IFF_ALLMULTI)
-- 
cgit v1.2.3


From e14a599335427f81bbb0008963e59aa9c6449dce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 10 May 2011 12:26:06 -0700
Subject: net: dev_close() should check IFF_UP

Commit 443457242beb (factorize sync-rcu call in
unregister_netdevice_many) mistakenly removed one test from dev_close()

Following actions trigger a BUG :

modprobe bonding
modprobe dummy
ifconfig bond0 up
ifenslave bond0 dummy0
rmmod dummy

dev_close() must not close a non IFF_UP device.

With help from Frank Blaschka and Einar EL Lueck

Reported-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Reported-by: Einar EL Lueck <ELELUECK@de.ibm.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 856b6ee9a1d5..92009440d28b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1284,11 +1284,13 @@ static int dev_close_many(struct list_head *head)
  */
 int dev_close(struct net_device *dev)
 {
-	LIST_HEAD(single);
+	if (dev->flags & IFF_UP) {
+		LIST_HEAD(single);
 
-	list_add(&dev->unreg_list, &single);
-	dev_close_many(&single);
-	list_del(&single);
+		list_add(&dev->unreg_list, &single);
+		dev_close_many(&single);
+		list_del(&single);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(dev_close);
-- 
cgit v1.2.3


From 43a4dea4c9d44baae38ddc14b9b6d86fde4c8b88 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 9 May 2011 19:36:38 +0000
Subject: xfrm: Assign the inner mode output function to the dst entry

As it is, we assign the outer modes output function to the dst entry
when we create the xfrm bundle. This leads to two problems on interfamily
scenarios. We might insert ipv4 packets into ip6_fragment when called
from xfrm6_output. The system crashes if we try to fragment an ipv4
packet with ip6_fragment. This issue was introduced with git commit
ad0081e4 (ipv6: Fragment locally generated tunnel-mode IPSec6 packets
as needed). The second issue is, that we might insert ipv4 packets in
netfilter6 and vice versa on interfamily scenarios.

With this patch we assign the inner mode output function to the dst entry
when we create the xfrm bundle. So xfrm4_output/xfrm6_output from the inner
mode is used and the right fragmentation and netfilter functions are called.
We switch then to outer mode with the output_finish functions.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_output.c |  8 ++++++--
 net/ipv4/xfrm4_state.c  |  1 +
 net/ipv6/xfrm6_output.c |  6 +++---
 net/ipv6/xfrm6_state.c  |  1 +
 net/xfrm/xfrm_policy.c  | 14 +++++++++++++-
 5 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 571aa96a175c..2d51840e53a1 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(xfrm4_prepare_output);
 
-static int xfrm4_output_finish(struct sk_buff *skb)
+int xfrm4_output_finish(struct sk_buff *skb)
 {
 #ifdef CONFIG_NETFILTER
 	if (!skb_dst(skb)->xfrm) {
@@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb)
 
 int xfrm4_output(struct sk_buff *skb)
 {
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_state *x = dst->xfrm;
+
 	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
-			    NULL, skb_dst(skb)->dev, xfrm4_output_finish,
+			    NULL, dst->dev,
+			    x->outer_mode->afinfo->output_finish,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1717c64628d1..805d63ef4340 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -78,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.init_tempsel		= __xfrm4_init_tempsel,
 	.init_temprop		= xfrm4_init_temprop,
 	.output			= xfrm4_output,
+	.output_finish		= xfrm4_output_finish,
 	.extract_input		= xfrm4_extract_input,
 	.extract_output		= xfrm4_extract_output,
 	.transport_finish	= xfrm4_transport_finish,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8e688b3de9ab..49a91c5f5623 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -79,7 +79,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(xfrm6_prepare_output);
 
-static int xfrm6_output_finish(struct sk_buff *skb)
+int xfrm6_output_finish(struct sk_buff *skb)
 {
 #ifdef CONFIG_NETFILTER
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
@@ -97,9 +97,9 @@ static int __xfrm6_output(struct sk_buff *skb)
 	if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
 	    ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 		dst_allfrag(skb_dst(skb)))) {
-			return ip6_fragment(skb, xfrm6_output_finish);
+			return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
 	}
-	return xfrm6_output_finish(skb);
+	return x->outer_mode->afinfo->output_finish(skb);
 }
 
 int xfrm6_output(struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index afe941e9415c..248f0b2a7ee9 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -178,6 +178,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.tmpl_sort		= __xfrm6_tmpl_sort,
 	.state_sort		= __xfrm6_state_sort,
 	.output			= xfrm6_output,
+	.output_finish		= xfrm6_output_finish,
 	.extract_input		= xfrm6_extract_input,
 	.extract_output		= xfrm6_extract_output,
 	.transport_finish	= xfrm6_transport_finish,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 15792d8b6272..b4d745ea8ee1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1406,6 +1406,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	struct net *net = xp_net(policy);
 	unsigned long now = jiffies;
 	struct net_device *dev;
+	struct xfrm_mode *inner_mode;
 	struct dst_entry *dst_prev = NULL;
 	struct dst_entry *dst0 = NULL;
 	int i = 0;
@@ -1436,6 +1437,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 			goto put_states;
 		}
 
+		if (xfrm[i]->sel.family == AF_UNSPEC) {
+			inner_mode = xfrm_ip2inner_mode(xfrm[i],
+							xfrm_af2proto(family));
+			if (!inner_mode) {
+				err = -EAFNOSUPPORT;
+				dst_release(dst);
+				goto put_states;
+			}
+		} else
+			inner_mode = xfrm[i]->inner_mode;
+
 		if (!dst_prev)
 			dst0 = dst1;
 		else {
@@ -1464,7 +1476,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		dst1->lastuse = now;
 
 		dst1->input = dst_discard;
-		dst1->output = xfrm[i]->outer_mode->afinfo->output;
+		dst1->output = inner_mode->afinfo->output;
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-- 
cgit v1.2.3


From 6fa5ddcc675b937f94d05628e8997c07a80c6cb9 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 9 May 2011 19:43:05 +0000
Subject: xfrm: Don't allow esn with disabled anti replay detection

Unlike the standard case, disabled anti replay detection needs some
nontrivial extra treatment on ESN. RFC 4303 states:

Note: If a receiver chooses to not enable anti-replay for an SA, then
the receiver SHOULD NOT negotiate ESN in an SA management protocol.
Use of ESN creates a need for the receiver to manage the anti-replay
window (in order to determine the correct value for the high-order
bits of the ESN, which are employed in the ICV computation), which is
generally contrary to the notion of disabling anti-replay for an SA.

So return an error if an ESN state with disabled anti replay detection
is inserted for now and add the extra treatment later if we need it.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_replay.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index e8a781422feb..47f1b8638df9 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -535,6 +535,9 @@ int xfrm_init_replay(struct xfrm_state *x)
 		    replay_esn->bmp_len * sizeof(__u32) * 8)
 			return -EINVAL;
 
+	if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0)
+		return -EINVAL;
+
 	if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn)
 		x->repl = &xfrm_replay_esn;
 	else
-- 
cgit v1.2.3


From ce8453776d68982cfe93bcb28191af8ccad01f45 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 11 May 2011 18:58:16 -0700
Subject: Revert "Bluetooth: fix shutdown on SCO sockets"

This reverts commit f21ca5fff6e548833fa5ee8867239a8378623150.

Quoth Gustavo F. Padovan:
  "Commit f21ca5fff6e548833fa5ee8867239a8378623150 can cause a NULL
   dereference if we call shutdown in a bluetooth SCO socket and doesn't
   wait the shutdown completion to call close().  Please revert it.  I
   may have a fix for it soon, but we don't have time anymore, so revert
   is the way to go.  ;)"

Requested-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/bluetooth/sco.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 94954c74f6ae..42fdffd1d76c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -369,15 +369,6 @@ static void __sco_sock_close(struct sock *sk)
 
 	case BT_CONNECTED:
 	case BT_CONFIG:
-		if (sco_pi(sk)->conn) {
-			sk->sk_state = BT_DISCONN;
-			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
-			hci_conn_put(sco_pi(sk)->conn->hcon);
-			sco_pi(sk)->conn = NULL;
-		} else
-			sco_chan_del(sk, ECONNRESET);
-		break;
-
 	case BT_CONNECT:
 	case BT_DISCONN:
 		sco_chan_del(sk, ECONNRESET);
-- 
cgit v1.2.3


From 1b0bcbcf62884959fa7214eb16c44cff445691c6 Mon Sep 17 00:00:00 2001
From: Pedro Scarapicchia Junior <pedrinho.rep51@gmail.com>
Date: Mon, 9 May 2011 14:10:49 +0000
Subject: net/9p/protocol.c: Fix a memory leak

When p9pdu_readf() is called with "s" attribute, it allocates a pointer that
will store a string. In p9dirent_read(), this pointer is not being released,
leading to out of memory errors.
This patch releases this pointer after string is copyed to dirent->d_name.

Signed-off-by: Pedro Scarapicchia Junior <pedro.scarapiccha@br.flextronics.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/protocol.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index b58a501cf3d1..a873277cb996 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -674,6 +674,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
 	}
 
 	strcpy(dirent->d_name, nameptr);
+	kfree(nameptr);
 
 out:
 	return fake_pdu.offset;
-- 
cgit v1.2.3


From cb68552858c64db302771469b1202ea09e696329 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 13 May 2011 16:03:24 -0400
Subject: bridge: fix forwarding of IPv6

The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e
    bridge: Reset IPCB when entering IP stack on NF_FORWARD
broke forwarding of IPV6 packets in bridge because it would
call bp_parse_ip_options with an IPV6 packet.

Reported-by: Noah Meyerhans <noahm@debian.org>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f3bc322c5891..74ef4d4846a4 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
-	if (br_parse_ip_options(skb))
+	if (pf == PF_INET && br_parse_ip_options(skb))
 		return NF_DROP;
 
 	/* The physdev module checks on this */
-- 
cgit v1.2.3


From d8083deb4f1aa0977980dfb834fcc336ef38318f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 13 May 2011 16:03:24 -0400
Subject: bridge: fix forwarding of IPv6

The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e
    bridge: Reset IPCB when entering IP stack on NF_FORWARD
broke forwarding of IPV6 packets in bridge because it would
call bp_parse_ip_options with an IPV6 packet.

Reported-by: Noah Meyerhans <noahm@debian.org>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f3bc322c5891..74ef4d4846a4 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
-	if (br_parse_ip_options(skb))
+	if (pf == PF_INET && br_parse_ip_options(skb))
 		return NF_DROP;
 
 	/* The physdev module checks on this */
-- 
cgit v1.2.3


From 0f08190fe8af3cdb6ba19690eb0fa253ecef4bde Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Date: Sun, 15 May 2011 17:20:29 +0200
Subject: IPVS: fix netns if reading ip_vs_* procfs entries

Without this patch every access to ip_vs in procfs will increase
the netns count i.e. an unbalanced get_net()/put_net().
(ipvsadm commands also use procfs.)
The result is you can't exit a netns if reading ip_vs_* procfs entries.

Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_app.c  | 2 +-
 net/netfilter/ipvs/ip_vs_conn.c | 4 ++--
 net/netfilter/ipvs/ip_vs_ctl.c  | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 51f3af7c4743..059af3120be7 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -572,7 +572,7 @@ static const struct file_operations ip_vs_app_fops = {
 	.open	 = ip_vs_app_open,
 	.read	 = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 #endif
 
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d3fd91bbba49..bf28ac2fc99b 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1046,7 +1046,7 @@ static const struct file_operations ip_vs_conn_fops = {
 	.open    = ip_vs_conn_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 
 static const char *ip_vs_origin_name(unsigned flags)
@@ -1114,7 +1114,7 @@ static const struct file_operations ip_vs_conn_sync_fops = {
 	.open    = ip_vs_conn_sync_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_net,
 };
 
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ea722810faf3..37890f228b19 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2066,7 +2066,7 @@ static const struct file_operations ip_vs_info_fops = {
 	.open    = ip_vs_info_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
-	.release = seq_release_private,
+	.release = seq_release_net,
 };
 
 #endif
@@ -2109,7 +2109,7 @@ static const struct file_operations ip_vs_stats_fops = {
 	.open = ip_vs_stats_seq_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };
 
 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
@@ -2178,7 +2178,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
 	.open = ip_vs_stats_percpu_seq_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = single_release,
+	.release = single_release_net,
 };
 #endif
 
-- 
cgit v1.2.3


From 6f404e441d169afc90929ef5e451ec9779c1f11a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Mon, 16 May 2011 15:14:21 -0400
Subject: net: Change netdev_fix_features messages loglevel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those reduced to DEBUG can possibly be triggered by unprivileged processes
and are nothing exceptional. Illegal checksum combinations can only be
caused by driver bug, so promote those messages to WARN.

Since GSO without SG will now only cause DEBUG message from
netdev_fix_features(), remove the workaround from register_netdevice().

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 92009440d28b..b624fe4d9bd7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5186,27 +5186,27 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 	/* Fix illegal checksum combinations */
 	if ((features & NETIF_F_HW_CSUM) &&
 	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		netdev_info(dev, "mixed HW and IP checksum settings.\n");
+		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
 	}
 
 	if ((features & NETIF_F_NO_CSUM) &&
 	    (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		netdev_info(dev, "mixed no checksumming and other settings.\n");
+		netdev_warn(dev, "mixed no checksumming and other settings.\n");
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
 	}
 
 	/* Fix illegal SG+CSUM combinations. */
 	if ((features & NETIF_F_SG) &&
 	    !(features & NETIF_F_ALL_CSUM)) {
-		netdev_info(dev,
-			    "Dropping NETIF_F_SG since no checksum feature.\n");
+		netdev_dbg(dev,
+			"Dropping NETIF_F_SG since no checksum feature.\n");
 		features &= ~NETIF_F_SG;
 	}
 
 	/* TSO requires that SG is present as well. */
 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
-		netdev_info(dev, "Dropping TSO features since no SG feature.\n");
+		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
 		features &= ~NETIF_F_ALL_TSO;
 	}
 
@@ -5216,7 +5216,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 
 	/* Software GSO depends on SG. */
 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
-		netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
+		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
 		features &= ~NETIF_F_GSO;
 	}
 
@@ -5226,13 +5226,13 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
 		if (!((features & NETIF_F_GEN_CSUM) ||
 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-			netdev_info(dev,
+			netdev_dbg(dev,
 				"Dropping NETIF_F_UFO since no checksum offload features.\n");
 			features &= ~NETIF_F_UFO;
 		}
 
 		if (!(features & NETIF_F_SG)) {
-			netdev_info(dev,
+			netdev_dbg(dev,
 				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
 			features &= ~NETIF_F_UFO;
 		}
@@ -5414,12 +5414,6 @@ int register_netdevice(struct net_device *dev)
 	dev->features |= NETIF_F_SOFT_FEATURES;
 	dev->wanted_features = dev->features & dev->hw_features;
 
-	/* Avoid warning from netdev_fix_features() for GSO without SG */
-	if (!(dev->wanted_features & NETIF_F_SG)) {
-		dev->wanted_features &= ~NETIF_F_GSO;
-		dev->features &= ~NETIF_F_GSO;
-	}
-
 	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
 	 * vlan_dev_init() will do the dev->features check, so these features
 	 * are enabled only if supported by underlying device.
-- 
cgit v1.2.3