From fb04883371f2cb7867d24783e7d590036dc9b548 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 May 2011 15:44:27 +0200
Subject: netfilter: add more values to enum ip_conntrack_info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Following error is raised (and other similar ones) :

net/ipv4/netfilter/nf_nat_standalone.c: In function ‘nf_nat_fn’:
net/ipv4/netfilter/nf_nat_standalone.c:119:2: warning: case value ‘4’
not in enumerated type ‘enum ip_conntrack_info’

gcc barfs on adding two enum values and getting a not enumerated
result :

case IP_CT_RELATED+IP_CT_IS_REPLY:

Add missing enum values

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c             | 6 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c            | 2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +-
 net/ipv4/netfilter/nf_nat_core.c               | 2 +-
 net/ipv4/netfilter/nf_nat_rule.c               | 2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d609ac3cb9a4..5c9e97c79017 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	 * error messages (RELATED) and information requests (see below) */
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
 	    (ctinfo == IP_CT_RELATED ||
-	     ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY))
+	     ctinfo == IP_CT_RELATED_REPLY))
 		return XT_CONTINUE;
 
 	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
@@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 			ct->mark = hash;
 			break;
 		case IP_CT_RELATED:
-		case IP_CT_RELATED+IP_CT_IS_REPLY:
+		case IP_CT_RELATED_REPLY:
 			/* FIXME: we don't handle expectations at the
 			 * moment.  they can arrive on a different node than
 			 * the master connection (e.g. FTP passive mode) */
 		case IP_CT_ESTABLISHED:
-		case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
+		case IP_CT_ESTABLISHED_REPLY:
 			break;
 		default:
 			break;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index d2ed9dc74ebc..9931152a78b5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	nat = nfct_nat(ct);
 
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+			    ctinfo == IP_CT_RELATED_REPLY));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 5a03c02af999..db10075dd88e 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
 		goto out;
 
 	help = nfct_help(ct);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 9c71b2755ce3..3346de5d94d0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 
 	/* Must be RELATED */
 	NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
-		     skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+		     skb->nfctinfo == IP_CT_RELATED_REPLY);
 
 	/* Redirects on non-null nats must be dropped, else they'll
 	   start talking to each other without our translation, and be
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 21c30426480b..733c9abc1cbd 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+			    ctinfo == IP_CT_RELATED_REPLY));
 	NF_CT_ASSERT(par->out != NULL);
 
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 7317bdf1d457..483b76d042da 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum,
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
-	case IP_CT_RELATED+IP_CT_IS_REPLY:
+	case IP_CT_RELATED_REPLY:
 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
 							   hooknum, skb))
@@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum,
 	default:
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
+			     ctinfo == IP_CT_ESTABLISHED_REPLY);
 	}
 
 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
-- 
cgit v1.2.3


From d9be76f38526dccf84062e3ac3ed3a6a97698565 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 29 May 2011 23:42:29 +0300
Subject: netfilter: nf_nat: fix crash in nf_nat_csum

Fix crash in nf_nat_csum when mangling packets
in OUTPUT hook where skb->dev is not defined, it is set
later before POSTROUTING. Problem happens for CHECKSUM_NONE.
We can check device from rt but using CHECKSUM_PARTIAL
should be safe (skb_checksum_help).

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 99cfa28b6d38..ebc5f8894f99 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    skb->dev->features & NETIF_F_V4_CSUM) {
+		    (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
 			skb->ip_summed = CHECKSUM_PARTIAL;
 			skb->csum_start = skb_headroom(skb) +
 					  skb_network_offset(skb) +
-- 
cgit v1.2.3


From 88ed01d17b44bc2bed4ad4835d3b1099bff3dd71 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 2 Jun 2011 15:08:45 +0200
Subject: netfilter: nf_conntrack: fix ct refcount leak in l4proto->error()

This patch fixes a refcount leak of ct objects that may occur if
l4proto->error() assigns one conntrack object to one skbuff. In
that case, we have to skip further processing in nf_conntrack_in().

With this patch, we can also fix wrong return values (-NF_ACCEPT)
for special cases in ICMP[v6] that should not bump the invalid/error
statistic counters.

Reported-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7404bde95994..ab5b27a2916f 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	/* Update skb to refer to this connection */
 	skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
 	skb->nfctinfo = *ctinfo;
-	return -NF_ACCEPT;
+	return NF_ACCEPT;
 }
 
 /* Small and modified version of icmp_rcv */
-- 
cgit v1.2.3


From d232b8dded624af3e346b13807a591c63b601c44 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 27 May 2011 20:36:51 -0400
Subject: netfilter: use unsigned variables for packet lengths in ip[6]_queue.

Netlink message lengths can't be negative, so use unsigned variables.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ip_queue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d2c1311cb28d..f7f9bd7ba12d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -402,7 +402,8 @@ ipq_dev_drop(int ifindex)
 static inline void
 __ipq_rcv_skb(struct sk_buff *skb)
 {
-	int status, type, pid, flags, nlmsglen, skblen;
+	int status, type, pid, flags;
+	unsigned int nlmsglen, skblen;
 	struct nlmsghdr *nlh;
 
 	skblen = skb->len;
-- 
cgit v1.2.3


From fe6fe792faec3fc2d2db39b69651682b8c4e7fcb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 8 Jun 2011 06:07:07 +0000
Subject: net: pmtu_expires fixes

commit 2c8cec5c10bc (ipv4: Cache learned PMTU information in inetpeer)
added some racy peer->pmtu_expires accesses.

As its value can be changed by another cpu/thread, we should be more
careful, reading its value once.

Add peer_pmtu_expired() and peer_pmtu_cleaned() helpers

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 78 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 34 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 52b0b956508b..045f0ec6a4a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1316,6 +1316,23 @@ reject_redirect:
 	;
 }
 
+static bool peer_pmtu_expired(struct inet_peer *peer)
+{
+	unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
+
+	return orig &&
+	       time_after_eq(jiffies, orig) &&
+	       cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
+}
+
+static bool peer_pmtu_cleaned(struct inet_peer *peer)
+{
+	unsigned long orig = ACCESS_ONCE(peer->pmtu_expires);
+
+	return orig &&
+	       cmpxchg(&peer->pmtu_expires, orig, 0) == orig;
+}
+
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *)dst;
@@ -1331,14 +1348,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 						rt_genid(dev_net(dst->dev)));
 			rt_del(hash, rt);
 			ret = NULL;
-		} else if (rt->peer &&
-			   rt->peer->pmtu_expires &&
-			   time_after_eq(jiffies, rt->peer->pmtu_expires)) {
-			unsigned long orig = rt->peer->pmtu_expires;
-
-			if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
-				dst_metric_set(dst, RTAX_MTU,
-					       rt->peer->pmtu_orig);
+		} else if (rt->peer && peer_pmtu_expired(rt->peer)) {
+			dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
 		}
 	}
 	return ret;
@@ -1531,8 +1542,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
 
 static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
 {
-	unsigned long expires = peer->pmtu_expires;
+	unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
 
+	if (!expires)
+		return;
 	if (time_before(jiffies, expires)) {
 		u32 orig_dst_mtu = dst_mtu(dst);
 		if (peer->pmtu_learned < orig_dst_mtu) {
@@ -1555,10 +1568,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 		rt_bind_peer(rt, rt->rt_dst, 1);
 	peer = rt->peer;
 	if (peer) {
+		unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
+
 		if (mtu < ip_rt_min_pmtu)
 			mtu = ip_rt_min_pmtu;
-		if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
-			unsigned long pmtu_expires;
+		if (!pmtu_expires || mtu < peer->pmtu_learned) {
 
 			pmtu_expires = jiffies + ip_rt_mtu_expires;
 			if (!pmtu_expires)
@@ -1612,13 +1626,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 			rt_bind_peer(rt, rt->rt_dst, 0);
 
 		peer = rt->peer;
-		if (peer && peer->pmtu_expires)
+		if (peer) {
 			check_peer_pmtu(dst, peer);
 
-		if (peer && peer->redirect_learned.a4 &&
-		    peer->redirect_learned.a4 != rt->rt_gateway) {
-			if (check_peer_redir(dst, peer))
-				return NULL;
+			if (peer->redirect_learned.a4 &&
+			    peer->redirect_learned.a4 != rt->rt_gateway) {
+				if (check_peer_redir(dst, peer))
+					return NULL;
+			}
 		}
 
 		rt->rt_peer_genid = rt_peer_genid();
@@ -1649,14 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb)
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
 
 	rt = skb_rtable(skb);
-	if (rt &&
-	    rt->peer &&
-	    rt->peer->pmtu_expires) {
-		unsigned long orig = rt->peer->pmtu_expires;
-
-		if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig)
-			dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
-	}
+	if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
+		dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1770,8 +1779,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 			       sizeof(u32) * RTAX_MAX);
 		dst_init_metrics(&rt->dst, peer->metrics, false);
 
-		if (peer->pmtu_expires)
-			check_peer_pmtu(&rt->dst, peer);
+		check_peer_pmtu(&rt->dst, peer);
 		if (peer->redirect_learned.a4 &&
 		    peer->redirect_learned.a4 != rt->rt_gateway) {
 			rt->rt_gateway = peer->redirect_learned.a4;
@@ -2775,7 +2783,8 @@ static int rt_fill_info(struct net *net,
 	struct rtable *rt = skb_rtable(skb);
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
-	long expires;
+	long expires = 0;
+	const struct inet_peer *peer = rt->peer;
 	u32 id = 0, ts = 0, tsage = 0, error;
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -2823,15 +2832,16 @@ static int rt_fill_info(struct net *net,
 		NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark);
 
 	error = rt->dst.error;
-	expires = (rt->peer && rt->peer->pmtu_expires) ?
-		rt->peer->pmtu_expires - jiffies : 0;
-	if (rt->peer) {
+	if (peer) {
 		inet_peer_refcheck(rt->peer);
-		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
-		if (rt->peer->tcp_ts_stamp) {
-			ts = rt->peer->tcp_ts;
-			tsage = get_seconds() - rt->peer->tcp_ts_stamp;
+		id = atomic_read(&peer->ip_id_count) & 0xffff;
+		if (peer->tcp_ts_stamp) {
+			ts = peer->tcp_ts;
+			tsage = get_seconds() - peer->tcp_ts_stamp;
 		}
+		expires = ACCESS_ONCE(peer->pmtu_expires);
+		if (expires)
+			expires -= jiffies;
 	}
 
 	if (rt_is_input_route(rt)) {
-- 
cgit v1.2.3


From 96d7303e9cfb6a9bc664174a4dfdb6fa689284fe Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Sun, 5 Jun 2011 20:48:47 +0000
Subject: ipv4: Fix packet size calculation for raw IPsec packets in
 __ip_append_data

We assume that transhdrlen is positive on the first fragment
which is wrong for raw packets. So we don't add exthdrlen to the
packet size for raw packets. This leads to a reallocation on IPsec
because we have not enough headroom on the skb to place the IPsec
headers. This patch fixes this by adding exthdrlen to the packet
size whenever the send queue of the socket is empty. This issue was
introduced with git commit 1470ddf7 (inet: Remove explicit write
references to sk/inet in ip_append_data)

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 98af3697c718..a8024eaa0e87 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -799,7 +799,9 @@ static int __ip_append_data(struct sock *sk,
 	int csummode = CHECKSUM_NONE;
 	struct rtable *rt = (struct rtable *)cork->dst;
 
-	exthdrlen = transhdrlen ? rt->dst.header_len : 0;
+	skb = skb_peek_tail(queue);
+
+	exthdrlen = !skb ? rt->dst.header_len : 0;
 	length += exthdrlen;
 	transhdrlen += exthdrlen;
 	mtu = cork->fragsize;
@@ -825,8 +827,6 @@ static int __ip_append_data(struct sock *sk,
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
-	skb = skb_peek_tail(queue);
-
 	cork->length += length;
 	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-- 
cgit v1.2.3


From 63f6fe92c6b3cbf4c0bbbea4b31fdd3d68e21e4d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Thu, 16 Jun 2011 17:16:37 +0200
Subject: netfilter: ip_tables: fix compile with debug

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 764743843503..24e556e83a3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 	const struct xt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip check failed %p %s.\n", e, par->match->name);
+		duprintf("ip check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 58d5a0257d2fd89fbe4451f704193cc95b0a9c97 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 16 Jun 2011 17:24:17 +0200
Subject: netfilter: ipt_ecn: fix protocol check in ecn_mt_check()

Check for protocol inversion in ecn_mt_check() and remove the
unnecessary runtime check for IPPROTO_TCP in ecn_mt().

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_ecn.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index af6e9c778345..aaa85be1b2d8 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -76,8 +76,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return false;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
-		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
-			return false;
 		if (!match_tcp(skb, info, &par->hotdrop))
 			return false;
 	}
@@ -97,7 +95,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
-	    ip->proto != IPPROTO_TCP) {
+	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
 		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From db898aa2ef6529fa80891e754d353063611c77de Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 16 Jun 2011 17:24:55 +0200
Subject: netfilter: ipt_ecn: fix inversion for IP header ECN match

Userspace allows to specify inversion for IP header ECN matches, the
kernel silently accepts it, but doesn't invert the match result.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_ecn.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index aaa85be1b2d8..2b57e52c746c 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -25,7 +25,8 @@ MODULE_LICENSE("GPL");
 static inline bool match_ip(const struct sk_buff *skb,
 			    const struct ipt_ecn_info *einfo)
 {
-	return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
+	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
+	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
 }
 
 static inline bool match_tcp(const struct sk_buff *skb,
-- 
cgit v1.2.3


From 2c38de4c1f8da799bdca0e4bb40ca13f2174d3e8 Mon Sep 17 00:00:00 2001
From: Nicolas Cavallari <cavallar@lri.fr>
Date: Thu, 16 Jun 2011 17:27:04 +0200
Subject: netfilter: fix looped (broad|multi)cast's MAC handling

By default, when broadcast or multicast packet are sent from a local
application, they are sent to the interface then looped by the kernel
to other local applications, going throught netfilter hooks in the
process.

These looped packet have their MAC header removed from the skb by the
kernel looping code. This confuse various netfilter's netlink queue,
netlink log and the legacy ip_queue, because they try to extract a
hardware address from these packets, but extracts a part of the IP
header instead.

This patch prevent NFQUEUE, NFLOG and ip_QUEUE to include a MAC header
if there is none in the packet.

Signed-off-by: Nicolas Cavallari <cavallar@lri.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index f7f9bd7ba12d..5c9b9d963918 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	else
 		pmsg->outdev_name[0] = '\0';
 
-	if (entry->indev && entry->skb->dev) {
+	if (entry->indev && entry->skb->dev &&
+	    entry->skb->mac_header != entry->skb->network_header) {
 		pmsg->hw_type = entry->skb->dev->type;
 		pmsg->hw_addrlen = dev_parse_header(entry->skb,
 						    pmsg->hw_addr);
-- 
cgit v1.2.3


From 42c1edd345c8412d96e7a362ee06feb7be73bb6c Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 16 Jun 2011 17:29:22 +0200
Subject: netfilter: nf_nat: avoid double seq_adjust for loopback

	Avoid double seq adjustment for loopback traffic
because it causes silent repetition of TCP data. One
example is passive FTP with DNAT rule and difference in the
length of IP addresses.

	This patch adds check if packet is sent and
received via loopback device. As the same conntrack is
used both for outgoing and incoming direction, we restrict
seq adjustment to happen only in POSTROUTING.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index db10075dd88e..de9da21113a1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
 		return ret;
 	}
 
-	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
 		typeof(nf_nat_seq_adjust_hook) seq_adjust;
 
 		seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
-- 
cgit v1.2.3


From 1eddceadb0d6441cd39b2c38705a8f5fec86e770 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 17 Jun 2011 03:45:15 +0000
Subject: net: rfs: enable RFS before first data packet is received
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le jeudi 16 juin 2011 à 23:38 -0400, David Miller a écrit :
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Fri, 17 Jun 2011 00:50:46 +0100
>
> > On Wed, 2011-06-15 at 04:15 +0200, Eric Dumazet wrote:
> >> @@ -1594,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
> >>  			goto discard;
> >>
> >>  		if (nsk != sk) {
> >> +			sock_rps_save_rxhash(nsk, skb->rxhash);
> >>  			if (tcp_child_process(sk, nsk, skb)) {
> >>  				rsk = nsk;
> >>  				goto reset;
> >>
> >
> > I haven't tried this, but it looks reasonable to me.
> >
> > What about IPv6?  The logic in tcp_v6_do_rcv() looks very similar.
>
> Indeed ipv6 side needs the same fix.
>
> Eric please add that part and resubmit.  And in fact I might stick
> this into net-2.6 instead of net-next-2.6
>

OK, here is the net-2.6 based one then, thanks !

[PATCH v2] net: rfs: enable RFS before first data packet is received

First packet received on a passive tcp flow is not correctly RFS
steered.

One sock_rps_record_flow() call is missing in inet_accept()

But before that, we also must record rxhash when child socket is setup.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@conan.davemloft.net>
---
 net/ipv4/af_inet.c  | 1 +
 net/ipv4/tcp_ipv4.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net/ipv4')

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9c1926027a26..eae1f676f870 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -676,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	lock_sock(sk2);
 
+	sock_rps_record_flow(sk2);
 	WARN_ON(!((1 << sk2->sk_state) &
 		  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7d6671e33b8..708dc203b034 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 			goto discard;
 
 		if (nsk != sk) {
+			sock_rps_save_rxhash(nsk, skb->rxhash);
 			if (tcp_child_process(sk, nsk, skb)) {
 				rsk = nsk;
 				goto reset;
-- 
cgit v1.2.3


From eeb1497277d6b1a0a34ed36b97e18f2bd7d6de0d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 17 Jun 2011 16:25:39 -0400
Subject: inet_diag: fix inet_diag_bc_audit()

A malicious user or buggy application can inject code and trigger an
infinite loop in inet_diag_bc_audit()

Also make sure each instruction is aligned on 4 bytes boundary, to avoid
unaligned accesses.

Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 6ffe94ca5bc9..3267d3898437 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc)
 			return 0;
 		if (cc == len)
 			return 1;
-		if (op->yes < 4)
+		if (op->yes < 4 || op->yes & 3)
 			return 0;
 		len -= op->yes;
 		bc  += op->yes;
@@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc)
 
 static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 {
-	const unsigned char *bc = bytecode;
+	const void *bc = bytecode;
 	int  len = bytecode_len;
 
 	while (len > 0) {
-		struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc;
+		const struct inet_diag_bc_op *op = bc;
 
 //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
 		switch (op->code) {
@@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 		case INET_DIAG_BC_S_LE:
 		case INET_DIAG_BC_D_GE:
 		case INET_DIAG_BC_D_LE:
-			if (op->yes < 4 || op->yes > len + 4)
-				return -EINVAL;
 		case INET_DIAG_BC_JMP:
-			if (op->no < 4 || op->no > len + 4)
+			if (op->no < 4 || op->no > len + 4 || op->no & 3)
 				return -EINVAL;
 			if (op->no < len &&
 			    !valid_cc(bytecode, bytecode_len, len - op->no))
 				return -EINVAL;
 			break;
 		case INET_DIAG_BC_NOP:
-			if (op->yes < 4 || op->yes > len + 4)
-				return -EINVAL;
 			break;
 		default:
 			return -EINVAL;
 		}
+		if (op->yes < 4 || op->yes > len + 4 || op->yes & 3)
+			return -EINVAL;
 		bc  += op->yes;
 		len -= op->yes;
 	}
-- 
cgit v1.2.3


From 9aa3c94ce59066f545521033007abb6441706068 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 18 Jun 2011 11:59:18 -0700
Subject: ipv4: fix multicast losses

Knut Tidemann found that first packet of a multicast flow was not
correctly received, and bisected the regression to commit b23dd4fe42b4
(Make output route lookup return rtable directly.)

Special thanks to Knut, who provided a very nice bug report, including
sample programs to demonstrate the bug.

Reported-and-bisectedby: Knut Tidemann <knut.andre.tidemann@jotron.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 045f0ec6a4a0..aa13ef105110 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1902,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
 	rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
-	err = 0;
-	if (IS_ERR(rth))
-		err = PTR_ERR(rth);
+	return IS_ERR(rth) ? PTR_ERR(rth) : 0;
 
 e_nobufs:
 	return -ENOBUFS;
-- 
cgit v1.2.3


From 8ad2475e3555346fbd738e77da12578b97d10505 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sun, 19 Jun 2011 22:31:20 +0000
Subject: ipv4, ping: Remove duplicate icmp.h include

Remove the duplicate inclusion of net/icmp.h from net/ipv4/ping.c

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9aaa67165f42..39b403f854c6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -41,7 +41,6 @@
 #include <linux/proc_fs.h>
 #include <net/sock.h>
 #include <net/ping.h>
-#include <net/icmp.h>
 #include <net/udp.h>
 #include <net/route.h>
 #include <net/inet_common.h>
-- 
cgit v1.2.3