From 27b437c8b7d519aac70a0254c2e04c29eff565a2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 13 Jul 2006 19:26:39 -0700
Subject: [NET]: Update frag_list in pskb_trim

When pskb_trim has to defer to ___pksb_trim to trim the frag_list part of
the packet, the frag_list is not updated to reflect the trimming.  This
will usually work fine until you hit something that uses the packet length
or tail from the frag_list.

Examples include esp_output and ip_fragment.

Another problem caused by this is that you can end up with a linear packet
with a frag_list attached.

It is possible to get away with this if we audit everything to make sure
that they always consult skb->len before going down onto frag_list.  In
fact we can do the samething for the paged part as well to avoid copying
the data area of the skb.  For now though, let's do the conservative fix
and update frag_list.

Many thanks to Marco Berizzi for helping me to track down this bug.

This 4-year old bug took 3 months to track down.  Marco was very patient
indeed :)

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 91 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 65 insertions(+), 26 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 44f6a181a754..476aa3978504 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -257,11 +257,11 @@ nodata:
 }
 
 
-static void skb_drop_fraglist(struct sk_buff *skb)
+static void skb_drop_list(struct sk_buff **listp)
 {
-	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+	struct sk_buff *list = *listp;
 
-	skb_shinfo(skb)->frag_list = NULL;
+	*listp = NULL;
 
 	do {
 		struct sk_buff *this = list;
@@ -270,6 +270,11 @@ static void skb_drop_fraglist(struct sk_buff *skb)
 	} while (list);
 }
 
+static inline void skb_drop_fraglist(struct sk_buff *skb)
+{
+	skb_drop_list(&skb_shinfo(skb)->frag_list);
+}
+
 static void skb_clone_fraglist(struct sk_buff *skb)
 {
 	struct sk_buff *list;
@@ -830,41 +835,75 @@ free_skb:
 
 int ___pskb_trim(struct sk_buff *skb, unsigned int len)
 {
+	struct sk_buff **fragp;
+	struct sk_buff *frag;
 	int offset = skb_headlen(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int i;
+	int err;
+
+	if (skb_cloned(skb) &&
+	    unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
+		return err;
 
 	for (i = 0; i < nfrags; i++) {
 		int end = offset + skb_shinfo(skb)->frags[i].size;
-		if (end > len) {
-			if (skb_cloned(skb)) {
-				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-					return -ENOMEM;
-			}
-			if (len <= offset) {
-				put_page(skb_shinfo(skb)->frags[i].page);
-				skb_shinfo(skb)->nr_frags--;
-			} else {
-				skb_shinfo(skb)->frags[i].size = len - offset;
-			}
+
+		if (end < len) {
+			offset = end;
+			continue;
 		}
-		offset = end;
+
+		if (len > offset)
+			skb_shinfo(skb)->frags[i++].size = len - offset;
+
+		skb_shinfo(skb)->nr_frags = i;
+
+		for (; i < nfrags; i++)
+			put_page(skb_shinfo(skb)->frags[i].page);
+
+		if (skb_shinfo(skb)->frag_list)
+			skb_drop_fraglist(skb);
+		break;
 	}
 
-	if (offset < len) {
+	for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
+	     fragp = &frag->next) {
+		int end = offset + frag->len;
+
+		if (skb_shared(frag)) {
+			struct sk_buff *nfrag;
+
+			nfrag = skb_clone(frag, GFP_ATOMIC);
+			if (unlikely(!nfrag))
+				return -ENOMEM;
+
+			nfrag->next = frag->next;
+			frag = nfrag;
+			*fragp = frag;
+		}
+
+		if (end < len) {
+			offset = end;
+			continue;
+		}
+
+		if (end > len &&
+		    unlikely((err = pskb_trim(frag, len - offset))))
+			return err;
+
+		if (frag->next)
+			skb_drop_list(&frag->next);
+		break;
+	}
+
+	if (len > skb_headlen(skb)) {
 		skb->data_len -= skb->len - len;
 		skb->len       = len;
 	} else {
-		if (len <= skb_headlen(skb)) {
-			skb->len      = len;
-			skb->data_len = 0;
-			skb->tail     = skb->data + len;
-			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
-				skb_drop_fraglist(skb);
-		} else {
-			skb->data_len -= skb->len - len;
-			skb->len       = len;
-		}
+		skb->len       = len;
+		skb->data_len  = 0;
+		skb->tail      = skb->data + len;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 53602f92dd3691616478a40738353694bcfef171 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 14 Jul 2006 14:49:32 -0700
Subject: [IPV4]: Clear skb cb on IP input

when data arrives at IP through loopback (and possibly other devices).
So the field needs to be cleared before it confuses the route code.
This was seen when running netem over loopback, but there are probably
other device cases. Maybe this should go into stable?

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e1a7dba2fa8a..184c78ca79e6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -428,6 +428,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto drop;
 	}
 
+	/* Remove any debris in the socket control block */
+	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+
 	return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
-- 
cgit v1.2.3


From b3a6251915df9e3d80d4a0d32bd8d24223906688 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 14 Jul 2006 16:32:27 -0700
Subject: [PKT_SCHED] HTB: initialize upper bound properly

The upper bound for HTB time diff needs to be scaled to PSCHED
units rather than just assuming usecs.  The field mbuffer is used
in TDIFF_SAFE(), as an upper bound.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 34afe41fa2f3..cd0a973b1128 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -196,7 +196,7 @@ struct htb_class
     struct qdisc_rate_table *rate;	/* rate table of the class itself */
     struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
     long buffer,cbuffer;		/* token bucket depth/rate */
-    long mbuffer;			/* max wait time */
+    psched_tdiff_t mbuffer;		/* max wait time */
     long tokens,ctokens;		/* current number of tokens */
     psched_time_t t_c;			/* checkpoint time */
 };
@@ -1601,7 +1601,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* set class to be in HTB_CAN_SEND state */
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
-		cl->mbuffer = 60000000; /* 1min */
+		cl->mbuffer = PSCHED_JIFFIE2US(HZ*60) /* 1min */
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
-- 
cgit v1.2.3


From 0610d11b53ad15200618e38e4511373e3ed09e8a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 14 Jul 2006 16:34:22 -0700
Subject: [VLAN]: __vlan_hwaccel_rx can use the faster ether_compare_addr

The inline function compare_ether_addr is faster than memcmp.
Also, don't need to drag in proc_fs.h, the only reference to proc_dir_entry
is a pointer so the declaration is needed here.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Acked-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index eef0876d8307..383627ad328f 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -23,8 +23,8 @@ struct vlan_collection;
 struct vlan_dev_info;
 struct hlist_node;
 
-#include <linux/proc_fs.h> /* for proc_dir_entry */
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 
 #define VLAN_HLEN	4		/* The additional bytes (on top of the Ethernet header)
 					 * that VLAN requires.
@@ -185,7 +185,8 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb,
 		 * This allows the VLAN to have a different MAC than the underlying
 		 * device, and still route correctly.
 		 */
-		if (!memcmp(eth_hdr(skb)->h_dest, skb->dev->dev_addr, ETH_ALEN))
+		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+				       	skb->dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;
 		break;
 	};
-- 
cgit v1.2.3