From 33f99dc7fd948bbc808a24a0989c167f8973b643 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 22 Jun 2011 01:04:37 +0000 Subject: ipv4: Fix packet size calculation in __ip_append_data Git commit 59104f06 (ip: take care of last fragment in ip_append_data) added a check to see if we exceed the mtu when we add trailer_len. However, the mtu is already subtracted by the trailer length when the xfrm transfomation bundles are set up. So IPsec packets with mtu size get fragmented, or if the DF bit is set the packets will not be send even though they match the mtu perfectly fine. This patch actually reverts commit 59104f06. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a8024eaa0e87..6b894d430e61 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -888,12 +888,9 @@ alloc_new_skb: * because we have no idea what fragment will be * the last. */ - if (datalen == length + fraggap) { + if (datalen == length + fraggap) alloclen += rt->dst.trailer_len; - /* make sure mtu is not reached */ - if (datalen > mtu - fragheaderlen - rt->dst.trailer_len) - datalen -= ALIGN(rt->dst.trailer_len, 8); - } + if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len + 15, -- cgit v1.2.3 From 353e5c9abd900de3d1a40925386ffe4abf76111e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 22 Jun 2011 01:05:37 +0000 Subject: ipv4: Fix IPsec slowpath fragmentation problem ip_append_data() builds packets based on the mtu from dst_mtu(rt->dst.path). On IPsec the effective mtu is lower because we need to add the protocol headers and trailers later when we do the IPsec transformations. So after the IPsec transformations the packet might be too big, which leads to a slowpath fragmentation then. This patch fixes this by building the packets based on the lower IPsec mtu from dst_mtu(&rt->dst) and adapts the exthdr handling to this. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6b894d430e61..4a7e16b5d3f3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -802,8 +802,6 @@ static int __ip_append_data(struct sock *sk, skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - length += exthdrlen; - transhdrlen += exthdrlen; mtu = cork->fragsize; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -883,6 +881,8 @@ alloc_new_skb: else alloclen = fraglen; + alloclen += exthdrlen; + /* The last fragment gets additional space at tail. * Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be @@ -923,11 +923,11 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen + exthdrlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); - data += fragheaderlen; + data += fragheaderlen + exthdrlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( @@ -1061,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, */ *rtp = NULL; cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(&rt->dst); cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; -- cgit v1.2.3 From c146066ab80267c3305de5dda6a4083f06df9265 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:19:32 +0000 Subject: ipv4: Don't use ufo handling on later transformed packets We might call ip_ufo_append_data() for packets that will be IPsec transformed later. This function should be used just for real udp packets. So we check for rt->dst.header_len which is only nonzero on IPsec handling and call ip_ufo_append_data() just if rt->dst.header_len is zero. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_output.c') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4a7e16b5d3f3..84f26e8e6c60 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -828,7 +828,7 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); -- cgit v1.2.3