From 345f6b8beb20338a2d792bf5974d01d457abee53 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 11 Jul 2006 23:16:34 +0100 Subject: [PATCH] softmac: do shared key auth in workqueue Johann Uhrmann reported a bcm43xx crash and Michael Buesch tracked it down to a problem with the new shared key auth code (recursive calls into the driver) This patch (effectively Michael's patch with a couple of small modifications) solves the problem by sending the authentication challenge response frame from a workqueue entry. I also removed a lone \n from the bcm43xx messages relating to authentication mode - this small change was previously discussed but not patched in. Signed-off-by: Daniel Drake Acked-by: Johannes Berg Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- net/ieee80211/softmac/ieee80211softmac_auth.c | 28 +++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ieee80211/softmac/ieee80211softmac_auth.c b/net/ieee80211/softmac/ieee80211softmac_auth.c index ebc33ca6e692..4cef39e171d0 100644 --- a/net/ieee80211/softmac/ieee80211softmac_auth.c +++ b/net/ieee80211/softmac/ieee80211softmac_auth.c @@ -116,6 +116,16 @@ ieee80211softmac_auth_queue(void *data) kfree(auth); } +/* Sends a response to an auth challenge (for shared key auth). */ +static void +ieee80211softmac_auth_challenge_response(void *_aq) +{ + struct ieee80211softmac_auth_queue_item *aq = _aq; + + /* Send our response */ + ieee80211softmac_send_mgt_frame(aq->mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); +} + /* Handle the auth response from the AP * This should be registered with ieee80211 as handle_auth */ @@ -197,24 +207,30 @@ ieee80211softmac_auth_resp(struct net_device *dev, struct ieee80211_auth *auth) case IEEE80211SOFTMAC_AUTH_SHARED_CHALLENGE: /* Check to make sure we have a challenge IE */ data = (u8 *)auth->info_element; - if(*data++ != MFIE_TYPE_CHALLENGE){ + if (*data++ != MFIE_TYPE_CHALLENGE) { printkl(KERN_NOTICE PFX "Shared Key Authentication failed due to a missing challenge.\n"); break; } /* Save the challenge */ spin_lock_irqsave(&mac->lock, flags); net->challenge_len = *data++; - if(net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) + if (net->challenge_len > WLAN_AUTH_CHALLENGE_LEN) net->challenge_len = WLAN_AUTH_CHALLENGE_LEN; - if(net->challenge != NULL) + if (net->challenge != NULL) kfree(net->challenge); net->challenge = kmalloc(net->challenge_len, GFP_ATOMIC); memcpy(net->challenge, data, net->challenge_len); aq->state = IEEE80211SOFTMAC_AUTH_SHARED_RESPONSE; - spin_unlock_irqrestore(&mac->lock, flags); - /* Send our response */ - ieee80211softmac_send_mgt_frame(mac, aq->net, IEEE80211_STYPE_AUTH, aq->state); + /* We reuse the work struct from the auth request here. + * It is safe to do so as each one is per-request, and + * at this point (dealing with authentication response) + * we have obviously already sent the initial auth + * request. */ + cancel_delayed_work(&aq->work); + INIT_WORK(&aq->work, &ieee80211softmac_auth_challenge_response, (void *)aq); + schedule_work(&aq->work); + spin_unlock_irqrestore(&mac->lock, flags); return 0; case IEEE80211SOFTMAC_AUTH_SHARED_PASS: kfree(net->challenge); -- cgit v1.2.3 From ca0084fa90533687b6317e6d084141da87c74d5c Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Fri, 14 Jul 2006 18:51:41 -0400 Subject: [PATCH] ieee80211: TKIP requires CRC32 ieee80211_crypt_tkip will not work without CRC32. LD .tmp_vmlinux1 net/built-in.o: In function `ieee80211_tkip_encrypt': net/ieee80211/ieee80211_crypt_tkip.c:349: undefined reference to `crc32_le' Reported by Toralf Foerster Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: John W. Linville --- net/ieee80211/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index dbb08528ddf5..f7e84e9d13ad 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -58,6 +58,7 @@ config IEEE80211_CRYPT_TKIP depends on IEEE80211 && NET_RADIO select CRYPTO select CRYPTO_MICHAEL_MIC + select CRC32 ---help--- Include software based cipher suites in support of IEEE 802.11i (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled -- cgit v1.2.3 From 643162258e57180a33e0ef7f08f0d986fbb5b4b9 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 28 Jul 2006 18:12:09 +0900 Subject: [IPV6] ADDRCONF: Check payload length for IFA_LOCAL attribute in RTM_{ADD,DEL}MSG message Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2316a4315a18..81702b9ba5be 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2853,7 +2853,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } @@ -2877,7 +2878,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } -- cgit v1.2.3 From 0778769d392b5b80410673f53e4f946574ebacf7 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 28 Jul 2006 18:12:10 +0900 Subject: [IPV6] ADDRCONF: Allow user-space to specify address lifetime Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 81702b9ba5be..c0641887bdeb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1869,15 +1869,21 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); + /* check the lifetime */ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; @@ -1889,10 +1895,29 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) scope = ipv6_addr_scope(pfx); - ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT); + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + if (!IS_ERR(ifp)) { + spin_lock(&ifp->lock); + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + spin_unlock(&ifp->lock); + addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); + addrconf_verify(0); return 0; } @@ -1945,7 +1970,8 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2870,6 +2896,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; pfx = NULL; if (rta[IFA_ADDRESS-1]) { @@ -2886,7 +2913,18 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (pfx == NULL) return -EINVAL; - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + if (rta[IFA_CACHEINFO-1]) { + struct ifa_cacheinfo *ci; + if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) + return -EINVAL; + ci = RTA_DATA(rta[IFA_CACHEINFO-1]); + valid_lft = ci->ifa_valid; + prefered_lft = ci->ifa_prefered; + } + + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + prefered_lft, valid_lft); + } /* Maximum length of ifa_cacheinfo attributes */ -- cgit v1.2.3 From 8f27ebb9823b7f6b7a67ab325b515f75ba51bf4c Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 28 Jul 2006 18:12:11 +0900 Subject: [IPV6] ADDRCONF: Do not verify an address with infinity lifetime We also do not try regenarating new temporary address corresponding to an address with infinite preferred lifetime. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c0641887bdeb..93a40a8ade88 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2797,12 +2797,16 @@ restart: ifp->idev->nd_parms->retrans_time / HZ; #endif - if (age >= ifp->valid_lft) { + if (ifp->valid_lft != INFINITY_LIFE_TIME && + age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; + } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { + spin_unlock(&ifp->lock); + continue; } else if (age >= ifp->prefered_lft) { /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ int deprecate = 0; -- cgit v1.2.3 From 6c223828058bc45f070d35b63d4a819a8df0146d Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 28 Jul 2006 18:12:12 +0900 Subject: [IPV6] ADDRCONF: Support get operation of single address Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93a40a8ade88..3ef3fe20283f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3165,6 +3165,62 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_getaddr(struct sk_buff *in_skb, + struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *addr = NULL; + struct net_device *dev = NULL; + struct inet6_ifaddr *ifa; + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int err; + + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) + return -EINVAL; + addr = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || + (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) + return -EINVAL; + addr = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (addr == NULL) + return -EINVAL; + + if (ifm->ifa_index) + dev = __dev_get_by_index(ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) + return -EADDRNOTAVAIL; + + if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto out; + } + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, RTM_NEWADDR, 0); + if (err < 0) { + err = -EMSGSIZE; + goto out_free; + } + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + in6_ifa_put(ifa); + return err; +out_free: + kfree_skb(skb); + goto out; +} + static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; @@ -3407,7 +3463,8 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr, + .dumpit = inet6_dump_ifaddr, }, [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, -- cgit v1.2.3 From 081bba5b3ace5698eccf2f1a378cd4a9a4c98a85 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 28 Jul 2006 18:12:13 +0900 Subject: [IPV6] ADDRCONF: NLM_F_REPLACE support for RTM_NEWADDR Based on MIPL2 kernel patch. Signed-off-by: Noriaki YAKAMIYA Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3ef3fe20283f..8ea1e36bf8eb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2894,6 +2894,55 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } +static int +inet6_addr_modify(int ifindex, struct in6_addr *pfx, + __u32 prefered_lft, __u32 valid_lft) +{ + struct inet6_ifaddr *ifp = NULL; + struct net_device *dev; + int ifa_flags = 0; + + if ((dev = __dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + + if (!valid_lft || (prefered_lft > valid_lft)) + return -EINVAL; + + ifp = ipv6_get_ifaddr(pfx, dev, 1); + if (ifp == NULL) + return -ENOENT; + + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags = IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags = IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + spin_lock_bh(&ifp->lock); + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; + + ifp->tstamp = jiffies; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (!(ifp->flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); + + addrconf_verify(0); + + return 0; +} + static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { @@ -2926,6 +2975,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) prefered_lft = ci->ifa_prefered; } + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + int ret; + ret = inet6_addr_modify(ifm->ifa_index, pfx, + prefered_lft, valid_lft); + if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return ret; + } + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, prefered_lft, valid_lft); -- cgit v1.2.3 From 679e898a4742d4a4a47430b67fd68a789a73dcfd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 30 Jul 2006 20:19:11 -0700 Subject: [XFRM]: Fix protocol field value for outgoing IPv6 GSO packets Signed-off-by: Patrick McHardy Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/xfrm6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0eea60ea9ebc..c8c8b44a0f58 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -125,7 +125,7 @@ static int xfrm6_output_finish(struct sk_buff *skb) if (!skb_is_gso(skb)) return xfrm6_output_finish2(skb); - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (unlikely(IS_ERR(segs))) -- cgit v1.2.3 From 497c615abad7ee81994dd592194535aea2aad617 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 20:19:33 -0700 Subject: [IPV6]: Audit all ip6_dst_lookup/ip6_dst_store calls The current users of ip6_dst_lookup can be divided into two classes: 1) The caller holds no locks and is in user-context (UDP). 2) The caller does not want to lookup the dst cache at all. The second class covers everyone except UDP because most people do the cache lookup directly before calling ip6_dst_lookup. This patch adds ip6_sk_dst_lookup for the first class. Similarly ip6_dst_store users can be divded into those that need to take the socket dst lock and those that don't. This patch adds __ip6_dst_store for those (everyone except UDP/datagram) that don't need an extra lock. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 4 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 120 ++++++++++++++++++++++++++------------- net/ipv6/tcp_ipv6.c | 4 +- net/ipv6/udp.c | 2 +- 6 files changed, 88 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9f3d4d7cd0bf..610c722ac27f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -230,7 +230,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -863,7 +863,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5a0ba58b86cc..ac85e9c532c2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -658,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } return 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5c950cc79d80..bf491077b822 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3bc74ce78800..5e74a37695f7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -723,48 +723,51 @@ fail: return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + struct dst_entry *dst, + struct flowi *fl) { - int err = 0; + struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt = (struct rt6_info *)dst; - *dst = NULL; - if (sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - - *dst = sk_dst_check(sk, np->dst_cookie); - if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; - - /* Yes, checking route validity in not connected - * case is not very simple. Take into account, - * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) - * - * 1. If route was host route, check that - * cached destination is current. - * If it is network route, we still may - * check its validity using saved pointer - * to the last used address: daddr_cache. - * We do not want to save whole address now, - * (because main consumer of this service - * is tcp, which has not this problem), - * so that the last trick works only on connected - * sockets. - * 2. oif also should be the same. - */ - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, - &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, - np->daddr_cache))) - || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { - dst_release(*dst); - *dst = NULL; - } - } + if (!dst) + goto out; + + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, + * and MSG_DONTROUTE --ANK (980726) + * + * 1. If route was host route, check that + * cached destination is current. + * If it is network route, we still may + * check its validity using saved pointer + * to the last used address: daddr_cache. + * We do not want to save whole address now, + * (because main consumer of this service + * is tcp, which has not this problem), + * so that the last trick works only on connected + * sockets. + * 2. oif also should be the same. + */ + if (((rt->rt6i_dst.plen != 128 || + !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; } +out: + return dst; +} + +static int ip6_dst_lookup_tail(struct sock *sk, + struct dst_entry **dst, struct flowi *fl) +{ + int err; + if (*dst == NULL) *dst = ip6_route_output(sk, fl); @@ -773,7 +776,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); - if (err) goto out_err_release; } @@ -786,8 +788,48 @@ out_err_release: return err; } +/** + * ip6_dst_lookup - perform route lookup on flow + * @sk: socket which provides route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + return ip6_dst_lookup_tail(sk, dst, fl); +} EXPORT_SYMBOL_GPL(ip6_dst_lookup); +/** + * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * @sk: socket which provides the dst cache and route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow with the + * possibility of using the cached route in the socket if it is valid. + * It will take the socket dst lock when operating on the dst cache. + * As a result, this function can only be used in process context. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + if (sk) { + *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + *dst = ip6_sk_dst_check(sk, *dst, fl); + } + + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); + static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 923989d0520d..b76fd7fba5f5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -270,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -947,7 +947,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccc57f434cd3..3d54f246411e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,7 +782,7 @@ do_udp_sendmsg: connected = 0; } - err = ip6_dst_lookup(sk, &dst, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; if (final_p) -- cgit v1.2.3 From f4d26fb336f3c08066bffbe907d3104be4fb91a8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 20:20:28 -0700 Subject: [NET]: Fix ___pskb_trim when entire frag_list needs dropping When the trim point is within the head and there is no paged data, ___pskb_trim fails to drop the first element in the frag_list. This patch fixes this by moving the len <= offset case out of the page data loop. This patch also adds a missing kfree_skb on the frag that we just cloned. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 476aa3978504..d236f02c6467 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -846,7 +846,11 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; - for (i = 0; i < nfrags; i++) { + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; if (end < len) { @@ -854,9 +858,9 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) continue; } - if (len > offset) - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_shinfo(skb)->frags[i++].size = len - offset; +drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) @@ -864,7 +868,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - break; + goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); @@ -879,6 +883,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) return -ENOMEM; nfrag->next = frag->next; + kfree_skb(frag); frag = nfrag; *fragp = frag; } @@ -897,6 +902,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) break; } +done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; -- cgit v1.2.3 From 118075b3cdc90e0815362365f3fc64d672ace0d6 Mon Sep 17 00:00:00 2001 From: James Morris Date: Sun, 30 Jul 2006 20:21:45 -0700 Subject: [TCP]: fix memory leak in net/ipv4/tcp_probe.c::tcpprobe_read() Based upon a patch by Jesper Juhl. Signed-off-by: James Morris Acked-by: Stephen Hemminger Acked-by: Jesper Juhl Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d7d517a3a238..b3435324b573 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - int error = 0, cnt; + int error = 0, cnt = 0; unsigned char *tbuf; if (!buf || len < 0) -- cgit v1.2.3 From 3687b1dc6fe83a500ba4d3235704594f6a111a2d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 30 Jul 2006 20:35:54 -0700 Subject: [TCP]: SNMPv2 tcpAttemptFails counter error Refer to RFC2012, tcpAttemptFails is defined as following: tcpAttemptFails OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of times TCP connections have made a direct transition to the CLOSED state from either the SYN-SENT state or the SYN-RCVD state, plus the number of times TCP connections have made a direct transition to the LISTEN state from the SYN-RCVD state." ::= { tcp 7 } When I lookup into RFC793, I found that the state change should occured under following condition: 1. SYN-SENT -> CLOSED a) Received ACK,RST segment when SYN-SENT state. 2. SYN-RCVD -> CLOSED b) Received SYN segment when SYN-RCVD state(came from LISTEN). c) Received RST segment when SYN-RCVD state(came from SYN-SENT). d) Received SYN segment when SYN-RCVD state(came from SYN-SENT). 3. SYN-RCVD -> LISTEN e) Received RST segment when SYN-RCVD state(came from LISTEN). In my test, those direct state transition can not be counted to tcpAttemptFails. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 -- net/ipv4/tcp_minisocks.c | 4 +++- net/ipv6/tcp_ipv6.c | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6f39e814291..4b04c3edd4a9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) It can f.e. if SYNs crossed. */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -874,7 +873,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ccb7cb22b15..624e2b2c7f53 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -589,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; + } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b76fd7fba5f5..b843a650be71 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -427,7 +427,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -831,7 +830,6 @@ drop: if (req) reqsk_free(req); - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; /* don't send reset */ } -- cgit v1.2.3 From 792d1932e319ff8ba01361e7d151b1794c55c31f Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Sun, 30 Jul 2006 20:43:26 -0700 Subject: [NET]: Network Event Notifier Mechanism. This patch uses notifier blocks to implement a network event notifier mechanism. Clients register their callback function by calling register_netevent_notifier() like this: static struct notifier_block nb = { .notifier_call = my_callback_func }; ... register_netevent_notifier(&nb); Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- net/core/netevent.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 net/core/netevent.c (limited to 'net') diff --git a/net/core/netevent.c b/net/core/netevent.c new file mode 100644 index 000000000000..35d02c38554e --- /dev/null +++ b/net/core/netevent.c @@ -0,0 +1,69 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker + * Steve Wise + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include +#include + +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); -- cgit v1.2.3 From 8d71740c56a9058acc4378504a356d543ff1308b Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Sun, 30 Jul 2006 20:43:36 -0700 Subject: [NET]: Core net changes to generate netevents Generate netevents for: - neighbour changes - routing redirects - pmtu changes Signed-off-by: Tom Tucker Signed-off-by: Steve Wise Signed-off-by: David S. Miller --- net/core/Makefile | 2 +- net/core/neighbour.c | 14 ++++++++------ net/ipv4/route.c | 8 ++++++++ net/ipv6/route.c | 7 +++++++ 4 files changed, 24 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/Makefile b/net/core/Makefile index e9bd2467d5a9..2645ba428d48 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7ad681f5e712..5130d2efdbbe 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -754,6 +755,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); + notify = 1; } } else if (state & NUD_DELAY) { if (time_before_eq(now, @@ -762,6 +764,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); + notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { NEIGH_PRINTK2("neigh %p is probed.\n", neigh); @@ -819,6 +822,8 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) @@ -926,9 +931,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, { u8 old; int err; -#ifdef CONFIG_ARPD int notify = 0; -#endif struct net_device *dev; int update_isrouter = 0; @@ -948,9 +951,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_suspect(neigh); neigh->nud_state = new; err = 0; -#ifdef CONFIG_ARPD notify = old & NUD_VALID; -#endif goto out; } @@ -1022,9 +1023,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1); -#ifdef CONFIG_ARPD notify = 1; -#endif } if (new == old) goto out; @@ -1056,6 +1055,9 @@ out: (neigh->flags & ~NTF_ROUTER); } write_unlock_bh(&neigh->lock); + + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2dc6dbb28467..19bd49d69d9f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -104,6 +104,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -1125,6 +1126,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + struct netevent_redirect netevent; if (!in_dev) return; @@ -1216,6 +1218,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, rt_drop(rt); goto do_next; } + + netevent.old = &rth->u.dst; + netevent.new = &rt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, + &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1452,6 +1459,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87c39c978cd0..4b163711f3a8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -742,6 +743,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; } dst->metrics[RTAX_MTU-1] = mtu; + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1155,6 +1157,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct rt6_info *rt, *nrt = NULL; int strict; struct fib6_node *fn; + struct netevent_redirect netevent; /* * Get the "current" route for this destination and @@ -1252,6 +1255,10 @@ restart: if (ip6_ins_rt(nrt, NULL, NULL, NULL)) goto out; + netevent.old = &rt->u.dst; + netevent.new = &nrt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); + if (rt->rt6i_flags&RTF_CACHE) { ip6_del_rt(rt, NULL, NULL, NULL); return; -- cgit v1.2.3 From a280b89982f48e9a32c6410a37419b12ca88af6b Mon Sep 17 00:00:00 2001 From: James Morris Date: Sun, 30 Jul 2006 20:46:38 -0700 Subject: [SECURITY] secmark: nul-terminate secdata The patch below fixes a problem in the iptables SECMARK target, where the user-supplied 'selctx' string may not be nul-terminated. From initial analysis, it seems that the strlen() called from selinux_string_to_sid() could run until it arbitrarily finds a zero, and possibly cause a kernel oops before then. The impact of this appears limited because the operation requires CAP_NET_ADMIN, which is essentially always root. Also, the module is not yet in wide use. Signed-off-by: James Morris Signed-off-by: Stephen Smalley Signed-off-by: David S. Miller --- net/netfilter/xt_SECMARK.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c2ce9c4011cc..de9537ad9a7c 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -57,6 +57,8 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; + + sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; err = selinux_string_to_sid(sel->selctx, &sel->selsid); if (err) { -- cgit v1.2.3 From 52499afe40387524e9f46ef9ce4695efccdd2ed9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 31 Jul 2006 22:32:09 -0700 Subject: [TCP]: Process linger2 timeout consistently. Based upon guidance from Alexey Kuznetsov. When linger2 is active, we check to see if the fin_wait2 timeout is longer than the timewait. If it is, we schedule the keepalive timer for the difference between the timewait timeout and the fin_wait2 timeout. When this orphan socket is seen by tcp_keepalive_timer() it will try to transform this fin_wait2 socket into a fin_wait2 mini-socket, again if linger2 is active. Not all paths were setting this initial keepalive timer correctly. The tcp input path was doing it correctly, but tcp_close() wasn't, potentially making the socket linger longer than it really needs to. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f6a2d9223d07..7b621e44b124 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1659,7 +1659,8 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); + inet_csk_reset_keepalive_timer(sk, + tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; -- cgit v1.2.3 From 8af2745645243b5e5b031504a643bf2158571dc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Jul 2006 22:35:23 -0700 Subject: [NET]: Add netdev_alloc_skb(). Add a dev_alloc_skb variant that takes a struct net_device * paramater. For now that paramater is unused, but I'll use it to allocate the skb from node-local memory in a follow-up patch. Also there have been some other plans mentioned on the list that can use it. Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/skbuff.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d236f02c6467..71487b915d67 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -256,6 +256,29 @@ nodata: goto out; } +/** + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. + */ +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + if (likely(skb)) + skb_reserve(skb, NET_SKB_PAD); + return skb; +} static void skb_drop_list(struct sk_buff **listp) { @@ -2048,6 +2071,7 @@ EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); EXPORT_SYMBOL(__alloc_skb); +EXPORT_SYMBOL(__netdev_alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); -- cgit v1.2.3 From b10866fd7dd9ae9b8dd03646d28702a76d624474 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 31 Jul 2006 23:46:18 -0700 Subject: [NETFILTER]: SIP helper: expect RTP streams in both directions Since we don't know in which direction the first packet will arrive, we need to create one expectation for each direction, which is currently prevented by max_expected beeing set to 1. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index fc87ce0da40d..4f222d6be009 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -442,7 +442,7 @@ static int __init init(void) sip[i].tuple.src.u.udp.port = htons(ports[i]); sip[i].mask.src.u.udp.port = 0xFFFF; sip[i].mask.dst.protonum = 0xFF; - sip[i].max_expected = 1; + sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ sip[i].me = THIS_MODULE; sip[i].help = sip_help; -- cgit v1.2.3 From 3ab720881b6e36bd5190a3a11cee8d8d067c4ad7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 31 Jul 2006 23:47:31 -0700 Subject: [NETFILTER]: xt_hashlimit/xt_string: missing string validation The hashlimit table name and the textsearch algorithm need to be terminated, the textsearch pattern length must not exceed the maximum size. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_hashlimit.c | 3 +++ net/netfilter/xt_string.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 92980ab8ce48..6b662449e825 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -508,6 +508,9 @@ hashlimit_checkentry(const char *tablename, if (!r->cfg.expire) return 0; + if (r->name[sizeof(r->name) - 1] != '\0') + return 0; + /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before ip_tables.c grabs ipt_mutex. * We also cannot grab the hashtable spinlock, since htable_create will diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0ebb6ac2c8c7..d8e3891b5f8b 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -55,7 +55,10 @@ static int checkentry(const char *tablename, /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) return 0; - + if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') + return 0; + if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) + return 0; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) -- cgit v1.2.3 From b60dfc6c20bd5f19de0083362ce377c89b1e5a24 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Aug 2006 00:00:12 -0700 Subject: [NET]: Kill the WARN_ON() calls for checksum fixups. We have a more complete solution in the works, involving the seperation of CHECKSUM_HW on input vs. output, and having netfilter properly do incremental checksums. But that is a very involved patch and is thus 2.6.19 material. What we have now is infinitely better than the past, wherein all TSO packets were dropped due to corrupt checksums as soon at the NAT module was loaded. At least now, the checksums do get fixed up, it just isn't the cleanest nor most optimal solution. Signed-off-by: David S. Miller --- net/core/dev.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4d2b5167d7f5..5b630cece707 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1166,11 +1166,6 @@ int skb_checksum_help(struct sk_buff *skb, int inward) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - /* Let GSO fix up the checksum. */ goto out_set_summed; } @@ -1220,11 +1215,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __skb_pull(skb, skb->mac_len); if (unlikely(skb->ip_summed != CHECKSUM_HW)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); -- cgit v1.2.3 From 32c524d1c48b62be49fa1b1dd93fed10792debc0 Mon Sep 17 00:00:00 2001 From: Wei Dong Date: Wed, 2 Aug 2006 13:39:57 -0700 Subject: [IPV6]: SNMPv2 "ipv6IfStatsInHdrErrors" counter error When I tested Linux kernel 2.6.17.7 about statistics "ipv6IfStatsInHdrErrors", found that this counter couldn't increase correctly. The criteria is RFC2465: ipv6IfStatsInHdrErrors OBJECT-TYPE SYNTAX Counter3 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of input datagrams discarded due to errors in their IPv6 headers, including version number mismatch, other format errors, hop count exceeded, errors discovered in processing their IPv6 options, etc." ::= { ipv6IfStatsEntry 2 } When I send TTL=0 and TTL=1 a packet to a router which need to be forwarded, router just sends an ICMPv6 message to tell the sender that TIME_EXCEED and HOPLIMITS, but no increments for this counter(in the function ip6_forward). Signed-off-by: Wei Dong Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5e74a37695f7..70c9234b70e7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -356,6 +356,7 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; -- cgit v1.2.3 From dafee490858f79e144c5e6cdd84ceb9efa20a3f1 Mon Sep 17 00:00:00 2001 From: Wei Dong Date: Wed, 2 Aug 2006 13:41:21 -0700 Subject: [IPV6]: SNMPv2 "ipv6IfStatsOutFragCreates" counter error When I tested linux kernel 2.6.71.7 about statistics "ipv6IfStatsOutFragCreates", and found that it couldn't increase correctly. The criteria is RFC 2465: ipv6IfStatsOutFragCreates OBJECT-TYPE SYNTAX Counter32 MAX-ACCESS read-only STATUS current DESCRIPTION "The number of output datagram fragments that have been generated as a result of fragmentation at this output interface." ::= { ipv6IfStatsEntry 15 } I think there are two issues in Linux kernel. 1st: RFC2465 specifies the counter is "The number of output datagram fragments...". I think increasing this counter after output a fragment successfully is better. And it should not be increased even though a fragment is created but failed to output. 2nd: If we send a big ICMP/ICMPv6 echo request to a host, and receive ICMP/ICMPv6 echo reply consisted of some fragments. As we know that in Linux kernel first fragmentation occurs in ICMP layer(maybe saying transport layer is better), but this is not the "real" fragmentation,just do some "pre-fragment" -- allocate space for date, and form a frag_list, etc. The "real" fragmentation happens in IP layer -- set offset and MF flag and so on. So I think in "fast path" for ip_fragment/ip6_fragment, if we send a fragment which "pre-fragment" by upper layer we should also increase "ipv6IfStatsOutFragCreates". Signed-off-by: Wei Dong Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 7 ++++--- net/ipv6/ip6_output.c | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7c9f9a6421b8..9bf307a29783 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -526,6 +526,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); + if (!err) + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -649,9 +651,6 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); - iph->tot_len = htons(len + hlen); ip_send_check(iph); @@ -659,6 +658,8 @@ slow_path: err = output(skb2); if (err) goto fail; + + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGOKS); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 70c9234b70e7..69451af6abe7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -596,6 +596,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); + if(!err) + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + if (err || !frag) break; @@ -707,12 +710,11 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); - err = output(frag); if (err) goto fail; + + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); -- cgit v1.2.3 From 2b7e24b66d31d677d76b49918e711eb360c978b6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 2 Aug 2006 14:07:58 -0700 Subject: [NET]: skb_queue_lock_key() is no longer used. Signed-off-by: Adrian Bunk Acked-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/skbuff.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 71487b915d67..022d8894c11d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,13 +70,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; -/* - * lockdep: lock class key used by skb_queue_head_init(): - */ -struct lock_class_key skb_queue_lock_key; - -EXPORT_SYMBOL(skb_queue_lock_key); - /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always -- cgit v1.2.3 From dc49c1f94e3469d94b952e8f5160dd4ccd791d79 Mon Sep 17 00:00:00 2001 From: Catherine Zhang Date: Wed, 2 Aug 2006 14:12:06 -0700 Subject: [AF_UNIX]: Kernel memory leak fix for af_unix datagram getpeersec patch From: Catherine Zhang This patch implements a cleaner fix for the memory leak problem of the original unix datagram getpeersec patch. Instead of creating a security context each time a unix datagram is sent, we only create the security context when the receiver requests it. This new design requires modification of the current unix_getsecpeer_dgram LSM hook and addition of two new hooks, namely, secid_to_secctx and release_secctx. The former retrieves the security context and the latter releases it. A hook is required for releasing the security context because it is up to the security module to decide how that's done. In the case of Selinux, it's a simple kfree operation. Acked-by: Stephen Smalley Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 9 +++++++-- net/unix/af_unix.c | 17 +++++------------ 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 84f43a3c9098..2d05c4133d3e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -112,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; - u32 seclen; + u32 seclen, secid; int err; - err = security_socket_getpeersec_dgram(skb, &secdata, &seclen); + err = security_socket_getpeersec_dgram(NULL, skb, &secid); + if (err) + return; + + err = security_secid_to_secctx(secid, &secdata, &seclen); if (err) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); + security_release_secctx(secdata, seclen); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6f2909279268..de6ec519272e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -128,23 +128,17 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK -static void unix_get_peersec_dgram(struct sk_buff *skb) +static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - int err; - - err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb), - UNIXSECLEN(skb)); - if (err) - *(UNIXSECDATA(skb)) = NULL; + memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - scm->secdata = *UNIXSECDATA(skb); - scm->seclen = *UNIXSECLEN(skb); + scm->secid = *UNIXSID(skb); } #else -static inline void unix_get_peersec_dgram(struct sk_buff *skb) +static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -1322,8 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); if (siocb->scm->fp) unix_attach_fds(siocb->scm, skb); - - unix_get_peersec_dgram(skb); + unix_get_secdata(siocb->scm, skb); skb->h.raw = skb->data; err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); -- cgit v1.2.3 From 9bbf28a1ff7b9d4e7df57829c25638721984277b Mon Sep 17 00:00:00 2001 From: Patrick Caulfield Date: Wed, 2 Aug 2006 14:14:44 -0700 Subject: [DECNET]: Fix for routing bug This patch fixes a bug in the DECnet routing code where we were selecting a loopback device in preference to an outward facing device even when the destination was known non-local. This patch should fix the problem. Signed-off-by: Patrick Caulfield Signed-off-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1355614ec11b..743e9fcf7c5a 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -925,8 +925,13 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { if (!dev_out->dn_ptr) continue; - if (dn_dev_islocal(dev_out, oldflp->fld_src)) - break; + if (!dn_dev_islocal(dev_out, oldflp->fld_src)) + continue; + if ((dev_out->flags & IFF_LOOPBACK) && + oldflp->fld_dst && + !dn_dev_islocal(dev_out, oldflp->fld_dst)) + continue; + break; } read_unlock(&dev_base_lock); if (dev_out == NULL) -- cgit v1.2.3 From e6eb307d48c81d688804f8b39a0a3ddde3cd3458 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Wed, 2 Aug 2006 14:21:19 -0700 Subject: [I/OAT]: Remove CPU hotplug lock from net_dma_rebalance Remove the lock_cpu_hotplug()/unlock_cpu_hotplug() calls from net_dma_rebalance The lock_cpu_hotplug()/unlock_cpu_hotplug() sequence in net_dma_rebalance is both incorrect (as pointed out by David Miller) because lock_cpu_hotplug() may sleep while the net_dma_event_lock spinlock is held, and unnecessary (as pointed out by Andrew Morton) as spin_lock() disables preemption which protects from CPU hotplug events. Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- net/core/dev.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5b630cece707..f25d7ecaf035 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3419,12 +3419,9 @@ static void net_dma_rebalance(void) unsigned int cpu, i, n; struct dma_chan *chan; - lock_cpu_hotplug(); - if (net_dma_count == 0) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); - unlock_cpu_hotplug(); return; } @@ -3444,8 +3441,6 @@ static void net_dma_rebalance(void) i++; } rcu_read_unlock(); - - unlock_cpu_hotplug(); } /** -- cgit v1.2.3 From 29bbd72d6ee1dbf2d9f00d022f8e999aa528fb3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 2 Aug 2006 15:02:31 -0700 Subject: [NET]: Fix more per-cpu typos Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- net/ipv4/tcp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f25d7ecaf035..d95e2626d944 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3421,7 +3421,7 @@ static void net_dma_rebalance(void) if (net_dma_count == 0) { for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; } @@ -3434,7 +3434,7 @@ static void net_dma_rebalance(void) + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); while(n) { - per_cpu(softnet_data.net_dma, cpu) = chan; + per_cpu(softnet_data, cpu).net_dma = chan; cpu = next_cpu(cpu, cpu_online_map); n--; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7b621e44b124..934396bb1376 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1132,7 +1132,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else -- cgit v1.2.3 From e0ab53deaa91293a7958d63d5a2cf4c5645ad6f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Jul 2006 17:22:50 -0400 Subject: RPC: Ensure that we disconnect TCP socket when client requests error out If we're part way through transmitting a TCP request, and the client errors, then we need to disconnect and reconnect the TCP socket in order to avoid confusing the server. Signed-off-by: Trond Myklebust (cherry picked from 031a50c8b9ea82616abd4a4e18021a25848941ce commit) --- net/sunrpc/clnt.c | 52 ++++++++++++++++++++++++++++----------------------- net/sunrpc/xprt.c | 21 +++------------------ net/sunrpc/xprtsock.c | 29 +++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4ba271f892c8..d6409e757219 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -921,26 +921,43 @@ call_transmit(struct rpc_task *task) task->tk_status = xprt_prepare_transmit(task); if (task->tk_status != 0) return; + task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ if (rpc_task_need_encode(task)) { - task->tk_rqstp->rq_bytes_sent = 0; + BUG_ON(task->tk_rqstp->rq_bytes_sent != 0); call_encode(task); /* Did the encode result in an error condition? */ if (task->tk_status != 0) - goto out_nosend; + return; } - task->tk_action = call_transmit_status; xprt_transmit(task); if (task->tk_status < 0) return; - if (!task->tk_msg.rpc_proc->p_decode) { - task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); - } - return; -out_nosend: - /* release socket write lock before attempting to handle error */ - xprt_abort_transmit(task); + /* + * On success, ensure that we call xprt_end_transmit() before sleeping + * in order to allow access to the socket to other RPC requests. + */ + call_transmit_status(task); + if (task->tk_msg.rpc_proc->p_decode != NULL) + return; + task->tk_action = rpc_exit_task; + rpc_wake_up_task(task); +} + +/* + * 5a. Handle cleanup after a transmission + */ +static void +call_transmit_status(struct rpc_task *task) +{ + task->tk_action = call_status; + /* + * Special case: if we've been waiting on the socket's write_space() + * callback, then don't call xprt_end_transmit(). + */ + if (task->tk_status == -EAGAIN) + return; + xprt_end_transmit(task); rpc_task_force_reencode(task); } @@ -992,18 +1009,7 @@ call_status(struct rpc_task *task) } /* - * 6a. Handle transmission errors. - */ -static void -call_transmit_status(struct rpc_task *task) -{ - if (task->tk_status != -EAGAIN) - rpc_task_force_reencode(task); - call_status(task); -} - -/* - * 6b. Handle RPC timeout + * 6a. Handle RPC timeout * We do not release the request slot, so we keep using the * same XID for all retransmits. */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 313b68d892c6..e8c2bc4977f3 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -707,12 +707,9 @@ out_unlock: return err; } -void -xprt_abort_transmit(struct rpc_task *task) +void xprt_end_transmit(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; - - xprt_release_write(xprt, task); + xprt_release_write(task->tk_xprt, task); } /** @@ -761,8 +758,6 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = -ENOTCONN; else if (!req->rq_received) rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); - - xprt->ops->release_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return; } @@ -772,18 +767,8 @@ void xprt_transmit(struct rpc_task *task) * schedq, and being picked up by a parallel run of rpciod(). */ task->tk_status = status; - - switch (status) { - case -ECONNREFUSED: + if (status == -ECONNREFUSED) rpc_sleep_on(&xprt->sending, task, NULL, NULL); - case -EAGAIN: - case -ENOTCONN: - return; - default: - break; - } - xprt_release_write(xprt, task); - return; } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee678ed13b6f..441bd53f5eca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -413,6 +413,33 @@ static int xs_tcp_send_request(struct rpc_task *task) return status; } +/** + * xs_tcp_release_xprt - clean up after a tcp transmission + * @xprt: transport + * @task: rpc task + * + * This cleans up if an error causes us to abort the transmission of a request. + * In this case, the socket may need to be reset in order to avoid confusing + * the server. + */ +static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct rpc_rqst *req; + + if (task != xprt->snd_task) + return; + if (task == NULL) + goto out_release; + req = task->tk_rqstp; + if (req->rq_bytes_sent == 0) + goto out_release; + if (req->rq_bytes_sent == req->rq_snd_buf.len) + goto out_release; + set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state); +out_release: + xprt_release_xprt(xprt, task); +} + /** * xs_close - close a socket * @xprt: transport @@ -1250,7 +1277,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, - .release_xprt = xprt_release_xprt, + .release_xprt = xs_tcp_release_xprt, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, -- cgit v1.2.3 From 5c3e985a2c1908aa97221d3806f85ce7e2fbfa88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Jul 2006 17:37:40 -0400 Subject: SUNRPC: Fix obvious refcounting bugs in rpc_pipefs. Doh! Signed-off-by: Trond Myklebust (cherry picked from 496f408f2f0e7ee5481a7c2222189be6c4f5aa6c commit) --- net/sunrpc/rpc_pipe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index dc6cb93c8830..a3bd2db2e024 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -667,10 +667,11 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client) RPCAUTH_info, RPCAUTH_EOF); if (error) goto err_depopulate; + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_depopulate: rpc_depopulate(dentry); __rpc_rmdir(dir, dentry); @@ -731,10 +732,11 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) rpci->flags = flags; rpci->ops = ops; inode_dir_notify(dir, DN_CREATE); + dget(dentry); out: mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); - return dget(dentry); + return dentry; err_dput: dput(dentry); dentry = ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 782a6675119c76c071e74e2ddd98268f47770cba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 3 Aug 2006 23:54:41 +1000 Subject: [PATCH] Send wireless netlink events with a clean slate Drivers expect to be able to call wireless_send_event in arbitrary contexts. On the other hand, netlink really doesn't like being invoked in an IRQ context. So we need to postpone the sending of netlink skb's to a tasklet. Signed-off-by: Herbert Xu Signed-off-by: John W. Linville --- net/core/wireless.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/wireless.c b/net/core/wireless.c index d2bc72d318f7..de0bde4b51dd 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -82,6 +82,7 @@ #include /* for __init */ #include /* ARPHRD_ETHER */ #include /* compare_ether_addr */ +#include #include /* Pretty obvious */ #include /* New driver API */ @@ -1842,6 +1843,18 @@ int wireless_rtnetlink_set(struct net_device * dev, */ #ifdef WE_EVENT_RTNETLINK +static struct sk_buff_head wireless_nlevent_queue; + +static void wireless_nlevent_process(unsigned long data) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&wireless_nlevent_queue))) + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); +} + +static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); + /* ---------------------------------------------------------------- */ /* * Fill a rtnetlink message with our event data. @@ -1904,8 +1917,17 @@ static inline void rtmsg_iwinfo(struct net_device * dev, return; } NETLINK_CB(skb).dst_group = RTNLGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); + skb_queue_tail(&wireless_nlevent_queue, skb); + tasklet_schedule(&wireless_nlevent_tasklet); +} + +static int __init wireless_nlevent_init(void) +{ + skb_queue_head_init(&wireless_nlevent_queue); + return 0; } + +subsys_initcall(wireless_nlevent_init); #endif /* WE_EVENT_RTNETLINK */ /* ---------------------------------------------------------------- */ -- cgit v1.2.3 From bea1b42e1bb184cb75e6bbd95c83e4478dde4ab9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 3 Aug 2006 16:24:02 -0700 Subject: [BRIDGE]: netlink status fix Fix code that passes back netlink status messages about bridge changes. Submitted by Aji_Srinivas@emc.com Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 06abb6634f5b..53086fb75089 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -85,7 +85,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) goto err_out; err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); - if (err) + if (err < 0) goto err_kfree; NETLINK_CB(skb).dst_group = RTNLGRP_LINK; -- cgit v1.2.3 From b9e2cc0f0e47ad351349156018ef8a365e9c6d25 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 3 Aug 2006 16:36:51 -0700 Subject: [PKT_SCHED]: Return ENOENT if qdisc module is unavailable Return ENOENT if qdisc module is unavailable Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c7844bacbbcb..a19eff12cf78 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -430,7 +430,7 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) } #endif - err = -EINVAL; + err = -ENOENT; if (ops == NULL) goto err_out; -- cgit v1.2.3 From 30a584d944fbd599d4a8f470f75bf7af1a15b466 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 3 Aug 2006 16:38:49 -0700 Subject: [LLX]: SOCK_DGRAM interface fixes The datagram interface of LLC is broken in a couple of ways. These were discovered when trying to use it to build an out-of-kernel version of STP. First it didn't pass the source address of the received packet in recvfrom(). It needs to copy the source address of received LLC packets into the socket control block. At the same time fix a security issue because there was uninitialized data leakage. Every recvfrom call was just copying out old data. Second, LLC should not merge multiple packets in one receive call on datagram sockets. LLC should preserve packet boundaries on SOCK_DGRAM. This fix goes against the old historical comments about UNIX98 semantics but without this fix SOCK_DGRAM is broken and useless. So either ANK's interpretation was incorect or UNIX98 standard was wrong. Signed-off-by: Stephen Hemminger Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/llc/af_llc.c | 20 ++++++++------------ net/llc/llc_sap.c | 4 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index d6cfe84d521b..2652ead96c64 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -784,24 +784,20 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; - if (used + offset < skb->len) - continue; - if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } + + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + + /* Partial read */ + if (used + offset < skb->len) + continue; } while (len > 0); - /* - * According to UNIX98, msg_name/msg_namelen are ignored - * on connected socket. -ANK - * But... af_llc still doesn't have separate sets of methods for - * SOCK_DGRAM and SOCK_STREAM :-( So we have to do this test, will - * eventually fix this tho :-) -acme - */ - if (sk->sk_type == SOCK_DGRAM) - goto copy_uaddr; out: release_sock(sk); return copied; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 20c4eb5c1ac6..42eb0c3a9780 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -51,10 +51,10 @@ void llc_save_primitive(struct sock *sk, struct sk_buff* skb, u8 prim) { struct sockaddr_llc *addr; - if (skb->sk->sk_type == SOCK_STREAM) /* See UNIX98 */ - return; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); + + memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; -- cgit v1.2.3 From d254bcdbf2199d9e2a52dbe4592e79ef3a456096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 4 Aug 2006 16:57:42 -0700 Subject: [TCP]: Fixes IW > 2 cases when TCP is application limited MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever a transfer is application limited, we are allowed at least initial window worth of data per window unless cwnd is previously less than that. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 738dad9f7d49..104af5d5bcbc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3541,7 +3541,8 @@ void tcp_cwnd_application_limited(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ - u32 win_used = max(tp->snd_cwnd_used, 2U); + u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk)); + u32 win_used = max(tp->snd_cwnd_used, init_win); if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; -- cgit v1.2.3 From 558e10a57db10de355ee97712d2b6df49e9b7849 Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Sat, 5 Aug 2006 21:15:58 -0700 Subject: [LAPB]: Fix windowsize check In bug #6954, Norbert Reinartz reported the following issue: "Function lapb_setparms() in file net/lapb/lapb_iface.c checks if the given parameters are valid. If the given window size is in the range of 8 .. 127, lapb_setparms() fails and returns an error value of LAPB_INVALUE, even if bit LAPB_EXTENDED in parms->mode is set. If bit LAPB_EXTENDED in parms->mode is set and the window size is in the range of 8 .. 127, the first check "(parms->mode & LAPB_EXTENDED)" results true and the second check "(parms->window < 1 || parms->window > 127)" results false. Both checks in conjunction result to false, thus the third check "(parms->window < 1 || parms->window > 7)" is done by fault. This third check results true, so that we leave lapb_setparms() by 'goto out_put'. Seems that this bug doesn't cause any problems, because lapb_setparms() isn't used to change the default values of LAPB. We are using kernel lapb in our software project and also change the default parameters of lapb, so we found this bug" He also pasted a fix, that I've transformated into a patch: Signed-off-by: Diego Calleja Signed-off-by: David S. Miller --- net/lapb/lapb_iface.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index d504eed416f6..7e6bc41eeb21 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -238,11 +238,13 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms) goto out_put; if (lapb->state == LAPB_STATE_0) { - if (((parms->mode & LAPB_EXTENDED) && - (parms->window < 1 || parms->window > 127)) || - (parms->window < 1 || parms->window > 7)) - goto out_put; - + if (parms->mode & LAPB_EXTENDED) { + if (parms->window < 1 || parms->window > 127) + goto out_put; + } else { + if (parms->window < 1 || parms->window > 7) + goto out_put; + } lapb->mode = parms->mode; lapb->window = parms->window; } -- cgit v1.2.3 From 2f34931fdc78b4895553aaa33748939cc7697c99 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Sat, 5 Aug 2006 12:14:29 -0700 Subject: [PATCH] knfsd: fix race related problem when adding items to and svcrpc auth cache If we don't find the item we are lookng for, we allocate a new one, and then grab the lock again and search to see if it has been added while we did the alloc. If it had been added we need to 'cache_put' the newly created item that we are never going to use. But as it hasn't been initialised properly, putting it can cause an oops. So move the ->init call earlier to that it will always be fully initilised if we have to put it. Thanks to Philipp Matthias Hahn for reporting the problem. Signed-off-by: Neil Brown Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/sunrpc/cache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 7026b0866b7b..00cb388ece03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -71,7 +71,12 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, new = detail->alloc(); if (!new) return NULL; + /* must fully initialise 'new', else + * we might get lose if we need to + * cache_put it soon. + */ cache_init(new); + detail->init(new, key); write_lock(&detail->hash_lock); @@ -85,7 +90,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, return tmp; } } - detail->init(new, key); new->next = *head; *head = new; detail->entries++; -- cgit v1.2.3 From 7b2e497a06c0e93719fda88820e057b635e8fae2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Aug 2006 16:09:04 -0700 Subject: [NET]: Assign skb->dev in netdev_alloc_skb Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 022d8894c11d..c54f3664bce5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -268,8 +268,10 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, struct sk_buff *skb; skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); - if (likely(skb)) + if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } return skb; } -- cgit v1.2.3 From 8b5cc5ef40c83c6ea4c90b203bb2c8b17edfa11b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 7 Aug 2006 20:09:20 -0700 Subject: [IPX]: Header length validation needed This patch will linearize and check there is enough data. It handles the pprop case as well as avoiding a whole audit of the routing code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index aa34ff4b707c..c13e86b14f69 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1646,7 +1646,8 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty ipx_pktsize = ntohs(ipx->ipx_pktsize); /* Too small or invalid header? */ - if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len) + if (ipx_pktsize < sizeof(struct ipxhdr) || + !pskb_may_pull(skb, ipx_pktsize)) goto drop; if (ipx->ipx_checksum != IPX_NO_CHECKSUM && -- cgit v1.2.3 From 8d1502de27c46b365b5c86e17d173083d3d6c9ac Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Mon, 7 Aug 2006 20:44:22 -0700 Subject: [IPV4]: Limit rt cache size properly. From: Kirill Korotaev During OpenVZ stress testing we found that UDP traffic with random src can generate too much excessive rt hash growing leading finally to OOM and kernel panics. It was found that for 4GB i686 system (having 1048576 total pages and 225280 normal zone pages) kernel allocates the following route hash: syslog: IP route cache hash table entries: 262144 (order: 8, 1048576 bytes) => ip_rt_max_size = 4194304 entries, i.e. max rt size is 4194304 * 256b = 1Gb of RAM > normal_zone Attached the patch which removes HASH_HIGHMEM flag from alloc_large_system_hash() call. Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 19bd49d69d9f..b873cbcdd0b8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3157,7 +3157,7 @@ int __init ip_rt_init(void) rhash_entries, (num_physpages >= 128 * 1024) ? 15 : 17, - HASH_HIGHMEM, + 0, &rt_hash_log, &rt_hash_mask, 0); -- cgit v1.2.3 From aaf580601ff244df82324fff380ed6740f27ef03 Mon Sep 17 00:00:00 2001 From: Chen-Li Tien Date: Mon, 7 Aug 2006 20:49:07 -0700 Subject: [PKTGEN]: Fix oops when used with balance-tlb bonding Signed-off-by: Chen-Li Tien Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 67ed14ddabd2..b1743375eb6d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2149,6 +2149,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32); skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.iph = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); -- cgit v1.2.3 From 69d8c28c9578ce78b3dc1b9be36926d962282898 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Aug 2006 20:52:10 -0700 Subject: [PKTGEN]: Make sure skb->{nh,h} are initialized in fill_packet_ipv6() too. Mirror the bug fix from fill_packet_ipv4() Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b1743375eb6d..6a7320b39ed0 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2462,6 +2462,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.ipv6h = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); -- cgit v1.2.3 From bd37a088596ccdb2b2dd3299e25e333bca7a9a34 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 7 Aug 2006 21:04:15 -0700 Subject: [TCP]: SNMPv2 tcpOutSegs counter error Do not count retransmitted segments. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5c08ea20a18d..507adefbc17c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -466,7 +466,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) + TCP_INC_STATS(TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) @@ -2157,10 +2158,9 @@ int tcp_connect(struct sock *sk) skb_shinfo(buff)->gso_size = 0; skb_shinfo(buff)->gso_type = 0; buff->csum = 0; + tp->snd_nxt = tp->write_seq; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; - tp->snd_nxt = tp->write_seq; - tp->pushed_seq = tp->write_seq; /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; @@ -2170,6 +2170,12 @@ int tcp_connect(struct sock *sk) sk_charge_skb(sk, buff); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + + /* We change tp->snd_nxt after the tcp_transmit_skb() call + * in order to make this packet get counted in tcpOutSegs. + */ + tp->snd_nxt = tp->write_seq; + tp->pushed_seq = tp->write_seq; TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ -- cgit v1.2.3 From 70f8e78e150425b01c1099087ad3decacf7e4ccf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Aug 2006 16:47:37 -0700 Subject: [RTNETLINK]: Fix IFLA_ADDRESS handling. The ->set_mac_address handlers expect a pointer to a sockaddr which contains the MAC address, whereas IFLA_ADDRESS provides just the MAC address itself. So whip up a sockaddr to wrap around the netlink attribute for the ->set_mac_address call. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 20e5bb73f147..30cc1ba6ed5c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -394,6 +394,9 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } if (ida[IFLA_ADDRESS - 1]) { + struct sockaddr *sa; + int len; + if (!dev->set_mac_address) { err = -EOPNOTSUPP; goto out; @@ -405,7 +408,17 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) goto out; - err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1])); + len = sizeof(sa_family_t) + dev->addr_len; + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; + goto out; + } + sa->sa_family = dev->type; + memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), + dev->addr_len); + err = dev->set_mac_address(dev, sa); + kfree(sa); if (err) goto out; send_addr_notify = 1; -- cgit v1.2.3 From 7b1ba8de569460894efa892457af7a37c0d574f9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 8 Aug 2006 16:48:51 -0700 Subject: [IPX]: Another nonlinear receive fix Need to check some more cases in IPX receive. If the skb is purely fragments, the IPX header needs to be extracted. The function pskb_may_pull() may in theory invalidate all the pointers in the skb, so references to ipx header must be refreshed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index c13e86b14f69..401964204866 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1642,14 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out; - ipx = ipx_hdr(skb); - ipx_pktsize = ntohs(ipx->ipx_pktsize); + if (!pskb_may_pull(skb, sizeof(struct ipxhdr))) + goto drop; + + ipx_pktsize = ntohs(ipxhdr(skb)->ipx_pktsize); /* Too small or invalid header? */ if (ipx_pktsize < sizeof(struct ipxhdr) || !pskb_may_pull(skb, ipx_pktsize)) goto drop; + ipx = ipx_hdr(skb); if (ipx->ipx_checksum != IPX_NO_CHECKSUM && ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize)) goto drop; -- cgit v1.2.3 From 7c91767a6b701543c93ebcd611dab61deff3dad1 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Wed, 9 Aug 2006 02:25:54 -0700 Subject: [NET]: add_timer -> mod_timer() in dst_run_gc() Patch from Dmitry Mishin : Replace add_timer() by mod_timer() in dst_run_gc in order to avoid BUG message. CPU1 CPU2 dst_run_gc() entered dst_run_gc() entered spin_lock(&dst_lock) ..... del_timer(&dst_gc_timer) fail to get lock .... mod_timer() <--- puts timer back to the list add_timer(&dst_gc_timer) <--- BUG because timer is in list already. Found during OpenVZ internal testing. At first we thought that it is OpenVZ specific as we added dst_run_gc(0) call in dst_dev_event(), but as Alexey pointed to me it is possible to trigger this condition in mainstream kernel. F.e. timer has fired on CPU2, but the handler was preeempted by an irq before dst_lock is tried. Meanwhile, someone on CPU1 adds an entry to gc list and starts the timer. If CPU2 was preempted long enough, this timer can expire simultaneously with resuming timer handler on CPU1, arriving exactly to the situation described. Signed-off-by: Dmitry Mishin Signed-off-by: Kirill Korotaev Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/core/dst.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 470c05bc4cb2..1a5e49da0e77 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy) dst_gc_timer_inc = DST_GC_INC; dst_gc_timer_expires = DST_GC_MIN; } - dst_gc_timer.expires = jiffies + dst_gc_timer_expires; #if RT_CACHE_DEBUG >= 2 printk("dst_total: %d/%d %ld\n", atomic_read(&dst_total), delayed, dst_gc_timer_expires); #endif - add_timer(&dst_gc_timer); + mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); out: spin_unlock(&dst_lock); -- cgit v1.2.3 From 06aebfb7faa13258af5230ff3d1587ece6c0250e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 9 Aug 2006 16:52:04 -0700 Subject: [IPV6]: The ifa lock is a BH lock The ifa lock is expected to be taken in BH context (by addrconf timers) so we must disable BH when accessing it from user context. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8ea1e36bf8eb..0c5042e7380d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1909,11 +1909,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); if (!IS_ERR(ifp)) { - spin_lock(&ifp->lock); + spin_lock_bh(&ifp->lock); ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = jiffies; - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); -- cgit v1.2.3 From fff642570dc47ab76491fe81ee6599269c4eb13e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Aug 2006 17:36:15 -0700 Subject: [IPX]: Fix typo, ipxhdr() --> ipx_hdr() Noticed by Dave Jones. Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 401964204866..bef3f61569f7 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1645,7 +1645,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty if (!pskb_may_pull(skb, sizeof(struct ipxhdr))) goto drop; - ipx_pktsize = ntohs(ipxhdr(skb)->ipx_pktsize); + ipx_pktsize = ntohs(ipx_hdr(skb)->ipx_pktsize); /* Too small or invalid header? */ if (ipx_pktsize < sizeof(struct ipxhdr) || -- cgit v1.2.3 From 18b6fe64d4d1f6e0a2c71429a5e5074f43e29203 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 13 Aug 2006 18:05:09 -0700 Subject: [TCP]: Fix botched memory leak fix to tcpprobe_read(). Somehow I clobbered James's original fix and only my subsequent compiler warning change went in for that changeset. Get the real fix in there. Noticed by Jesper Juhl. Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index b3435324b573..dab37d2f65fc 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -130,11 +130,12 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf, error = wait_event_interruptible(tcpw.wait, __kfifo_len(tcpw.fifo) != 0); if (error) - return error; + goto out_free; cnt = kfifo_get(tcpw.fifo, tbuf, len); error = copy_to_user(buf, tbuf, cnt); +out_free: vfree(tbuf); return error ? error : cnt; -- cgit v1.2.3 From 97c802a113989800430a981b6f36b14c62163d37 Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Sun, 13 Aug 2006 18:05:35 -0700 Subject: [NETFILTER]: xt_string: fix negation The xt_string match is broken with ! negation. This resolves a portion of netfilter bugzilla #497. Signed-off-by: Phil Oester Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index d8e3891b5f8b..275330fcdaaa 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -37,7 +37,7 @@ static int match(const struct sk_buff *skb, return (skb_find_text((struct sk_buff *)skb, conf->from_offset, conf->to_offset, conf->config, &state) - != UINT_MAX) && !conf->invert; + != UINT_MAX) ^ conf->invert; } #define STRING_TEXT_PRIV(m) ((struct xt_string_info *) m) -- cgit v1.2.3 From 1c7628bd7a458faf7c96ef521f6d3a5ea9b106b8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 13 Aug 2006 18:06:02 -0700 Subject: [NETFILTER]: xt_hashlimit: fix limit off-by-one Hashlimit doesn't account for the first packet, which is inconsistent with the limit match. Reported by ryan.castellucci@gmail.com, netfilter bugzilla #500. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_hashlimit.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 6b662449e825..3bd2368e1fc9 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -454,15 +454,12 @@ hashlimit_match(const struct sk_buff *skb, dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); dh->rateinfo.cost = user2credits(hinfo->cfg.avg); - - spin_unlock_bh(&hinfo->lock); - return 1; + } else { + /* update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now); } - /* update expiration timeout */ - dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - - rateinfo_recalc(dh, now); if (dh->rateinfo.credit >= dh->rateinfo.cost) { /* We're underlimit. */ dh->rateinfo.credit -= dh->rateinfo.cost; -- cgit v1.2.3 From d49c73c729e2ef644558a1f441c044bfacdc9744 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 13 Aug 2006 18:55:53 -0700 Subject: [IPSEC]: Validate properly in xfrm_dst_check() If dst->obsolete is -1, this is a signal from the bundle creator that we want the XFRM dst and the dsts that it references to be validated on every use. I misunderstood this intention when I changed xfrm_dst_check() to always return NULL. Now, when we purge a dst entry, by running dst_free() on it. This will set the dst->obsolete to a positive integer, and we want to return NULL in that case so that the socket does a relookup for the route. Thus, if dst->obsolete<0, let stale_bundle() validate the state, else always return NULL. In general, we need to do things more intelligently here because we flush too much state during rule changes. Herbert Xu has some ideas wherein the key manager gives us some help in this area. We can also use smarter state management algorithms inside of the kernel as well. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f35bc676128c..3da67ca2c3ce 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1134,12 +1134,33 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } EXPORT_SYMBOL(__xfrm_route_forward); +/* Optimize later using cookies and generation ids. */ + static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* If it is marked obsolete, which is how we even get here, - * then we have purged it from the policy bundle list and we - * did that for a good reason. + /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + * to "-1" to force all XFRM destinations to get validated by + * dst_ops->check on every use. We do this because when a + * normal route referenced by an XFRM dst is obsoleted we do + * not go looking around for all parent referencing XFRM dsts + * so that we can invalidate them. It is just too much work. + * Instead we make the checks here on every use. For example: + * + * XFRM dst A --> IPv4 dst X + * + * X is the "xdst->route" of A (X is also the "dst->path" of A + * in this example). If X is marked obsolete, "A" will not + * notice. That's what we are validating here via the + * stale_bundle() check. + * + * When a policy's bundle is pruned, we dst_free() the XFRM + * dst which causes it's ->obsolete field to be set to a + * positive non-zero integer. If an XFRM dst has been pruned + * like this, we want to force a new route lookup. */ + if (dst->obsolete < 0 && !stale_bundle(dst)) + return dst; + return NULL; } -- cgit v1.2.3 From 7ee66fcb94cb8be77d5f34cce7d315d11759f9c1 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 13 Aug 2006 18:56:26 -0700 Subject: [LLC]: multicast receive device match Fix from Aji_Srinivas@emc.com, STP packets are incorrectly received on all LLC datagram sockets, whichever interface they are bound to. The llc_sap datagram receive logic sends packets with a unicast destination MAC to one socket bound to that SAP and MAC, and multicast packets to all sockets bound to that SAP. STP packets are multicast, and we do need to know on which interface they were received. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/llc/llc_sap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 42eb0c3a9780..61cb8cf7d153 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -330,6 +330,9 @@ static void llc_sap_mcast(struct llc_sap *sap, if (llc->laddr.lsap != laddr->lsap) continue; + if (llc->dev != skb->dev) + continue; + skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) break; -- cgit v1.2.3 From 0eff66e625306a794ecba4b29ed12f7a147ce219 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 13 Aug 2006 18:57:28 -0700 Subject: [NETFILTER]: {arp,ip,ip6}_tables: proper error recovery in init path Neither of {arp,ip,ip6}_tables cleans up behind itself when something goes wrong during initialization. Noticed by Rennie deGraaf Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 27 ++++++++++++++++++++------- net/ipv4/netfilter/ip_tables.c | 33 +++++++++++++++++++++++++-------- net/ipv6/netfilter/ip6_tables.c | 34 +++++++++++++++++++++++++--------- 3 files changed, 70 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 80c73ca90116..df4854cf598b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1170,21 +1170,34 @@ static int __init arp_tables_init(void) { int ret; - xt_proto_init(NF_ARP); + ret = xt_proto_init(NF_ARP); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&arpt_standard_target); - xt_register_target(&arpt_error_target); + ret = xt_register_target(&arpt_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&arpt_error_target); + if (ret < 0) + goto err3; /* Register setsockopt */ ret = nf_register_sockopt(&arpt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err4; printk("arp_tables: (C) 2002 David S. Miller\n"); return 0; + +err4: + xt_unregister_target(&arpt_error_target); +err3: + xt_unregister_target(&arpt_standard_target); +err2: + xt_proto_fini(NF_ARP); +err1: + return ret; } static void __exit arp_tables_fini(void) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fc5bdd5eb7d3..f316ff5fd8a6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2239,22 +2239,39 @@ static int __init ip_tables_init(void) { int ret; - xt_proto_init(AF_INET); + ret = xt_proto_init(AF_INET); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&ipt_standard_target); - xt_register_target(&ipt_error_target); - xt_register_match(&icmp_matchstruct); + ret = xt_register_target(&ipt_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&ipt_error_target); + if (ret < 0) + goto err3; + ret = xt_register_match(&icmp_matchstruct); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err5; printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_match(&icmp_matchstruct); +err4: + xt_unregister_target(&ipt_error_target); +err3: + xt_unregister_target(&ipt_standard_target); +err2: + xt_proto_fini(AF_INET); +err1: + return ret; } static void __exit ip_tables_fini(void) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f26898b00347..c9d6b23cd3f7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1398,23 +1398,39 @@ static int __init ip6_tables_init(void) { int ret; - xt_proto_init(AF_INET6); + ret = xt_proto_init(AF_INET6); + if (ret < 0) + goto err1; /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(&ip6t_standard_target); - xt_register_target(&ip6t_error_target); - xt_register_match(&icmp6_matchstruct); + ret = xt_register_target(&ip6t_standard_target); + if (ret < 0) + goto err2; + ret = xt_register_target(&ip6t_error_target); + if (ret < 0) + goto err3; + ret = xt_register_match(&icmp6_matchstruct); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ip6t_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - xt_proto_fini(AF_INET6); - return ret; - } + if (ret < 0) + goto err5; printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_match(&icmp6_matchstruct); +err4: + xt_unregister_target(&ip6t_error_target); +err3: + xt_unregister_target(&ip6t_standard_target); +err2: + xt_proto_fini(AF_INET6); +err1: + return ret; } static void __exit ip6_tables_fini(void) -- cgit v1.2.3 From dcb7cd97f133f7cfbd181149a1e60215a869f895 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Sun, 13 Aug 2006 18:57:54 -0700 Subject: [NETFILTER]: ulog: fix panic on SMP kernels Fix kernel panic on various SMP machines. The culprit is a null ub->skb in ulog_send(). If ulog_timer() has already been scheduled on one CPU and is spinning on the lock, and ipt_ulog_packet() flushes the queue on another CPU by calling ulog_send() right before it exits, there will be no skbuff when ulog_timer() acquires the lock and calls ulog_send(). Cancelling the timer in ulog_send() doesn't help because it has already been scheduled and is running on the first CPU. Similar problem exists in ebt_ulog.c and nfnetlink_log.c. Signed-off-by: Mark Huang Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_ulog.c | 3 +++ net/ipv4/netfilter/ipt_ULOG.c | 5 +++++ net/netfilter/nfnetlink_log.c | 3 +++ 3 files changed, 11 insertions(+) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 02693a230dc1..9f950db3b76f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -74,6 +74,9 @@ static void ulog_send(unsigned int nlgroup) if (timer_pending(&ub->timer)) del_timer(&ub->timer); + if (!ub->skb) + return; + /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d7dd7fe7051c..d46fd677fa11 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -115,6 +115,11 @@ static void ulog_send(unsigned int nlgroupnum) del_timer(&ub->timer); } + if (!ub->skb) { + DEBUGP("ipt_ULOG: ulog_send: nothing to send\n"); + return; + } + /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 61cdda4e5d3b..b59d3b2bde21 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst) if (timer_pending(&inst->timer)) del_timer(&inst->timer); + if (!inst->skb) + return 0; + if (inst->qlen > 1) inst->lastnlh->nlmsg_type = NLMSG_DONE; -- cgit v1.2.3 From e9fa4f7bd291c29a785666e2fa5a9cf3241ee6c3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 20:12:58 -0700 Subject: [INET]: Use pskb_trim_unique when trimming paged unique skbs The IPv4/IPv6 datagram output path was using skb_trim to trim paged packets because they know that the packet has not been cloned yet (since the packet hasn't been given to anything else in the system). This broke because skb_trim no longer allows paged packets to be trimmed. Paged packets must be given to one of the pskb_trim functions instead. This patch adds a new pskb_trim_unique function to cover the IPv4/IPv6 datagram output path scenario and replaces the corresponding skb_trim calls with it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 4 ++-- net/ipv6/ip6_output.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9bf307a29783..4c20f5546893 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -947,7 +947,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; @@ -1142,7 +1142,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, data, fraggap, 0); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 69451af6abe7..4fb47a252913 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1095,7 +1095,7 @@ alloc_new_skb: skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; - skb_trim(skb_prev, maxfraglen); + pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; if (copy < 0) { -- cgit v1.2.3 From 7ea49ed73c8d0d0bdf7c11fc18c61572d2d22176 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 14 Aug 2006 17:08:36 -0700 Subject: [VLAN]: Make sure bonding packet drop checks get done in hwaccel RX path. Since __vlan_hwaccel_rx() is essentially bypassing the netif_receive_skb() call that would have occurred if we did the VLAN decapsulation in software, we are missing the skb_bond() call and the assosciated checks it does. Export those checks via an inline function, skb_bond_should_drop(), and use this in __vlan_hwaccel_rx(). Signed-off-by: David S. Miller --- net/core/dev.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d95e2626d944..9fe96cde3e19 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1619,26 +1619,10 @@ static inline struct net_device *skb_bond(struct sk_buff *skb) struct net_device *dev = skb->dev; if (dev->master) { - /* - * On bonding slaves other than the currently active - * slave, suppress duplicates except for 802.3ad - * ETH_P_SLOW and alb non-mcast/bcast. - */ - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if (dev->master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - goto keep; - } - - if (dev->master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - goto keep; - + if (skb_bond_should_drop(skb)) { kfree_skb(skb); return NULL; } -keep: skb->dev = dev->master; } -- cgit v1.2.3 From 855751125093f758871b70da2951d8b92b6368cc Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 15 Aug 2006 00:03:01 -0700 Subject: [NET]: Fix potential stack overflow in net/core/utils.c On High end systems (1024 or so cpus) this can potentially cause stack overflow. Fix the stack usage. Signed-off-by: Suresh Siddha Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/utils.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/utils.c b/net/core/utils.c index 4f96f389243d..e31c90e05594 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -130,12 +130,13 @@ void __init net_random_init(void) static int net_random_reseed(void) { int i; - unsigned long seed[NR_CPUS]; + unsigned long seed; - get_random_bytes(seed, sizeof(seed)); for_each_possible_cpu(i) { struct nrnd_state *state = &per_cpu(net_rand_state,i); - __net_srandom(state, seed[i]); + + get_random_bytes(&seed, sizeof(seed)); + __net_srandom(state, seed); } return 0; } -- cgit v1.2.3 From deb47c66e12a645f7eec9b1c153c05ed47989439 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 15 Aug 2006 00:04:56 -0700 Subject: [NETFILTER]: xt_physdev build fix It needs netfilter_bridge.h for brnf_deferred_hooks Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/netfilter/xt_physdev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index a9f4f6f3c628..63a965467465 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 640c41c77a96dbbfb74d40ae86ab75b759afb911 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 15 Aug 2006 00:06:56 -0700 Subject: [IPV6] lockdep: annotate __icmpv6_socket Split off __icmpv6_socket's sk->sk_dst_lock class, because it gets used from softirqs, which is safe for __icmpv6_sockets (because they never get directly used via userspace syscalls), but unsafe for normal sockets. Has no effect on non-lockdep kernels. Signed-off-by: Ingo Molnar Acked-by: Herbert Xu Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1044b6fce0d5..3d6e9a351150 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -712,6 +712,11 @@ discard_it: return 0; } +/* + * Special lock-class for __icmpv6_socket: + */ +static struct lock_class_key icmpv6_socket_sk_dst_lock_key; + int __init icmpv6_init(struct net_proto_family *ops) { struct sock *sk; @@ -730,6 +735,14 @@ int __init icmpv6_init(struct net_proto_family *ops) sk = per_cpu(__icmpv6_socket, i)->sk; sk->sk_allocation = GFP_ATOMIC; + /* + * Split off their lock-class, because sk->sk_dst_lock + * gets used from softirqs, which is safe for + * __icmpv6_socket (because those never get directly used + * via userspace syscalls), but unsafe for normal sockets. + */ + lockdep_set_class(&sk->sk_dst_lock, + &icmpv6_socket_sk_dst_lock_key); /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. -- cgit v1.2.3 From bb699cbca0096aa3f5f750264ec0af080732375a Mon Sep 17 00:00:00 2001 From: Michal Ruzicka Date: Tue, 15 Aug 2006 00:20:17 -0700 Subject: [IPV4]: Possible leak of multicast source filter sctructure There is a leak of a socket's multicast source filter list structure on closing a socket with a multicast source filter set on an interface that does not exist any more. Signed-off-by: Michal Ruzicka Acked-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9f4b752f5a33..e981369ebe13 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2199,13 +2199,13 @@ void ip_mc_drop_socket(struct sock *sk) struct in_device *in_dev; inet->mc_list = iml->next; - if ((in_dev = inetdev_by_index(iml->multi.imr_ifindex)) != NULL) { - (void) ip_mc_leave_src(sk, iml, in_dev); + in_dev = inetdev_by_index(iml->multi.imr_ifindex); + (void) ip_mc_leave_src(sk, iml, in_dev); + if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); in_dev_put(in_dev); } sock_kfree_s(sk, iml, sizeof(*iml)); - } rtnl_unlock(); } -- cgit v1.2.3 From b9c6e3e96669ade31afd3a39f17393e577b609c5 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Tue, 15 Aug 2006 02:02:33 -0700 Subject: [ATM]: Compile error on ARM atm_proc_exit() is declared as __exit, and thus in .exit.text. On some architectures (ARM) .exit.text is discarded at compile time, and since atm_proc_exit() is called by some other __init functions, it results in a link error. Signed-off-by: Kevin Hilman Signed-off-by: David S. Miller --- net/atm/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/proc.c b/net/atm/proc.c index 3f95b0886a6a..91fe5f53ff11 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -507,7 +507,7 @@ err_out: goto out; } -void __exit atm_proc_exit(void) +void atm_proc_exit(void) { atm_proc_dirs_remove(); } -- cgit v1.2.3 From c0956bd25161bff45304d482cda51ca4b3b572f1 Mon Sep 17 00:00:00 2001 From: Ralf Hildebrandt Date: Tue, 15 Aug 2006 02:12:43 -0700 Subject: [PKT_SCHED] cls_u32: Fix typo. Signed-off-by: Ralf Hildebrandt Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index eea366966740..0a6cfa0005be 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -796,7 +796,7 @@ static int __init init_u32(void) { printk("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF - printk(" Perfomance counters on\n"); + printk(" Performance counters on\n"); #endif #ifdef CONFIG_NET_CLS_POLICE printk(" OLD policer on \n"); -- cgit v1.2.3 From c7fa9d189e93877a1fa08ab00f230e0689125e45 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Aug 2006 16:34:13 -0700 Subject: [NET]: Disallow whitespace in network device names. It causes way too much trouble and confusion in userspace. Signed-off-by: David S. Miller --- net/core/dev.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9fe96cde3e19..d4a1ec3bded5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -116,6 +116,7 @@ #include #include #include +#include /* * The list of packet types we will receive (as opposed to discard) @@ -632,14 +633,22 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas * @name: name string * * Network device names need to be valid file names to - * to allow sysfs to work + * to allow sysfs to work. We also disallow any kind of + * whitespace. */ int dev_valid_name(const char *name) { - return !(*name == '\0' - || !strcmp(name, ".") - || !strcmp(name, "..") - || strchr(name, '/')); + if (*name == '\0') + return 0; + if (!strcmp(name, ".") || !strcmp(name, "..")) + return 0; + + while (*name) { + if (*name == '/' || isspace(*name)) + return 0; + name++; + } + return 1; } /** -- cgit v1.2.3 From acd6e00b8e4db542cb6bc9ddfbb4e18bbe29ce4d Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Thu, 17 Aug 2006 16:27:39 -0700 Subject: [MCAST]: Fix filter leak on device removal. This fixes source filter leakage when a device is removed and a process leaves the group thereafter. This also includes corresponding fixes for IPv6 multicast source filters on device removal. Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 32 +++++++++++++++++++------------- net/ipv6/mcast.c | 10 ++++++---- 2 files changed, 25 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e981369ebe13..8e8117c19e4d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1793,29 +1793,35 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct in_device *in_dev; u32 group = imr->imr_multiaddr.s_addr; u32 ifindex; + int ret = -EADDRNOTAVAIL; rtnl_lock(); in_dev = ip_mc_find_dev(imr); - if (!in_dev) { - rtnl_unlock(); - return -ENODEV; - } ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { - if (iml->multi.imr_multiaddr.s_addr == group && - iml->multi.imr_ifindex == ifindex) { - (void) ip_mc_leave_src(sk, iml, in_dev); + if (iml->multi.imr_multiaddr.s_addr != group) + continue; + if (ifindex) { + if (iml->multi.imr_ifindex != ifindex) + continue; + } else if (imr->imr_address.s_addr && imr->imr_address.s_addr != + iml->multi.imr_address.s_addr) + continue; - *imlp = iml->next; + (void) ip_mc_leave_src(sk, iml, in_dev); + *imlp = iml->next; + + if (in_dev) ip_mc_dec_group(in_dev, group); - rtnl_unlock(); - sock_kfree_s(sk, iml, sizeof(*iml)); - return 0; - } + rtnl_unlock(); + sock_kfree_s(sk, iml, sizeof(*iml)); + return 0; } + if (!in_dev) + ret = -ENODEV; rtnl_unlock(); - return -EADDRNOTAVAIL; + return ret; } int ip_mc_source(int add, int omode, struct sock *sk, struct diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9d697d4dcffc..639eb20c9f1f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -268,13 +268,14 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); + (void) ip6_mc_leave_src(sk, mc_lst, idev); if (idev) { - (void) ip6_mc_leave_src(sk,mc_lst,idev); __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } dev_put(dev); - } + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -334,13 +335,14 @@ void ipv6_sock_mc_close(struct sock *sk) if (dev) { struct inet6_dev *idev = in6_dev_get(dev); + (void) ip6_mc_leave_src(sk, mc_lst, idev); if (idev) { - (void) ip6_mc_leave_src(sk, mc_lst, idev); __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } dev_put(dev); - } + } else + (void) ip6_mc_leave_src(sk, mc_lst, NULL); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); -- cgit v1.2.3 From 6e8fcbf64024f9056ba122abbb66554aa76bae5d Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 17 Aug 2006 16:44:46 -0700 Subject: [IPV4]: severe locking bug in fib_semantics.c Found in 2.4 by Yixin Pan . > When I read fib_semantics.c of Linux-2.4.32, write_lock(&fib_info_lock) = > is used in fib_release_info() instead of write_lock_bh(&fib_info_lock). = > Is the following case possible: a BH interrupts fib_release_info() while = > holding the write lock, and calls ip_check_fib_default() which calls = > read_lock(&fib_info_lock), and spin forever. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9be53a8e72c3..51738000f3dc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -159,7 +159,7 @@ void free_fib_info(struct fib_info *fi) void fib_release_info(struct fib_info *fi) { - write_lock(&fib_info_lock); + write_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) @@ -172,7 +172,7 @@ void fib_release_info(struct fib_info *fi) fi->fib_dead = 1; fib_info_put(fi); } - write_unlock(&fib_info_lock); + write_unlock_bh(&fib_info_lock); } static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) @@ -598,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_hash_size; unsigned int i, bytes; - write_lock(&fib_info_lock); + write_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; @@ -639,7 +639,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, } fib_info_laddrhash = new_laddrhash; - write_unlock(&fib_info_lock); + write_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); @@ -820,7 +820,7 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock(&fib_info_lock); + write_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -839,7 +839,7 @@ link_it: head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) - write_unlock(&fib_info_lock); + write_unlock_bh(&fib_info_lock); return fi; err_inval: -- cgit v1.2.3 From d205dc40798d97d63ad348bfaf7394f445d152d4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 17 Aug 2006 18:12:38 -0700 Subject: [NETFILTER]: ctnetlink: fix deadlock in table dumping ip_conntrack_put must not be called while holding ip_conntrack_lock since destroy_conntrack takes it again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_netlink.c | 17 +++++++---------- net/netfilter/nf_conntrack_netlink.c | 17 +++++++---------- 2 files changed, 14 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 33891bb1fde4..0d4cc92391fa 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -415,21 +415,18 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&ip_conntrack_lock); + last = (struct ip_conntrack *)cb->args[1]; for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) { restart: - last = (struct ip_conntrack *)cb->args[1]; list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { h = (struct ip_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = tuplehash_to_ctrack(h); - if (last != NULL) { - if (ct == last) { - ip_conntrack_put(last); - cb->args[1] = 0; - last = NULL; - } else + if (cb->args[1]) { + if (ct != last) continue; + cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -440,17 +437,17 @@ restart: goto out; } } - if (last != NULL) { - ip_conntrack_put(last); + if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: read_unlock_bh(&ip_conntrack_lock); + if (last) + ip_conntrack_put(last); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - return skb->len; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index af4845971f70..6527d4e048d8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -429,9 +429,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0], *id); read_lock_bh(&nf_conntrack_lock); + last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - last = (struct nf_conn *)cb->args[1]; list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { h = (struct nf_conntrack_tuple_hash *) i; if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) @@ -442,13 +442,10 @@ restart: * then dump everything. */ if (l3proto && L3PROTO(ct) != l3proto) continue; - if (last != NULL) { - if (ct == last) { - nf_ct_put(last); - cb->args[1] = 0; - last = NULL; - } else + if (cb->args[1]) { + if (ct != last) continue; + cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -459,17 +456,17 @@ restart: goto out; } } - if (last != NULL) { - nf_ct_put(last); + if (cb->args[1]) { cb->args[1] = 0; goto restart; } } out: read_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_put(last); DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - return skb->len; } -- cgit v1.2.3 From 8311731afc439f508ab4d759edadedae75afb73e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 17 Aug 2006 18:13:53 -0700 Subject: [NETFILTER]: ip_tables: fix table locking in ipt_do_table table->private might change because of ruleset changes, don't use it without holding the lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_tables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f316ff5fd8a6..048514f15f2f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -230,7 +230,7 @@ ipt_do_table(struct sk_buff **pskb, const char *indev, *outdev; void *table_base; struct ipt_entry *e, *back; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* Initialization */ ip = (*pskb)->nh.iph; @@ -247,6 +247,7 @@ ipt_do_table(struct sk_buff **pskb, read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); -- cgit v1.2.3 From 78eb887733ec8ff5d6e6c69e3c32a187a9303622 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Aug 2006 18:22:32 -0700 Subject: [BRIDGE]: Disable SG/GSO if TX checksum is off When the bridge recomputes features, it does not maintain the constraint that SG/GSO must be off if TX checksum is off. This patch adds that constraint. On a completely unrelated note, I've also added TSO6 and TSO_ECN feature bits if GSO is enabled on the underlying device through the new NETIF_F_GSO_SOFTWARE macro. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_if.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f55ef682ef84..b1211d5342f6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,12 +386,17 @@ void br_features_recompute(struct net_bridge *br) checksum = 0; if (feature & NETIF_F_GSO) - feature |= NETIF_F_TSO; + feature |= NETIF_F_GSO_SOFTWARE; feature |= NETIF_F_GSO; features &= feature; } + if (!(checksum & NETIF_F_ALL_CSUM)) + features &= ~NETIF_F_SG; + if (!(features & NETIF_F_SG)) + features &= ~NETIF_F_GSO_MASK; + br->dev->features = features | checksum | NETIF_F_LLTX | NETIF_F_GSO_ROBUST; } -- cgit v1.2.3 From c164a9ba0a8870c5c9d353f63085319931d69f23 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 11:50:39 -0700 Subject: Fix sctp privilege elevation (CVE-2006-3745) sctp_make_abort_user() now takes the msg_len along with the msg so that we don't have to recalculate the bytes in iovec. It also uses memcpy_fromiovec() so that we don't go beyond the length allocated. It is good to have this fix even if verify_iovec() is fixed to return error on overflow. Signed-off-by: Sridhar Samudrala Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 30 +++++++++--------------------- net/sctp/sm_statefuns.c | 20 ++++---------------- net/sctp/socket.c | 10 +++++++++- 3 files changed, 22 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 4f11f5858209..17b509282cf2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -806,38 +806,26 @@ no_mem: /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error. */ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const struct msghdr *msg) + const struct msghdr *msg, + size_t paylen) { struct sctp_chunk *retval; - void *payload = NULL, *payoff; - size_t paylen = 0; - struct iovec *iov = NULL; - int iovlen = 0; - - if (msg) { - iov = msg->msg_iov; - iovlen = msg->msg_iovlen; - paylen = get_user_iov_size(iov, iovlen); - } + void *payload = NULL; + int err; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen); + retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen); if (!retval) goto err_chunk; if (paylen) { /* Put the msg_iov together into payload. */ - payload = kmalloc(paylen, GFP_ATOMIC); + payload = kmalloc(paylen, GFP_KERNEL); if (!payload) goto err_payload; - payoff = payload; - for (; iovlen > 0; --iovlen) { - if (copy_from_user(payoff, iov->iov_base,iov->iov_len)) - goto err_copy; - payoff += iov->iov_len; - iov++; - } + err = memcpy_fromiovec(payload, msg->msg_iov, paylen); + if (err < 0) + goto err_copy; } sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ead3f1b0ea3d..5b5ae7958322 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4031,18 +4031,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( * from its upper layer, but retransmits data to the far end * if necessary to fill gaps. */ - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer. */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4166,8 +4160,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( void *arg, sctp_cmd_seq_t *commands) { - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; /* Stop T1-init timer */ @@ -4175,12 +4168,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 54722e622e6d..fde3f55bfd4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1520,8 +1520,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } if (sinfo_flags & SCTP_ABORT) { + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, msg, msg_len); + if (!chunk) { + err = -ENOMEM; + goto out_unlock; + } + SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, msg); + sctp_primitive_ABORT(asoc, chunk); err = 0; goto out_unlock; } -- cgit v1.2.3 From e0b7cde9975e17a61b4511c7822803dfb7210011 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 21 Aug 2006 15:31:08 -0700 Subject: [NETFILTER]: arp_tables: fix table locking in arpt_do_table table->private might change because of ruleset changes, don't use it without holding the lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index df4854cf598b..8d1d7a6e72a5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + @@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, outdev = out ? out->name : nulldevname; read_lock_bh(&table->lock); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); -- cgit v1.2.3 From 316c1592bea94ead75301cb764523661fbbcc1ca Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Aug 2006 00:06:11 -0700 Subject: [TCP]: Limit window scaling if window is clamped. This small change allows for easy per-route workarounds for broken hosts or middleboxes that are not compliant with TCP standards for window scaling. Rather than having to turn off window scaling globally. This patch allows reducing or disabling window scaling if window clamp is present. Example: Mark Lord reported a problem with 2.6.17 kernel being unable to access http://www.everymac.com # ip route add 216.145.246.23/32 via 10.8.0.1 window 65535 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 507adefbc17c..b4f3ffe1b3b4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * See RFC1323 for an explanation of the limit to 14 */ space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; (*rcv_wscale)++; -- cgit v1.2.3 From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Jul 2006 14:11:48 -0700 Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path Signe-off-by: Trond Myklebust (cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit) --- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/rpc_pipe.c | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4a9aa9393b97..beaa7b848246 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -718,7 +718,7 @@ gss_destroy(struct rpc_auth *auth) auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->path); + rpc_unlink(gss_auth->dentry); dput(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a3bd2db2e024..9144f2767b66 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -746,22 +746,15 @@ err_dput: } int -rpc_unlink(char *path) +rpc_unlink(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; - int error; + int error = 0; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; + parent = dget_parent(dentry); + dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } d_drop(dentry); if (dentry->d_inode) { rpc_close_pipes(dentry->d_inode); @@ -769,9 +762,8 @@ rpc_unlink(char *path) } dput(dentry); inode_dir_notify(dir, DN_DELETE); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } -- cgit v1.2.3 From dff02cc1a34fcb60904a2c57cb351857cc11219e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Jul 2006 14:17:18 -0700 Subject: NFS: clean up rpc_rmdir Make it take a dentry argument instead of a path Signed-off-by: Trond Myklebust (cherry picked from 648d4116eb2509f010f7f34704a650150309b3e7 commit) --- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/rpc_pipe.c | 18 +++++------------- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d6409e757219..d307556872db 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -183,7 +183,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_pathname); + rpc_rmdir(clnt->cl_dentry); dput(clnt->cl_dentry); rpc_put_mount(); } @@ -320,8 +320,8 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (clnt->cl_pathname[0]) - rpc_rmdir(clnt->cl_pathname); + if (!IS_ERR(clnt->cl_dentry)) + rpc_rmdir(clnt->cl_dentry); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9144f2767b66..9c355e1ae61a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -684,28 +684,20 @@ err_dput: } int -rpc_rmdir(char *path) +rpc_rmdir(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; int error; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; + parent = dget_parent(dentry); + dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } rpc_depopulate(dentry); error = __rpc_rmdir(dir, dentry); dput(dentry); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } -- cgit v1.2.3 From 68adb0af51ebccb72ffb14d49cb8121b1afc4259 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Aug 2006 17:51:46 -0400 Subject: SUNRPC: rpc_unlink() must check for unhashed dentries A prior call to rpc_depopulate() by rpc_rmdir() on the parent directory may have already called simple_unlink() on this entry. Add the same check to rpc_rmdir(). Also remove a redundant call to rpc_close_pipes() in rpc_rmdir. Signed-off-by: Trond Myklebust (cherry picked from 0bbfb9d20f6437c4031aa3bf9b4d311a053e58e3 commit) --- net/sunrpc/rpc_pipe.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9c355e1ae61a..0b1a1ac8a4bc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -539,6 +539,7 @@ repeat: rpc_close_pipes(dentry->d_inode); simple_unlink(dir, dentry); } + inode_dir_notify(dir, DN_DELETE); dput(dentry); } while (n); goto repeat; @@ -610,8 +611,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) int error; shrink_dcache_parent(dentry); - if (dentry->d_inode) - rpc_close_pipes(dentry->d_inode); + if (d_unhashed(dentry)) + return 0; if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -747,13 +748,15 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); + if (!d_unhashed(dentry)) { + d_drop(dentry); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + } + inode_dir_notify(dir, DN_DELETE); } dput(dentry); - inode_dir_notify(dir, DN_DELETE); mutex_unlock(&dir->i_mutex); dput(parent); return error; -- cgit v1.2.3 From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Aug 2006 13:11:15 -0400 Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs rpc_unlink() and rpc_rmdir() will dput the dentry reference for you. Signed-off-by: Trond Myklebust (cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit) --- net/sunrpc/auth_gss/auth_gss.c | 1 - net/sunrpc/clnt.c | 15 ++++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index beaa7b848246..ef1cf5b476c8 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -719,7 +719,6 @@ gss_destroy(struct rpc_auth *auth) gss_auth = container_of(auth, struct gss_auth, rpc_auth); rpc_unlink(gss_auth->dentry); - dput(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d307556872db..d9eac7069101 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -184,7 +184,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); - dput(clnt->cl_dentry); rpc_put_mount(); } out_no_path: @@ -251,10 +250,8 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; - if (!IS_ERR(new->cl_dentry)) { + if (!IS_ERR(new->cl_dentry)) dget(new->cl_dentry); - rpc_get_mount(); - } rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); @@ -317,11 +314,15 @@ rpc_destroy_client(struct rpc_clnt *clnt) clnt->cl_auth = NULL; } if (clnt->cl_parent != clnt) { + if (!IS_ERR(clnt->cl_dentry)) + dput(clnt->cl_dentry); rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (!IS_ERR(clnt->cl_dentry)) + if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); + rpc_put_mount(); + } if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; @@ -331,10 +332,6 @@ rpc_destroy_client(struct rpc_clnt *clnt) out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - if (!IS_ERR(clnt->cl_dentry)) { - dput(clnt->cl_dentry); - rpc_put_mount(); - } kfree(clnt); return 0; } -- cgit v1.2.3 From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Aug 2006 15:44:19 -0400 Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode Check the bounds of length specifiers more thoroughly in the XDR decoding of NFS4 readdir reply data. Currently, if the server returns a bitmap or attr length that causes the current decode point pointer to wrap, this could go undetected (consider a small "negative" length on a 32-bit machine). Also add a check into the main XDR decode handler to make sure that the amount of data is a multiple of four bytes (as specified by RFC-1014). This makes sure that we can do u32* pointer subtraction in the NFS client without risking an undefined result (the result is undefined if the pointers are not correctly aligned with respect to one another). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust (cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit) --- net/sunrpc/clnt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d9eac7069101..3e19d321067a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1181,6 +1181,17 @@ call_verify(struct rpc_task *task) u32 *p = iov->iov_base, n; int error = -EACCES; + if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { + /* RFC-1014 says that the representation of XDR data must be a + * multiple of four bytes + * - if it isn't pointer subtraction in the NFS client may give + * undefined results + */ + printk(KERN_WARNING + "call_verify: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + goto out_eio; + } if ((len -= 3) < 0) goto out_overflow; p += 1; /* skip XID */ -- cgit v1.2.3 From 59eed279c5daa88d95e429782ddb8ef87e52c44b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 25 Aug 2006 15:55:43 -0700 Subject: [IPV6]: Segmentation offload not set correctly on TCP children TCP over IPV6 would incorrectly inherit the GSO settings. This would cause kernel to send Tcp Segmentation Offload packets for IPV6 data to devices that can't handle it. It caused the sky2 driver to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050 and the e1000 would generate bogus packets. I can't blame the hardware for gagging if the upper layers feed it garbage. This was a new bug in 2.6.18 introduced with GSO support. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b843a650be71..802a1a6b1037 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -944,7 +944,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * comment in that function for the gory details. -acme */ - sk->sk_gso_type = SKB_GSO_TCPV6; + newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; -- cgit v1.2.3 From f3166c07175c1639687288006aeabed363a921f3 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:01:03 -0700 Subject: [DCCP]: Fix typo This fixes a small typo in net/dccp/libs/packet_history.c Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/lib/packet_history.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index ad98d6a322eb..6739be1681c9 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -1,5 +1,5 @@ /* - * net/dccp/packet_history.h + * net/dccp/packet_history.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. * -- cgit v1.2.3 From e6bccd357343e98db9e1fd0d487f4f924e1a7921 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:01:30 -0700 Subject: [DCCP]: Update contact details and copyright Just updating copyright and contacts Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 4 ++-- net/dccp/ccids/ccid3.h | 4 ++-- net/dccp/ccids/lib/loss_interval.c | 2 +- net/dccp/ccids/lib/loss_interval.h | 2 +- net/dccp/ccids/lib/packet_history.c | 6 +++--- net/dccp/ccids/lib/packet_history.h | 4 ++-- net/dccp/ccids/lib/tfrc.h | 2 +- net/dccp/ccids/lib/tfrc_equation.c | 2 +- net/dccp/dccp.h | 2 +- net/dccp/options.c | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index c39bff706cfc..0f85970ee6d1 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,7 +2,7 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald + * Copyright (c) 2005-6 Ian McDonald * * An implementation of the DCCP protocol * @@ -1230,7 +1230,7 @@ static __exit void ccid3_module_exit(void) } module_exit(ccid3_module_exit); -MODULE_AUTHOR("Ian McDonald , " +MODULE_AUTHOR("Ian McDonald , " "Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5ade4f668b22..22cb9f80a09d 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -1,13 +1,13 @@ /* * net/dccp/ccids/ccid3.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5d7b7d864385..b93d9fc98cb8 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/loss_interval.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005-6 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 43bf78269d1d..dcb370a53f57 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/loss_interval.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify it diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 6739be1681c9..7b6b03e9271a 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -1,13 +1,13 @@ /* * net/dccp/packet_history.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -391,7 +391,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); -MODULE_AUTHOR("Ian McDonald , " +MODULE_AUTHOR("Ian McDonald , " "Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP TFRC library"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 673c209e4e85..27c43092636c 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -1,13 +1,13 @@ /* * net/dccp/packet_history.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 130c4c40cfe3..45f30f59ea2a 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/tfrc.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 4fd2ebebf5a0..44076e0c6591 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/tfrc_equation.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d00a2f4ee5dd..b8931d33bec3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -5,7 +5,7 @@ * * An implementation of the DCCP protocol * Copyright (c) 2005 Arnaldo Carvalho de Melo - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005-6 Ian McDonald * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as diff --git a/net/dccp/options.c b/net/dccp/options.c index daf72bb671f0..07a34696ac97 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -4,7 +4,7 @@ * An implementation of the DCCP protocol * Copyright (c) 2005 Aristeu Sergio Rozanski Filho * Copyright (c) 2005 Arnaldo Carvalho de Melo - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3 From 837d107cd101fbf734e9ea2bbb5c7336a329e432 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:06:42 -0700 Subject: [DCCP]: Introduces follows48 function This adds a new function to see if two sequence numbers follow each other. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/dccp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b8931d33bec3..a5c5475724c0 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -81,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2) return after48(seq1, seq2) ? seq1 : seq2; } +/* is seq1 next seqno after seq2 */ +static inline int follows48(const u64 seq1, const u64 seq2) +{ + int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF); + + return diff==1; +} + enum { DCCP_MIB_NUM = 0, DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ -- cgit v1.2.3 From 80193aee18bc862e284ba18504f3a3e14706a997 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:07:36 -0700 Subject: [DCCP]: Introduce dccp_rx_hist_find_entry This adds a new function dccp_rx_hist_find_entry. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/lib/packet_history.c | 19 +++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 2 ++ 2 files changed, 21 insertions(+) (limited to 'net') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 7b6b03e9271a..420c60f8604d 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -365,6 +365,25 @@ struct dccp_tx_hist_entry * EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) +{ + struct dccp_rx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } + + if (packet) + *ccval = packet->dccphrx_ccval; + + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); + void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *packet) diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 27c43092636c..aea9c5d70910 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -106,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) -- cgit v1.2.3 From 3a13813e6effcfad5910d47b15b724621b50b878 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 26 Aug 2006 20:28:30 -0700 Subject: [BRIDGE] netfilter: memory corruption fix The bridge-netfilter code will overwrite memory if there is not headroom in the skb to save the header. This first showed up when using Xen with sky2 driver that doesn't allocate the extra space. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 6ccd32b30809..864fbbc7b24d 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -40,11 +40,15 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) else { #ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ - nf_bridge_maybe_copy_header(skb); + if (nf_bridge_maybe_copy_header(skb)) + kfree_skb(skb); + else #endif - skb_push(skb, ETH_HLEN); + { + skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); + dev_queue_xmit(skb); + } } return 0; -- cgit v1.2.3 From 66a377c5041e1e399633153c8b500d457281e7c1 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 23:40:50 -0700 Subject: [DCCP]: Fix CCID3 This fixes CCID3 to give much closer performance to RFC4342. CCID3 is meant to alter sending rate based on RTT and loss. The performance was verified against: http://wand.net.nz/~perry/max_download.php For example I tested with netem and had the following parameters: Delayed Acks 1, MSS 256 bytes, RTT 105 ms, packet loss 5%. This gives a theoretical speed of 71.9 Kbits/s. I measured across three runs with this patch set and got 70.1 Kbits/s. Without this patchset the average was 232 Kbits/s which means Linux can't be used for CCID3 research properly. I also tested with netem turned off so box just acting as router with 1.2 msec RTT. The performance with this is the same with or without the patch at around 30 Mbit/s. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 149 ++++++++++++++++++++++++++++-------- net/dccp/ccids/ccid3.h | 5 +- net/dccp/ccids/lib/loss_interval.c | 34 ++++---- net/dccp/ccids/lib/loss_interval.h | 7 +- net/dccp/ccids/lib/packet_history.c | 141 +++------------------------------- net/dccp/ccids/lib/packet_history.h | 11 +-- 6 files changed, 154 insertions(+), 193 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 0f85970ee6d1..090bc39e8199 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); } out: return rc; @@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: if (len > 0) { - hctx->ccid3hctx_t_nom = now; + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_t_ipi(hctx); ccid3_calc_new_delta(hctx); timeval_add_usecs(&hctx->ccid3hctx_t_nom, @@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } hcrx->ccid3hcrx_tstamp_last_feedback = now; - hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; - hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; + hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ @@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; - DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) return 0; @@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: + if (!tail) { + LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", + __FUNCTION__); + return ~0; + } rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -864,9 +871,20 @@ found: delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) + x_recv = hcrx->ccid3hcrx_x_recv; + tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); tmp2 = (u32)tmp1; + + if (!tmp2) { + LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " + "%s: x_recv = %u, rtt =%u\n", + __FUNCTION__, x_recv, rtt); + return ~0; + } + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ p = tfrc_calc_x_reverse_lookup(fval); @@ -882,31 +900,101 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct dccp_li_hist_entry *next, *head; + u64 seq_temp; - if (seq_loss != DCCP_MAX_SEQNO + 1 && - list_empty(&hcrx->ccid3hcrx_li_hist)) { - struct dccp_li_hist_entry *li_tail; + if (list_empty(&hcrx->ccid3hcrx_li_hist)) { + if (!dccp_li_hist_interval_new(ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) + return; - li_tail = dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, - seq_loss, win_loss); - if (li_tail == NULL) + next = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + + if (entry == NULL) { + printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); + dump_stack(); return; - li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } else - LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " - "interval\n", __FUNCTION__); + } + + list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); + + tail = hcrx->ccid3hcrx_li_hist.prev; + list_del(tail); + kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } } -static void ccid3_hc_rx_detect_loss(struct sock *sk) +static int ccid3_hc_rx_detect_loss(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - u8 win_loss; - const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, - &win_loss); + struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + u64 seqno = packet->dccphrx_seqno; + u64 tmp_seqno; + int loss = 0; + u8 ccval; + + + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + + if (!rx_hist || + follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + goto detect_out; + } + - ccid3_hc_rx_update_li(sk, seq_loss, win_loss); + while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) + > TFRC_RECV_NUM_LATE_LOSS) { + loss = 1; + ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + dccp_inc_seqno(&tmp_seqno); + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + dccp_inc_seqno(&tmp_seqno); + while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, + tmp_seqno, &ccval)) { + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_ccval_nonloss = ccval; + dccp_inc_seqno(&tmp_seqno); + } + } + + /* FIXME - this code could be simplified with above while */ + /* but works at moment */ + if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + } + +detect_out: + dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, packet, + hcrx->ccid3hcrx_seqno_nonloss); + return loss; } static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev, r_sample, t_elapsed; - int ins; + u32 p_prev, rtt_prev, r_sample, t_elapsed; + int loss; BUG_ON(hcrx == NULL || !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || @@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; - p_prev = hcrx->ccid3hcrx_rtt; + rtt_prev = hcrx->ccid3hcrx_rtt; dccp_timestamp(sk, &now); timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); r_sample = timeval_usecs(&now); @@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + r_sample / 10; - if (p_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", + if (rtt_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; @@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) win_count = packet->dccphrx_ccval; - ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, packet); + loss = ccid3_hc_rx_detect_loss(sk, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; @@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; - if (ins != 0) + if (loss) break; dccp_timestamp(sk, &now); @@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); - ccid3_hc_rx_detect_loss(sk); p_prev = hcrx->ccid3hcrx_p; /* Calculate loss event rate */ @@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Scaling up by 1000000 as fixed decimal */ if (i_mean != 0) hcrx->ccid3hcrx_p = 1000000 / i_mean; + } else { + printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); + dump_stack(); } if (hcrx->ccid3hcrx_p > p_prev) { diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 22cb9f80a09d..0a2cb7536d26 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_last_counter:48, + u64 ccid3hcrx_seqno_nonloss:48, + ccid3hcrx_ccval_nonloss:4, ccid3hcrx_state:8, - ccid3hcrx_last_counter:4; + ccid3hcrx_ccval_last_counter:4; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index b93d9fc98cb8..906c81ab9d4f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -12,6 +12,7 @@ */ #include +#include #include "loss_interval.h" @@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) u32 w_tot = 0; list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { - if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { + if (li_entry->dccplih_interval != ~0) { i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; w_tot += dccp_li_hist_w[i]; + if (i != 0) + i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; } - if (i != 0) - i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) break; @@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) i_tot = max(i_tot0, i_tot1); - /* FIXME: Why do we do this? -Ian McDonald */ - if (i_tot * 4 < w_tot) - i_tot = w_tot * 4; + if (!w_tot) { + LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); + return 1; + } - return i_tot * 4 / w_tot; + return i_tot / w_tot; } EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss) +int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss) { - struct dccp_li_hist_entry *tail = NULL, *entry; + struct dccp_li_hist_entry *entry; int i; - for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { + for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); - return NULL; + dump_stack(); + return 0; } - if (tail == NULL) - tail = entry; + entry->dccplih_interval = ~0; list_add(&entry->dccplih_node, list); } entry->dccplih_seqno = seq_loss; entry->dccplih_win_count = win_loss; - return tail; + return 1; } EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index dcb370a53f57..0ae85f0340b2 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist, extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern struct dccp_li_hist_entry * - dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss); +extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 420c60f8604d..b876c9c81c65 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -112,64 +112,27 @@ struct dccp_rx_hist_entry * EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); -int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet) + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno) { - struct dccp_rx_hist_entry *entry, *next, *iter; + struct dccp_rx_hist_entry *entry, *next; u8 num_later = 0; - iter = dccp_rx_hist_head(rx_list); - if (iter == NULL) - dccp_rx_hist_add_entry(rx_list, packet); - else { - const u64 seqno = packet->dccphrx_seqno; - - if (after48(seqno, iter->dccphrx_seqno)) - dccp_rx_hist_add_entry(rx_list, packet); - else { - if (dccp_rx_hist_entry_data_packet(iter)) - num_later = 1; - - list_for_each_entry_continue(iter, rx_list, - dccphrx_node) { - if (after48(seqno, iter->dccphrx_seqno)) { - dccp_rx_hist_add_entry(&iter->dccphrx_node, - packet); - goto trim_history; - } - - if (dccp_rx_hist_entry_data_packet(iter)) - num_later++; - - if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(hist, packet); - return 1; - } - } - - if (num_later < TFRC_RECV_NUM_LATE_LOSS) - dccp_rx_hist_add_entry(rx_list, packet); - /* - * FIXME: else what? should we destroy the packet - * like above? - */ - } - } + list_add(&packet->dccphrx_node, rx_list); -trim_history: - /* - * Trim history (remove all packets after the NUM_LATE_LOSS + 1 - * data packets) - */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(li_list)) { list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(hist, entry); + if (after48(nonloss_seqno, + entry->dccphrx_seqno)) { + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + } } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -217,94 +180,10 @@ trim_history: --num_later; } } - - return 0; } EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, - struct list_head *li_list, u8 *win_loss) -{ - struct dccp_rx_hist_entry *entry, *next, *packet; - struct dccp_rx_hist_entry *a_loss = NULL; - struct dccp_rx_hist_entry *b_loss = NULL; - u64 seq_loss = DCCP_MAX_SEQNO + 1; - u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - - list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - b_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (b_loss == NULL) - goto out; - - num_later = 1; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - a_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (a_loss == NULL) { - if (list_empty(li_list)) { - /* no loss event have occured yet */ - LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " - "comparing to initial seqno\n", - __FUNCTION__); - goto out; - } else { - LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", - __FUNCTION__); - goto out; - } - } - - /* Locate a lost data packet */ - entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, - packet->dccphrx_seqno); - - if (delta != 0) { - if (dccp_rx_hist_entry_data_packet(packet)) - --delta; - /* - * FIXME: check this, probably this % usage is because - * in earlier drafts the ndp count was just 8 bits - * long, but now it cam be up to 24 bits long. - */ -#if 0 - if (delta % DCCP_NDP_LIMIT != - (packet->dccphrx_ndp - - entry->dccphrx_ndp) % DCCP_NDP_LIMIT) -#endif - if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { - seq_loss = entry->dccphrx_seqno; - dccp_inc_seqno(&seq_loss); - } - } - packet = entry; - if (packet == a_loss) - break; - } -out: - if (seq_loss != DCCP_MAX_SEQNO + 1) - *win_loss = a_loss->dccphrx_ccval; - else - *win_loss = 0; /* Paranoia */ - - return seq_loss; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); - struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index aea9c5d70910..067cf1c85a37 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -166,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list); -static inline void dccp_rx_hist_add_entry(struct list_head *list, - struct dccp_rx_hist_entry *entry) -{ - list_add(&entry->dccphrx_node, list); -} - static inline struct dccp_rx_hist_entry * dccp_rx_hist_head(struct list_head *list) { @@ -190,10 +184,11 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet); + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); -- cgit v1.2.3