From 540c8cb6a576f34a9a0b04467f46bb6e67a1f852 Mon Sep 17 00:00:00 2001 From: Kevin Coffman Date: Wed, 2 Mar 2011 19:51:41 -0500 Subject: gss:krb5 only include enctype numbers in gm_upcall_enctypes Make the value in gm_upcall_enctypes just the enctype values. This allows the values to be used more easily elsewhere. Signed-off-by: Kevin Coffman Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 45dbf1521b9a..f3914d0c5079 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -417,7 +417,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, gss_msg->msg.len += len; } if (mech->gm_upcall_enctypes) { - len = sprintf(p, mech->gm_upcall_enctypes); + len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes); p += len; gss_msg->msg.len += len; } diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index f375decc024b..9022f0a6503e 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -750,7 +750,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ", + .gm_upcall_enctypes = "18,17,16,23,3,1,2", }; static int __init init_kerberos_module(void) -- cgit v1.2.3 From 8b3e07ac908d005bb791410f594cce8744f6806a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 7 Mar 2011 22:50:47 -0500 Subject: svcrpc: fix rare race on unix_domain creation Note that "new" here is not yet fully initialized; auth_domain_put should be called only on auth_domains that have actually been added to the hash. Before this fix, two attempts to add the same domain at once could cause the hlist_del in auth_domain_put to fail. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 30916b06c12b..d100bf2b4e81 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -38,6 +38,14 @@ struct unix_domain { extern struct auth_ops svcauth_unix; +static void svcauth_unix_domain_release(struct auth_domain *dom) +{ + struct unix_domain *ud = container_of(dom, struct unix_domain, h); + + kfree(dom->name); + kfree(ud); +} + struct auth_domain *unix_domain_find(char *name) { struct auth_domain *rv; @@ -47,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name) while(1) { if (rv) { if (new && rv != &new->h) - auth_domain_put(&new->h); + svcauth_unix_domain_release(new); if (rv->flavour != &svcauth_unix) { auth_domain_put(rv); @@ -74,14 +82,6 @@ struct auth_domain *unix_domain_find(char *name) } EXPORT_SYMBOL_GPL(unix_domain_find); -static void svcauth_unix_domain_release(struct auth_domain *dom) -{ - struct unix_domain *ud = container_of(dom, struct unix_domain, h); - - kfree(dom->name); - kfree(ud); -} - /************************************************** * cache for IP address to unix_domain -- cgit v1.2.3 From 352b5d13c0684ba8cd103aa20cb74f105334562a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 9 Mar 2011 22:40:30 -0500 Subject: svcrpc: fix bad argument in unix_domain_find "After merging the nfsd tree, today's linux-next build (powerpc ppc64_defconfig) produced this warning: net/sunrpc/svcauth_unix.c: In function 'unix_domain_find': net/sunrpc/svcauth_unix.c:58: warning: passing argument 1 of +'svcauth_unix_domain_release' from incompatible pointer type net/sunrpc/svcauth_unix.c:41: note: expected 'struct auth_domain *' but argument +is of type 'struct unix_domain *' Introduced by commit 8b3e07ac908d ("svcrpc: fix rare race on unix_domain creation")." Reported-by: Stephen Rothwell Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d100bf2b4e81..c8e10216c113 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -55,7 +55,7 @@ struct auth_domain *unix_domain_find(char *name) while(1) { if (rv) { if (new && rv != &new->h) - svcauth_unix_domain_release(new); + svcauth_unix_domain_release(&new->h); if (rv->flavour != &svcauth_unix) { auth_domain_put(rv); -- cgit v1.2.3 From b09734b1f4abd86e046777f0f268215b4ef1b523 Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Wed, 2 Feb 2011 11:39:32 -0800 Subject: libceph: Fix base64-decoding when input ends in newline. It used to return -EINVAL because it thought the end was not aligned to 4 bytes. Clean up superfluous src < end test in if, the while itself guarantees that. Signed-off-by: Tommi Virtanen Signed-off-by: Sage Weil --- net/ceph/armor.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/armor.c b/net/ceph/armor.c index eb2a666b0be7..1fc1ee11dfa2 100644 --- a/net/ceph/armor.c +++ b/net/ceph/armor.c @@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end) while (src < end) { int a, b, c, d; - if (src < end && src[0] == '\n') + if (src[0] == '\n') { src++; + continue; + } if (src + 4 > end) return -EINVAL; a = decode_bits(src[0]); -- cgit v1.2.3 From 4be34b9d69c97211ff4eb00d79078f3c1593804d Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 22 Jan 2011 22:40:20 +0100 Subject: SUNRPC: Remove resource leak in svc_rdma_send_error() We leak the memory allocated to 'ctxt' when we return after 'ib_dma_mapping_error()' returns !=0. Signed-off-by: Jesper Juhl Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 9df1eadc912a..1a10dcd999ea 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, p, 0, length, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) { put_page(p); + svc_rdma_put_context(ctxt, 1); return; } atomic_inc(&xprt->sc_dma_used); -- cgit v1.2.3 From 67c5c6cb8129c595f21e88254a3fc6b3b841ae8e Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Thu, 17 Mar 2011 01:40:10 +0000 Subject: econet: 4 byte infoleak to the network struct aunhdr has 4 padding bytes between 'pad' and 'handle' fields on x86_64. These bytes are not initialized in the variable 'ah' before sending 'ah' to the network. This leads to 4 bytes kernel stack infoleak. This bug was introduced before the git epoch. Signed-off-by: Vasiliy Kulikov Acked-by: Phil Blundell Signed-off-by: David S. Miller --- net/econet/af_econet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 0c2826337919..116d3fd3d669 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpdest.sin_addr.s_addr = htonl(network | addr.station); } + memset(&ah, 0, sizeof(ah)); ah.port = port; ah.cb = cb & 0x7f; ah.code = 2; /* magic */ - ah.pad = 0; /* tack our header on the front of the iovec */ size = sizeof(struct aunhdr); -- cgit v1.2.3 From 5e5069b41d5b82bcadc1dbf73f48476b428c102f Mon Sep 17 00:00:00 2001 From: Roger Luethi Date: Thu, 17 Mar 2011 06:37:21 +0000 Subject: ethtool: __ethtool_set_sg: check for function pointer before using it __ethtool_set_sg does not check if dev->ethtool_ops->set_sg is defined which can result in a NULL pointer dereference when ethtool is used to change SG settings for drivers without SG support. Signed-off-by: Roger Luethi Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c1a71bb738da..a1086fb0c0c7 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1457,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (!dev->ethtool_ops->set_sg) + return -EOPNOTSUPP; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) return -EINVAL; -- cgit v1.2.3 From 3a7da39d165e0c363c294feec119db1427032afd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 17 Mar 2011 07:34:32 +0000 Subject: ethtool: Compat handling for struct ethtool_rxnfc This structure was accidentally defined such that its layout can differ between 32-bit and 64-bit processes. Add compat structure definitions and an ioctl wrapper function. Signed-off-by: Ben Hutchings Acked-by: Alexander Duyck Cc: stable@kernel.org [2.6.30+] Signed-off-by: David S. Miller --- net/socket.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 937d0fcf74bc..5212447c86e7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2588,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { + struct compat_ethtool_rxnfc __user *compat_rxnfc; + bool convert_in = false, convert_out = false; + size_t buf_size = ALIGN(sizeof(struct ifreq), 8); + struct ethtool_rxnfc __user *rxnfc; struct ifreq __user *ifr; + u32 rule_cnt = 0, actual_rule_cnt; + u32 ethcmd; u32 data; - void __user *datap; + int ret; + + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; - ifr = compat_alloc_user_space(sizeof(*ifr)); + compat_rxnfc = compat_ptr(data); - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (get_user(ethcmd, &compat_rxnfc->cmd)) return -EFAULT; - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + /* Most ethtool structures are defined without padding. + * Unfortunately struct ethtool_rxnfc is an exception. + */ + switch (ethcmd) { + default: + break; + case ETHTOOL_GRXCLSRLALL: + /* Buffer size is variable */ + if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) + return -EFAULT; + if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) + return -ENOMEM; + buf_size += rule_cnt * sizeof(u32); + /* fall through */ + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + convert_out = true; + /* fall through */ + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + buf_size += sizeof(struct ethtool_rxnfc); + convert_in = true; + break; + } + + ifr = compat_alloc_user_space(buf_size); + rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8); + + if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &ifr->ifr_ifru.ifru_data)) + if (put_user(convert_in ? rxnfc : compat_ptr(data), + &ifr->ifr_ifru.ifru_data)) return -EFAULT; - return dev_ioctl(net, SIOCETHTOOL, ifr); + if (convert_in) { + /* We expect there to be holes between fs.m_u and + * fs.ring_cookie and at the end of fs, but nowhere else. + */ + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) + + sizeof(compat_rxnfc->fs.m_u) != + offsetof(struct ethtool_rxnfc, fs.m_u) + + sizeof(rxnfc->fs.m_u)); + BUILD_BUG_ON( + offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_in_user(rxnfc, compat_rxnfc, + (void *)(&rxnfc->fs.m_u + 1) - + (void *)rxnfc) || + copy_in_user(&rxnfc->fs.ring_cookie, + &compat_rxnfc->fs.ring_cookie, + (void *)(&rxnfc->fs.location + 1) - + (void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + } + + ret = dev_ioctl(net, SIOCETHTOOL, ifr); + if (ret) + return ret; + + if (convert_out) { + if (copy_in_user(compat_rxnfc, rxnfc, + (const void *)(&rxnfc->fs.m_u + 1) - + (const void *)rxnfc) || + copy_in_user(&compat_rxnfc->fs.ring_cookie, + &rxnfc->fs.ring_cookie, + (const void *)(&rxnfc->fs.location + 1) - + (const void *)&rxnfc->fs.ring_cookie) || + copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + /* As an optimisation, we only copy the actual + * number of rules that the underlying + * function returned. Since Mallory might + * change the rule count in user memory, we + * check that it is less than the rule count + * originally given (as the user buffer size), + * which has been range-checked. + */ + if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + if (actual_rule_cnt < rule_cnt) + rule_cnt = actual_rule_cnt; + if (copy_in_user(&compat_rxnfc->rule_locs[0], + &rxnfc->rule_locs[0], + rule_cnt * sizeof(u32))) + return -EFAULT; + } + } + + return 0; } static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) -- cgit v1.2.3 From d870bfb9d366c5d466c0f5419a4ec95a3f71ea8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Mar 2011 00:27:27 +0000 Subject: vlan: should take into account needed_headroom Commit c95b819ad7 (gre: Use needed_headroom) made gre use needed_headroom instead of hard_header_len This uncover a bug in vlan code. We should make sure vlan devices take into account their real_dev->needed_headroom or we risk a crash in ipgre_header(), because we dont have enough room to push IP header in skb. Reported-by: Diddi Oscarsson Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Herbert Xu Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ae610f046de5..e34ea9e5e28b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -720,6 +720,7 @@ static int vlan_dev_init(struct net_device *dev) dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; #endif + dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_TX) { dev->header_ops = real_dev->header_ops; dev->hard_header_len = real_dev->hard_header_len; -- cgit v1.2.3 From 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Mar 2011 05:27:28 +0000 Subject: bridge: Reset IPCB when entering IP stack on NF_FORWARD Whenever we enter the IP stack proper from bridge netfilter we need to ensure that the skb is in a form the IP stack expects it to be in. The entry point on NF_FORWARD did not meet the requirements of the IP stack, therefore leading to potential crashes/panics. This patch fixes the problem. Signed-off-by: Herbert Xu Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f97af5590ba1..008ff6c4eecf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -739,6 +739,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } + if (br_parse_ip_options(skb)) + return NF_DROP; + /* The physdev module checks on this */ nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; -- cgit v1.2.3 From 5e0c1eb7e6b61998c7ecd39b7f69a15773d894d4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 20 Mar 2011 15:33:26 +0100 Subject: netfilter: ipset: fix address ranges at hash:*port* types The hash:*port* types with IPv4 silently ignored when address ranges with non TCP/UDP were added/deleted from the set and used the first address from the range only. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_hash_ipport.c | 34 +++++++++-------------------- net/netfilter/ipset/ip_set_hash_ipportip.c | 34 +++++++++-------------------- net/netfilter/ipset/ip_set_hash_ipportnet.c | 34 +++++++++-------------------- net/netfilter/ipset/ip_set_hash_netport.c | 30 +++++++------------------ 4 files changed, 38 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index adbe787ea5dc..b9214145d357 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -150,6 +150,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipport4_elem data = { }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -172,21 +173,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -195,7 +190,6 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -219,13 +213,12 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -361,6 +354,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipport6_elem data = { }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -385,21 +379,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -407,9 +395,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 22e23abb86c6..4642872df6e1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -154,6 +154,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportip4_elem data = { }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -180,21 +181,15 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -203,7 +198,6 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -227,13 +221,12 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -375,6 +368,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportip6_elem data = { }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -403,21 +397,15 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -425,9 +413,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 6033e8b54bbd..2cb84a54b7ad 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -174,6 +174,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; u32 ip, ip_to, p, port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -208,21 +209,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -231,7 +226,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &data, timeout); @@ -255,13 +249,12 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else ip_to = ip; - port = ntohs(data.port); - if (tb[IPSET_ATTR_PORT_TO]) { + port_to = port = ntohs(data.port); + if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); - } else - port_to = port; + } for (; !before(ip_to, ip); ip++) for (p = port; p <= port_to; p++) { @@ -429,6 +422,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_ipportnet6_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || @@ -465,21 +459,15 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -487,9 +475,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 34a165626ee9..8598676f2a05 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -170,6 +170,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netport4_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -198,21 +199,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMP: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMP)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -220,9 +215,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -390,6 +383,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netport6_elem data = { .cidr = HOST_MASK }; u32 port, port_to; u32 timeout = h->timeout; + bool with_ports = false; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -418,21 +412,15 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_PROTO]) { data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(data.proto); if (data.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else return -IPSET_ERR_MISSING_PROTO; - switch (data.proto) { - case IPPROTO_UDP: - case IPPROTO_TCP: - case IPPROTO_ICMPV6: - break; - default: + if (!(with_ports || data.proto == IPPROTO_ICMPV6)) data.port = 0; - break; - } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -440,9 +428,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || - !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || - !tb[IPSET_ATTR_PORT_TO]) { + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &data, timeout); return ip_set_eexist(ret, flags) ? 0 : ret; } -- cgit v1.2.3 From 5c1aba467828bf0574ec5754c84884d573f590af Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 20 Mar 2011 15:35:01 +0100 Subject: netfilter: ipset: fix checking the type revision at create command The revision of the set type was not checked at the create command: if the userspace sent a valid set type but with not supported revision number, it'd create a loop. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_core.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 618a615acc9d..d6b48230a540 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -94,16 +94,28 @@ static int find_set_type_get(const char *name, u8 family, u8 revision, struct ip_set_type **found) { + struct ip_set_type *type; + int err; + rcu_read_lock(); *found = find_set_type(name, family, revision); if (*found) { - int err = !try_module_get((*found)->me); - rcu_read_unlock(); - return err ? -EFAULT : 0; + err = !try_module_get((*found)->me) ? -EFAULT : 0; + goto unlock; } + /* Make sure the type is loaded but we don't support the revision */ + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name)) { + err = -IPSET_ERR_FIND_TYPE; + goto unlock; + } rcu_read_unlock(); return try_to_load_type(name); + +unlock: + rcu_read_unlock(); + return err; } /* Find a given set type by name and family. @@ -116,7 +128,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) struct ip_set_type *type; bool found = false; - *min = *max = 0; + *min = 255; *max = 0; rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && @@ -124,7 +136,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) found = true; if (type->revision < *min) *min = type->revision; - else if (type->revision > *max) + if (type->revision > *max) *max = type->revision; } rcu_read_unlock(); -- cgit v1.2.3 From db856674ac69e31946e56085239757cca3f7655f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 20 Mar 2011 15:40:06 +0100 Subject: netfilter: xtables: fix reentrancy commit f3c5c1bfd4308 (make ip_tables reentrant) introduced a race in handling the stackptr restore, at the end of ipt_do_table() We should do it before the call to xt_info_rdunlock_bh(), or we allow cpu preemption and another cpu overwrites stackptr of original one. A second fix is to change the underflow test to check the origptr value instead of 0 to detect underflow, or else we allow a jump from different hooks. Signed-off-by: Eric Dumazet Cc: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index b09ed0d080f9..ffcea0d1678e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb, verdict = (unsigned)(-v) - 1; break; } - if (*stackptr == 0) { + if (*stackptr <= origptr) { e = get_entry(table_base, private->underflow[hook]); pr_debug("Underflow (this is normal) " @@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - xt_info_rdunlock_bh(); pr_debug("Exiting %s; resetting sp from %u to %u\n", __func__, *stackptr, origptr); *stackptr = origptr; + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; #else diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c9598a9067d7..0b2af9b85cec 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -410,7 +410,7 @@ ip6t_do_table(struct sk_buff *skb, verdict = (unsigned)(-v) - 1; break; } - if (*stackptr == 0) + if (*stackptr <= origptr) e = get_entry(table_base, private->underflow[hook]); else @@ -441,8 +441,8 @@ ip6t_do_table(struct sk_buff *skb, break; } while (!acpar.hotdrop); - xt_info_rdunlock_bh(); *stackptr = origptr; + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; -- cgit v1.2.3 From 961ed183a9fd080cf306c659b8736007e44065a5 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Sun, 20 Mar 2011 15:42:52 +0100 Subject: netfilter: ipt_CLUSTERIP: fix buffer overflow 'buffer' string is copied from userspace. It is not checked whether it is zero terminated. This may lead to overflow inside of simple_strtoul(). Changli Gao suggested to copy not more than user supplied 'size' bytes. It was introduced before the git epoch. Files "ipt_CLUSTERIP/*" are root writable only by default, however, on some setups permissions might be relaxed to e.g. network admin user. Signed-off-by: Vasiliy Kulikov Acked-by: Changli Gao Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 403ca57f6011..d609ac3cb9a4 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -664,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, char buffer[PROC_WRITELEN+1]; unsigned long nodenum; - if (copy_from_user(buffer, input, PROC_WRITELEN)) + if (size > PROC_WRITELEN) + return -EIO; + if (copy_from_user(buffer, input, size)) return -EFAULT; + buffer[size] = 0; if (*buffer == '+') { nodenum = simple_strtoul(buffer+1, NULL, 10); -- cgit v1.2.3 From 8bc8aecdc5e26cfda12dbd6867af4aa67836da6a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 21 Mar 2011 20:01:00 +0100 Subject: mac80211: initialize sta->last_rx in sta_info_alloc This field is used to determine the inactivity time. When in AP mode, hostapd uses it for kicking out inactive clients after a while. Without this patch, hostapd immediately deauthenticates a new client if it checks the inactivity time before the client sends its first data frame. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5a11078827ab..d0311a322ddd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -243,6 +243,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; + sta->last_rx = jiffies; ewma_init(&sta->avg_signal, 1024, 8); -- cgit v1.2.3 From 6f6c7006755b667f9f6c1f3b6f08cd65f75cc471 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 17 Jan 2011 20:34:08 -0800 Subject: libceph: fix osd request queuing on osdmap updates If we send a request to osd A, and the request's pg remaps to osd B and then back to A in quick succession, we need to resend the request to A. The old code was only calling kick_requests after processing all incremental maps in a message, so it was very possible to not resend a request that needed to be resent. This would make the osd eventually time out (at least with the current default of osd timeouts enabled). The correct approach is to scan requests on every map incremental. This patch refactors the kick code in a few ways: - all requests are either on req_lru (in flight), req_unsent (ready to send), or req_notarget (currently map to no up osd) - mapping always done by map_request (previous map_osds) - if the mapping changes, we requeue. requests are resent only after all map incrementals are processed. - some osd reset code is moved out of kick_requests into a separate function - the "kick this osd" functionality is moved to kick_osd_requests, as it is unrelated to scanning for request->pg->osd mapping changes Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 255 ++++++++++++++++++++++++-------------------------- 1 file changed, 122 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3e20a122ffa2..b85ed5a5503d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -22,10 +22,9 @@ #define OSD_OPREPLY_FRONT_LEN 512 static const struct ceph_connection_operations osd_con_ops; -static int __kick_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd); -static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); +static void send_queued(struct ceph_osd_client *osdc); +static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); static int op_needs_trail(int op) { @@ -529,6 +528,35 @@ __lookup_request_ge(struct ceph_osd_client *osdc, return NULL; } +/* + * Resubmit requests pending on the given osd. + */ +static void __kick_osd_requests(struct ceph_osd_client *osdc, + struct ceph_osd *osd) +{ + struct ceph_osd_request *req; + int err; + + dout("__kick_osd_requests osd%d\n", osd->o_osd); + err = __reset_osd(osdc, osd); + if (err == -EAGAIN) + return; + + list_for_each_entry(req, &osd->o_requests, r_osd_item) { + list_move(&req->r_req_lru_item, &osdc->req_unsent); + dout("requeued %p tid %llu osd%d\n", req, req->r_tid, + osd->o_osd); + req->r_flags |= CEPH_OSD_FLAG_RETRY; + } +} + +static void kick_osd_requests(struct ceph_osd_client *osdc, + struct ceph_osd *kickosd) +{ + mutex_lock(&osdc->request_mutex); + __kick_osd_requests(osdc, kickosd); + mutex_unlock(&osdc->request_mutex); +} /* * If the osd connection drops, we need to resubmit all requests. @@ -543,7 +571,8 @@ static void osd_reset(struct ceph_connection *con) dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; down_read(&osdc->map_sem); - kick_requests(osdc, osd); + kick_osd_requests(osdc, osd); + send_queued(osdc); up_read(&osdc->map_sem); } @@ -781,20 +810,20 @@ static void __cancel_request(struct ceph_osd_request *req) ceph_con_revoke(&req->r_osd->o_con, req->r_request); req->r_sent = 0; } - list_del_init(&req->r_req_lru_item); } /* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is - * no up osd, set r_osd to NULL. + * no up osd, set r_osd to NULL. Move the request to the appropiate list + * (unsent, homeless) or leave on in-flight lru. * * Return 0 if unchanged, 1 if changed, or negative on error. * * Caller should hold map_sem for read and request_mutex. */ -static int __map_osds(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) +static int __map_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; @@ -802,11 +831,13 @@ static int __map_osds(struct ceph_osd_client *osdc, int o = -1, num = 0; int err; - dout("map_osds %p tid %lld\n", req, req->r_tid); + dout("map_request %p tid %lld\n", req, req->r_tid); err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, &req->r_file_layout, osdc->osdmap); - if (err) + if (err) { + list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; + } pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; @@ -823,7 +854,7 @@ static int __map_osds(struct ceph_osd_client *osdc, (req->r_osd == NULL && o == -1)) return 0; /* no change */ - dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n", + dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n", req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, req->r_osd ? req->r_osd->o_osd : -1); @@ -841,10 +872,12 @@ static int __map_osds(struct ceph_osd_client *osdc, if (!req->r_osd && o >= 0) { err = -ENOMEM; req->r_osd = create_osd(osdc); - if (!req->r_osd) + if (!req->r_osd) { + list_move(&req->r_req_lru_item, &osdc->req_notarget); goto out; + } - dout("map_osds osd %p is osd%d\n", req->r_osd, o); + dout("map_request osd %p is osd%d\n", req->r_osd, o); req->r_osd->o_osd = o; req->r_osd->o_con.peer_name.num = cpu_to_le64(o); __insert_osd(osdc, req->r_osd); @@ -855,6 +888,9 @@ static int __map_osds(struct ceph_osd_client *osdc, if (req->r_osd) { __remove_osd_from_lru(req->r_osd); list_add(&req->r_osd_item, &req->r_osd->o_requests); + list_move(&req->r_req_lru_item, &osdc->req_unsent); + } else { + list_move(&req->r_req_lru_item, &osdc->req_notarget); } err = 1; /* osd or pg changed */ @@ -869,16 +905,6 @@ static int __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { struct ceph_osd_request_head *reqhead; - int err; - - err = __map_osds(osdc, req); - if (err < 0) - return err; - if (req->r_osd == NULL) { - dout("send_request %p no up osds in pg\n", req); - ceph_monc_request_next_osdmap(&osdc->client->monc); - return 0; - } dout("send_request %p tid %llu to osd%d flags %d\n", req, req->r_tid, req->r_osd->o_osd, req->r_flags); @@ -897,6 +923,21 @@ static int __send_request(struct ceph_osd_client *osdc, return 0; } +/* + * Send any requests in the queue (req_unsent). + */ +static void send_queued(struct ceph_osd_client *osdc) +{ + struct ceph_osd_request *req, *tmp; + + dout("send_queued\n"); + mutex_lock(&osdc->request_mutex); + list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) { + __send_request(osdc, req); + } + mutex_unlock(&osdc->request_mutex); +} + /* * Timeout callback, called every N seconds when 1 or more osd * requests has been active for more than N seconds. When this @@ -916,7 +957,6 @@ static void handle_timeout(struct work_struct *work) unsigned long keepalive = osdc->client->options->osd_keepalive_timeout * HZ; unsigned long last_stamp = 0; - struct rb_node *p; struct list_head slow_osds; dout("timeout\n"); @@ -925,21 +965,6 @@ static void handle_timeout(struct work_struct *work) ceph_monc_request_next_osdmap(&osdc->client->monc); mutex_lock(&osdc->request_mutex); - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { - req = rb_entry(p, struct ceph_osd_request, r_node); - - if (req->r_resend) { - int err; - - dout("osdc resending prev failed %lld\n", req->r_tid); - err = __send_request(osdc, req); - if (err) - dout("osdc failed again on %lld\n", req->r_tid); - else - req->r_resend = false; - continue; - } - } /* * reset osds that appear to be _really_ unresponsive. this @@ -963,7 +988,7 @@ static void handle_timeout(struct work_struct *work) BUG_ON(!osd); pr_warning(" tid %llu timed out on osd%d, will reset osd\n", req->r_tid, osd->o_osd); - __kick_requests(osdc, osd); + __kick_osd_requests(osdc, osd); } /* @@ -991,7 +1016,7 @@ static void handle_timeout(struct work_struct *work) __schedule_osd_timeout(osdc); mutex_unlock(&osdc->request_mutex); - + send_queued(osdc); up_read(&osdc->map_sem); } @@ -1109,108 +1134,61 @@ bad: ceph_msg_dump(msg); } - -static int __kick_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) +static void reset_changed_osds(struct ceph_osd_client *osdc) { - struct ceph_osd_request *req; struct rb_node *p, *n; - int needmap = 0; - int err; - - dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); - if (kickosd) { - err = __reset_osd(osdc, kickosd); - if (err == -EAGAIN) - return 1; - } else { - for (p = rb_first(&osdc->osds); p; p = n) { - struct ceph_osd *osd = - rb_entry(p, struct ceph_osd, o_node); - - n = rb_next(p); - if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) || - memcmp(&osd->o_con.peer_addr, - ceph_osd_addr(osdc->osdmap, - osd->o_osd), - sizeof(struct ceph_entity_addr)) != 0) - __reset_osd(osdc, osd); - } - } - - for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { - req = rb_entry(p, struct ceph_osd_request, r_node); - - if (req->r_resend) { - dout(" r_resend set on tid %llu\n", req->r_tid); - __cancel_request(req); - goto kick; - } - if (req->r_osd && kickosd == req->r_osd) { - __cancel_request(req); - goto kick; - } - err = __map_osds(osdc, req); - if (err == 0) - continue; /* no change */ - if (err < 0) { - /* - * FIXME: really, we should set the request - * error and fail if this isn't a 'nofail' - * request, but that's a fair bit more - * complicated to do. So retry! - */ - dout(" setting r_resend on %llu\n", req->r_tid); - req->r_resend = true; - continue; - } - if (req->r_osd == NULL) { - dout("tid %llu maps to no valid osd\n", req->r_tid); - needmap++; /* request a newer map */ - continue; - } + for (p = rb_first(&osdc->osds); p; p = n) { + struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node); -kick: - dout("kicking %p tid %llu osd%d\n", req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - req->r_flags |= CEPH_OSD_FLAG_RETRY; - err = __send_request(osdc, req); - if (err) { - dout(" setting r_resend on %llu\n", req->r_tid); - req->r_resend = true; - } + n = rb_next(p); + if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) || + memcmp(&osd->o_con.peer_addr, + ceph_osd_addr(osdc->osdmap, + osd->o_osd), + sizeof(struct ceph_entity_addr)) != 0) + __reset_osd(osdc, osd); } - - return needmap; } /* - * Resubmit osd requests whose osd or osd address has changed. Request - * a new osd map if osds are down, or we are otherwise unable to determine - * how to direct a request. - * - * Close connections to down osds. - * - * If @who is specified, resubmit requests for that specific osd. + * Requeue requests whose mapping to an OSD has changed. If requests map to + * no osd, request a new map. * * Caller should hold map_sem for read and request_mutex. */ -static void kick_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) +static void kick_requests(struct ceph_osd_client *osdc) { - int needmap; + struct ceph_osd_request *req; + struct rb_node *p; + int needmap = 0; + int err; + dout("kick_requests\n"); mutex_lock(&osdc->request_mutex); - needmap = __kick_requests(osdc, kickosd); + for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { + req = rb_entry(p, struct ceph_osd_request, r_node); + err = __map_request(osdc, req); + if (err < 0) + continue; /* error */ + if (req->r_osd == NULL) { + dout("%p tid %llu maps to no osd\n", req, req->r_tid); + needmap++; /* request a newer map */ + } else if (err > 0) { + dout("%p tid %llu requeued on osd%d\n", req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + req->r_flags |= CEPH_OSD_FLAG_RETRY; + } + } mutex_unlock(&osdc->request_mutex); if (needmap) { dout("%d requests for down osds, need new map\n", needmap); ceph_monc_request_next_osdmap(&osdc->client->monc); } - } + + /* * Process updated osd map. * @@ -1263,6 +1241,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) ceph_osdmap_destroy(osdc->osdmap); osdc->osdmap = newmap; } + kick_requests(osdc); + reset_changed_osds(osdc); } else { dout("ignoring incremental map %u len %d\n", epoch, maplen); @@ -1300,6 +1280,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) osdc->osdmap = newmap; if (oldmap) ceph_osdmap_destroy(oldmap); + kick_requests(osdc); } p += maplen; nr_maps--; @@ -1308,8 +1289,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) done: downgrade_write(&osdc->map_sem); ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); - if (newmap) - kick_requests(osdc, NULL); + send_queued(osdc); up_read(&osdc->map_sem); wake_up_all(&osdc->client->auth_wq); return; @@ -1347,15 +1327,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, * the request still han't been touched yet. */ if (req->r_sent == 0) { - rc = __send_request(osdc, req); - if (rc) { - if (nofail) { - dout("osdc_start_request failed send, " - " marking %lld\n", req->r_tid); - req->r_resend = true; - rc = 0; - } else { - __unregister_request(osdc, req); + rc = __map_request(osdc, req); + if (rc < 0) + return rc; + if (req->r_osd == NULL) { + dout("send_request %p no up osds in pg\n", req); + ceph_monc_request_next_osdmap(&osdc->client->monc); + } else { + rc = __send_request(osdc, req); + if (rc) { + if (nofail) { + dout("osdc_start_request failed send, " + " will retry %lld\n", req->r_tid); + rc = 0; + } else { + __unregister_request(osdc, req); + } } } } @@ -1441,6 +1428,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) INIT_LIST_HEAD(&osdc->osd_lru); osdc->requests = RB_ROOT; INIT_LIST_HEAD(&osdc->req_lru); + INIT_LIST_HEAD(&osdc->req_unsent); + INIT_LIST_HEAD(&osdc->req_notarget); osdc->num_requests = 0; INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); -- cgit v1.2.3 From a454f0ccefbfdbfc0e1aa8a5f8010af5e48b8845 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 21 Mar 2011 18:08:28 -0700 Subject: xfrm: Fix initialize repl field of struct xfrm_state Commit 'xfrm: Move IPsec replay detection functions to a separate file' (9fdc4883d92d20842c5acea77a4a21bb1574b495) introduce repl field to struct xfrm_state, and only initialize it under SA's netlink create path, the other path, such as pf_key, ipcomp/ipcomp6 etc, the repl field remaining uninitialize. So if the SA is created by pf_key, any input packet with SA's encryption algorithm will cause panic. int xfrm_input() { ... x->repl->advance(x, seq); ... } This patch fixed it by introduce new function __xfrm_init_state(). Pid: 0, comm: swapper Not tainted 2.6.38-next+ #14 Bochs Bochs EIP: 0060:[] EFLAGS: 00010206 CPU: 0 EIP is at xfrm_input+0x31c/0x4cc EAX: dd839c00 EBX: 00000084 ECX: 00000000 EDX: 01000000 ESI: dd839c00 EDI: de3a0780 EBP: dec1de88 ESP: dec1de64 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process swapper (pid: 0, ti=dec1c000 task=c09c0f20 task.ti=c0992000) Stack: 00000000 00000000 00000002 c0ba27c0 00100000 01000000 de3a0798 c0ba27c0 00000033 dec1de98 c0786848 00000000 de3a0780 dec1dea4 c0786868 00000000 dec1debc c074ee56 e1da6b8c de3a0780 c074ed44 de3a07a8 dec1decc c074ef32 Call Trace: [] xfrm4_rcv_encap+0x22/0x27 [] xfrm4_rcv+0x1b/0x1d [] ip_local_deliver_finish+0x112/0x1b1 [] ? ip_local_deliver_finish+0x0/0x1b1 [] NF_HOOK.clone.1+0x3d/0x44 [] ip_local_deliver+0x3e/0x44 [] ? ip_local_deliver_finish+0x0/0x1b1 [] ip_rcv_finish+0x30a/0x332 [] ? ip_rcv_finish+0x0/0x332 [] NF_HOOK.clone.1+0x3d/0x44 [] ip_rcv+0x20b/0x247 [] ? ip_rcv_finish+0x0/0x332 [] __netif_receive_skb+0x373/0x399 [] netif_receive_skb+0x4b/0x51 [] cp_rx_poll+0x210/0x2c4 [8139cp] [] net_rx_action+0x9a/0x17d [] __do_softirq+0xa1/0x149 [] ? __do_softirq+0x0/0x149 Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 15 ++++++++++++++- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d575f0534868..f83a3d1da81b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1907,7 +1907,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) return res; } -int xfrm_init_state(struct xfrm_state *x) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay) { struct xfrm_state_afinfo *afinfo; struct xfrm_mode *inner_mode; @@ -1980,12 +1980,25 @@ int xfrm_init_state(struct xfrm_state *x) if (x->outer_mode == NULL) goto error; + if (init_replay) { + err = xfrm_init_replay(x); + if (err) + goto error; + } + x->km.state = XFRM_STATE_VALID; error: return err; } +EXPORT_SYMBOL(__xfrm_init_state); + +int xfrm_init_state(struct xfrm_state *x) +{ + return __xfrm_init_state(x, true); +} + EXPORT_SYMBOL(xfrm_init_state); int __net_init xfrm_state_init(struct net *net) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 706385ae3e4b..fc152d28753c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -511,7 +511,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, xfrm_mark_get(attrs, &x->mark); - err = xfrm_init_state(x); + err = __xfrm_init_state(x, false); if (err) goto error; -- cgit v1.2.3 From 8aa525a9340da4227797a06221ca08399006635f Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 21 Mar 2011 18:10:25 -0700 Subject: l2tp: fix possible oops on l2tp_eth module unload A struct used in the l2tp_eth driver for registering network namespace ops was incorrectly marked as __net_initdata, leading to oops when module unloaded. BUG: unable to handle kernel paging request at ffffffffa00ec098 IP: [] ops_exit_list+0x7/0x4b PGD 142d067 PUD 1431063 PMD 195da8067 PTE 0 Oops: 0000 [#1] SMP last sysfs file: /sys/module/l2tp_eth/refcnt Call Trace: [] ? unregister_pernet_operations+0x32/0x93 [] ? unregister_pernet_device+0x2b/0x38 [] ? sys_delete_module+0x1b8/0x222 [] ? do_munmap+0x254/0x318 [] ? page_fault+0x25/0x30 [] ? system_call_fastpath+0x16/0x1b Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8d9ce0accc98..a8193f52c13c 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net) return 0; } -static __net_initdata struct pernet_operations l2tp_eth_net_ops = { +static struct pernet_operations l2tp_eth_net_ops = { .init = l2tp_eth_init_net, .id = &l2tp_eth_net_id, .size = sizeof(struct l2tp_eth_net), -- cgit v1.2.3 From 674f2115995b7b588cbf3540c9f9b2448a8c7ea8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Mar 2011 18:16:39 -0700 Subject: ipx: fix ipx_release() Commit b0d0d915d1d1a0 (remove the BKL) added a regression, because sock_put() can free memory while we are going to use it later. Fix is to delay sock_put() _after_ release_sock(). Reported-by: Ingo Molnar Tested-by: Ingo Molnar Signed-off-by: Eric Dumazet Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 2731b51923d1..9680226640ef 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -148,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk) ipx_remove_socket(sk); skb_queue_purge(&sk->sk_receive_queue); sk_refcnt_debug_dec(sk); - sock_put(sk); } /* @@ -1404,6 +1403,7 @@ static int ipx_release(struct socket *sock) sk_refcnt_debug_release(sk); ipx_destroy_socket(sk); release_sock(sk); + sock_put(sk); out: return 0; } -- cgit v1.2.3 From b20e7bbfc7a15a4182730f0936433145992b4b06 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 21 Mar 2011 18:18:00 -0700 Subject: net/appletalk: fix atalk_release use after free The BKL removal in appletalk introduced a use-after-free problem, where atalk_destroy_socket frees a sock, but we still release the socket lock on it. An easy fix is to take an extra reference on the sock and sock_put it when returning from atalk_release. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3d4f4b043406..206e771e82d1 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1051,6 +1051,7 @@ static int atalk_release(struct socket *sock) { struct sock *sk = sock->sk; + sock_hold(sk); lock_sock(sk); if (sk) { sock_orphan(sk); @@ -1058,6 +1059,8 @@ static int atalk_release(struct socket *sock) atalk_destroy_socket(sk); } release_sock(sk); + sock_put(sk); + return 0; } -- cgit v1.2.3 From ac0a121d7906b049dfee3649f886c969fbb3c1b7 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 21 Mar 2011 18:20:26 -0700 Subject: net: fix incorrect spelling in drop monitor protocol It was pointed out to me recently that my spelling could be better :) Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 36e603c78ce9..706502ff64aa 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void) struct per_cpu_dm_data *data; int cpu, rc; - printk(KERN_INFO "Initalizing network drop monitor service\n"); + printk(KERN_INFO "Initializing network drop monitor service\n"); if (sizeof(void *) > 8) { printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); -- cgit v1.2.3 From 9d2a8fa96a44ba242de3a6f56acaef7a40a97b97 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 21 Mar 2011 18:23:34 -0700 Subject: net ipv6: Fix duplicate /proc/sys/net/ipv6/neigh directory entries. When I was fixing issues with unregisgtering tables under /proc/sys/net/ipv6/neigh by adding a mount point it appears I missed a critical ordering issue, in the ipv6 initialization. I had not realized that ipv6_sysctl_register is called at the very end of the ipv6 initialization and in particular after we call neigh_sysctl_register from ndisc_init. "neigh" needs to be initialized in ipv6_static_sysctl_register which is the first ipv6 table to initialized, and definitely before ndisc_init. This removes the weirdness of duplicate tables while still providing a "neigh" mount point which prevents races in sysctl unregistering. This was initially reported at https://bugzilla.kernel.org/show_bug.cgi?id=31232 Reported-by: sunkan@zappa.cx Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv6/sysctl_net_ipv6.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7cb65ef79f9c..6dcf5e7d661b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -17,6 +17,16 @@ static struct ctl_table empty[1]; +static ctl_table ipv6_static_skeleton[] = { + { + .procname = "neigh", + .maxlen = 0, + .mode = 0555, + .child = empty, + }, + { } +}; + static ctl_table ipv6_table_template[] = { { .procname = "route", @@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "neigh", - .maxlen = 0, - .mode = 0555, - .child = empty, - }, { } }; @@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base; int ipv6_static_sysctl_register(void) { - ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty); + ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton); if (ip6_base == NULL) return -ENOMEM; return 0; -- cgit v1.2.3 From f40f94fc6c3b5a5542d5ed976e9ff69a3b463802 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Mar 2011 10:15:40 +0000 Subject: ipvs: fix a typo in __ip_vs_control_init() Reported-by: Ingo Molnar Signed-off-by: Eric Dumazet Cc: Simon Horman Cc: Julian Anastasov Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b799cea31f95..33733c8872e7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3605,7 +3605,7 @@ int __net_init __ip_vs_control_init(struct net *net) /* procfs stats */ ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (ipvs->tot_stats.cpustats) { + if (!ipvs->tot_stats.cpustats) { pr_err("%s(): alloc_percpu.\n", __func__); return -ENOMEM; } -- cgit v1.2.3 From 736561a01f11114146b1b7f82d486fa9c95828ef Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 21 Mar 2011 15:18:01 +0000 Subject: IPVS: Use global mutex in ip_vs_app.c As part of the work to make IPVS network namespace aware __ip_vs_app_mutex was replaced by a per-namespace lock, ipvs->app_mutex. ipvs->app_key is also supplied for debugging purposes. Unfortunately this implementation results in ipvs->app_key residing in non-static storage which at the very least causes a lockdep warning. This patch takes the rather heavy-handed approach of reinstating __ip_vs_app_mutex which will cover access to the ipvs->list_head of all network namespaces. [ 12.610000] IPVS: Creating netns size=2456 id=0 [ 12.630000] IPVS: Registered protocols (TCP, UDP, SCTP, AH, ESP) [ 12.640000] BUG: key ffff880003bbf1a0 not in .data! [ 12.640000] ------------[ cut here ]------------ [ 12.640000] WARNING: at kernel/lockdep.c:2701 lockdep_init_map+0x37b/0x570() [ 12.640000] Hardware name: Bochs [ 12.640000] Pid: 1, comm: swapper Tainted: G W 2.6.38-kexec-06330-g69b7efe-dirty #122 [ 12.650000] Call Trace: [ 12.650000] [] warn_slowpath_common+0x75/0xb0 [ 12.650000] [] warn_slowpath_null+0x15/0x20 [ 12.650000] [] lockdep_init_map+0x37b/0x570 [ 12.650000] [] ? trace_hardirqs_on+0xd/0x10 [ 12.650000] [] debug_mutex_init+0x38/0x50 [ 12.650000] [] __mutex_init+0x5c/0x70 [ 12.650000] [] __ip_vs_app_init+0x64/0x86 [ 12.660000] [] ? ip_vs_init+0x0/0xff [ 12.660000] [] T.620+0x43/0x170 [ 12.660000] [] ? register_pernet_subsys+0x1a/0x40 [ 12.660000] [] ? ip_vs_init+0x0/0xff [ 12.660000] [] ? ip_vs_init+0x0/0xff [ 12.660000] [] register_pernet_operations+0x57/0xb0 [ 12.660000] [] ? ip_vs_init+0x0/0xff [ 12.670000] [] register_pernet_subsys+0x29/0x40 [ 12.670000] [] ip_vs_app_init+0x10/0x12 [ 12.670000] [] ip_vs_init+0x4c/0xff [ 12.670000] [] do_one_initcall+0x7a/0x12e [ 12.670000] [] kernel_init+0x13e/0x1c2 [ 12.670000] [] kernel_thread_helper+0x4/0x10 [ 12.670000] [] ? restore_args+0x0/0x30 [ 12.680000] [] ? kernel_init+0x0/0x1c2 [ 12.680000] [] ? kernel_thread_helper+0x0/0x1global0 Signed-off-by: Simon Horman Cc: Ingo Molnar Cc: Eric Dumazet Cc: Julian Anastasov Cc: Hans Schillstrom Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_app.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 5c48ffb60c28..2dc6de13ac18 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); +static DEFINE_MUTEX(__ip_vs_app_mutex); + /* * Get an ip_vs_app object */ @@ -167,14 +169,13 @@ int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, __u16 port) { - struct netns_ipvs *ipvs = net_ipvs(net); int result; - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); result = ip_vs_app_inc_new(net, app, proto, port); - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); return result; } @@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) /* increase the module use count */ ip_vs_use_count_inc(); - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); list_add(&app->a_list, &ipvs->app_list); - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); return 0; } @@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app) */ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *inc, *nxt; - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { ip_vs_app_inc_release(net, inc); @@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) list_del(&app->a_list); - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) struct net *net = seq_file_net(seq); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&ipvs->app_mutex); + mutex_lock(&__ip_vs_app_mutex); return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } @@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { - struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); - - mutex_unlock(&ipvs->app_mutex); + mutex_unlock(&__ip_vs_app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) @@ -583,7 +581,6 @@ static int __net_init __ip_vs_app_init(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); INIT_LIST_HEAD(&ipvs->app_list); - __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } -- cgit v1.2.3 From 27660515a21bf913e3208ded3f27abd0529fae0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 18 Mar 2011 16:56:34 +0000 Subject: net: implement dev_disable_lro() hw_features compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement compatibility with new hw_features for dev_disable_lro(). This is a transition path - dev_disable_lro() should be later integrated into netdev_fix_features() after all drivers are converted. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 19 +++++++++++-------- net/core/ethtool.c | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 0b88eba97dab..f453370131a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1353,14 +1353,17 @@ EXPORT_SYMBOL(dev_close); */ void dev_disable_lro(struct net_device *dev) { - if (dev->ethtool_ops && dev->ethtool_ops->get_flags && - dev->ethtool_ops->set_flags) { - u32 flags = dev->ethtool_ops->get_flags(dev); - if (flags & ETH_FLAG_LRO) { - flags &= ~ETH_FLAG_LRO; - dev->ethtool_ops->set_flags(dev, flags); - } - } + u32 flags; + + if (dev->ethtool_ops && dev->ethtool_ops->get_flags) + flags = dev->ethtool_ops->get_flags(dev); + else + flags = ethtool_op_get_flags(dev); + + if (!(flags & ETH_FLAG_LRO)) + return; + + __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); WARN_ON(dev->features & NETIF_F_LRO); } EXPORT_SYMBOL(dev_disable_lro); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a1086fb0c0c7..24bd57493c0d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -513,7 +513,7 @@ static int ethtool_set_one_feature(struct net_device *dev, } } -static int __ethtool_set_flags(struct net_device *dev, u32 data) +int __ethtool_set_flags(struct net_device *dev, u32 data) { u32 changed; -- cgit v1.2.3 From 74cb3c108bc0f599a4eb40980db8580cfba725c9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:46 +0000 Subject: ipv4: match prefsrc when deleting routes fib_table_delete forgets to match the routes by prefsrc. Callers can specify known IP in fc_prefsrc and we should remove the exact route. This is needed for cases when same local or broadcast addresses are used in different subnets and the routes differ only in prefsrc. All callers that do not provide fc_prefsrc will ignore the route prefsrc as before and will delete the first occurence. That is how the ip route del default magic works. Current callers are: - ip_rt_ioctl where rtentry_to_fib_config provides fc_prefsrc only when the provided device name matches IP label with colon. - inet_rtm_delroute where RTA_PREFSRC is optional too - fib_magic which deals with routes when deleting addresses and where the fc_prefsrc is always set with the primary IP for the concerned IFA. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3d28a35c2e1a..ac87a49ad50b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1665,6 +1665,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_prefsrc || + fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && fib_nh_match(cfg, fi) == 0) { -- cgit v1.2.3 From e6abbaa2725a43cf5d26c4c2a5dc6c0f6029ea19 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:49 +0000 Subject: ipv4: fix route deletion for IPs on many subnets Alex Sidorenko reported for problems with local routes left after IP addresses are deleted. It happens when same IPs are used in more than one subnet for the device. Fix fib_del_ifaddr to restrict the checks for duplicate local and broadcast addresses only to the IFAs that use our primary IFA or another primary IFA with same address. And we expect the prefsrc to be matched when the routes are deleted because it is possible they to differ only by prefsrc. This patch prevents local and broadcast routes to be leaked until their primary IP is deleted finally from the box. As the secondary address promotion needs to delete the routes for all secondaries that used the old primary IFA, add option to ignore these secondaries from the checks and to assume they are already deleted, so that we can safely delete the route while these IFAs are still on the device list. Reported-by: Alex Sidorenko Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 101 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a373a259253c..02c3ba61884a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -722,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) } } -static void fib_del_ifaddr(struct in_ifaddr *ifa) +/* Delete primary or secondary address. + * Optionally, on secondary address promotion consider the addresses + * from subnet iprim as deleted, even if they are in device list. + * In this case the secondary ifa can be in device list. + */ +void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) { struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; struct in_ifaddr *ifa1; - struct in_ifaddr *prim = ifa; + struct in_ifaddr *prim = ifa, *prim1 = NULL; __be32 brd = ifa->ifa_address | ~ifa->ifa_mask; __be32 any = ifa->ifa_address & ifa->ifa_mask; #define LOCAL_OK 1 @@ -735,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) #define BRD0_OK 4 #define BRD1_OK 8 unsigned ok = 0; + int subnet = 0; /* Primary network */ + int gone = 1; /* Address is missing */ + int same_prefsrc = 0; /* Another primary with same IP */ - if (!(ifa->ifa_flags & IFA_F_SECONDARY)) - fib_magic(RTM_DELROUTE, - dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - any, ifa->ifa_prefixlen, prim); - else { + if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (prim == NULL) { printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); return; } + if (iprim && iprim != prim) { + printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n"); + return; + } + } else if (!ipv4_is_zeronet(any) && + (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { + fib_magic(RTM_DELROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + any, ifa->ifa_prefixlen, prim); + subnet = 1; } /* Deletion is more complicated than add. @@ -755,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) */ for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + if (ifa1 == ifa) { + /* promotion, keep the IP */ + gone = 0; + continue; + } + /* Ignore IFAs from our subnet */ + if (iprim && ifa1->ifa_mask == iprim->ifa_mask && + inet_ifa_match(ifa1->ifa_address, iprim)) + continue; + + /* Ignore ifa1 if it uses different primary IP (prefsrc) */ + if (ifa1->ifa_flags & IFA_F_SECONDARY) { + /* Another address from our subnet? */ + if (ifa1->ifa_mask == prim->ifa_mask && + inet_ifa_match(ifa1->ifa_address, prim)) + prim1 = prim; + else { + /* We reached the secondaries, so + * same_prefsrc should be determined. + */ + if (!same_prefsrc) + continue; + /* Search new prim1 if ifa1 is not + * using the current prim1 + */ + if (!prim1 || + ifa1->ifa_mask != prim1->ifa_mask || + !inet_ifa_match(ifa1->ifa_address, prim1)) + prim1 = inet_ifa_byprefix(in_dev, + ifa1->ifa_address, + ifa1->ifa_mask); + if (!prim1) + continue; + if (prim1->ifa_local != prim->ifa_local) + continue; + } + } else { + if (prim->ifa_local != ifa1->ifa_local) + continue; + prim1 = ifa1; + if (prim != prim1) + same_prefsrc = 1; + } if (ifa->ifa_local == ifa1->ifa_local) ok |= LOCAL_OK; if (ifa->ifa_broadcast == ifa1->ifa_broadcast) @@ -763,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) ok |= BRD1_OK; if (any == ifa1->ifa_broadcast) ok |= BRD0_OK; + /* primary has network specific broadcasts */ + if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) { + __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask; + __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask; + + if (!ipv4_is_zeronet(any1)) { + if (ifa->ifa_broadcast == brd1 || + ifa->ifa_broadcast == any1) + ok |= BRD_OK; + if (brd == brd1 || brd == any1) + ok |= BRD1_OK; + if (any == brd1 || any == any1) + ok |= BRD0_OK; + } + } } if (!(ok & BRD_OK)) fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); - if (!(ok & BRD1_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); - if (!(ok & BRD0_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + if (subnet && ifa->ifa_prefixlen < 31) { + if (!(ok & BRD1_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); + if (!(ok & BRD0_OK)) + fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + } if (!(ok & LOCAL_OK)) { fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); /* Check, that this local address finally disappeared. */ - if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { + if (gone && + inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { /* And the last, but not the least thing. * We must flush stray FIB entries. * @@ -896,7 +971,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: - fib_del_ifaddr(ifa); + fib_del_ifaddr(ifa, NULL); fib_update_nh_saddrs(dev); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. -- cgit v1.2.3 From 2d230e2b2c3111cf4a11619f60dcd158ae84e3ab Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:52 +0000 Subject: ipv4: remove the routes on secondary promotion The secondary address promotion relies on fib_sync_down_addr to remove all routes created for the secondary addresses when the old primary address is deleted. It does not happen for cases when the primary address is also in another subnet. Fix that by deleting local and broadcast routes for all secondaries while they are on device list and by faking that all addresses from this subnet are to be deleted. It relies on fib_del_ifaddr being able to ignore the IPs from the concerned subnet while checking for duplication. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6d85800daeb7..2523001f4c9a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -345,6 +345,17 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } + /* On promotion all secondaries from subnet are changing + * the primary IP, we must remove all their routes silently + * and later to add them back with new prefsrc. Do this + * while all addresses are on the device list. + */ + for (ifa = promote; ifa; ifa = ifa->ifa_next) { + if (ifa1->ifa_mask == ifa->ifa_mask && + inet_ifa_match(ifa1->ifa_address, ifa)) + fib_del_ifaddr(ifa, ifa1); + } + /* 2. Unlink it */ *ifap = ifa1->ifa_next; -- cgit v1.2.3 From 04024b937a6e9b7d4320b5853557cea3930d528c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 19 Mar 2011 12:13:54 +0000 Subject: ipv4: optimize route adding on secondary promotion Optimize the calling of fib_add_ifaddr for all secondary addresses after the promoted one to start from their place, not from the new place of the promoted secondary. It will save some CPU cycles because we are sure the promoted secondary was first for the subnet and all next secondaries do not change their place. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2523001f4c9a..d5a4553bebc3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -375,6 +375,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { + struct in_ifaddr *next_sec = promote->ifa_next; if (prev_prom) { prev_prom->ifa_next = promote->ifa_next; @@ -386,7 +387,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); - for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { + for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { if (ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) continue; -- cgit v1.2.3 From a40c4f10e3fb96030358e49abd010c1f08446fa3 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Mon, 21 Mar 2011 15:07:16 -0700 Subject: libceph: add lingering request and watch/notify event framework Lingering requests are requests that are sent to the OSD normally but tracked also after we get a successful request. This keeps the OSD connection open and resends the original request if the object moves to another OSD. The OSD can then send notification messages back to us if another client initiates a notify. This framework will be used by RBD so that the client gets notification when a snapshot is created by another node or tool. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 1 + net/ceph/osd_client.c | 385 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 374 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index f3e4a13fea0c..95f96ab94bba 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -62,6 +62,7 @@ const char *ceph_msg_type_name(int type) case CEPH_MSG_OSD_MAP: return "osd_map"; case CEPH_MSG_OSD_OP: return "osd_op"; case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; + case CEPH_MSG_WATCH_NOTIFY: return "watch_notify"; default: return "unknown"; } } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b85ed5a5503d..02212ed50852 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -25,6 +25,12 @@ static const struct ceph_connection_operations osd_con_ops; static void send_queued(struct ceph_osd_client *osdc); static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); +static void __register_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); +static void __unregister_linger_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); +static int __send_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); static int op_needs_trail(int op) { @@ -33,6 +39,7 @@ static int op_needs_trail(int op) case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: case CEPH_OSD_OP_CALL: + case CEPH_OSD_OP_NOTIFY: return 1; default: return 0; @@ -208,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, init_completion(&req->r_completion); init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); + INIT_LIST_HEAD(&req->r_linger_item); + INIT_LIST_HEAD(&req->r_linger_osd); req->r_flags = flags; WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); @@ -314,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req, break; case CEPH_OSD_OP_STARTSYNC: break; + case CEPH_OSD_OP_NOTIFY: + { + __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); + __le32 timeout = cpu_to_le32(src->watch.timeout); + + BUG_ON(!req->r_trail); + + ceph_pagelist_append(req->r_trail, + &prot_ver, sizeof(prot_ver)); + ceph_pagelist_append(req->r_trail, + &timeout, sizeof(timeout)); + } + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + dst->watch.cookie = cpu_to_le64(src->watch.cookie); + dst->watch.ver = cpu_to_le64(src->watch.ver); + dst->watch.flag = src->watch.flag; + break; default: pr_err("unrecognized osd opcode %d\n", dst->op); WARN_ON(1); @@ -534,7 +561,7 @@ __lookup_request_ge(struct ceph_osd_client *osdc, static void __kick_osd_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd) { - struct ceph_osd_request *req; + struct ceph_osd_request *req, *nreq; int err; dout("__kick_osd_requests osd%d\n", osd->o_osd); @@ -546,7 +573,17 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, list_move(&req->r_req_lru_item, &osdc->req_unsent); dout("requeued %p tid %llu osd%d\n", req, req->r_tid, osd->o_osd); - req->r_flags |= CEPH_OSD_FLAG_RETRY; + if (!req->r_linger) + req->r_flags |= CEPH_OSD_FLAG_RETRY; + } + + list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, + r_linger_osd) { + __unregister_linger_request(osdc, req); + __register_request(osdc, req); + list_move(&req->r_req_lru_item, &osdc->req_unsent); + dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, + osd->o_osd); } } @@ -590,6 +627,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; INIT_LIST_HEAD(&osd->o_requests); + INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); osd->o_incarnation = 1; @@ -679,7 +717,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) int ret = 0; dout("__reset_osd %p osd%d\n", osd, osd->o_osd); - if (list_empty(&osd->o_requests)) { + if (list_empty(&osd->o_requests) && + list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], &osd->o_con.peer_addr, @@ -752,10 +791,9 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc) * Register request, assign tid. If this is the first request, set up * the timeout event. */ -static void register_request(struct ceph_osd_client *osdc, - struct ceph_osd_request *req) +static void __register_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) { - mutex_lock(&osdc->request_mutex); req->r_tid = ++osdc->last_tid; req->r_request->hdr.tid = cpu_to_le64(req->r_tid); INIT_LIST_HEAD(&req->r_req_lru_item); @@ -769,6 +807,13 @@ static void register_request(struct ceph_osd_client *osdc, dout(" first request, scheduling timeout\n"); __schedule_osd_timeout(osdc); } +} + +static void register_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) +{ + mutex_lock(&osdc->request_mutex); + __register_request(osdc, req); mutex_unlock(&osdc->request_mutex); } @@ -787,9 +832,14 @@ static void __unregister_request(struct ceph_osd_client *osdc, ceph_con_revoke(&req->r_osd->o_con, req->r_request); list_del_init(&req->r_osd_item); - if (list_empty(&req->r_osd->o_requests)) + if (list_empty(&req->r_osd->o_requests) && + list_empty(&req->r_osd->o_linger_requests)) { + dout("moving osd to %p lru\n", req->r_osd); __move_osd_to_lru(osdc, req->r_osd); - req->r_osd = NULL; + } + if (list_empty(&req->r_osd_item) && + list_empty(&req->r_linger_item)) + req->r_osd = NULL; } ceph_osdc_put_request(req); @@ -812,6 +862,58 @@ static void __cancel_request(struct ceph_osd_request *req) } } +static void __register_linger_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) +{ + dout("__register_linger_request %p\n", req); + list_add_tail(&req->r_linger_item, &osdc->req_linger); + list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests); +} + +static void __unregister_linger_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) +{ + dout("__unregister_linger_request %p\n", req); + if (req->r_osd) { + list_del_init(&req->r_linger_item); + list_del_init(&req->r_linger_osd); + + if (list_empty(&req->r_osd->o_requests) && + list_empty(&req->r_osd->o_linger_requests)) { + dout("moving osd to %p lru\n", req->r_osd); + __move_osd_to_lru(osdc, req->r_osd); + } + req->r_osd = NULL; + } +} + +void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) +{ + mutex_lock(&osdc->request_mutex); + if (req->r_linger) { + __unregister_linger_request(osdc, req); + ceph_osdc_put_request(req); + } + mutex_unlock(&osdc->request_mutex); +} +EXPORT_SYMBOL(ceph_osdc_unregister_linger_request); + +void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, + struct ceph_osd_request *req) +{ + if (!req->r_linger) { + dout("set_request_linger %p\n", req); + req->r_linger = 1; + /* + * caller is now responsible for calling + * unregister_linger_request + */ + ceph_osdc_get_request(req); + } +} +EXPORT_SYMBOL(ceph_osdc_set_request_linger); + /* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is @@ -958,7 +1060,6 @@ static void handle_timeout(struct work_struct *work) osdc->client->options->osd_keepalive_timeout * HZ; unsigned long last_stamp = 0; struct list_head slow_osds; - dout("timeout\n"); down_read(&osdc->map_sem); @@ -1060,7 +1161,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, numops * sizeof(struct ceph_osd_op)) goto bad; dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); - /* lookup */ mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); @@ -1104,6 +1204,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("handle_reply tid %llu flags %d\n", tid, flags); + if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK)) + __register_linger_request(osdc, req); + /* either this is a read, or we got the safe response */ if (result < 0 || (flags & CEPH_OSD_FLAG_ONDISK) || @@ -1124,6 +1227,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, } done: + dout("req=%p req->r_linger=%d\n", req, req->r_linger); ceph_osdc_put_request(req); return; @@ -1159,7 +1263,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc) */ static void kick_requests(struct ceph_osd_client *osdc) { - struct ceph_osd_request *req; + struct ceph_osd_request *req, *nreq; struct rb_node *p; int needmap = 0; int err; @@ -1177,8 +1281,30 @@ static void kick_requests(struct ceph_osd_client *osdc) } else if (err > 0) { dout("%p tid %llu requeued on osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); - req->r_flags |= CEPH_OSD_FLAG_RETRY; + if (!req->r_linger) + req->r_flags |= CEPH_OSD_FLAG_RETRY; + } + } + + list_for_each_entry_safe(req, nreq, &osdc->req_linger, + r_linger_item) { + dout("linger req=%p req->r_osd=%p\n", req, req->r_osd); + + err = __map_request(osdc, req); + if (err == 0) + continue; /* no change and no osd was specified */ + if (err < 0) + continue; /* hrm! */ + if (req->r_osd == NULL) { + dout("tid %llu maps to no valid osd\n", req->r_tid); + needmap++; /* request a newer map */ + continue; } + + dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + __unregister_linger_request(osdc, req); + __register_request(osdc, req); } mutex_unlock(&osdc->request_mutex); @@ -1301,6 +1427,223 @@ bad: return; } +/* + * watch/notify callback event infrastructure + * + * These callbacks are used both for watch and notify operations. + */ +static void __release_event(struct kref *kref) +{ + struct ceph_osd_event *event = + container_of(kref, struct ceph_osd_event, kref); + + dout("__release_event %p\n", event); + kfree(event); +} + +static void get_event(struct ceph_osd_event *event) +{ + kref_get(&event->kref); +} + +void ceph_osdc_put_event(struct ceph_osd_event *event) +{ + kref_put(&event->kref, __release_event); +} +EXPORT_SYMBOL(ceph_osdc_put_event); + +static void __insert_event(struct ceph_osd_client *osdc, + struct ceph_osd_event *new) +{ + struct rb_node **p = &osdc->event_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_osd_event *event = NULL; + + while (*p) { + parent = *p; + event = rb_entry(parent, struct ceph_osd_event, node); + if (new->cookie < event->cookie) + p = &(*p)->rb_left; + else if (new->cookie > event->cookie) + p = &(*p)->rb_right; + else + BUG(); + } + + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &osdc->event_tree); +} + +static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc, + u64 cookie) +{ + struct rb_node **p = &osdc->event_tree.rb_node; + struct rb_node *parent = NULL; + struct ceph_osd_event *event = NULL; + + while (*p) { + parent = *p; + event = rb_entry(parent, struct ceph_osd_event, node); + if (cookie < event->cookie) + p = &(*p)->rb_left; + else if (cookie > event->cookie) + p = &(*p)->rb_right; + else + return event; + } + return NULL; +} + +static void __remove_event(struct ceph_osd_event *event) +{ + struct ceph_osd_client *osdc = event->osdc; + + if (!RB_EMPTY_NODE(&event->node)) { + dout("__remove_event removed %p\n", event); + rb_erase(&event->node, &osdc->event_tree); + ceph_osdc_put_event(event); + } else { + dout("__remove_event didn't remove %p\n", event); + } +} + +int ceph_osdc_create_event(struct ceph_osd_client *osdc, + void (*event_cb)(u64, u64, u8, void *), + int one_shot, void *data, + struct ceph_osd_event **pevent) +{ + struct ceph_osd_event *event; + + event = kmalloc(sizeof(*event), GFP_NOIO); + if (!event) + return -ENOMEM; + + dout("create_event %p\n", event); + event->cb = event_cb; + event->one_shot = one_shot; + event->data = data; + event->osdc = osdc; + INIT_LIST_HEAD(&event->osd_node); + kref_init(&event->kref); /* one ref for us */ + kref_get(&event->kref); /* one ref for the caller */ + init_completion(&event->completion); + + spin_lock(&osdc->event_lock); + event->cookie = ++osdc->event_count; + __insert_event(osdc, event); + spin_unlock(&osdc->event_lock); + + *pevent = event; + return 0; +} +EXPORT_SYMBOL(ceph_osdc_create_event); + +void ceph_osdc_cancel_event(struct ceph_osd_event *event) +{ + struct ceph_osd_client *osdc = event->osdc; + + dout("cancel_event %p\n", event); + spin_lock(&osdc->event_lock); + __remove_event(event); + spin_unlock(&osdc->event_lock); + ceph_osdc_put_event(event); /* caller's */ +} +EXPORT_SYMBOL(ceph_osdc_cancel_event); + + +static void do_event_work(struct work_struct *work) +{ + struct ceph_osd_event_work *event_work = + container_of(work, struct ceph_osd_event_work, work); + struct ceph_osd_event *event = event_work->event; + u64 ver = event_work->ver; + u64 notify_id = event_work->notify_id; + u8 opcode = event_work->opcode; + + dout("do_event_work completing %p\n", event); + event->cb(ver, notify_id, opcode, event->data); + complete(&event->completion); + dout("do_event_work completed %p\n", event); + ceph_osdc_put_event(event); + kfree(event_work); +} + + +/* + * Process osd watch notifications + */ +void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) +{ + void *p, *end; + u8 proto_ver; + u64 cookie, ver, notify_id; + u8 opcode; + struct ceph_osd_event *event; + struct ceph_osd_event_work *event_work; + + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + ceph_decode_8_safe(&p, end, proto_ver, bad); + ceph_decode_8_safe(&p, end, opcode, bad); + ceph_decode_64_safe(&p, end, cookie, bad); + ceph_decode_64_safe(&p, end, ver, bad); + ceph_decode_64_safe(&p, end, notify_id, bad); + + spin_lock(&osdc->event_lock); + event = __find_event(osdc, cookie); + if (event) { + get_event(event); + if (event->one_shot) + __remove_event(event); + } + spin_unlock(&osdc->event_lock); + dout("handle_watch_notify cookie %lld ver %lld event %p\n", + cookie, ver, event); + if (event) { + event_work = kmalloc(sizeof(*event_work), GFP_NOIO); + INIT_WORK(&event_work->work, do_event_work); + if (!event_work) { + dout("ERROR: could not allocate event_work\n"); + goto done_err; + } + event_work->event = event; + event_work->ver = ver; + event_work->notify_id = notify_id; + event_work->opcode = opcode; + if (!queue_work(osdc->notify_wq, &event_work->work)) { + dout("WARNING: failed to queue notify event work\n"); + goto done_err; + } + } + + return; + +done_err: + complete(&event->completion); + ceph_osdc_put_event(event); + return; + +bad: + pr_err("osdc handle_watch_notify corrupt msg\n"); + return; +} + +int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) +{ + int err; + + dout("wait_event %p\n", event); + err = wait_for_completion_interruptible_timeout(&event->completion, + timeout * HZ); + ceph_osdc_put_event(event); + if (err > 0) + err = 0; + dout("wait_event %p returns %d\n", event, err); + return err; +} +EXPORT_SYMBOL(ceph_osdc_wait_event); + /* * Register request, send initial attempt. */ @@ -1430,9 +1773,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) INIT_LIST_HEAD(&osdc->req_lru); INIT_LIST_HEAD(&osdc->req_unsent); INIT_LIST_HEAD(&osdc->req_notarget); + INIT_LIST_HEAD(&osdc->req_linger); osdc->num_requests = 0; INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); + spin_lock_init(&osdc->event_lock); + osdc->event_tree = RB_ROOT; + osdc->event_count = 0; schedule_delayed_work(&osdc->osds_timeout_work, round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ)); @@ -1452,6 +1799,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) "osd_op_reply"); if (err < 0) goto out_msgpool; + + osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); + if (IS_ERR(osdc->notify_wq)) { + err = PTR_ERR(osdc->notify_wq); + osdc->notify_wq = NULL; + goto out_msgpool; + } return 0; out_msgpool: @@ -1465,6 +1819,8 @@ EXPORT_SYMBOL(ceph_osdc_init); void ceph_osdc_stop(struct ceph_osd_client *osdc) { + flush_workqueue(osdc->notify_wq); + destroy_workqueue(osdc->notify_wq); cancel_delayed_work_sync(&osdc->timeout_work); cancel_delayed_work_sync(&osdc->osds_timeout_work); if (osdc->osdmap) { @@ -1472,6 +1828,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) osdc->osdmap = NULL; } remove_old_osds(osdc, 1); + WARN_ON(!RB_EMPTY_ROOT(&osdc->osds)); mempool_destroy(osdc->req_mempool); ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); @@ -1580,6 +1937,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) case CEPH_MSG_OSD_OPREPLY: handle_reply(osdc, msg, con); break; + case CEPH_MSG_WATCH_NOTIFY: + handle_watch_notify(osdc, msg); + break; default: pr_err("received unknown message type %d %s\n", type, @@ -1673,6 +2033,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, switch (type) { case CEPH_MSG_OSD_MAP: + case CEPH_MSG_WATCH_NOTIFY: return ceph_msg_new(type, front, GFP_NOFS); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); -- cgit v1.2.3 From eeff66ef6e33925f615d49e6c846263e342ab60e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 8 Mar 2011 16:39:47 +0530 Subject: net/9p: Convert the in the 9p rpc call path to GFP_NOFS Without this we can cause reclaim allocation in writepage. [ 3433.448430] ================================= [ 3433.449117] [ INFO: inconsistent lock state ] [ 3433.449117] 2.6.38-rc5+ #84 [ 3433.449117] --------------------------------- [ 3433.449117] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage. [ 3433.449117] kswapd0/505 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 3433.449117] (iprune_sem){+++++-}, at: [] shrink_icache_memory+0x45/0x2b1 [ 3433.449117] {RECLAIM_FS-ON-W} state was registered at: [ 3433.449117] [] mark_held_locks+0x52/0x70 [ 3433.449117] [] lockdep_trace_alloc+0x85/0x9f [ 3433.449117] [] slab_pre_alloc_hook+0x18/0x3c [ 3433.449117] [] kmem_cache_alloc+0x23/0xa2 [ 3433.449117] [] idr_pre_get+0x2d/0x6f [ 3433.449117] [] p9_idpool_get+0x30/0xae [ 3433.449117] [] p9_client_rpc+0xd7/0x9b0 [ 3433.449117] [] p9_client_clunk+0x88/0xdb [ 3433.449117] [] v9fs_evict_inode+0x3c/0x48 [ 3433.449117] [] evict+0x1f/0x87 [ 3433.449117] [] dispose_list+0x47/0xe3 [ 3433.449117] [] evict_inodes+0x138/0x14f [ 3433.449117] [] generic_shutdown_super+0x57/0xe8 [ 3433.449117] [] kill_anon_super+0x11/0x50 [ 3433.449117] [] v9fs_kill_super+0x49/0xab [ 3433.449117] [] deactivate_locked_super+0x21/0x46 [ 3433.449117] [] deactivate_super+0x40/0x44 [ 3433.449117] [] mntput_no_expire+0x100/0x109 [ 3433.449117] [] sys_umount+0x2f1/0x31c [ 3433.449117] [] system_call_fastpath+0x16/0x1b [ 3433.449117] irq event stamp: 192941 [ 3433.449117] hardirqs last enabled at (192941): [] _raw_spin_unlock_irq+0x2b/0x30 [ 3433.449117] hardirqs last disabled at (192940): [] shrink_inactive_list+0x290/0x2f5 [ 3433.449117] softirqs last enabled at (188470): [] __do_softirq+0x133/0x152 [ 3433.449117] softirqs last disabled at (188455): [] call_softirq+0x1c/0x28 [ 3433.449117] [ 3433.449117] other info that might help us debug this: [ 3433.449117] 1 lock held by kswapd0/505: [ 3433.449117] #0: (shrinker_rwsem){++++..}, at: [] shrink_slab+0x38/0x15f [ 3433.449117] [ 3433.449117] stack backtrace: [ 3433.449117] Pid: 505, comm: kswapd0 Not tainted 2.6.38-rc5+ #84 [ 3433.449117] Call Trace: [ 3433.449117] [] ? valid_state+0x17e/0x191 [ 3433.449117] [] ? save_stack_trace+0x28/0x45 [ 3433.449117] [] ? check_usage_forwards+0x0/0x87 [ 3433.449117] [] ? mark_lock+0x113/0x22c [ 3433.449117] [] ? __lock_acquire+0x37a/0xcf7 [ 3433.449117] [] ? mark_lock+0x2d/0x22c [ 3433.449117] [] ? __lock_acquire+0x392/0xcf7 [ 3433.449117] [] ? determine_dirtyable_memory+0x15/0x28 [ 3433.449117] [] ? lock_acquire+0x57/0x6d [ 3433.449117] [] ? shrink_icache_memory+0x45/0x2b1 [ 3433.449117] [] ? down_read+0x47/0x5c [ 3433.449117] [] ? shrink_icache_memory+0x45/0x2b1 [ 3433.449117] [] ? shrink_icache_memory+0x45/0x2b1 [ 3433.449117] [] ? shrink_slab+0xdb/0x15f [ 3433.449117] [] ? kswapd+0x574/0x96a [ 3433.449117] [] ? kswapd+0x0/0x96a [ 3433.449117] [] ? kthread+0x7d/0x85 [ 3433.449117] [] ? kernel_thread_helper+0x4/0x10 [ 3433.449117] [] ? restore_args+0x0/0x30 [ 3433.449117] [] ? kthread+0x0/0x85 [ 3433.449117] [] ? kernel_thread_helper+0x0/0x10 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 10 +++++----- net/9p/protocol.c | 6 +++--- net/9p/trans_fd.c | 2 +- net/9p/trans_rdma.c | 6 +++--- net/9p/util.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 347ec0cd2718..2ccbf04d37df 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -223,7 +223,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) req = &c->reqs[row][col]; if (!req->tc) { - req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); + req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); if (!req->wq) { printk(KERN_ERR "Couldn't grow tag array\n"); return ERR_PTR(-ENOMEM); @@ -233,17 +233,17 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) P9_TRANS_PREF_PAYLOAD_SEP) { int alloc_msize = min(c->msize, 4096); req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_KERNEL); + GFP_NOFS); req->tc->capacity = alloc_msize; req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_KERNEL); + GFP_NOFS); req->rc->capacity = alloc_msize; } else { req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); + GFP_NOFS); req->tc->capacity = c->msize; req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_KERNEL); + GFP_NOFS); req->rc->capacity = c->msize; } if ((!req->tc) || (!req->rc)) { diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 2ce515b859b3..8a4084fa8b5a 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -205,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (errcode) break; - *sptr = kmalloc(len + 1, GFP_KERNEL); + *sptr = kmalloc(len + 1, GFP_NOFS); if (*sptr == NULL) { errcode = -EFAULT; break; @@ -273,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, if (!errcode) { *wnames = kmalloc(sizeof(char *) * *nwname, - GFP_KERNEL); + GFP_NOFS); if (!*wnames) errcode = -ENOMEM; } @@ -317,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, *wqids = kmalloc(*nwqid * sizeof(struct p9_qid), - GFP_KERNEL); + GFP_NOFS); if (*wqids == NULL) errcode = -ENOMEM; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index a30471e51740..aa5672b15eae 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -350,7 +350,7 @@ static void p9_read_work(struct work_struct *work) if (m->req->rc == NULL) { m->req->rc = kmalloc(sizeof(struct p9_fcall) + - m->client->msize, GFP_KERNEL); + m->client->msize, GFP_NOFS); if (!m->req->rc) { m->req = NULL; err = -ENOMEM; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 29a54ccd213d..150e0c4bbf40 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -424,7 +424,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *rpl_context = NULL; /* Allocate an fcall for the reply */ - rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); + rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; goto err_close; @@ -437,7 +437,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) */ if (!req->rc) { req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_KERNEL); + GFP_NOFS); if (req->rc) { req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); @@ -468,7 +468,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) req->rc = NULL; /* Post the request */ - c = kmalloc(sizeof *c, GFP_KERNEL); + c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; goto err_free1; diff --git a/net/9p/util.c b/net/9p/util.c index e048701a72d2..b84619b5ba22 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p) unsigned long flags; retry: - if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) + if (idr_pre_get(&p->pool, GFP_NOFS) == 0) return 0; spin_lock_irqsave(&p->lock, flags); -- cgit v1.2.3 From 472e7f9f8b547605ee9670ac803e971c2e3eeac0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 8 Mar 2011 16:39:47 +0530 Subject: net/9p: Fix compile warning Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/trans_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index d62b9aa58df8..9172ab78fcb0 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -41,9 +41,9 @@ EXPORT_SYMBOL(p9_release_req_pages); int p9_nr_pages(struct p9_req_t *req) { - int start_page, end_page; - start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT; - end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size + + unsigned long start_page, end_page; + start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; + end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT; return end_page - start_page; } @@ -69,8 +69,8 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); if (*pdata_off) - first_page_bytes = min((PAGE_SIZE - *pdata_off), - req->tc->pbuf_size); + first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), + req->tc->pbuf_size); rpinfo = req->tc->private; pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, -- cgit v1.2.3 From 53bda3e5b4e91763224ecb7d05dab94d281fd41d Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Tue, 8 Mar 2011 15:34:20 -0800 Subject: [net/9p] unconditional wake_up to proc waiting for space on VirtIO ring Process may wait to get space on VirtIO ring to send a transaction to VirtFS server. Current code just does a conditional wake_up() which means only one process will be woken up even if multiple processes are waiting. This fix makes the wake_up unconditional. Hence we won't have any processes waiting for-ever. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 9b550ed9c711..961e025957ae 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -146,11 +146,10 @@ static void req_done(struct virtqueue *vq) rc = virtqueue_get_buf(chan->vq, &len); if (rc != NULL) { - if (!chan->ring_bufs_avail) { - chan->ring_bufs_avail = 1; - wake_up(chan->vc_wq); - } + chan->ring_bufs_avail = 1; spin_unlock_irqrestore(&chan->lock, flags); + /* Wakeup if anyone waiting for VirtIO ring space. */ + wake_up(chan->vc_wq); P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); -- cgit v1.2.3 From a01a984035ea799b14aa5e874dcaeb122f09c4b4 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Mon, 14 Mar 2011 14:12:49 -0700 Subject: [net/9p] Set the condition just before waking up. Given that the sprious wake-ups are common, we need to move the condition setting right next to the wake_up(). After setting the condition to req->status = REQ_STATUS_RCVD, sprious wakeups may cause the virtqueue back on the free list for someone else to use. This may result in kernel panic while relasing the pinned pages in p9_release_req_pages(). Also rearranged the while loop in req_done() for better redability. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 961e025957ae..cb98af9367ab 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -141,33 +141,33 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); - do { + while (1) { spin_lock_irqsave(&chan->lock, flags); rc = virtqueue_get_buf(chan->vq, &len); - if (rc != NULL) { - chan->ring_bufs_avail = 1; - spin_unlock_irqrestore(&chan->lock, flags); - /* Wakeup if anyone waiting for VirtIO ring space. */ - wake_up(chan->vc_wq); - P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); - P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", - rc->tag); - req = p9_tag_lookup(chan->client, rc->tag); - req->status = REQ_STATUS_RCVD; - if (req->tc->private) { - struct trans_rpage_info *rp = req->tc->private; - /*Release pages */ - p9_release_req_pages(rp); - if (rp->rp_alloc) - kfree(rp); - req->tc->private = NULL; - } - p9_client_cb(chan->client, req); - } else { + if (rc == NULL) { spin_unlock_irqrestore(&chan->lock, flags); + break; } - } while (rc != NULL); + + chan->ring_bufs_avail = 1; + spin_unlock_irqrestore(&chan->lock, flags); + /* Wakeup if anyone waiting for VirtIO ring space. */ + wake_up(chan->vc_wq); + P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); + P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + if (req->tc->private) { + struct trans_rpage_info *rp = req->tc->private; + /*Release pages */ + p9_release_req_pages(rp); + if (rp->rp_alloc) + kfree(rp); + req->tc->private = NULL; + } + req->status = REQ_STATUS_RCVD; + p9_client_cb(chan->client, req); + } } /** -- cgit v1.2.3 From 316ad5501c2098cb2a2a25ed77a0421f1671411c Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Mon, 14 Mar 2011 14:22:41 -0700 Subject: [net/9p] Don't re-pin pages on retrying virtqueue_add_buf(). Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index cb98af9367ab..c6e1ae2fb926 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -262,7 +262,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); -req_retry: req->status = REQ_STATUS_SENT; if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { @@ -295,6 +294,7 @@ req_retry: } } +req_retry_pinned: spin_lock_irqsave(&chan->lock, flags); /* Handle out VirtIO ring buffers */ @@ -355,7 +355,7 @@ req_retry: return err; P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); - goto req_retry; + goto req_retry_pinned; } else { spin_unlock_irqrestore(&chan->lock, flags); P9_DPRINTK(P9_DEBUG_TRANS, -- cgit v1.2.3 From 68da9ba4eeadae86ad42e52b80822fbd56971267 Mon Sep 17 00:00:00 2001 From: "Venkateswararao Jujjuri (JV)" Date: Fri, 18 Mar 2011 15:49:48 -0700 Subject: [net/9p]: Introduce basic flow-control for VirtIO transport. Recent zerocopy work in the 9P VirtIO transport maps and pins user buffers into kernel memory for the server to work on them. Since the user process can initiate this kind of pinning with a simple read/write call, thousands of IO threads initiated by the user process can hog the system resources and could result into denial of service. This patch introduces flow control to avoid that extreme scenario. The ceiling limit to avoid denial of service attacks is set to relatively high (nr_free_pagecache_pages()/4) so that it won't interfere with regular usage, but can step in extreme cases to limit the total system hang. Since we don't have a global structure to accommodate this variable, I choose the virtio_chan as the home for this. Signed-off-by: Venkateswararao Jujjuri Reviewed-by: Badari Pulavarty Signed-off-by: Eric Van Hensbergen --- net/9p/trans_virtio.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c6e1ae2fb926..e8f046b07182 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include "trans_common.h" @@ -51,6 +52,8 @@ /* a single mutex to manage channel initialization and attachment */ static DEFINE_MUTEX(virtio_9p_lock); +static DECLARE_WAIT_QUEUE_HEAD(vp_wq); +static atomic_t vp_pinned = ATOMIC_INIT(0); /** * struct virtio_chan - per-instance transport information @@ -78,7 +81,10 @@ struct virtio_chan { struct virtqueue *vq; int ring_bufs_avail; wait_queue_head_t *vc_wq; - + /* This is global limit. Since we don't have a global structure, + * will be placing it in each channel. + */ + int p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; @@ -159,8 +165,11 @@ static void req_done(struct virtqueue *vq) req = p9_tag_lookup(chan->client, rc->tag); if (req->tc->private) { struct trans_rpage_info *rp = req->tc->private; + int p = rp->rp_nr_pages; /*Release pages */ p9_release_req_pages(rp); + atomic_sub(p, &vp_pinned); + wake_up(&vp_wq); if (rp->rp_alloc) kfree(rp); req->tc->private = NULL; @@ -269,6 +278,14 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) int rpinfo_size = sizeof(struct trans_rpage_info) + sizeof(struct page *) * nr_pages; + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + atomic_read(&vp_pinned) < chan->p9_max_pages); + if (err == -ERESTARTSYS) + return err; + P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); + } + if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { /* We can use sdata */ req->tc->private = req->tc->sdata + req->tc->size; @@ -291,6 +308,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) if (rpinfo->rp_alloc) kfree(rpinfo); return err; + } else { + atomic_add(rpinfo->rp_nr_pages, &vp_pinned); } } @@ -452,6 +471,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) } init_waitqueue_head(chan->vc_wq); chan->ring_bufs_avail = 1; + /* Ceiling limit to avoid denial of service attacks */ + chan->p9_max_pages = nr_free_buffer_pages()/4; mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); -- cgit v1.2.3 From 246408dcd5dfeef2df437ccb0ef4d6ee87805f58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Mar 2011 18:40:10 -0400 Subject: SUNRPC: Never reuse the socket port after an xs_close() If we call xs_close(), we're in one of two situations: - Autoclose, which means we don't expect to resend a request - bind+connect failed, which probably means the port is in use Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- net/sunrpc/xprtsock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index be96d429b475..1e336a06d3e6 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -710,6 +710,8 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + transport->srcport = 0; + write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; -- cgit v1.2.3 From 94dcf29a11b3d20a28790598d701f98484a969da Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:45 -0700 Subject: kthread: use kthread_create_on_node() ksoftirqd, kworker, migration, and pktgend kthreads can be created with kthread_create_on_node(), to get proper NUMA affinities for their stack and task_struct. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Acked-by: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/pktgen.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0c55eaa70e39..aeeece72b72f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3761,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu) list_add_tail(&t->th_list, &pktgen_threads); init_completion(&t->start_done); - p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); + p = kthread_create_on_node(pktgen_thread_worker, + t, + cpu_to_node(cpu), + "kpktgend_%d", cpu); if (IS_ERR(p)) { pr_err("kernel_thread() failed for cpu %d\n", t->cpu); list_del(&t->th_list); -- cgit v1.2.3 From 9c7a4f9ce651383c73dfdff3d7e21d5f9572c4ec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Mar 2011 19:17:36 -0700 Subject: ipv6: ip6_route_output does not modify sk parameter, so make it const This avoids explicit cast to avoid 'discards qualifiers' compiler warning in a netfilter patch that i've been working on. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6814c8722fa7..843406f14d7b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -854,7 +854,7 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, +struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, struct flowi6 *fl6) { int flags = 0; -- cgit v1.2.3 From a7bff75b087e7a355838a32efe61707cfa73c194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 22 Mar 2011 11:40:32 +0000 Subject: bridge: Fix possibly wrong MLD queries' ethernet source address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ipv6_dev_get_saddr() is currently called with an uninitialized destination address. Although in tests it usually seemed to nevertheless always fetch the right source address, there seems to be a possible race condition. Therefore this commit changes this, first setting the destination address and only after that fetching the source address. Reported-by: Jan Beulich Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 030a002ff8ee..f61eb2eff3fd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -445,9 +445,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; + ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr); - ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); -- cgit v1.2.3 From 67d4120a1793138bc9f4a6eb61d0fc5298ed97e0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 14 Mar 2011 10:57:03 +0000 Subject: tcp: avoid cwnd moderation in undo In the current undo logic, cwnd is moderated after it was restored to the value prior entering fast-recovery. It was moderated first in tcp_try_undo_recovery then again in tcp_complete_cwr. Since the undo indicates recovery was false, these moderations are not necessary. If the undo is triggered when most of the outstanding data have been acknowledged, the (restored) cwnd is falsely pulled down to a small value. This patch removes these cwnd moderations if cwnd is undone a) during fast-recovery b) by receiving DSACKs past fast-recovery Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index da782e7ab16d..e72af7e31d1c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const int undo) +static void tcp_undo_cwr(struct sock *sk, const int undo_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); @@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { + if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } } else { tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } - tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk) static inline void tcp_complete_cwr(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_stamp = tcp_time_stamp; + /* Do not moderate cwnd if it's already undone in cwr or recovery */ + if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { + tp->snd_cwnd = tp->snd_ssthresh; + tp->snd_cwnd_stamp = tcp_time_stamp; + } tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } -- cgit v1.2.3 From f6152737a95bd6c22f0c664b20831aefd48085a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Mar 2011 19:37:11 -0700 Subject: tcp: Make undo_ssthresh arg to tcp_undo_cwr() a bool. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e72af7e31d1c..bef9f04c22ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const int undo_ssthresh) +static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); @@ -2698,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else @@ -2725,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk) if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); tp->undo_marker = 0; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); } @@ -2778,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, 0); + tcp_undo_cwr(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. @@ -2807,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk) DBGUNDO(sk, "partial loss"); tp->lost_out = 0; - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; @@ -3496,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) if (flag & FLAG_ECE) tcp_ratehalving_spur_to_response(sk); else - tcp_undo_cwr(sk, 1); + tcp_undo_cwr(sk, true); } /* F-RTO spurious RTO detection algorithm (RFC4138) -- cgit v1.2.3 From 406b6f974dae76a5b795d5c251d11c979a4e509b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Mar 2011 21:56:23 -0700 Subject: ipv4: Fallback to FIB local table in __ip_dev_find(). In commit 9435eb1cf0b76b323019cebf8d16762a50a12a19 ("ipv4: Implement __ip_dev_find using new interface address hash.") we reimplemented __ip_dev_find() so that it doesn't have to do a full FIB table lookup. Instead, it consults a hash table of addresses configured to interfaces. This works identically to the old code in all except one case, and that is for loopback subnets. The old code would match the loopback device for any IP address that falls within a subnet configured to the loopback device. Handle this corner case by doing the FIB lookup. We could implement this via inet_addr_onlink() but: 1) Someone could configure many addresses to loopback and inet_addr_onlink() is a simple list traversal. 2) We know the old code works. Reported-by: Julian Anastasov Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d5a4553bebc3..5345b0bee6df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -64,6 +64,8 @@ #include #include +#include "fib_lookup.h" + static struct ipv4_devconf ipv4_devconf = { .data = { [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, @@ -151,6 +153,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) break; } } + if (!result) { + struct flowi4 fl4 = { .daddr = addr }; + struct fib_result res = { 0 }; + struct fib_table *local; + + /* Fallback to FIB local table so that communication + * over loopback subnets work. + */ + local = fib_get_table(net, RT_TABLE_LOCAL); + if (local && + !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && + res.type == RTN_LOCAL) + result = FIB_RES_DEV(res); + } if (result && devref) dev_hold(result); rcu_read_unlock(); -- cgit v1.2.3 From eb49a97363f020c1d7eef8bcd93865726b1fa11d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Mar 2011 12:18:15 -0700 Subject: ipv4: fix ip_rt_update_pmtu() commit 2c8cec5c10bc (Cache learned PMTU information in inetpeer) added an extra inet_putpeer() call in ip_rt_update_pmtu(). This results in various problems, since we can free one inetpeer, while it is still in use. Ref: http://www.spinics.net/lists/netdev/msg159121.html Reported-by: Alexander Beregalov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 870b5182ddd8..34921b0d3f8e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1593,8 +1593,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt->rt_peer_genid = rt_peer_genid(); } check_peer_pmtu(dst, peer); - - inet_putpeer(peer); } } -- cgit v1.2.3 From 12ce22423abacca70bf1dfbcb8543b3e2b74aad4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 23 Mar 2011 16:41:45 -0700 Subject: rds: stop including asm-generic/bitops/le.h directly asm-generic/bitops/le.h is only intended to be included directly from asm-generic/bitops/ext2-non-atomic.h or asm-generic/bitops/minix-le.h which implements generic ext2 or minix bit operations. This stops including asm-generic/bitops/le.h directly and use ext2 non-atomic bit operations instead. It seems odd to use ext2_*_bit() on rds, but it will replaced with __{set,clear,test}_bit_le() after introducing little endian bit operations for all architectures. This indirect step is necessary to maintain bisectability for some architectures which have their own little-endian bit operations. Signed-off-by: Akinobu Mita Cc: Andy Grover Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/rds/cong.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/rds/cong.c b/net/rds/cong.c index 75ea686f27d5..8cc322bfd092 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -33,8 +33,7 @@ #include #include #include - -#include +#include #include "rds.h" @@ -285,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - generic___set_le_bit(off, (void *)map->m_page_addrs[i]); + ext2_set_bit(off, (void *)map->m_page_addrs[i]); } void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) @@ -299,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - generic___clear_le_bit(off, (void *)map->m_page_addrs[i]); + ext2_clear_bit(off, (void *)map->m_page_addrs[i]); } static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) @@ -310,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - return generic_test_le_bit(off, (void *)map->m_page_addrs[i]); + return ext2_test_bit(off, (void *)map->m_page_addrs[i]); } void rds_cong_add_socket(struct rds_sock *rs) -- cgit v1.2.3 From e1dc1c81b9d1c823f2a529b9b9cf8bf5dacbce6a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 23 Mar 2011 16:42:05 -0700 Subject: rds: use little-endian bitops As a preparation for removing ext2 non-atomic bit operations from asm/bitops.h. This converts ext2 non-atomic bit operations to little-endian bit operations. Signed-off-by: Akinobu Mita Cc: Andy Grover Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/rds/cong.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/cong.c b/net/rds/cong.c index 8cc322bfd092..6daaa49d133f 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -284,7 +284,7 @@ void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - ext2_set_bit(off, (void *)map->m_page_addrs[i]); + __set_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) @@ -298,7 +298,7 @@ void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - ext2_clear_bit(off, (void *)map->m_page_addrs[i]); + __clear_bit_le(off, (void *)map->m_page_addrs[i]); } static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) @@ -309,7 +309,7 @@ static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; - return ext2_test_bit(off, (void *)map->m_page_addrs[i]); + return test_bit_le(off, (void *)map->m_page_addrs[i]); } void rds_cong_add_socket(struct rds_sock *rs) -- cgit v1.2.3 From 7ebb931598cd95cccea10d4bc4c0123a464ea565 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 24 Mar 2011 17:12:30 +0000 Subject: NFS: use secinfo when crossing mountpoints A submount may use different security than the parent mount does. We should figure out what sec flavor the submount uses at mount time. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_mech_switch.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 8b4061049d76..6c844b01a1d1 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -160,6 +160,28 @@ gss_mech_get_by_name(const char *name) EXPORT_SYMBOL_GPL(gss_mech_get_by_name); +struct gss_api_mech * +gss_mech_get_by_OID(struct xdr_netobj *obj) +{ + struct gss_api_mech *pos, *gm = NULL; + + spin_lock(®istered_mechs_lock); + list_for_each_entry(pos, ®istered_mechs, gm_list) { + if (obj->len == pos->gm_oid.len) { + if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) { + if (try_module_get(pos->gm_owner)) + gm = pos; + break; + } + } + } + spin_unlock(®istered_mechs_lock); + return gm; + +} + +EXPORT_SYMBOL_GPL(gss_mech_get_by_OID); + static inline int mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) { -- cgit v1.2.3 From 8f70e95f9f4159184f557a1db60c909d7c1bd2e3 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Thu, 24 Mar 2011 17:12:31 +0000 Subject: NFS: Determine initial mount security When sec= is not presented as a mount option, we should attempt to determine what security flavor the server is using. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_mech_switch.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 6c844b01a1d1..e3c36a274412 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -215,6 +215,22 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); +int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) +{ + struct gss_api_mech *pos = NULL; + int i = 0; + + spin_lock(®istered_mechs_lock); + list_for_each_entry(pos, ®istered_mechs, gm_list) { + array_ptr[i] = pos->gm_pfs->pseudoflavor; + i++; + } + spin_unlock(®istered_mechs_lock); + return i; +} + +EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors); + u32 gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) { -- cgit v1.2.3 From fcd13f42c9d6ab7b1024b9b7125a2e8db3cc00b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Mar 2011 07:01:24 +0000 Subject: ipv4: fix fib metrics Alessandro Suardi reported that we could not change route metrics : ip ro change default .... advmss 1400 This regression came with commit 9c150e82ac50 (Allocate fib metrics dynamically). fib_metrics is no longer an array, but a pointer to an array. Reported-by: Alessandro Suardi Signed-off-by: Eric Dumazet Tested-by: Alessandro Suardi Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 622ac4c95026..75b9fb5d5d37 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -251,7 +251,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, - sizeof(fi->fib_metrics)) == 0 && + sizeof(u32) * RTAX_MAX) == 0 && ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; -- cgit v1.2.3 From 52cb1c0d208565d8f06b743cdc6596d744f92e3b Mon Sep 17 00:00:00 2001 From: Suraj Sumangala Date: Wed, 9 Mar 2011 14:44:05 +0530 Subject: Bluetooth: Increment unacked_frames count only the first transmit This patch lets 'l2cap_pinfo.unacked_frames' be incremented only the first time a frame is transmitted. Previously it was being incremented for retransmitted packets too resulting the value to cross the transmit window size. Signed-off-by: Suraj Sumangala Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c9f9cecca527..ca27f3a41536 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1116,7 +1116,9 @@ int l2cap_ertm_send(struct sock *sk) bt_cb(skb)->tx_seq = pi->next_tx_seq; pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; - pi->unacked_frames++; + if (bt_cb(skb)->retries == 1) + pi->unacked_frames++; + pi->frames_sent++; if (skb_queue_is_last(TX_QUEUE(sk), skb)) -- cgit v1.2.3 From f630cf0d5434e3923e1b8226ffa2753ead6b0ce5 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Wed, 16 Mar 2011 15:36:29 -0300 Subject: Bluetooth: Fix HCI_RESET command synchronization We can't send new commands before a cmd_complete for the HCI_RESET command shows up. Reported-by: Mikko Vinni Reported-by: Justin P. Mattock Reported-by: Ed Tomlinson Signed-off-by: Gustavo F. Padovan Tested-by: Justin P. Mattock Tested-by: Mikko Vinni Tested-by: Ed Tomlinson --- net/bluetooth/hci_core.c | 6 +++++- net/bluetooth/hci_event.c | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b372fb8bcdcf..92b48e257b89 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -186,6 +186,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) BT_DBG("%s %ld", hdev->name, opt); /* Reset device */ + set_bit(HCI_RESET, &hdev->flags); hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } @@ -213,8 +214,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Mandatory initialization */ /* Reset */ - if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) + if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { + set_bit(HCI_RESET, &hdev->flags); hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); + } /* Read Local Supported Features */ hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); @@ -1074,6 +1077,7 @@ static void hci_cmd_timer(unsigned long arg) BT_ERR("%s command tx timeout", hdev->name); atomic_set(&hdev->cmd_cnt, 1); + clear_bit(HCI_RESET, &hdev->flags); tasklet_schedule(&hdev->cmd_task); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3fbfa50c2bff..cebe7588469f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -183,6 +183,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); + clear_bit(HCI_RESET, &hdev->flags); + hci_req_complete(hdev, HCI_OP_RESET, status); } @@ -1847,7 +1849,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->opcode != HCI_OP_NOP) del_timer(&hdev->cmd_timer); - if (ev->ncmd) { + if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) tasklet_schedule(&hdev->cmd_task); -- cgit v1.2.3 From 6994ca5e8ade57d18b7d1e05aad040c441a2ad37 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 16 Mar 2011 14:29:34 +0200 Subject: Bluetooth: Fix missing hci_dev_lock_bh in user_confirm_reply The code was correctly calling _unlock at the end of the function but there was no actual _lock call anywhere. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0054c74e27b7..4476d8e3c0f2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1230,6 +1230,8 @@ static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data, if (!hdev) return cmd_status(sk, index, mgmt_op, ENODEV); + hci_dev_lock_bh(hdev); + if (!test_bit(HCI_UP, &hdev->flags)) { err = cmd_status(sk, index, mgmt_op, ENETDOWN); goto failed; -- cgit v1.2.3 From a0cc9a1b5712ea52aaa4e7abfa0ec2dbe0d820ff Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 24 Mar 2011 17:16:08 +0200 Subject: Bluetooth: delete hanging L2CAP channel Sometimes L2CAP connection remains hanging. Make sure that L2CAP channel is deleted. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index fc85e7ae33c7..f77308e63e58 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -923,8 +923,9 @@ void __l2cap_sock_close(struct sock *sk, int reason) rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); - } else - l2cap_chan_del(sk, reason); + } + + l2cap_chan_del(sk, reason); break; case BT_CONNECT: -- cgit v1.2.3 From b77dcf8460ae57d4eb9fd3633eb4f97b8fb20716 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 24 Mar 2011 20:16:42 +0100 Subject: Bluetooth: Fix warning with hci_cmd_timer After we made debugobjects working again, we got the following: WARNING: at lib/debugobjects.c:262 debug_print_object+0x8e/0xb0() Hardware name: System Product Name ODEBUG: free active (active state 0) object type: timer_list hint: hci_cmd_timer+0x0/0x60 Pid: 2125, comm: dmsetup Tainted: G W 2.6.38-06707-gc62b389 #110375 Call Trace: [] warn_slowpath_common+0x7a/0xb0 [] warn_slowpath_fmt+0x46/0x50 [] debug_print_object+0x8e/0xb0 [] ? hci_cmd_timer+0x0/0x60 [] debug_check_no_obj_freed+0x125/0x230 [] ? check_object+0xb3/0x2b0 [] kfree+0x150/0x190 [] ? bt_host_release+0x16/0x20 [] bt_host_release+0x16/0x20 [] device_release+0x27/0xa0 [] kobject_release+0x4c/0xa0 [] ? kobject_release+0x0/0xa0 [] kref_put+0x36/0x70 [] kobject_put+0x27/0x60 [] put_device+0x17/0x20 [] hci_free_dev+0x29/0x30 [] vhci_release+0x36/0x70 [] fput+0xd6/0x1f0 [] filp_close+0x66/0x90 [] sys_close+0x99/0xf0 [] system_call_fastpath+0x16/0x1b That timer was introduced with commit 6bd32326cda(Bluetooth: Use proper timer for hci command timout) Timer seems to be running when the thing is closed. Removing the timer unconditionally fixes the problem. And yes, it needs to be fixed before the HCI_UP check. Signed-off-by: Thomas Gleixner Tested-by: Ingo Molnar Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 92b48e257b89..2216620ff296 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -587,6 +587,9 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); + /* Stop timer, it might be running */ + del_timer_sync(&hdev->cmd_timer); + if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { hci_req_unlock(hdev); return 0; @@ -626,7 +629,6 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Drop last sent command */ if (hdev->sent_cmd) { - del_timer_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } -- cgit v1.2.3 From 436c3b66ec9824a633724ae42de1c416af4f2063 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Mar 2011 17:42:21 -0700 Subject: ipv4: Invalidate nexthop cache nh_saddr more correctly. Any operation that: 1) Brings up an interface 2) Adds an IP address to an interface 3) Deletes an IP address from an interface can potentially invalidate the nh_saddr value, requiring it to be recomputed. Perform the recomputation lazily using a generation ID. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 11 +++++++---- net/ipv4/fib_semantics.c | 32 +++++++++++--------------------- net/ipv4/route.c | 6 ++++-- 3 files changed, 22 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 02c3ba61884a..f116ce8f1b46 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (res.type != RTN_LOCAL || !accept_local) goto e_inval; } - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(net, res); fib_combine_itag(itag, &res); dev_match = false; @@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, ret = 0; if (fib_lookup(net, &fl4, &res) == 0) { if (res.type == RTN_UNICAST) { - *spec_dst = FIB_RES_PREFSRC(res); + *spec_dst = FIB_RES_PREFSRC(net, res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } } @@ -960,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net_device *dev = ifa->ifa_dev->dev; + struct net *net = dev_net(dev); switch (event) { case NETDEV_UP: @@ -967,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - fib_update_nh_saddrs(dev); + atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); - fib_update_nh_saddrs(dev); + atomic_inc(&net->ipv4.dev_addr_genid); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. * Disable IP. @@ -990,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); @@ -1007,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif + atomic_inc(&net->ipv4.dev_addr_genid); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 75b9fb5d5d37..2d4bebca671a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -695,6 +695,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, fib_info_hash_free(old_laddrhash, bytes); } +__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) +{ + nh->nh_saddr = inet_select_addr(nh->nh_dev, + nh->nh_gw, + nh->nh_cfg_scope); + nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); + + return nh->nh_saddr; +} + struct fib_info *fib_create_info(struct fib_config *cfg) { int err; @@ -855,9 +865,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { nexthop_nh->nh_cfg_scope = cfg->fc_scope; - nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, - nexthop_nh->nh_gw, - nexthop_nh->nh_cfg_scope); + fib_info_update_nh_saddr(net, nexthop_nh); } endfor_nexthops(fi) link_it: @@ -1128,24 +1136,6 @@ out: return; } -void fib_update_nh_saddrs(struct net_device *dev) -{ - struct hlist_head *head; - struct hlist_node *node; - struct fib_nh *nh; - unsigned int hash; - - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; - hlist_for_each_entry(nh, node, head, nh_hash) { - if (nh->nh_dev != dev) - continue; - nh->nh_saddr = inet_select_addr(nh->nh_dev, - nh->nh_gw, - nh->nh_cfg_scope); - } -} - #ifdef CONFIG_IP_ROUTE_MULTIPATH /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34921b0d3f8e..4b0c81180804 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1718,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) - src = FIB_RES_PREFSRC(res); + src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); @@ -2615,7 +2615,7 @@ static struct rtable *ip_route_output_slow(struct net *net, fib_select_default(&res); if (!fl4.saddr) - fl4.saddr = FIB_RES_PREFSRC(res); + fl4.saddr = FIB_RES_PREFSRC(net, res); dev_out = FIB_RES_DEV(res); fl4.flowi4_oif = dev_out->ifindex; @@ -3219,6 +3219,8 @@ static __net_init int rt_genid_init(struct net *net) { get_random_bytes(&net->ipv4.rt_genid, sizeof(net->ipv4.rt_genid)); + get_random_bytes(&net->ipv4.dev_addr_genid, + sizeof(net->ipv4.dev_addr_genid)); return 0; } -- cgit v1.2.3 From 37e826c513883099c298317bad1b3b677b2905fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Mar 2011 18:06:47 -0700 Subject: ipv4: Fix nexthop caching wrt. scoping. Move the scope value out of the fib alias entries and into fib_info, so that we always use the correct scope when recomputing the nexthop cached source address. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_lookup.h | 3 +-- net/ipv4/fib_semantics.c | 15 ++++++++------- net/ipv4/fib_trie.c | 12 ++++-------- 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 4ec323875a02..af0f14aba169 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -10,7 +10,6 @@ struct fib_alias { struct fib_info *fa_info; u8 fa_tos; u8 fa_type; - u8 fa_scope; u8 fa_state; struct rcu_head rcu; }; @@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, __be32 dst, + u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2d4bebca671a..641a5a2a9f9c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) unsigned int mask = (fib_info_hash_size - 1); unsigned int val = fi->fib_nhs; - val ^= fi->fib_protocol; + val ^= (fi->fib_protocol << 8) | fi->fib_scope; val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; for_nexthops(fi) { @@ -248,6 +248,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) if (fi->fib_nhs != nfi->fib_nhs) continue; if (nfi->fib_protocol == fi->fib_protocol && + nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, @@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, goto errout; err = fib_dump_info(skb, info->pid, seq, event, tb_id, - fa->fa_type, fa->fa_scope, key, dst_len, + fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ @@ -699,7 +700,7 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) { nh->nh_saddr = inet_select_addr(nh->nh_dev, nh->nh_gw, - nh->nh_cfg_scope); + nh->nh_parent->fib_scope); nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); return nh->nh_saddr; @@ -763,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; + fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; @@ -864,7 +866,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } change_nexthops(fi) { - nexthop_nh->nh_cfg_scope = cfg->fc_scope; fib_info_update_nh_saddr(net, nexthop_nh); } endfor_nexthops(fi) @@ -914,7 +915,7 @@ failure: } int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, + u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; @@ -936,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; - rtm->rtm_scope = scope; + rtm->rtm_scope = fi->fib_scope; rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len) @@ -1092,7 +1093,7 @@ void fib_select_default(struct fib_result *res) list_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; - if (fa->fa_scope != res->scope || + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ac87a49ad50b..90a3ff605591 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (fa->fa_info->fib_priority != fi->fib_priority) break; if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { fa_match = fa; break; @@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state = state & ~FA_S_ACCESSED; @@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_info = fi; new_fa->fa_tos = tos; new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1362,7 +1359,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; - if (fa->fa_scope < flp->flowi4_scope) + if (fa->fa_info->fib_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; @@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, res->prefixlen = plen; res->nh_sel = nhsel; res->type = fa->fa_type; - res->scope = fa->fa_scope; + res->scope = fa->fa_info->fib_scope; res->fi = fi; res->table = tb; res->fa_head = &li->falh; @@ -1664,7 +1661,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && + fa->fa_info->fib_scope == cfg->fc_scope) && (!cfg->fc_prefsrc || fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || @@ -1863,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, RTM_NEWROUTE, tb->tb_id, fa->fa_type, - fa->fa_scope, xkey, plen, fa->fa_tos, @@ -2384,7 +2380,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth+1); seq_printf(seq, " /%d %s %s", li->plen, rtn_scope(buf1, sizeof(buf1), - fa->fa_scope), + fa->fa_info->fib_scope), rtn_type(buf2, sizeof(buf2), fa->fa_type)); if (fa->fa_tos) -- cgit v1.2.3 From ef550f6f4f6c9345a27ec85d98f4f7de1adce79c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 25 Mar 2011 13:27:48 -0700 Subject: ceph: flush msgr_wq during mds_client shutdown The release method for mds connections uses a backpointer to the mds_client, so we need to flush the workqueue of any pending work (and ceph_connection references) prior to freeing the mds_client. This fixes an oops easily triggered under UML by while true ; do mount ... ; umount ... ; done Also fix an outdated comment: the flush in ceph_destroy_client only flushes OSD connections out. This bug is basically an artifact of the ceph -> ceph+libceph conversion. Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 95f96ab94bba..9bbb356b12e7 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -394,8 +394,8 @@ void ceph_destroy_client(struct ceph_client *client) ceph_osdc_stop(&client->osdc); /* - * make sure mds and osd connections close out before destroying - * the auth module, which is needed to free those connections' + * make sure osd connections close out before destroying the + * auth module, which is needed to free those connections' * ceph_authorizers. */ ceph_msgr_flush(); -- cgit v1.2.3 From 1fbc78439291627642517f15b9b91f3125588143 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 25 Mar 2011 20:33:23 -0700 Subject: ipv4: do not ignore route errors The "ipv4: Inline fib_semantic_match into check_leaf" change forgets to return the route errors. check_leaf should return the same results as fib_table_lookup. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 90a3ff605591..b92c86f6e9b3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1365,9 +1365,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, err = fib_props[fa->fa_type].error; if (err) { #ifdef CONFIG_IP_FIB_TRIE_STATS - t->stats.semantic_match_miss++; + t->stats.semantic_match_passed++; #endif - return 1; + return err; } if (fi->fib_flags & RTNH_F_DEAD) continue; -- cgit v1.2.3 From 6b0ae4097c1ebad98c7b33f83b4ca7e93890ed12 Mon Sep 17 00:00:00 2001 From: Mariusz Kozlowski Date: Sat, 26 Mar 2011 19:29:34 +0100 Subject: ceph: fix possible NULL pointer dereference This patch fixes 'event_work' dereference before it is checked for NULL. Signed-off-by: Mariusz Kozlowski Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 02212ed50852..b6776cb627cc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1602,11 +1602,11 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) cookie, ver, event); if (event) { event_work = kmalloc(sizeof(*event_work), GFP_NOIO); - INIT_WORK(&event_work->work, do_event_work); if (!event_work) { dout("ERROR: could not allocate event_work\n"); goto done_err; } + INIT_WORK(&event_work->work, do_event_work); event_work->event = event; event_work->ver = ver; event_work->notify_id = notify_id; -- cgit v1.2.3 From a271c5a0dea418931b6a903ef85adc30ad4c54be Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 27 Mar 2011 17:48:57 +0200 Subject: NFS: Ensure that rpc_release_resources_task() can be called twice. BUG: atomic_dec_and_test(): -1: atomic counter underflow at: Pid: 2827, comm: mount.nfs Not tainted 2.6.38 #1 Call Trace: [] ? put_rpccred+0x44/0x14e [sunrpc] [] ? rpc_ping+0x4e/0x58 [sunrpc] [] ? rpc_create+0x481/0x4fc [sunrpc] [] ? rpcauth_lookup_credcache+0xab/0x22d [sunrpc] [] ? nfs_create_rpc_client+0xa6/0xeb [nfs] [] ? nfs4_set_client+0xc2/0x1f9 [nfs] [] ? nfs4_create_server+0xf2/0x2a6 [nfs] [] ? nfs4_remote_mount+0x4e/0x14a [nfs] [] ? vfs_kern_mount+0x6e/0x133 [] ? nfs_do_root_mount+0x76/0x95 [nfs] [] ? nfs4_try_mount+0x56/0xaf [nfs] [] ? nfs_get_sb+0x435/0x73c [nfs] [] ? vfs_kern_mount+0x99/0x133 [] ? do_kern_mount+0x48/0xd8 [] ? do_mount+0x6da/0x741 [] ? sys_mount+0x83/0xc0 [] ? system_call_fastpath+0x16/0x1b Well, so, I think this is real bug of nfs codes somewhere. With some review, the code rpc_call_sync() rpc_run_task rpc_execute() __rpc_execute() rpc_release_task() rpc_release_resources_task() put_rpccred() <= release cred rpc_put_task rpc_do_put_task() rpc_release_resources_task() put_rpccred() <= release cred again seems to be release cred unintendedly. Signed-off-by: OGAWA Hirofumi Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ffb687671da0..6b43ee7221d5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -860,8 +860,10 @@ static void rpc_release_resources_task(struct rpc_task *task) { if (task->tk_rqstp) xprt_release(task); - if (task->tk_msg.rpc_cred) + if (task->tk_msg.rpc_cred) { put_rpccred(task->tk_msg.rpc_cred); + task->tk_msg.rpc_cred = NULL; + } rpc_task_release_client(task); } -- cgit v1.2.3 From 3bc07321ccc236f693ce1b6a8786f0a2e38bb87e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Mar 2011 21:08:28 +0000 Subject: xfrm: Force a dst refcount before entering the xfrm type handlers Crypto requests might return asynchronous. In this case we leave the rcu protected region, so force a refcount on the skb's destination entry before we enter the xfrm type input/output handlers. This fixes a crash when a route is deleted whilst sending IPsec data that is transformed by an asynchronous algorithm. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 2 ++ net/xfrm/xfrm_output.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 872065ca7f8c..341cd1189f8a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -190,6 +190,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) XFRM_SKB_CB(skb)->seq.input.low = seq; XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; + skb_dst_force(skb); + nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 1aba03f449cc..8f3f0eedc5a4 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -78,6 +78,8 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); + skb_dst_force(skb); + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out_exit; -- cgit v1.2.3 From e433430a0ca9cc1b851a83ac3b305e955b64880a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Mar 2011 21:09:32 +0000 Subject: dst: Clone child entry in skb_dst_pop We clone the child entry in skb_dst_pop before we call skb_dst_drop(). Otherwise we might kill the child right before we return it to the caller. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8f3f0eedc5a4..47bacd8c0250 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -96,7 +96,7 @@ resume: err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); -- cgit v1.2.3 From d50e7e3604778bfc2dc40f440e0742dbae399d54 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sat, 19 Mar 2011 20:14:30 +0000 Subject: irda: prevent heap corruption on invalid nickname Invalid nicknames containing only spaces will result in an underflow in a memcpy size calculation, subsequently destroying the heap and panicking. v2 also catches the case where the provided nickname is longer than the buffer size, which can result in controllable heap corruption. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/irda/irnet/irnet_ppp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7c567b8aa89a..2bb2beb6a373 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -105,6 +105,9 @@ irnet_ctrl_write(irnet_socket * ap, while(isspace(start[length - 1])) length--; + DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5, + -EINVAL, CTRL_ERROR, "Invalid nickname.\n"); + /* Copy the name for later reuse */ memcpy(ap->rname, start + 5, length - 5); ap->rname[length - 5] = '\0'; -- cgit v1.2.3 From d370af0ef7951188daeb15bae75db7ba57c67846 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sun, 20 Mar 2011 15:32:06 +0000 Subject: irda: validate peer name and attribute lengths Length fields provided by a peer for names and attributes may be longer than the destination array sizes. Validate lengths to prevent stack buffer overflows. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/irda/iriap.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 5b743bdd89ba..36477538cea8 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -656,10 +656,16 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self, n = 1; name_len = fp[n++]; + + IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;); + memcpy(name, fp+n, name_len); n+=name_len; name[name_len] = '\0'; attr_len = fp[n++]; + + IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;); + memcpy(attr, fp+n, attr_len); n+=attr_len; attr[attr_len] = '\0'; -- cgit v1.2.3 From be20250c13f88375345ad99950190685eda51eb8 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sat, 19 Mar 2011 20:43:43 +0000 Subject: ROSE: prevent heap corruption with bad facilities When parsing the FAC_NATIONAL_DIGIS facilities field, it's possible for a remote host to provide more digipeaters than expected, resulting in heap corruption. Check against ROSE_MAX_DIGIS to prevent overflows, and abort facilities parsing on failure. Additionally, when parsing the FAC_CCITT_DEST_NSAP and FAC_CCITT_SRC_NSAP facilities fields, a remote host can provide a length of less than 10, resulting in an underflow in a memcpy size, causing a kernel panic due to massive heap corruption. A length of greater than 20 results in a stack overflow of the callsign array. Abort facilities parsing on these invalid length values. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Signed-off-by: David S. Miller --- net/rose/rose_subr.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 1734abba26a2..174d51c9ce37 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -290,10 +290,15 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * facilities->source_ndigis = 0; facilities->dest_ndigis = 0; for (pt = p + 2, lg = 0 ; lg < l ; pt += AX25_ADDR_LEN, lg += AX25_ADDR_LEN) { - if (pt[6] & AX25_HBIT) + if (pt[6] & AX25_HBIT) { + if (facilities->dest_ndigis >= ROSE_MAX_DIGIS) + return -1; memcpy(&facilities->dest_digis[facilities->dest_ndigis++], pt, AX25_ADDR_LEN); - else + } else { + if (facilities->source_ndigis >= ROSE_MAX_DIGIS) + return -1; memcpy(&facilities->source_digis[facilities->source_ndigis++], pt, AX25_ADDR_LEN); + } } } p += l + 2; @@ -333,6 +338,11 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac case 0xC0: l = p[1]; + + /* Prevent overflows*/ + if (l < 10 || l > 20) + return -1; + if (*p == FAC_CCITT_DEST_NSAP) { memcpy(&facilities->source_addr, p + 7, ROSE_ADDR_LEN); memcpy(callsign, p + 12, l - 10); @@ -373,12 +383,16 @@ int rose_parse_facilities(unsigned char *p, switch (*p) { case FAC_NATIONAL: /* National */ len = rose_parse_national(p + 1, facilities, facilities_len - 1); + if (len < 0) + return 0; facilities_len -= len + 1; p += len + 1; break; case FAC_CCITT: /* CCITT */ len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); + if (len < 0) + return 0; facilities_len -= len + 1; p += len + 1; break; -- cgit v1.2.3 From e0bccd315db0c2f919e7fcf9cb60db21d9986f52 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 20 Mar 2011 06:48:05 +0000 Subject: rose: Add length checks to CALL_REQUEST parsing Define some constant offsets for CALL_REQUEST based on the description at and the definition of ROSE as using 10-digit (5-byte) addresses. Use them consistently. Validate all implicit and explicit facilities lengths. Validate the address length byte rather than either trusting or assuming its value. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/rose/af_rose.c | 8 ++--- net/rose/rose_loopback.c | 13 ++++++- net/rose/rose_route.c | 20 +++++++---- net/rose/rose_subr.c | 91 ++++++++++++++++++++++++++++++------------------ 4 files changed, 86 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5ee0c62046a0..a80aef6e3d1f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -978,7 +978,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros struct sock *make; struct rose_sock *make_rose; struct rose_facilities_struct facilities; - int n, len; + int n; skb->sk = NULL; /* Initially we don't know who it's for */ @@ -987,9 +987,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros */ memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; - len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; - if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { + if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, + skb->len - ROSE_CALL_REQ_FACILITIES_OFF, + &facilities)) { rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76); return 0; } diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index ae4a9d99aec7..344456206b70 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -73,9 +73,20 @@ static void rose_loopback_timer(unsigned long param) unsigned int lci_i, lci_o; while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + if (skb->len < ROSE_MIN_LEN) { + kfree_skb(skb); + continue; + } lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); frametype = skb->data[2]; - dest = (rose_address *)(skb->data + 4); + if (frametype == ROSE_CALL_REQUEST && + (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || + skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != + ROSE_CALL_REQ_ADDR_LEN_VAL)) { + kfree_skb(skb); + continue; + } + dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); lci_o = ROSE_DEFAULT_MAXVC + 1 - lci_i; skb_reset_transport_header(skb); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 88a77e90e7e8..08dcd2f29cdc 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -861,7 +861,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) unsigned int lci, new_lci; unsigned char cause, diagnostic; struct net_device *dev; - int len, res = 0; + int res = 0; char buf[11]; #if 0 @@ -869,10 +869,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) return res; #endif + if (skb->len < ROSE_MIN_LEN) + return res; frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); - src_addr = (rose_address *)(skb->data + 9); - dest_addr = (rose_address *)(skb->data + 4); + if (frametype == ROSE_CALL_REQUEST && + (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF || + skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] != + ROSE_CALL_REQ_ADDR_LEN_VAL)) + return res; + src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF); + dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF); spin_lock_bh(&rose_neigh_list_lock); spin_lock_bh(&rose_route_list_lock); @@ -1010,12 +1017,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) goto out; } - len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1; - len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1; - memset(&facilities, 0x00, sizeof(struct rose_facilities_struct)); - if (!rose_parse_facilities(skb->data + len + 4, &facilities)) { + if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF, + skb->len - ROSE_CALL_REQ_FACILITIES_OFF, + &facilities)) { rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76); goto out; } diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 174d51c9ce37..f6c71caa94b9 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -142,7 +142,7 @@ void rose_write_internal(struct sock *sk, int frametype) *dptr++ = ROSE_GFI | lci1; *dptr++ = lci2; *dptr++ = frametype; - *dptr++ = 0xAA; + *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL; memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); @@ -246,12 +246,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * do { switch (*p & 0xC0) { case 0x00: + if (len < 2) + return -1; p += 2; n += 2; len -= 2; break; case 0x40: + if (len < 3) + return -1; if (*p == FAC_NATIONAL_RAND) facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF); p += 3; @@ -260,32 +264,48 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct * break; case 0x80: + if (len < 4) + return -1; p += 4; n += 4; len -= 4; break; case 0xC0: + if (len < 2) + return -1; l = p[1]; + if (len < 2 + l) + return -1; if (*p == FAC_NATIONAL_DEST_DIGI) { if (!fac_national_digis_received) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN); facilities->source_ndigis = 1; } } else if (*p == FAC_NATIONAL_SRC_DIGI) { if (!fac_national_digis_received) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN); facilities->dest_ndigis = 1; } } else if (*p == FAC_NATIONAL_FAIL_CALL) { + if (l < AX25_ADDR_LEN) + return -1; memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN); } else if (*p == FAC_NATIONAL_FAIL_ADD) { + if (l < 1 + ROSE_ADDR_LEN) + return -1; memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN); } else if (*p == FAC_NATIONAL_DIGIS) { + if (l % AX25_ADDR_LEN) + return -1; fac_national_digis_received = 1; facilities->source_ndigis = 0; facilities->dest_ndigis = 0; @@ -319,24 +339,32 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac do { switch (*p & 0xC0) { case 0x00: + if (len < 2) + return -1; p += 2; n += 2; len -= 2; break; case 0x40: + if (len < 3) + return -1; p += 3; n += 3; len -= 3; break; case 0x80: + if (len < 4) + return -1; p += 4; n += 4; len -= 4; break; case 0xC0: + if (len < 2) + return -1; l = p[1]; /* Prevent overflows*/ @@ -365,49 +393,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac return n; } -int rose_parse_facilities(unsigned char *p, +int rose_parse_facilities(unsigned char *p, unsigned packet_len, struct rose_facilities_struct *facilities) { int facilities_len, len; facilities_len = *p++; - if (facilities_len == 0) + if (facilities_len == 0 || (unsigned)facilities_len > packet_len) return 0; - while (facilities_len > 0) { - if (*p == 0x00) { - facilities_len--; - p++; - - switch (*p) { - case FAC_NATIONAL: /* National */ - len = rose_parse_national(p + 1, facilities, facilities_len - 1); - if (len < 0) - return 0; - facilities_len -= len + 1; - p += len + 1; - break; - - case FAC_CCITT: /* CCITT */ - len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); - if (len < 0) - return 0; - facilities_len -= len + 1; - p += len + 1; - break; - - default: - printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); - facilities_len--; - p++; - break; - } - } else - break; /* Error in facilities format */ + while (facilities_len >= 3 && *p == 0x00) { + facilities_len--; + p++; + + switch (*p) { + case FAC_NATIONAL: /* National */ + len = rose_parse_national(p + 1, facilities, facilities_len - 1); + break; + + case FAC_CCITT: /* CCITT */ + len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1); + break; + + default: + printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p); + len = 1; + break; + } + + if (len < 0) + return 0; + if (WARN_ON(len >= facilities_len)) + return 0; + facilities_len -= len + 1; + p += len + 1; } - return 1; + return facilities_len == 0; } static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose) -- cgit v1.2.3 From 3b261ade4224852ed841ecfd13876db812846e96 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 22 Mar 2011 01:59:47 +0000 Subject: net: remove useless comments in net/core/dev.c The code itself can explain what it is doing, no need these comments. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- net/core/dev.c | 54 ------------------------------------------------------ 1 file changed, 54 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f453370131a0..9b23dcefae6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1140,9 +1140,6 @@ static int __dev_open(struct net_device *dev) ASSERT_RTNL(); - /* - * Is it even present? - */ if (!netif_device_present(dev)) return -ENODEV; @@ -1151,9 +1148,6 @@ static int __dev_open(struct net_device *dev) if (ret) return ret; - /* - * Call device private open method - */ set_bit(__LINK_STATE_START, &dev->state); if (ops->ndo_validate_addr) @@ -1162,31 +1156,12 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); - /* - * If it went open OK then: - */ - if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { - /* - * Set the flags. - */ dev->flags |= IFF_UP; - - /* - * Enable NET_DMA - */ net_dmaengine_get(); - - /* - * Initialize multicasting status - */ dev_set_rx_mode(dev); - - /* - * Wakeup transmit queue engine - */ dev_activate(dev); } @@ -1209,22 +1184,13 @@ int dev_open(struct net_device *dev) { int ret; - /* - * Is it already up? - */ if (dev->flags & IFF_UP) return 0; - /* - * Open device - */ ret = __dev_open(dev); if (ret < 0) return ret; - /* - * ... and announce new interface. - */ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_UP, dev); @@ -1240,10 +1206,6 @@ static int __dev_close_many(struct list_head *head) might_sleep(); list_for_each_entry(dev, head, unreg_list) { - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1272,15 +1234,7 @@ static int __dev_close_many(struct list_head *head) if (ops->ndo_stop) ops->ndo_stop(dev); - /* - * Device is now down. - */ - dev->flags &= ~IFF_UP; - - /* - * Shutdown NET_DMA - */ net_dmaengine_put(); } @@ -1309,9 +1263,6 @@ static int dev_close_many(struct list_head *head) __dev_close_many(head); - /* - * Tell people we are down - */ list_for_each_entry(dev, head, unreg_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_DOWN, dev); @@ -1371,11 +1322,6 @@ EXPORT_SYMBOL(dev_disable_lro); static int dev_boot_phase = 1; -/* - * Device change register/unregister. These are not inline or static - * as we export them to the world. - */ - /** * register_netdevice_notifier - register a network notifier block * @nb: notifier -- cgit v1.2.3 From 53914b67993c724cec585863755c9ebc8446e83b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 22 Mar 2011 08:27:25 +0000 Subject: can: make struct proto const can_ioctl is the only reason for struct proto to be non-const. script/check-patch.pl suggests struct proto be const. Setting the reference to the common can_ioctl() in all CAN protocols directly removes the need to make the struct proto writable in af_can.c Signed-off-by: Kurt Van Dijck Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 9 +++------ net/can/bcm.c | 4 ++-- net/can/raw.c | 4 ++-- 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 702be5a2c956..733d66f1b05a 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -95,7 +95,7 @@ struct s_pstats can_pstats; /* receive list statistics */ * af_can socket functions */ -static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; @@ -108,6 +108,7 @@ static int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } } +EXPORT_SYMBOL(can_ioctl); static void can_sock_destruct(struct sock *sk) { @@ -698,13 +699,9 @@ int can_proto_register(struct can_proto *cp) printk(KERN_ERR "can: protocol %d already registered\n", proto); err = -EBUSY; - } else { + } else proto_tab[proto] = cp; - /* use generic ioctl function if not defined by module */ - if (!cp->ops->ioctl) - cp->ops->ioctl = can_ioctl; - } spin_unlock(&proto_tab_lock); if (err < 0) diff --git a/net/can/bcm.c b/net/can/bcm.c index 092dc88a7c64..871a0ad51025 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1569,7 +1569,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, return size; } -static struct proto_ops bcm_ops __read_mostly = { +static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, .bind = sock_no_bind, @@ -1578,7 +1578,7 @@ static struct proto_ops bcm_ops __read_mostly = { .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, - .ioctl = NULL, /* use can_ioctl() from af_can.c */ + .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, diff --git a/net/can/raw.c b/net/can/raw.c index 883e9d74fddf..649acfa7c70a 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -742,7 +742,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, return size; } -static struct proto_ops raw_ops __read_mostly = { +static const struct proto_ops raw_ops = { .family = PF_CAN, .release = raw_release, .bind = raw_bind, @@ -751,7 +751,7 @@ static struct proto_ops raw_ops __read_mostly = { .accept = sock_no_accept, .getname = raw_getname, .poll = datagram_poll, - .ioctl = NULL, /* use can_ioctl() from af_can.c */ + .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, -- cgit v1.2.3 From 8628bd8af7c4c14f40f5183f80f5744c4e682439 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Thu, 24 Mar 2011 07:44:22 +0000 Subject: ipv4: Fix IP timestamp option (IPOPT_TS_PRESPEC) handling in ip_options_echo() The current handling of echoed IP timestamp options with prespecified addresses is rather broken since the 2.2.x kernels. As far as i understand it, it should behave like when originating packets. Currently it will only timestamp the next free slot if: - there is space for *two* timestamps - some random data from the echoed packet taken as an IP is *not* a local IP This first is caused by an off-by-one error. 'soffset' points to the next free slot and so we only need to have 'soffset + 7 <= optlen'. The second bug is using sptr as the start of the option, when it really is set to 'skb_network_header(skb)'. I just use dptr instead which points to the timestamp option. Finally it would only timestamp for non-local IPs, which we shouldn't do. So instead we exclude all unicast destinations, similar to what we do in ip_options_compile(). Signed-off-by: Jan Luebbe Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 1906fa35860c..28a736f3442f 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) } else { dopt->ts_needtime = 0; - if (soffset + 8 <= optlen) { + if (soffset + 7 <= optlen) { __be32 addr; - memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) { + memcpy(&addr, dptr+soffset-1, 4); + if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { dopt->ts_needtime = 1; soffset += 8; } -- cgit v1.2.3 From edf947f10074fea27fdb1730524dca59355a1c40 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 24 Mar 2011 13:24:01 +0000 Subject: bridge: notify applications if address of bridge device changes The mac address of the bridge device may be changed when a new interface is added to the bridge. If this happens, then the bridge needs to call the network notifiers to tickle any other systems that care. Since bridge can be a module, this also means exporting the notifier function. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 6 +++++- net/bridge/br_private.h | 2 +- net/bridge/br_stp_if.c | 9 ++++++--- net/core/dev.c | 1 + 4 files changed, 13 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index dce8f0009a12..718b60366dfe 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -389,6 +389,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; int err = 0; + bool changed_addr; /* Don't allow bridging non-ethernet like devices */ if ((dev->flags & IFF_LOOPBACK) || @@ -446,7 +447,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) list_add_rcu(&p->list, &br->port_list); spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); + changed_addr = br_stp_recalculate_bridge_id(br); br_features_recompute(br); if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) && @@ -456,6 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) br_ifinfo_notify(RTM_NEWLINK, p); + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + dev_set_mtu(br->dev, br_min_mtu(br)); kobject_uevent(&p->kobj, KOBJ_ADD); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 19e2f46ed086..387013d33745 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -497,7 +497,7 @@ extern void br_stp_disable_bridge(struct net_bridge *br); extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val); extern void br_stp_enable_port(struct net_bridge_port *p); extern void br_stp_disable_port(struct net_bridge_port *p); -extern void br_stp_recalculate_bridge_id(struct net_bridge *br); +extern bool br_stp_recalculate_bridge_id(struct net_bridge *br); extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); extern void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 79372d4a4055..5593f5aec942 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -204,7 +204,7 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; /* called under bridge lock */ -void br_stp_recalculate_bridge_id(struct net_bridge *br) +bool br_stp_recalculate_bridge_id(struct net_bridge *br) { const unsigned char *br_mac_zero = (const unsigned char *)br_mac_zero_aligned; @@ -222,8 +222,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) } - if (compare_ether_addr(br->bridge_id.addr, addr)) - br_stp_change_bridge_id(br, addr); + if (compare_ether_addr(br->bridge_id.addr, addr) == 0) + return false; /* no change */ + + br_stp_change_bridge_id(br, addr); + return true; } /* called under bridge lock */ diff --git a/net/core/dev.c b/net/core/dev.c index 9b23dcefae6c..563ddc28139d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1423,6 +1423,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) ASSERT_RTNL(); return raw_notifier_call_chain(&netdev_chain, val, dev); } +EXPORT_SYMBOL(call_netdevice_notifiers); /* When > 0 there are consumers of rx skb time stamps */ static atomic_t netstamp_needed = ATOMIC_INIT(0); -- cgit v1.2.3 From 3e49e6d520401e1d25ec8d366520aad2c01adc1c Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Sat, 26 Mar 2011 05:10:30 +0000 Subject: net: use CHECKSUM_NONE instead of magic number Two places in the kernel were doing skb->ip_summed = 0. Change both to skb->ip_summed = CHECKSUM_NONE, which is more readable. Signed-off-by: Cesar Eduardo Barros Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7ff0343e05c7..29e48593bf22 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -663,7 +663,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = 0; + skb->ip_summed = CHECKSUM_NONE; skb->pkt_type = PACKET_HOST; skb_tunnel_rx(skb, reg_dev); -- cgit v1.2.3 From 673e63c688f43104c73aad8ea4237f7ad41fa14d Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 22 Mar 2011 23:54:49 +0000 Subject: net: fix ethtool->set_flags not intended -EINVAL return value After commit d5dbda23804156ae6f35025ade5307a49d1db6d7 "ethtool: Add support for vlan accleration.", drivers that have NETIF_F_HW_VLAN_TX, and/or NETIF_F_HW_VLAN_RX feature, but do not allow enable/disable vlan acceleration via ethtool set_flags, always return -EINVAL from that function. Fix by returning -EINVAL only if requested features do not match current settings and can not be changed by driver. Change any driver that define ethtool->set_flags to use ethtool_invalid_flags() to avoid similar problems in the future (also on drivers that do not have the problem). Tested with modified (to reproduce this bug) myri10ge driver. Cc: stable@kernel.org # 2.6.37+ Signed-off-by: Stanislaw Gruszka Signed-off-by: David S. Miller --- net/core/ethtool.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 24bd57493c0d..74ead9eca126 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -141,9 +141,24 @@ u32 ethtool_op_get_flags(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_flags); +/* Check if device can enable (or disable) particular feature coded in "data" + * argument. Flags "supported" describe features that can be toggled by device. + * If feature can not be toggled, it state (enabled or disabled) must match + * hardcoded device features state, otherwise flags are marked as invalid. + */ +bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported) +{ + u32 features = dev->features & flags_dup_features; + /* "data" can contain only flags_dup_features bits, + * see __ethtool_set_flags */ + + return (features & ~supported) != (data & ~supported); +} +EXPORT_SYMBOL(ethtool_invalid_flags); + int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) { - if (data & ~supported) + if (ethtool_invalid_flags(dev, data, supported)) return -EINVAL; dev->features = ((dev->features & ~flags_dup_features) | -- cgit v1.2.3 From 4dc217df68a17a57f8464c74c1b4785e40bddf77 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 25 Mar 2011 15:30:38 +0100 Subject: mac80211: fix a crash in minstrel_ht in HT mode with no supported MCS rates When a client connects in HT mode but does not provide any valid MCS rates, the function that finds the next sample rate gets stuck in an infinite loop. Fix this by falling back to legacy rates if no usable MCS rates are found. Signed-off-by: Felix Fietkau Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel_ht.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 8212a8bebf06..dbdebeda097f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -659,18 +659,14 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; struct ieee80211_local *local = hw_to_local(mp->hw); u16 sta_cap = sta->ht_cap.cap; + int n_supported = 0; int ack_dur; int stbc; int i; /* fall back to the old minstrel for legacy stations */ - if (!sta->ht_cap.ht_supported) { - msp->is_ht = false; - memset(&msp->legacy, 0, sizeof(msp->legacy)); - msp->legacy.r = msp->ratelist; - msp->legacy.sample_table = msp->sample_table; - return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); - } + if (!sta->ht_cap.ht_supported) + goto use_legacy; BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); @@ -725,7 +721,22 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->groups[i].supported = mcs->rx_mask[minstrel_mcs_groups[i].streams - 1]; + + if (mi->groups[i].supported) + n_supported++; } + + if (!n_supported) + goto use_legacy; + + return; + +use_legacy: + msp->is_ht = false; + memset(&msp->legacy, 0, sizeof(msp->legacy)); + msp->legacy.r = msp->ratelist; + msp->legacy.sample_table = msp->sample_table; + return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); } static void -- cgit v1.2.3 From 1f951a7f8ba05192291f781ef99a892697e47d62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Sun, 27 Mar 2011 13:31:26 +0200 Subject: mac80211: fix NULL pointer dereference in ieee80211_key_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ieee80211_key struct can be kfree()d several times in the function, for example if some of the key setup functions fails beforehand, but there's no check if the struct is still valid before we call memcpy() and INIT_LIST_HEAD() on it. In some cases (like it was in my case), if there's missing aes-generic module it could lead to the following kernel OOPS: Unable to handle kernel NULL pointer dereference at virtual address 0000018c .... PC is at memcpy+0x80/0x29c ... Backtrace: [] (ieee80211_key_alloc+0x0/0x234 [mac80211]) from [] (ieee80211_add_key+0x70/0x12c [mac80211]) [] (ieee80211_add_key+0x0/0x12c [mac80211]) from [] (__cfg80211_set_encryption+0x2a8/0x464 [cfg80211]) Signed-off-by: Petr Štetiar Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/key.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 8c02469b7176..09cf1f28c12b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -342,7 +342,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, if (IS_ERR(key->u.ccmp.tfm)) { err = PTR_ERR(key->u.ccmp.tfm); kfree(key); - key = ERR_PTR(err); + return ERR_PTR(err); } break; case WLAN_CIPHER_SUITE_AES_CMAC: @@ -360,7 +360,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, if (IS_ERR(key->u.aes_cmac.tfm)) { err = PTR_ERR(key->u.aes_cmac.tfm); kfree(key); - key = ERR_PTR(err); + return ERR_PTR(err); } break; } -- cgit v1.2.3 From 67aa030c0dff6095128bcb4e8043b48360f32331 Mon Sep 17 00:00:00 2001 From: Mariusz Kozlowski Date: Sat, 26 Mar 2011 18:58:51 +0100 Subject: mac80211: fix possible NULL pointer dereference This patch moves 'key' dereference after BUG_ON(!key) so that when key is NULL we will see proper trace instead of oops. Signed-off-by: Mariusz Kozlowski Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 09cf1f28c12b..af3c56482c80 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -400,11 +400,12 @@ int ieee80211_key_link(struct ieee80211_key *key, { struct ieee80211_key *old_key; int idx, ret; - bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; + bool pairwise; BUG_ON(!sdata); BUG_ON(!key); + pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; key->local = sdata->local; key->sdata = sdata; -- cgit v1.2.3 From bef9bacc4ec7ea6a02876164cd6ccaa4759edce4 Mon Sep 17 00:00:00 2001 From: Mariusz Kozlowski Date: Sat, 26 Mar 2011 19:26:55 +0100 Subject: cfg80211:: fix possible NULL pointer dereference In cfg80211_inform_bss_frame() wiphy is first dereferenced on privsz initialisation and then it is checked for NULL. This patch fixes that. Signed-off-by: Mariusz Kozlowski Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/scan.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index ea427f418f64..300c11d99997 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -585,16 +585,23 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, struct cfg80211_internal_bss *res; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - size_t privsz = wiphy->bss_priv_size; + size_t privsz; + + if (WARN_ON(!mgmt)) + return NULL; + + if (WARN_ON(!wiphy)) + return NULL; if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC && (signal < 0 || signal > 100))) return NULL; - if (WARN_ON(!mgmt || !wiphy || - len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) + if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) return NULL; + privsz = wiphy->bss_priv_size; + res = kzalloc(sizeof(*res) + privsz + ielen, gfp); if (!res) return NULL; -- cgit v1.2.3 From 2b78ac9bfc7483ba4bda9ad3d10dd4afcf88337c Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Mon, 28 Mar 2011 14:32:32 +0300 Subject: cfg80211: fix BSS double-unlinking (continued) This patch adds to the fix "fix BSS double-unlinking" (commit 3207390a8b58bfc1335750f91cf6783c48ca19ca) by Johannes Berg. It turns out, that the double-unlinking scenario can also occur if expired BSS elements are removed whilst an interface is performing association. To work around that, replace list_del with list_del_init also in the "cfg80211_bss_expire" function, so that the check for whether the BSS still is in the list works correctly in cfg80211_unlink_bss. Signed-off-by: Juuso Oikarinen Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/scan.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 300c11d99997..fbf6f33ae4d0 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -123,6 +123,15 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev, } } +/* must hold dev->bss_lock! */ +static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *bss) +{ + list_del_init(&bss->list); + rb_erase(&bss->rbn, &dev->bss_tree); + kref_put(&bss->ref, bss_release); +} + /* must hold dev->bss_lock! */ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) { @@ -134,9 +143,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) continue; if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) continue; - list_del(&bss->list); - rb_erase(&bss->rbn, &dev->bss_tree); - kref_put(&bss->ref, bss_release); + __cfg80211_unlink_bss(dev, bss); expired = true; } @@ -669,11 +676,8 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) spin_lock_bh(&dev->bss_lock); if (!list_empty(&bss->list)) { - list_del_init(&bss->list); + __cfg80211_unlink_bss(dev, bss); dev->bss_generation++; - rb_erase(&bss->rbn, &dev->bss_tree); - - kref_put(&bss->ref, bss_release); } spin_unlock_bh(&dev->bss_lock); } -- cgit v1.2.3 From 499fe9a419d43410be576bcc825658997b6ce822 Mon Sep 17 00:00:00 2001 From: Daniel Halperin Date: Thu, 24 Mar 2011 16:01:48 -0700 Subject: mac80211: fix aggregation frame release during timeout Suppose the aggregation reorder buffer looks like this: x-T-R1-y-R2, where x and y are frames that have not been received, T is a received frame that has timed out, and R1,R2 are received frames that have not yet timed out. The proper behavior in this scenario is to move the window past x (skipping it), release T and R1, and leave the window at y until y is received or R2 times out. As written, this code will instead leave the window at R1, because it has not yet timed out. Fix this by exiting the reorder loop only when the frame that has not timed out AND there are skipped frames earlier in the current valid window. Signed-off-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5c1930ba8ebe..aa5cc37b4921 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -612,7 +612,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, skipped++; continue; } - if (!time_after(jiffies, tid_agg_rx->reorder_time[j] + + if (skipped && + !time_after(jiffies, tid_agg_rx->reorder_time[j] + HT_RX_REORDER_BUF_TIMEOUT)) goto set_release_timer; -- cgit v1.2.3 From 4910ac6c526d2868adcb5893e0c428473de862b5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Mar 2011 16:51:15 -0700 Subject: ipv4: Don't ip_rt_put() an error pointer in RAW sockets. Reported-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- net/ipv4/raw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e837ffd3edc3..2d3c72e5bbbf 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -569,6 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); + rt = NULL; goto done; } } -- cgit v1.2.3 From 36ae0148dbb6b9e15d8f067bb7523fd2b765a6af Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:45:52 +0000 Subject: xfrm: Move the test on replay window size into the replay check functions As it is, the replay check is just performed if the replay window of the legacy implementation is nonzero. So we move the test on a nonzero replay window inside the replay check functions to be sure we are testing for the right implementation. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_replay.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 341cd1189f8a..a026b0ef2443 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -173,7 +173,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop_unlock; } - if (x->props.replay_window && x->repl->check(x, skb, seq)) { + if (x->repl->check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 2f5be5b15740..f218385950ca 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -118,6 +118,9 @@ static int xfrm_replay_check(struct xfrm_state *x, u32 diff; u32 seq = ntohl(net_seq); + if (!x->props.replay_window) + return 0; + if (unlikely(seq == 0)) goto err; @@ -193,9 +196,14 @@ static int xfrm_replay_check_bmp(struct xfrm_state *x, { unsigned int bitnr, nr; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; u32 seq = ntohl(net_seq); u32 diff = replay_esn->seq - seq; - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (!replay_esn->replay_window) + return 0; + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; if (unlikely(seq == 0)) goto err; @@ -373,12 +381,17 @@ static int xfrm_replay_check_esn(struct xfrm_state *x, unsigned int bitnr, nr; u32 diff; struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 pos; u32 seq = ntohl(net_seq); - u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; u32 wsize = replay_esn->replay_window; u32 top = replay_esn->seq; u32 bottom = top - wsize + 1; + if (!wsize) + return 0; + + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && (replay_esn->seq < replay_esn->replay_window - 1))) goto err; -- cgit v1.2.3 From af2f464e326ebad57284cfdecb03f1606e89bbc7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:46:39 +0000 Subject: xfrm: Assign esn pointers when cloning a state When we clone a xfrm state we have to assign the replay_esn and the preplay_esn pointers to the state if we use the new replay detection method. To this end, we add a xfrm_replay_clone() function that allocates memory for the replay detection and takes over the necessary values from the original state. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f83a3d1da81b..dd78536d40de 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1181,6 +1181,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; } + if (orig->replay_esn) { + err = xfrm_replay_clone(x, orig); + if (err) + goto error; + } + memcpy(&x->mark, &orig->mark, sizeof(x->mark)); err = xfrm_init_state(x); -- cgit v1.2.3 From e2b19125e94124daaeda1ddcf9b85b04575ad86f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:47:30 +0000 Subject: xfrm: Check for esn buffer len in xfrm_new_ae In xfrm_new_ae() we may overwrite the allocated esn replay state buffer with a wrong size. So check that the new size matches the original allocated size and return an error if this is not the case. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fc152d28753c..ccc4c0c8ef00 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -360,6 +360,23 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, return 0; } +static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, + struct nlattr *rp) +{ + struct xfrm_replay_state_esn *up; + + if (!replay_esn || !rp) + return 0; + + up = nla_data(rp); + + if (xfrm_replay_state_esn_len(replay_esn) != + xfrm_replay_state_esn_len(up)) + return -EINVAL; + + return 0; +} + static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, struct xfrm_replay_state_esn **preplay_esn, struct nlattr *rta) @@ -1766,6 +1783,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (x->km.state != XFRM_STATE_VALID) goto out; + err = xfrm_replay_verify_len(x->replay_esn, rp); + if (err) + goto out; + spin_lock_bh(&x->lock); xfrm_update_ae_params(x, attrs); spin_unlock_bh(&x->lock); -- cgit v1.2.3 From 02aadf72fe2c83f145e3437734e66be53abae481 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 28 Mar 2011 19:48:09 +0000 Subject: xfrm: Restrict extended sequence numbers to esp The IPsec extended sequence numbers are fully implemented just for esp. So restrict the usage to esp until other protocols have support too. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ccc4c0c8ef00..3d15d3e1b2c4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -127,6 +127,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (!rt) return 0; + if (p->id.proto != IPPROTO_ESP) + return -EINVAL; + if (p->replay_window != 0) return -EINVAL; -- cgit v1.2.3 From 234af26ff123dfb2aa48772124721b1354c8e0a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Mar 2011 06:25:59 +0300 Subject: ceph: unlock on error in ceph_osdc_start_request() There was a missing unlock on the error path if __map_request() failed. Signed-off-by: Dan Carpenter Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b6776cb627cc..03740e8fc9d1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1672,7 +1672,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, if (req->r_sent == 0) { rc = __map_request(osdc, req); if (rc < 0) - return rc; + goto out_unlock; if (req->r_osd == NULL) { dout("send_request %p no up osds in pg\n", req); ceph_monc_request_next_osdmap(&osdc->client->monc); @@ -1689,6 +1689,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, } } } + +out_unlock: mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); return rc; -- cgit v1.2.3 From fbdb9190482fd83a3eb20cdeb0da454759f479d7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 29 Mar 2011 12:11:06 -0700 Subject: libceph: fix null dereference when unregistering linger requests We should only clear r_osd if we are neither registered as a linger or a regular request. We may unregister as a linger while still registered as a regular request (e.g., in reset_osd). Incorrectly clearing r_osd there leads to a null pointer dereference in __send_request. Also simplify the parallel check in __unregister_request() where we just removed r_osd_item and know it's empty. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 03740e8fc9d1..3b91d651fe08 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -837,8 +837,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, dout("moving osd to %p lru\n", req->r_osd); __move_osd_to_lru(osdc, req->r_osd); } - if (list_empty(&req->r_osd_item) && - list_empty(&req->r_linger_item)) + if (list_empty(&req->r_linger_item)) req->r_osd = NULL; } @@ -883,7 +882,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, dout("moving osd to %p lru\n", req->r_osd); __move_osd_to_lru(osdc, req->r_osd); } - req->r_osd = NULL; + if (list_empty(&req->r_osd_item)) + req->r_osd = NULL; } } -- cgit v1.2.3 From 8323c3aa74cd92465350294567142d12ffdcc963 Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Fri, 25 Mar 2011 16:32:57 -0700 Subject: ceph: Move secret key parsing earlier. This makes the base64 logic be contained in mount option parsing, and prepares us for replacing the homebew key management with the kernel key retention service. Signed-off-by: Tommi Virtanen Signed-off-by: Sage Weil --- net/ceph/auth.c | 8 ++++---- net/ceph/auth_x.c | 8 +++++--- net/ceph/ceph_common.c | 43 ++++++++++++++++++++++++++++++++++++------- net/ceph/crypto.c | 11 +++++++++++ net/ceph/crypto.h | 2 ++ net/ceph/mon_client.c | 2 +- 6 files changed, 59 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 549c1f43e1d5..b4bf4ac090f1 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -35,12 +35,12 @@ static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) /* * setup, teardown. */ -struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) +struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key) { struct ceph_auth_client *ac; int ret; - dout("auth_init name '%s' secret '%s'\n", name, secret); + dout("auth_init name '%s'\n", name); ret = -ENOMEM; ac = kzalloc(sizeof(*ac), GFP_NOFS); @@ -52,8 +52,8 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) ac->name = name; else ac->name = CEPH_AUTH_NAME_DEFAULT; - dout("auth_init name %s secret %s\n", ac->name, secret); - ac->secret = secret; + dout("auth_init name %s\n", ac->name); + ac->key = key; return ac; out: diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 7fd5dfcf6e18..1587dc6010c6 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -662,14 +662,16 @@ int ceph_x_init(struct ceph_auth_client *ac) goto out; ret = -EINVAL; - if (!ac->secret) { + if (!ac->key) { pr_err("no secret set (for auth_x protocol)\n"); goto out_nomem; } - ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); - if (ret) + ret = ceph_crypto_key_clone(&xi->secret, ac->key); + if (ret < 0) { + pr_err("cannot clone key: %d\n", ret); goto out_nomem; + } xi->starting = true; xi->ticket_handlers = RB_ROOT; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 9bbb356b12e7..02e084f29d24 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -20,6 +20,7 @@ #include #include #include +#include "crypto.h" @@ -117,9 +118,29 @@ int ceph_compare_options(struct ceph_options *new_opt, if (ret) return ret; - ret = strcmp_null(opt1->secret, opt2->secret); - if (ret) - return ret; + if (opt1->key && !opt2->key) + return -1; + if (!opt1->key && opt2->key) + return 1; + if (opt1->key && opt2->key) { + if (opt1->key->type != opt2->key->type) + return -1; + if (opt1->key->created.tv_sec != opt2->key->created.tv_sec) + return -1; + if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec) + return -1; + if (opt1->key->len != opt2->key->len) + return -1; + if (opt1->key->key && !opt2->key->key) + return -1; + if (!opt1->key->key && opt2->key->key) + return 1; + if (opt1->key->key && opt2->key->key) { + ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len); + if (ret) + return ret; + } + } /* any matching mon ip implies a match */ for (i = 0; i < opt1->num_mon; i++) { @@ -203,7 +224,10 @@ void ceph_destroy_options(struct ceph_options *opt) { dout("destroy_options %p\n", opt); kfree(opt->name); - kfree(opt->secret); + if (opt->key) { + ceph_crypto_key_destroy(opt->key); + kfree(opt->key); + } kfree(opt); } EXPORT_SYMBOL(ceph_destroy_options); @@ -295,9 +319,14 @@ int ceph_parse_options(struct ceph_options **popt, char *options, GFP_KERNEL); break; case Opt_secret: - opt->secret = kstrndup(argstr[0].from, - argstr[0].to-argstr[0].from, - GFP_KERNEL); + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) { + err = -ENOMEM; + goto out; + } + err = ceph_crypto_key_unarmor(opt->key, argstr[0].from); + if (err < 0) + goto out; break; /* misc */ diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 7b505b0c983f..75f0893fa11f 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -9,6 +9,17 @@ #include #include "crypto.h" +int ceph_crypto_key_clone(struct ceph_crypto_key *dst, + const struct ceph_crypto_key *src) +{ + memcpy(dst, src, sizeof(struct ceph_crypto_key)); + dst->key = kmalloc(src->len, GFP_NOFS); + if (!dst->key) + return -ENOMEM; + memcpy(dst->key, src->key, src->len); + return 0; +} + int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) { if (*p + sizeof(u16) + sizeof(key->created) + diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index f9eccace592b..6cf6edc91ec4 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -19,6 +19,8 @@ static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) kfree(key->key); } +extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, + const struct ceph_crypto_key *src); extern int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end); extern int ceph_crypto_key_decode(struct ceph_crypto_key *key, diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 8a079399174a..cbe31fa45508 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -759,7 +759,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) /* authentication */ monc->auth = ceph_auth_init(cl->options->name, - cl->options->secret); + cl->options->key); if (IS_ERR(monc->auth)) return PTR_ERR(monc->auth); monc->auth->want_keys = -- cgit v1.2.3 From e2c3d29b4295c3eec18294bc34f0c99a7b9ae413 Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Fri, 25 Mar 2011 16:40:48 -0700 Subject: libceph: Get secret from the kernel keys api when mounting with key=NAME. Signed-off-by: Tommi Virtanen Signed-off-by: Sage Weil --- net/ceph/Kconfig | 1 + net/ceph/ceph_common.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'net') diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index ad424049b0cf..be683f2d401f 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -4,6 +4,7 @@ config CEPH_LIB select LIBCRC32C select CRYPTO_AES select CRYPTO + select KEYS default n help Choose Y or M here to include cephlib, which provides the diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 02e084f29d24..c92bc8d50597 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -197,6 +199,7 @@ enum { Opt_fsid, Opt_name, Opt_secret, + Opt_key, Opt_ip, Opt_last_string, /* string args above */ @@ -213,6 +216,7 @@ static match_table_t opt_tokens = { {Opt_fsid, "fsid=%s"}, {Opt_name, "name=%s"}, {Opt_secret, "secret=%s"}, + {Opt_key, "key=%s"}, {Opt_ip, "ip=%s"}, /* string args above */ {Opt_noshare, "noshare"}, @@ -232,6 +236,50 @@ void ceph_destroy_options(struct ceph_options *opt) } EXPORT_SYMBOL(ceph_destroy_options); +/* get secret from key store */ +static int get_secret(struct ceph_crypto_key *dst, const char *name) { + struct key *ukey; + int key_err; + int err = 0; + struct user_key_payload *payload; + void *p; + + ukey = request_key(&key_type_user, name, NULL); + if (!ukey || IS_ERR(ukey)) { + /* request_key errors don't map nicely to mount(2) + errors; don't even try, but still printk */ + key_err = PTR_ERR(ukey); + switch (key_err) { + case -ENOKEY: + pr_warning("ceph: Mount failed due to key not found: %s\n", name); + break; + case -EKEYEXPIRED: + pr_warning("ceph: Mount failed due to expired key: %s\n", name); + break; + case -EKEYREVOKED: + pr_warning("ceph: Mount failed due to revoked key: %s\n", name); + break; + default: + pr_warning("ceph: Mount failed due to unknown key error" + " %d: %s\n", key_err, name); + } + err = -EPERM; + goto out; + } + + payload = ukey->payload.data; + p = payload->data; + err = ceph_crypto_key_decode(dst, &p, p + payload->datalen); + if (err) + goto out_key; + /* pass through, err is 0 */ + +out_key: + key_put(ukey); +out: + return err; +} + int ceph_parse_options(struct ceph_options **popt, char *options, const char *dev_name, const char *dev_name_end, int (*parse_extra_token)(char *c, void *private), @@ -328,6 +376,16 @@ int ceph_parse_options(struct ceph_options **popt, char *options, if (err < 0) goto out; break; + case Opt_key: + opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); + if (!opt->key) { + err = -ENOMEM; + goto out; + } + err = get_secret(opt->key, argstr[0].from); + if (err < 0) + goto out; + break; /* misc */ case Opt_osdtimeout: -- cgit v1.2.3 From 4b2a58abd1e17c0ee53c8dded879e015917cca67 Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Mon, 28 Mar 2011 14:59:38 -0700 Subject: libceph: Create a new key type "ceph". This allows us to use existence of the key type as a feature test, from userspace. Signed-off-by: Tommi Virtanen Signed-off-by: Sage Weil --- net/ceph/ceph_common.c | 21 ++++++++++------- net/ceph/crypto.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ceph/crypto.h | 2 ++ 3 files changed, 77 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index c92bc8d50597..132963abc266 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -241,10 +241,9 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) { struct key *ukey; int key_err; int err = 0; - struct user_key_payload *payload; - void *p; + struct ceph_crypto_key *ckey; - ukey = request_key(&key_type_user, name, NULL); + ukey = request_key(&key_type_ceph, name, NULL); if (!ukey || IS_ERR(ukey)) { /* request_key errors don't map nicely to mount(2) errors; don't even try, but still printk */ @@ -267,9 +266,8 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) { goto out; } - payload = ukey->payload.data; - p = payload->data; - err = ceph_crypto_key_decode(dst, &p, p + payload->datalen); + ckey = ukey->payload.data; + err = ceph_crypto_key_clone(dst, ckey); if (err) goto out_key; /* pass through, err is 0 */ @@ -583,10 +581,14 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out; - ret = ceph_msgr_init(); + ret = ceph_crypto_init(); if (ret < 0) goto out_debugfs; + ret = ceph_msgr_init(); + if (ret < 0) + goto out_crypto; + pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, @@ -594,6 +596,8 @@ static int __init init_ceph_lib(void) return 0; +out_crypto: + ceph_crypto_shutdown(); out_debugfs: ceph_debugfs_cleanup(); out: @@ -604,6 +608,7 @@ static void __exit exit_ceph_lib(void) { dout("exit_ceph_lib\n"); ceph_msgr_exit(); + ceph_crypto_shutdown(); ceph_debugfs_cleanup(); } diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 75f0893fa11f..5a8009c9e0cd 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -5,7 +5,9 @@ #include #include #include +#include +#include #include #include "crypto.h" @@ -421,3 +423,63 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, return -EINVAL; } } + +int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) +{ + struct ceph_crypto_key *ckey; + int ret; + void *p; + + ret = -EINVAL; + if (datalen <= 0 || datalen > 32767 || !data) + goto err; + + ret = key_payload_reserve(key, datalen); + if (ret < 0) + goto err; + + ret = -ENOMEM; + ckey = kmalloc(sizeof(*ckey), GFP_KERNEL); + if (!ckey) + goto err; + + /* TODO ceph_crypto_key_decode should really take const input */ + p = (void*)data; + ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); + if (ret < 0) + goto err_ckey; + + key->payload.data = ckey; + return 0; + +err_ckey: + kfree(ckey); +err: + return ret; +} + +int ceph_key_match(const struct key *key, const void *description) +{ + return strcmp(key->description, description) == 0; +} + +void ceph_key_destroy(struct key *key) { + struct ceph_crypto_key *ckey = key->payload.data; + + ceph_crypto_key_destroy(ckey); +} + +struct key_type key_type_ceph = { + .name = "ceph", + .instantiate = ceph_key_instantiate, + .match = ceph_key_match, + .destroy = ceph_key_destroy, +}; + +int ceph_crypto_init(void) { + return register_key_type(&key_type_ceph); +} + +void ceph_crypto_shutdown(void) { + unregister_key_type(&key_type_ceph); +} diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 6cf6edc91ec4..1919d1550d75 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -42,6 +42,8 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, const void *src1, size_t src1_len, const void *src2, size_t src2_len); +extern int ceph_crypto_init(void); +extern void ceph_crypto_shutdown(void); /* armor.c */ extern int ceph_armor(char *dst, const char *src, const char *end); -- cgit v1.2.3 From 1459a3cc51d90d78027c7b5c1790e5d22751c8eb Mon Sep 17 00:00:00 2001 From: Balaji G Date: Tue, 29 Mar 2011 06:20:04 +0000 Subject: bridge: Fix compilation warning in function br_stp_recalculate_bridge_id() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/bridge/br_stp_if.c: In function ‘br_stp_recalculate_bridge_id’: net/bridge/br_stp_if.c:216:3: warning: ‘return’ with no value, in function returning non-void Signed-off-by: G.Balaji Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 5593f5aec942..9b61d09de9b9 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -213,7 +213,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) /* user has chosen a value so keep it */ if (br->flags & BR_SET_MAC_ADDR) - return; + return false; list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || -- cgit v1.2.3 From 93ca3bb5df9bc8b2c60485e1cc6507c3d7c8e1fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 28 Mar 2011 22:40:53 +0000 Subject: net: gre: provide multicast mappings for ipv4 and ipv6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My commit 6d55cb91a0020ac0 (gre: fix hard header destination address checking) broke multicast. The reason is that ip_gre used to get ipgre_header() calls with zero destination if we have NOARP or multicast destination. Instead the actual target was decided at ipgre_tunnel_xmit() time based on per-protocol dissection. Instead of allowing the "abuse" of ->header() calls with invalid destination, this creates multicast mappings for ip_gre. This also fixes "ip neigh show nud noarp" to display the proper multicast mappings used by the gre device. Reported-by: Doug Kehn Signed-off-by: Timo Teräs Acked-by: Doug Kehn Signed-off-by: David S. Miller --- net/ipv4/arp.c | 3 +++ net/ipv6/ndisc.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 090d273d7865..1b74d3b64371 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -215,6 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) case ARPHRD_INFINIBAND: ip_ib_mc_map(addr, dev->broadcast, haddr); return 0; + case ARPHRD_IPGRE: + ip_ipgre_mc_map(addr, dev->broadcast, haddr); + return 0; default: if (dir) { memcpy(haddr, dev->broadcast, dev->addr_len); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0e49c9db3c98..92f952d093db 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -341,6 +341,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d case ARPHRD_INFINIBAND: ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; + case ARPHRD_IPGRE: + return ipv6_ipgre_mc_map(addr, dev->broadcast, buf); default: if (dir) { memcpy(buf, dev->broadcast, dev->addr_len); -- cgit v1.2.3 From ff9a57a62afbbe2d0f3a09af321f1fd7645f38a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 26 Mar 2011 20:27:24 +0000 Subject: bridge: mcast snooping, fix length check of snooped MLDv1/2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "len = ntohs(ip6h->payload_len)" does not include the length of the ipv6 header itself, which the rest of this function assumes, though. This leads to a length check less restrictive as it should be in the following line for one thing. For another, it very likely leads to an integer underrun when substracting the offset and therefore to a very high new value of 'len' due to its unsignedness. This will ultimately lead to the pskb_trim_rcsum() practically never being called, even in the cases where it should. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f61eb2eff3fd..59660c909a7c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1475,7 +1475,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, ip6h->payload_len == 0) return 0; - len = ntohs(ip6h->payload_len); + len = ntohs(ip6h->payload_len) + sizeof(*ip6h); if (skb->len < len) return -EINVAL; -- cgit v1.2.3 From 79b569f0ec53a14c4d71e79d93a8676d9a0fda6d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 30 Mar 2011 02:42:17 -0700 Subject: netdev: fix mtu check when TSO is enabled In case the device where is coming from the packet has TSO enabled, we should not check the mtu size value as this one could be bigger than the expected value. This is the case for the macvlan driver when the lower device has TSO enabled. The macvlan inherit this feature and forward the packets without fragmenting them. Then the packets go through dev_forward_skb and are dropped. This patch fix this by checking TSO is not enabled when we want to check the mtu size. Signed-off-by: Daniel Lezcano Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 563ddc28139d..3da9fb06d47a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1454,6 +1454,27 @@ static inline void net_timestamp_check(struct sk_buff *skb) __net_timestamp(skb); } +static inline bool is_skb_forwardable(struct net_device *dev, + struct sk_buff *skb) +{ + unsigned int len; + + if (!(dev->flags & IFF_UP)) + return false; + + len = dev->mtu + dev->hard_header_len + VLAN_HLEN; + if (skb->len <= len) + return true; + + /* if TSO is enabled, we don't care about the length as the packet + * could be forwarded without being segmented before + */ + if (skb_is_gso(skb)) + return true; + + return false; +} + /** * dev_forward_skb - loopback an skb to another netif * @@ -1477,8 +1498,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_orphan(skb); nf_reset(skb); - if (unlikely(!(dev->flags & IFF_UP) || - (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) { + if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; -- cgit v1.2.3 From c031235b395433350f25943b7580a5e343c7b7b2 Mon Sep 17 00:00:00 2001 From: "Philip A. Prindeville" Date: Wed, 30 Mar 2011 13:17:04 +0000 Subject: atm/solos-pci: Don't flap VCs when carrier state changes Don't flap VCs when carrier state changes; higher-level protocols can detect loss of connectivity and act accordingly. This is more consistent with how other network interfaces work. We no longer use release_vccs() so we can delete it. release_vccs() was duplicated from net/atm/common.c; make the corresponding function exported, since other code duplicates it and could leverage it if it were public. Signed-off-by: Philip A. Prindeville Signed-off-by: David S. Miller --- net/atm/common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/common.c b/net/atm/common.c index 1b9c52a02cd3..22b963d06a10 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -252,6 +252,7 @@ void atm_dev_release_vccs(struct atm_dev *dev) } write_unlock_irq(&vcc_sklist_lock); } +EXPORT_SYMBOL(atm_dev_release_vccs); static int adjust_tp(struct atm_trafprm *tp, unsigned char aal) { -- cgit v1.2.3 From e2666f84958adb3a034b98e99699b55705117e01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Mar 2011 16:57:46 -0700 Subject: fib: add rtnl locking in ip_fib_net_exit Daniel J Blueman reported a lockdep splat in trie_firstleaf(), caused by RTNL being not locked before a call to fib_table_flush() Reported-by: Daniel J Blueman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f116ce8f1b46..451088330bbb 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1068,6 +1068,7 @@ static void ip_fib_net_exit(struct net *net) fib4_rules_exit(net); #endif + rtnl_lock(); for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; @@ -1080,6 +1081,7 @@ static void ip_fib_net_exit(struct net *net) fib_free_table(tb); } } + rtnl_unlock(); kfree(net->ipv4.fib_table_hash); } -- cgit v1.2.3 From a84b50ceb7d640437d0dc28a2bef0d0de054de89 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 30 Mar 2011 17:51:36 -0700 Subject: sctp: Pass __GFP_NOWARN to hash table allocation attempts. Like DCCP and other similar pieces of code, there are mechanisms here to try allocating smaller hash tables if the allocation fails. So pass in __GFP_NOWARN like the others do instead of emitting a scary message. Reported-by: Dave Jones Signed-off-by: David S. Miller --- net/sctp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 152976ec0b74..d5bf91d04f63 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1205,7 +1205,7 @@ SCTP_STATIC __init int sctp_init(void) if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) continue; sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_ATOMIC, order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { pr_err("Failed association hash alloc\n"); @@ -1238,7 +1238,7 @@ SCTP_STATIC __init int sctp_init(void) if ((sctp_port_hashsize > (64 * 1024)) && order > 0) continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC, order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); -- cgit v1.2.3 From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- net/8021q/vlanproc.c | 2 +- net/9p/client.c | 2 +- net/9p/trans_common.c | 4 ++-- net/9p/util.c | 2 +- net/atm/br2684.c | 2 +- net/atm/lec.h | 2 +- net/batman-adv/soft-interface.c | 2 +- net/bluetooth/hci_core.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/bridge/br_fdb.c | 2 +- net/bridge/br_ioctl.c | 2 +- net/caif/caif_socket.c | 2 +- net/can/bcm.c | 2 +- net/ceph/osd_client.c | 2 +- net/core/dev.c | 12 ++++++------ net/core/filter.c | 2 +- net/core/link_watch.c | 2 +- net/core/rtnetlink.c | 4 ++-- net/core/skbuff.c | 2 +- net/core/sock.c | 10 +++++----- net/dccp/output.c | 2 +- net/dsa/mv88e6131.c | 2 +- net/ipv4/cipso_ipv4.c | 8 ++++---- net/ipv4/fib_trie.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/ipconfig.c | 2 +- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/nf_nat_core.c | 4 ++-- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 4 ++-- net/ipv4/tcp_lp.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_yeah.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 2 +- net/irda/irlap.c | 2 +- net/irda/irlap_event.c | 8 ++++---- net/irda/irlap_frame.c | 2 +- net/irda/irlmp_event.c | 2 +- net/irda/irnet/irnet.h | 2 +- net/irda/irqueue.c | 2 +- net/irda/irttp.c | 2 +- net/irda/qos.c | 8 ++++---- net/irda/timer.c | 2 +- net/iucv/af_iucv.c | 2 +- net/iucv/iucv.c | 4 ++-- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mesh_pathtbl.c | 2 +- net/mac80211/rc80211_minstrel_ht.c | 2 +- net/mac80211/rc80211_pid.h | 2 +- net/mac80211/rx.c | 2 +- net/mac80211/sta_info.c | 4 ++-- net/mac80211/sta_info.h | 2 +- net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/ipvs/ip_vs_proto_sctp.c | 8 ++++---- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 6 +++--- net/netfilter/nf_conntrack_sip.c | 2 +- net/netfilter/nf_queue.c | 2 +- net/netlabel/netlabel_domainhash.c | 10 +++++----- net/netlabel/netlabel_mgmt.c | 2 +- net/rds/ib_send.c | 2 +- net/rds/iw_cm.c | 2 +- net/rds/iw_rdma.c | 2 +- net/rds/iw_send.c | 2 +- net/rds/send.c | 2 +- net/rose/rose_route.c | 2 +- net/sched/act_api.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/em_meta.c | 2 +- net/sched/sch_htb.c | 2 +- net/sched/sch_netem.c | 6 +++--- net/sctp/associola.c | 2 +- net/sctp/auth.c | 6 +++--- net/sctp/input.c | 2 +- net/sctp/output.c | 2 +- net/sctp/outqueue.c | 6 +++--- net/sctp/sm_sideeffect.c | 2 +- net/sctp/sm_statefuns.c | 20 ++++++++++---------- net/sctp/socket.c | 2 +- net/sctp/ulpevent.c | 2 +- net/sctp/ulpqueue.c | 2 +- net/socket.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/xprtsock.c | 4 ++-- net/tipc/link.c | 2 +- net/tipc/name_distr.c | 2 +- net/unix/af_unix.c | 2 +- net/wanrouter/wanproc.c | 2 +- net/wireless/reg.c | 4 ++-- net/x25/x25_facilities.c | 2 +- net/x25/x25_forward.c | 4 ++-- net/xfrm/xfrm_user.c | 6 +++--- 103 files changed, 160 insertions(+), 160 deletions(-) (limited to 'net') diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d1314cf18adf..d940c49d168a 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -54,7 +54,7 @@ static const char name_conf[] = "config"; /* * Structures for interfacing with the /proc filesystem. - * VLAN creates its own directory /proc/net/vlan with the folowing + * VLAN creates its own directory /proc/net/vlan with the following * entries: * config device status/configuration * entry for each device diff --git a/net/9p/client.c b/net/9p/client.c index 2ccbf04d37df..48b8e084e710 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -178,7 +178,7 @@ free_and_return: * @tag: numeric id for transaction * * this is a simple array lookup, but will grow the - * request_slots as necessary to accomodate transaction + * request_slots as necessary to accommodate transaction * ids which did not previously have a slot. * * this code relies on the client spinlock to manage locks, its diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9172ab78fcb0..d47880e971dd 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -36,7 +36,7 @@ p9_release_req_pages(struct trans_rpage_info *rpinfo) EXPORT_SYMBOL(p9_release_req_pages); /** - * p9_nr_pages - Return number of pages needed to accomodate the payload. + * p9_nr_pages - Return number of pages needed to accommodate the payload. */ int p9_nr_pages(struct p9_req_t *req) @@ -55,7 +55,7 @@ EXPORT_SYMBOL(p9_nr_pages); * @req: Request to be sent to server. * @pdata_off: data offset into the first page after translation (gup). * @pdata_len: Total length of the IO. gup may not return requested # of pages. - * @nr_pages: number of pages to accomodate the payload + * @nr_pages: number of pages to accommodate the payload * @rw: Indicates if the pages are for read or write. */ int diff --git a/net/9p/util.c b/net/9p/util.c index b84619b5ba22..da6af81e59d9 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL(p9_idpool_create); /** * p9_idpool_destroy - create a new per-connection id pool - * @p: idpool to destory + * @p: idpool to destroy */ void p9_idpool_destroy(struct p9_idpool *p) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index fce2eae8d476..2252c2085dac 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -509,7 +509,7 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) write_lock_irq(&devs_lock); net_dev = br2684_find_dev(&be.ifspec); if (net_dev == NULL) { - pr_err("tried to attach to non-existant device\n"); + pr_err("tried to attach to non-existent device\n"); err = -ENXIO; goto error; } diff --git a/net/atm/lec.h b/net/atm/lec.h index 9d14d196cc1d..dfc071966463 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -35,7 +35,7 @@ struct lecdatahdr_8025 { * Operations that LANE2 capable device can do. Two first functions * are used to make the device do things. See spec 3.1.3 and 3.1.4. * - * The third function is intented for the MPOA component sitting on + * The third function is intended for the MPOA component sitting on * top of the LANE device. The MPOA component assigns it's own function * to (*associate_indicator)() and the LANE device will use that * function to tell about TLVs it sees floating through. diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 9ed26140a269..824e1f6e50f2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -474,7 +474,7 @@ void interface_rx(struct net_device *soft_iface, goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); - /* should not be neccesary anymore as we use skb_pull_rcsum() + /* should not be necessary anymore as we use skb_pull_rcsum() * TODO: please verify this and remove this TODO * -- Dec 21st 2009, Simon Wunderlich */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b372fb8bcdcf..42d5ff02cb59 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1877,7 +1877,7 @@ static void hci_tx_task(unsigned long arg) read_unlock(&hci_task_lock); } -/* ----- HCI RX task (incoming data proccessing) ----- */ +/* ----- HCI RX task (incoming data processing) ----- */ /* ACL data packet */ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index fc85e7ae33c7..36b9c5d0ebe3 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -679,7 +679,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch if (opt == BT_FLUSHABLE_OFF) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; - /* proceed futher only when we have l2cap_conn and + /* proceed further only when we have l2cap_conn and No Flush support in the LM */ if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { err = -EINVAL; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 88485cc74dc3..cc4d3c5ab1c6 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -169,7 +169,7 @@ void br_fdb_flush(struct net_bridge *br) spin_unlock_bh(&br->hash_lock); } -/* Flush all entries refering to a specific port. +/* Flush all entries referring to a specific port. * if do_all is set also flush static entries */ void br_fdb_delete_by_port(struct net_bridge *br, diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index cb43312b846e..3d9fca0e3370 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -106,7 +106,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) /* * Legacy ioctl's through SIOCDEVPRIVATE * This interface is deprecated because it was too difficult to - * to do the translation for 32/64bit ioctl compatability. + * to do the translation for 32/64bit ioctl compatibility. */ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 8184c031d028..37a4034dfc29 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -852,7 +852,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTING; sk->sk_state = CAIF_CONNECTING; - /* Check priority value comming from socket */ + /* Check priority value coming from socket */ /* if priority value is out of range it will be ajusted */ if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX) cf_sk->conn_req.priority = CAIF_PRIO_MAX; diff --git a/net/can/bcm.c b/net/can/bcm.c index 871a0ad51025..57b1aed79014 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -387,7 +387,7 @@ static void bcm_tx_timeout_tsklet(unsigned long data) } /* - * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions + * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) { diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 02212ed50852..8d4ee7e01793 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -917,7 +917,7 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); /* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is - * no up osd, set r_osd to NULL. Move the request to the appropiate list + * no up osd, set r_osd to NULL. Move the request to the appropriate list * (unsent, homeless) or leave on in-flight lru. * * Return 0 if unchanged, 1 if changed, or negative on error. diff --git a/net/core/dev.c b/net/core/dev.c index 563ddc28139d..56c3e00098c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2071,7 +2071,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 features; /* - * If device doesnt need skb->dst, release it right now while + * If device doesn't need skb->dst, release it right now while * its hot in this cpu cache */ if (dev->priv_flags & IFF_XMIT_DST_RELEASE) @@ -2131,7 +2131,7 @@ gso: nskb->next = NULL; /* - * If device doesnt need nskb->dst, release it right now while + * If device doesn't need nskb->dst, release it right now while * its hot in this cpu cache */ if (dev->priv_flags & IFF_XMIT_DST_RELEASE) @@ -2950,8 +2950,8 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions * a compare and 2 stores extra right now if we dont have it on * but have CONFIG_NET_CLS_ACT - * NOTE: This doesnt stop any functionality; if you dont have - * the ingress scheduler, you just cant add policies on ingress. + * NOTE: This doesn't stop any functionality; if you dont have + * the ingress scheduler, you just can't add policies on ingress. * */ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq) @@ -3780,7 +3780,7 @@ static void net_rx_action(struct softirq_action *h) * with netpoll's poll_napi(). Only the entity which * obtains the lock and sees NAPI_STATE_SCHED set will * actually make the ->poll() call. Therefore we avoid - * accidently calling ->poll() when NAPI is not scheduled. + * accidentally calling ->poll() when NAPI is not scheduled. */ work = 0; if (test_bit(NAPI_STATE_SCHED, &n->state)) { @@ -6316,7 +6316,7 @@ static void __net_exit default_device_exit(struct net *net) if (dev->rtnl_link_ops) continue; - /* Push remaing network devices to init_net */ + /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { diff --git a/net/core/filter.c b/net/core/filter.c index 232b1873bb28..afb8afb066bb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -425,7 +425,7 @@ EXPORT_SYMBOL(sk_run_filter); * As we dont want to clear mem[] array for each packet going through * sk_run_filter(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure - * a malicious user doesnt try to abuse us. + * a malicious user doesn't try to abuse us. */ static int check_load_and_stores(struct sock_filter *filter, int flen) { diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 01a1101b5936..a7b342131869 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -129,7 +129,7 @@ static void linkwatch_schedule_work(int urgent) if (!cancel_delayed_work(&linkwatch_work)) return; - /* Otherwise we reschedule it again for immediate exection. */ + /* Otherwise we reschedule it again for immediate execution. */ schedule_delayed_work(&linkwatch_work, 0); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 49f7ea5b4c75..d7c4bb4b1820 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -196,7 +196,7 @@ EXPORT_SYMBOL_GPL(__rtnl_register); * as failure of this function is very unlikely, it can only happen due * to lack of memory when allocating the chain to store all message * handlers for a protocol. Meant for use in init functions where lack - * of memory implies no sense in continueing. + * of memory implies no sense in continuing. */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit) @@ -1440,7 +1440,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, errout: if (err < 0 && modified && net_ratelimit()) printk(KERN_WARNING "A link change request failed with " - "some changes comitted already. Interface %s may " + "some changes committed already. Interface %s may " "have been left with an inconsistent configuration, " "please check.\n", dev->name); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 801dd08908f9..7ebeed0a877c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2267,7 +2267,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read); * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * - * Note 1: The size of each block of data returned can be arbitary, + * Note 1: The size of each block of data returned can be arbitrary, * this limitation is the cost for zerocopy seqeuental * reads of potentially non linear data. * diff --git a/net/core/sock.c b/net/core/sock.c index 7dfed792434d..6e819780c232 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -215,7 +215,7 @@ __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -/* Maximal space eaten by iovec or ancilliary data plus some space */ +/* Maximal space eaten by iovec or ancillary data plus some space */ int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); @@ -1175,7 +1175,7 @@ static void __sk_free(struct sock *sk) void sk_free(struct sock *sk) { /* - * We substract one from sk_wmem_alloc and can know if + * We subtract one from sk_wmem_alloc and can know if * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ @@ -1185,10 +1185,10 @@ void sk_free(struct sock *sk) EXPORT_SYMBOL(sk_free); /* - * Last sock_put should drop referrence to sk->sk_net. It has already - * been dropped in sk_change_net. Taking referrence to stopping namespace + * Last sock_put should drop reference to sk->sk_net. It has already + * been dropped in sk_change_net. Taking reference to stopping namespace * is not an option. - * Take referrence to a socket to remove it from hash _alive_ and after that + * Take reference to a socket to remove it from hash _alive_ and after that * destroy it in the context of init_net. */ void sk_release_kernel(struct sock *sk) diff --git a/net/dccp/output.c b/net/dccp/output.c index 784d30210543..136d41cbcd02 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -143,7 +143,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) } /** - * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits + * dccp_determine_ccmps - Find out about CCID-specific packet-size limits * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), * since the RX CCID is restricted to feedback packets (Acks), which are small * in comparison with the data traffic. A value of 0 means "no current CCMPS". diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index bb2b41bc854e..d951f93644bf 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -124,7 +124,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) * Ignore removed tag data on doubly tagged packets, disable * flow control messages, force flow control priority to the * highest, and send all special multicast frames to the CPU - * port at the higest priority. + * port at the highest priority. */ REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 094e150c6260..a0af7ea87870 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -112,7 +112,7 @@ int cipso_v4_rbm_strictvalid = 1; /* The maximum number of category ranges permitted in the ranged category tag * (tag #5). You may note that the IETF draft states that the maximum number * of category ranges is 7, but if the low end of the last category range is - * zero then it is possibile to fit 8 category ranges because the zero should + * zero then it is possible to fit 8 category ranges because the zero should * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 @@ -438,7 +438,7 @@ cache_add_failure: * * Description: * Search the DOI definition list for a DOI definition with a DOI value that - * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). + * matches @doi. The caller is responsible for calling rcu_read_[un]lock(). * Returns a pointer to the DOI definition on success and NULL on failure. */ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) @@ -1293,7 +1293,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, return ret_val; /* This will send packets using the "optimized" format when - * possibile as specified in section 3.4.2.6 of the + * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) tag_len = 14; @@ -1752,7 +1752,7 @@ validate_return: } /** - * cipso_v4_error - Send the correct reponse for a bad packet + * cipso_v4_error - Send the correct response for a bad packet * @skb: the packet * @error: the error code * @gateway: CIPSO gateway flag diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b92c86f6e9b3..e9013d6c1f51 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -12,7 +12,7 @@ * * Hans Liss Uppsala Universitet * - * This work is based on the LPC-trie which is originally descibed in: + * This work is based on the LPC-trie which is originally described in: * * An experimental study of compression methods for dynamic tries * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a91dc1611081..e5f8a71d3a2a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -704,7 +704,7 @@ static void icmp_unreach(struct sk_buff *skb) */ /* - * Check the other end isnt violating RFC 1122. Some routers send + * Check the other end isn't violating RFC 1122. Some routers send * bogus responses to broadcast frames. If you see this message * first check your netmask matches at both ends, if it does then * get the other vendor to fix their kit. diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 67f241b97649..459c011b1d4a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -603,7 +603,7 @@ slow_path: /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; - /* IF: we are not sending upto and including the packet end + /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2b097752426b..cbff2ecccf3d 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1444,7 +1444,7 @@ static int __init ip_auto_config(void) root_server_addr = addr; /* - * Use defaults whereever applicable. + * Use defaults wherever applicable. */ if (ic_defaults() < 0) return -1; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4b5d457c2d76..89bc7e66d598 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -76,7 +76,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, } /* - * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Unfortunately, _b and _mask are not aligned to an int (or long int) * Some arches dont care, unrolling the loop is a win on them. * For other arches, we only have a 16bit alignement. */ @@ -1874,7 +1874,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err1; - /* Noone else will be downing sem now, so we won't sleep */ + /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); if (ret < 0) goto err2; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ffcea0d1678e..704915028009 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -2233,7 +2233,7 @@ static int __init ip_tables_init(void) if (ret < 0) goto err1; - /* Noone else will be downing sem now, so we won't sleep */ + /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); if (ret < 0) goto err2; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 21bcf471b25a..9c71b2755ce3 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -521,7 +521,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) } EXPORT_SYMBOL(nf_nat_protocol_register); -/* Noone stores the protocol anywhere; simply delete it. */ +/* No one stores the protocol anywhere; simply delete it. */ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) { spin_lock_bh(&nf_nat_lock); @@ -532,7 +532,7 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) } EXPORT_SYMBOL(nf_nat_protocol_unregister); -/* Noone using conntrack by the time this called. */ +/* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2d3c72e5bbbf..bceaec42c37d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -622,7 +622,7 @@ do_confirm: static void raw_close(struct sock *sk, long timeout) { /* - * Raw sockets may have direct kernel refereneces. Kill them. + * Raw sockets may have direct kernel references. Kill them. */ ip_ra_control(sk, 0, NULL); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b0c81180804..ea107515c53e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -821,7 +821,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) } /* - * Pertubation of rt_genid by a small quantity [1..256] + * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() * many times (2^24) without giving recent rt_genid. * Jenkins hash is strong enough that litle changes of rt_genid are OK. @@ -1191,7 +1191,7 @@ restart: #endif /* * Since lookup is lockfree, we must make sure - * previous writes to rt are comitted to memory + * previous writes to rt are committed to memory * before making rt visible to other CPUS. */ rcu_assign_pointer(rt_hash_table[hash].chain, rt); diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 656d431c99ad..72f7218b03f5 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -12,7 +12,7 @@ * within cong_avoid. * o Error correcting in remote HZ, therefore remote HZ will be keeped * on checking and updating. - * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne + * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since * OWD have a similar meaning as RTT. Also correct the buggy formular. * o Handle reaction for Early Congestion Indication (ECI) within * pkts_acked, as mentioned within pseudo code. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dfa5beb0c1c8..64f30eca1c67 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -73,7 +73,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinately with F-RTO */ + /* Don't override Nagle indefinitely with F-RTO */ if (tp->frto_counter == 2) tp->frto_counter = 3; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index dc7f43179c9a..05c3b6f0e8e1 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -20,7 +20,7 @@ #define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss #define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion #define TCP_YEAH_PHY 8 //lin maximum delta from base -#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss +#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss #define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count #define TCP_SCALABLE_AI_CNT 100U diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 588f47af5faf..f87a8eb76f3b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -189,7 +189,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, * @sk: socket struct in question * @snum: port number to look up * @saddr_comp: AF-dependent comparison of bound local IP addresses - * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, + * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3daaf3c7703c..1493534116df 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1084,7 +1084,7 @@ static int ipv6_get_saddr_eval(struct net *net, case IPV6_SADDR_RULE_PRIVACY: { /* Rule 7: Prefer public address - * Note: prefer temprary address if use_tempaddr >= 2 + * Note: prefer temporary address if use_tempaddr >= 2 */ int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? !!(dst->prefs & IPV6_PREFER_SRC_TMP) : @@ -1968,7 +1968,7 @@ ok: * to the stored lifetime since we'll * be updating the timestamp below, * else we'll set it back to the - * minumum. + * minimum. */ if (prefered_lft != ifp->prefered_lft) { valid_lft = stored_lft; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4b13d5d8890e..afcc7099f96d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1113,7 +1113,7 @@ static int __init inet6_init(void) /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance - * in a host availiable by both INET and INET6 APIs and + * in a host available by both INET and INET6 APIs and * able to communicate via both network protocols. */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 18208876aa8a..46cf7bea6769 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -779,7 +779,7 @@ slow_path: /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; - /* IF: we are not sending upto and including the packet end + /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0b2af9b85cec..5a1c6f27ffaf 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2248,7 +2248,7 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err1; - /* Noone else will be downing sem now, so we won't sleep */ + /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg)); if (ret < 0) goto err2; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 97c5b21b9674..cdd6d045e42e 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -71,7 +71,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == NULL) return NF_STOLEN; - /* error occured or not fragmented */ + /* error occurred or not fragmented */ if (reasm == skb) return NF_ACCEPT; diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 783c5f367d29..005b424494a0 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -165,7 +165,7 @@ struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, irlap_apply_default_connection_parameters(self); - self->N3 = 3; /* # connections attemts to try before giving up */ + self->N3 = 3; /* # connections attempts to try before giving up */ self->state = LAP_NDM; diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index d434c8880745..bb47021c9a55 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -708,7 +708,7 @@ static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event, self->frame_sent = TRUE; } - /* Readjust our timer to accomodate devices + /* Readjust our timer to accommodate devices * doing faster or slower discovery than us... * Jean II */ irlap_start_query_timer(self, info->S, info->s); @@ -931,7 +931,7 @@ static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event, irlap_send_rr_frame(self, CMD_FRAME); /* The timer is set to half the normal timer to quickly - * detect a failure to negociate the new connection + * detect a failure to negotiate the new connection * parameters. IrLAP 6.11.3.2, note 3. * Note that currently we don't process this failure * properly, as we should do a quick disconnect. @@ -1052,7 +1052,7 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event, return -EPROTO; } - /* Substract space used by this skb */ + /* Subtract space used by this skb */ self->bytes_left -= skb->len; #else /* CONFIG_IRDA_DYNAMIC_WINDOW */ /* Window has been adjusted for the max packet @@ -1808,7 +1808,7 @@ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event, return -EPROTO; /* Try again later */ } - /* Substract space used by this skb */ + /* Subtract space used by this skb */ self->bytes_left -= skb->len; #else /* CONFIG_IRDA_DYNAMIC_WINDOW */ /* Window has been adjusted for the max packet diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 688222cbf55b..8c004161a843 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -848,7 +848,7 @@ void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb) * though IrLAP is currently sending the *last* frame of the * tx-window, the driver most likely has only just started * sending the *first* frame of the same tx-window. - * I.e. we are always at the very begining of or Tx window. + * I.e. we are always at the very beginning of or Tx window. * Now, we are supposed to set the final timer from the end * of our tx-window to let the other peer reply. So, we need * to add extra time to compensate for the fact that we diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c index c1fb5db81042..9505a7d06f1a 100644 --- a/net/irda/irlmp_event.c +++ b/net/irda/irlmp_event.c @@ -498,7 +498,7 @@ static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event, switch (event) { #ifdef CONFIG_IRDA_ULTRA case LM_UDATA_INDICATION: - /* This is most bizzare. Those packets are aka unreliable + /* This is most bizarre. Those packets are aka unreliable * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA. * Why do we pass them as Ultra ??? Jean II */ irlmp_connless_data_indication(self, skb); diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index 0d82ff5aeff1..979ecb2435a7 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -73,7 +73,7 @@ * Infinite thanks to those brave souls for providing the infrastructure * upon which IrNET is built. * - * Thanks to all my collegues in HP for helping me. In particular, + * Thanks to all my colleagues in HP for helping me. In particular, * thanks to Salil Pradhan and Bill Serra for W2k testing... * Thanks to Luiz Magalhaes for irnetd and much testing... * diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index 849aaf0dabb5..9715e6e5900b 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -40,7 +40,7 @@ * o the hash function for ints is pathetic (but could be changed) * o locking is sometime suspicious (especially during enumeration) * o most users have only a few elements (== overhead) - * o most users never use seach, so don't benefit from hashing + * o most users never use search, so don't benefit from hashing * Problem already fixed : * o not 64 bit compliant (most users do hashv = (int) self) * o hashbin_remove() is broken => use hashbin_remove_this() diff --git a/net/irda/irttp.c b/net/irda/irttp.c index f6054f9ccbe3..9d9af4606970 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1193,7 +1193,7 @@ EXPORT_SYMBOL(irttp_connect_request); /* * Function irttp_connect_confirm (handle, qos, skb) * - * Sevice user confirms TSAP connection with peer. + * Service user confirms TSAP connection with peer. * */ static void irttp_connect_confirm(void *instance, void *sap, diff --git a/net/irda/qos.c b/net/irda/qos.c index 2b00974e5bae..1b51bcf42394 100644 --- a/net/irda/qos.c +++ b/net/irda/qos.c @@ -39,16 +39,16 @@ #include /* - * Maximum values of the baud rate we negociate with the other end. + * Maximum values of the baud rate we negotiate with the other end. * Most often, you don't have to change that, because Linux-IrDA will * use the maximum offered by the link layer, which usually works fine. * In some very rare cases, you may want to limit it to lower speeds... */ int sysctl_max_baud_rate = 16000000; /* - * Maximum value of the lap disconnect timer we negociate with the other end. + * Maximum value of the lap disconnect timer we negotiate with the other end. * Most often, the value below represent the best compromise, but some user - * may want to keep the LAP alive longuer or shorter in case of link failure. + * may want to keep the LAP alive longer or shorter in case of link failure. * Remember that the threshold time (early warning) is fixed to 3s... */ int sysctl_max_noreply_time = 12; @@ -411,7 +411,7 @@ static void irlap_adjust_qos_settings(struct qos_info *qos) * Fix tx data size according to user limits - Jean II */ if (qos->data_size.value > sysctl_max_tx_data_size) - /* Allow non discrete adjustement to avoid loosing capacity */ + /* Allow non discrete adjustement to avoid losing capacity */ qos->data_size.value = sysctl_max_tx_data_size; /* * Override Tx window if user request it. - Jean II diff --git a/net/irda/timer.c b/net/irda/timer.c index 0335ba0cc593..f418cb2ad49c 100644 --- a/net/irda/timer.c +++ b/net/irda/timer.c @@ -59,7 +59,7 @@ void irlap_start_query_timer(struct irlap_cb *self, int S, int s) * slot time, plus add some extra time to properly receive the last * discovery packet (which is longer due to extra discovery info), * to avoid messing with for incomming connections requests and - * to accomodate devices that perform discovery slower than us. + * to accommodate devices that perform discovery slower than us. * Jean II */ timeout = ((sysctl_slot_timeout * HZ / 1000) * (S - s) + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 9637e45744fa..986b2a5e8769 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -250,7 +250,7 @@ static struct device *af_iucv_dev; * PRMDATA[0..6] socket data (max 7 bytes); * PRMDATA[7] socket data length value (len is 0xff - PRMDATA[7]) * - * The socket data length is computed by substracting the socket data length + * The socket data length is computed by subtracting the socket data length * value from 0xFF. * If the socket data len is greater 7, then PRMDATA can be used for special * notifications (see iucv_sock_shutdown); and further, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 1ee5dab3cfae..8f156bd86be7 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -735,7 +735,7 @@ static void iucv_cleanup_queue(void) struct iucv_irq_list *p, *n; /* - * When a path is severed, the pathid can be reused immediatly + * When a path is severed, the pathid can be reused immediately * on a iucv connect or a connection pending interrupt. Remove * all entries from the task queue that refer to a stale pathid * (iucv_path_table[ix] == NULL). Only then do the iucv connect @@ -807,7 +807,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp) spin_lock_bh(&iucv_table_lock); /* Remove handler from the iucv_handler_list. */ list_del_init(&handler->list); - /* Sever all pathids still refering to the handler. */ + /* Sever all pathids still referring to the handler. */ list_for_each_entry_safe(p, n, &handler->paths, list) { iucv_sever_pathid(p->pathid, NULL); iucv_path_table[p->pathid] = NULL; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a40401701424..c18396c248d7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -97,7 +97,7 @@ struct ieee80211_bss { size_t supp_rates_len; /* - * During assocation, we save an ERP value from a probe response so + * During association, we save an ERP value from a probe response so * that we can feed ERP info to the driver when handling the * association completes. these fields probably won't be up-to-date * otherwise, you probably don't want to use them. diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 8d65b47d9837..336ca9d0c5c4 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -628,7 +628,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, * * @mpath: mesh path whose queue has to be freed * - * Locking: the function must me called withing a rcu_read_lock region + * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_flush_pending(struct mesh_path *mpath) { diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 8212a8bebf06..78e67d22dc1f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -259,7 +259,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } } - /* try to sample up to half of the availble rates during each interval */ + /* try to sample up to half of the available rates during each interval */ mi->sample_count *= 4; cur_prob = 0; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 6510f8ee738e..19111c7bf454 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -77,7 +77,7 @@ union rc_pid_event_data { }; struct rc_pid_event { - /* The time when the event occured */ + /* The time when the event occurred */ unsigned long timestamp; /* Event ID number */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5c1930ba8ebe..c50b68423c7b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -381,7 +381,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) * specs were sane enough this time around to require padding each A-MSDU * subframe to a length that is a multiple of four. * - * Padding like Atheros hardware adds which is inbetween the 802.11 header and + * Padding like Atheros hardware adds which is between the 802.11 header and * the payload is not supported, the driver is required to move the 802.11 * header to be directly in front of the payload in that case. */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d0311a322ddd..13e8c30adf01 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -47,9 +47,9 @@ * Station entries are added by mac80211 when you establish a link with a * peer. This means different things for the different type of interfaces * we support. For a regular station this mean we add the AP sta when we - * receive an assocation response from the AP. For IBSS this occurs when + * receive an association response from the AP. For IBSS this occurs when * get to know about a peer on the same IBSS. For WDS we add the sta for - * the peer imediately upon device open. When using AP mode we add stations + * the peer immediately upon device open. When using AP mode we add stations * for each respective station upon request from userspace through nl80211. * * In order to remove a STA info structure, various sta_info_destroy_*() diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 57681149e37f..b2f95966c7f4 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -173,7 +173,7 @@ struct sta_ampdu_mlme { /** * enum plink_state - state of a mesh peer link finite state machine * - * @PLINK_LISTEN: initial state, considered the implicit state of non existant + * @PLINK_LISTEN: initial state, considered the implicit state of non existent * mesh peer links * @PLINK_OPN_SNT: mesh plink open frame has been sent to this mesh peer * @PLINK_OPN_RCVD: mesh plink open frame has been received from this mesh peer diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d6b48230a540..253326e8d990 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -893,7 +893,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, to = ip_set_list[to_id]; /* Features must not change. - * Not an artifical restriction anymore, as we must prevent + * Not an artificial restriction anymore, as we must prevent * possible loops created by swapping in setlist type of sets. */ if (!(from->type->features == to->type->features && from->type->family == to->type->family)) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index f289306cbf12..c97bd45975be 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -595,7 +595,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) atomic_inc(&dest->inactconns); } else { /* It is a persistent connection/template, so increase - the peristent connection counter */ + the persistent connection counter */ atomic_inc(&dest->persistconns); } @@ -657,7 +657,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) } } else { /* It is a persistent connection/template, so decrease - the peristent connection counter */ + the persistent connection counter */ atomic_dec(&dest->persistconns); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index f276df9896b3..87e40ea77a95 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -131,7 +131,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); /* - * We don't kfree dest because it is refered either by its service + * We don't kfree dest because it is referred either by its service * or the trash dest list. */ atomic_dec(&en->dest->refcnt); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index cb1c9913d38b..90f618ab6dda 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -152,7 +152,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) write_lock(&set->lock); list_for_each_entry_safe(e, ep, &set->list, list) { /* - * We don't kfree dest because it is refered either + * We don't kfree dest because it is referred either * by its service or by the trash dest list. */ atomic_dec(&e->dest->refcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index b027ccc49f43..d12ed53ec95f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -566,7 +566,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -633,7 +633,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -701,7 +701,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ @@ -771,7 +771,7 @@ static struct ipvs_sctp_nextstate * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client */ /* - * We recieved the data chuck, keep the state unchanged. I assume + * We received the data chuck, keep the state unchanged. I assume * that still data chuncks can be received by both the peers in * SHUDOWN state */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 941286ca911d..2e1c11f78419 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -453,7 +453,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) REJECT will give spurious warnings here. */ /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ - /* No external references means noone else could have + /* No external references means no one else could have confirmed us. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); @@ -901,7 +901,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { - pr_debug("not prepared to track yet or error occured\n"); + pr_debug("not prepared to track yet or error occurred\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); ret = -ret; diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 9ae57c57c50e..2e664a69d7db 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -98,7 +98,7 @@ static const char * const dccp_state_names[] = { #define sIV CT_DCCP_INVALID /* - * DCCP state transistion table + * DCCP state transition table * * The assumption is the same as for TCP tracking: * diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6f4ee70f460b..6772b1154654 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -107,9 +107,9 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} }, { @@ -121,7 +121,7 @@ static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index bcf47eb518ef..237cc1981b89 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -707,7 +707,7 @@ static const char *ct_sdp_header_search(const char *dptr, const char *limit, } /* Locate a SDP header (optionally a substring within the header value), - * optionally stopping at the first occurence of the term header, parse + * optionally stopping at the first occurrence of the term header, parse * it and return the offset and length of the data we're interested in. */ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5ab22e2bbd7d..5b466cd1272f 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -134,7 +134,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; - /* QUEUE == DROP if noone is waiting, to be safe. */ + /* QUEUE == DROP if no one is waiting, to be safe. */ rcu_read_lock(); qh = rcu_dereference(queue_handler[pf]); diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index d37b7f80fa37..de0d8e4cbfb6 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -109,7 +109,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) * * Description: * This is the hashing function for the domain hash table, it returns the - * correct bucket number for the domain. The caller is responsibile for + * correct bucket number for the domain. The caller is responsible for * ensuring that the hash table is protected with either a RCU read lock or the * hash table lock. * @@ -134,7 +134,7 @@ static u32 netlbl_domhsh_hash(const char *key) * * Description: * Searches the domain hash table and returns a pointer to the hash table - * entry if found, otherwise NULL is returned. The caller is responsibile for + * entry if found, otherwise NULL is returned. The caller is responsible for * ensuring that the hash table is protected with either a RCU read lock or the * hash table lock. * @@ -165,7 +165,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) * Searches the domain hash table and returns a pointer to the hash table * entry if an exact match is found, if an exact match is not present in the * hash table then the default entry is returned if valid otherwise NULL is - * returned. The caller is responsibile ensuring that the hash table is + * returned. The caller is responsible ensuring that the hash table is * protected with either a RCU read lock or the hash table lock. * */ @@ -193,7 +193,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) * * Description: * Generate an audit record for adding a new NetLabel/LSM mapping entry with - * the given information. Caller is responsibile for holding the necessary + * the given information. Caller is responsible for holding the necessary * locks. * */ @@ -605,7 +605,7 @@ int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) * * Description: * Look through the domain hash table searching for an entry to match @domain, - * return a pointer to a copy of the entry or NULL. The caller is responsibile + * return a pointer to a copy of the entry or NULL. The caller is responsible * for ensuring that rcu_read_[un]lock() is called. * */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 998e85e895d0..4f251b19fbcc 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -259,7 +259,7 @@ add_failure: * * Description: * This function is a helper function used by the LISTALL and LISTDEF command - * handlers. The caller is responsibile for ensuring that the RCU read lock + * handlers. The caller is responsible for ensuring that the RCU read lock * is held. Returns zero on success, negative values on failure. * */ diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index c47a511f203d..7c4dce8fa5e6 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -355,7 +355,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed - * to submit without overruning the reciever's queue. For + * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 712cf2d1f28e..3a60a15d1b4a 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -181,7 +181,7 @@ static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, unsigned int send_size, recv_size; int ret; - /* The offset of 1 is to accomodate the additional ACK WR. */ + /* The offset of 1 is to accommodate the additional ACK WR. */ send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); rds_iw_ring_resize(send_ring, send_size - 1); diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 59509e9a9e72..6deaa77495e3 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -122,7 +122,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd #else /* FIXME - needs to compare the local and remote * ipaddr/port tuple, but the ipaddr is the only - * available infomation in the rds_sock (as the rest are + * available information in the rds_sock (as the rest are * zero'ed. It doesn't appear to be properly populated * during connection setup... */ diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 6280ea020d4e..545d8ee3efb1 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -307,7 +307,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * * Conceptually, we have two counters: * - send credits: this tells us how many WRs we're allowed - * to submit without overruning the reciever's queue. For + * to submit without overruning the receiver's queue. For * each SEND WR we post, we decrement this by one. * * - posted credits: this tells us how many WRs we recently diff --git a/net/rds/send.c b/net/rds/send.c index 35b9c2e9caf1..d58ae5f9339e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -116,7 +116,7 @@ static void release_in_xmit(struct rds_connection *conn) } /* - * We're making the concious trade-off here to only send one message + * We're making the conscious trade-off here to only send one message * down the connection at a time. * Pro: * - tx queueing is a simple fifo list diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 08dcd2f29cdc..479cae57d187 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -587,7 +587,7 @@ static int rose_clear_routes(void) /* * Check that the device given is a valid AX.25 interface that is "up". - * called whith RTNL + * called with RTNL */ static struct net_device *rose_ax25_dev_find(char *devname) { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 15873e14cb54..14b42f4ad791 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -999,7 +999,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) switch (n->nlmsg_type) { case RTM_NEWACTION: /* we are going to assume all other flags - * imply create only if it doesnt exist + * imply create only if it doesn't exist * Note that CREATE | EXCL implies that * but since we want avoid ambiguity (eg when flags * is zero) then just set this diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 50c7c06c019d..7affe9a92757 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -161,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } if (offset > 0 && offset > skb->len) { pr_info("tc filter pedit" - " offset %d cant exceed pkt length %d\n", + " offset %d can't exceed pkt length %d\n", offset, skb->len); goto bad; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a4de67eca824..49130e8abff0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -47,7 +47,7 @@ * on the meta type. Obviously, the length of the data must also * be provided for non-numeric types. * - * Additionaly, type dependant modifiers such as shift operators + * Additionally, type dependent modifiers such as shift operators * or mask may be applied to extend the functionaliy. As of now, * the variable length type supports shifting the byte string to * the right, eating up any number of octets and thus supporting diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index e1429a85091f..29b942ce9e82 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -183,7 +183,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) * filters in qdisc and in inner nodes (if higher filter points to the inner * node). If we end up with classid MAJOR:0 we enqueue the skb into special * internal fifo (direct). These packets then go directly thru. If we still - * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull + * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful * then finish and return direct queue. */ #define HTB_DIRECT ((struct htb_class *)-1L) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index edbbf7ad6623..69c35f6cd13f 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -160,7 +160,7 @@ static bool loss_4state(struct netem_sched_data *q) u32 rnd = net_random(); /* - * Makes a comparision between rnd and the transition + * Makes a comparison between rnd and the transition * probabilities outgoing from the current state, then decides the * next state and if the next packet has to be transmitted or lost. * The four states correspond to: @@ -212,9 +212,9 @@ static bool loss_4state(struct netem_sched_data *q) * Generates losses according to the Gilbert-Elliot loss model or * its special cases (Gilbert or Simple Gilbert) * - * Makes a comparision between random number and the transition + * Makes a comparison between random number and the transition * probabilities outgoing from the current state, then decides the - * next state. A second random number is extracted and the comparision + * next state. A second random number is extracted and the comparison * with the loss probability of the current state decides if the next * packet will be transmitted or lost. */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6b04287913cd..0698cad61763 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1593,7 +1593,7 @@ void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc) struct sctp_chunk *ack; struct sctp_chunk *tmp; - /* We can remove all the entries from the queue upto + /* We can remove all the entries from the queue up to * the "Peer-Sequence-Number". */ list_for_each_entry_safe(ack, tmp, &asoc->asconf_ack_list, diff --git a/net/sctp/auth.c b/net/sctp/auth.c index ddbbf7c81fa1..865e68fef21c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -113,7 +113,7 @@ struct sctp_shared_key *sctp_auth_shkey_create(__u16 key_id, gfp_t gfp) return new; } -/* Free the shared key stucture */ +/* Free the shared key structure */ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) { BUG_ON(!list_empty(&sh_key->key_list)); @@ -122,7 +122,7 @@ static void sctp_auth_shkey_free(struct sctp_shared_key *sh_key) kfree(sh_key); } -/* Destory the entire key list. This is done during the +/* Destroy the entire key list. This is done during the * associon and endpoint free process. */ void sctp_auth_destroy_keys(struct list_head *keys) @@ -324,7 +324,7 @@ static struct sctp_auth_bytes *sctp_auth_asoc_create_secret( if (!peer_key_vector || !local_key_vector) goto out; - /* Figure out the order in wich the key_vectors will be + /* Figure out the order in which the key_vectors will be * added to the endpoint shared key. * SCTP-AUTH, Section 6.1: * This is performed by selecting the numerically smaller key diff --git a/net/sctp/input.c b/net/sctp/input.c index 826661be73e7..5436c6921167 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1034,7 +1034,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( * association. * * This means that any chunks that can help us identify the association need -* to be looked at to find this assocation. +* to be looked at to find this association. */ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb, const union sctp_addr *laddr, diff --git a/net/sctp/output.c b/net/sctp/output.c index 60600d337a3a..b4f3cf06d8da 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -510,7 +510,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) sh->checksum = sctp_end_cksum(crc32); } else { if (dst->dev->features & NETIF_F_SCTP_CSUM) { - /* no need to seed psuedo checksum for SCTP */ + /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (skb_transport_header(nskb) - nskb->head); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 26dc005113a0..bf92a5b68f8b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -177,13 +177,13 @@ static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn) * 3) If the missing report count for TSN t is to be * incremented according to [RFC2960] and * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set, - * then the sender MUST futher execute steps 3.1 and + * then the sender MUST further execute steps 3.1 and * 3.2 to determine if the missing report count for * TSN t SHOULD NOT be incremented. * * 3.3) If 3.1 and 3.2 do not dictate that the missing * report count for t should not be incremented, then - * the sender SOULD increment missing report count for + * the sender SHOULD increment missing report count for * t (according to [RFC2960] and [SCTP_STEWART_2002]). */ static inline int sctp_cacc_skip(struct sctp_transport *primary, @@ -843,7 +843,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) case SCTP_CID_ECN_CWR: case SCTP_CID_ASCONF_ACK: one_packet = 1; - /* Fall throught */ + /* Fall through */ case SCTP_CID_SACK: case SCTP_CID_HEARTBEAT: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b21b218d564f..5f86ee4b54c1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -482,7 +482,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * If the timer was a heartbeat, we only increment error counts * when we already have an outstanding HEARTBEAT that has not * been acknowledged. - * Additionaly, some tranport states inhibit error increments. + * Additionally, some tranport states inhibit error increments. */ if (!is_hb) { asoc->overall_error_count++; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 4b4eb7c96bbd..76792083c379 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -551,7 +551,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, * * This means that if we only want to abort associations * in an authenticated way (i.e AUTH+ABORT), then we - * can't destroy this association just becuase the packet + * can't destroy this association just because the packet * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) @@ -1546,7 +1546,7 @@ cleanup: } /* - * Handle simultanous INIT. + * Handle simultaneous INIT. * This means we started an INIT and then we got an INIT request from * our peer. * @@ -2079,7 +2079,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -2120,7 +2120,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep, * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -2381,7 +2381,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -2448,7 +2448,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep, * RFC 2960, Section 3.3.7 * If an endpoint receives an ABORT with a format error or for an * association that doesn't exist, it MUST silently discard it. - * Becasue the length is "invalid", we can't really discard just + * Because the length is "invalid", we can't really discard just * as we do not know its true length. So, to be safe, discard the * packet. */ @@ -3855,7 +3855,7 @@ gen_shutdown: } /* - * SCTP-AUTH Section 6.3 Receving authenticated chukns + * SCTP-AUTH Section 6.3 Receiving authenticated chukns * * The receiver MUST use the HMAC algorithm indicated in the HMAC * Identifier field. If this algorithm was not specified by the @@ -4231,7 +4231,7 @@ static sctp_disposition_t sctp_sf_abort_violation( * * This means that if we only want to abort associations * in an authenticated way (i.e AUTH+ABORT), then we - * can't destroy this association just becuase the packet + * can't destroy this association just because the packet * was malformed. */ if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) @@ -4402,9 +4402,9 @@ static sctp_disposition_t sctp_sf_violation_ctsn( } /* Handle protocol violation of an invalid chunk bundling. For example, - * when we have an association and we recieve bundled INIT-ACK, or + * when we have an association and we receive bundled INIT-ACK, or * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle" - * statement from the specs. Additinally, there might be an attacker + * statement from the specs. Additionally, there might be an attacker * on the path and we may not want to continue this communication. */ static sctp_disposition_t sctp_sf_violation_chunk( diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3951a10605bc..deb82e35a107 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1193,7 +1193,7 @@ out_free: * an endpoint that is multi-homed. Much like sctp_bindx() this call * allows a caller to specify multiple addresses at which a peer can be * reached. The way the SCTP stack uses the list of addresses to set up - * the association is implementation dependant. This function only + * the association is implementation dependent. This function only * specifies that the stack will try to make use of all the addresses in * the list when needed. * diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index aa72e89c3ee1..dff27d5e22fd 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo)); /* Per TSVWG discussion with Randy. Allow the application to - * ressemble a fragmented message. + * resemble a fragmented message. */ ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 17678189d054..f2d1de7f2ffb 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -240,7 +240,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) } else { /* * If fragment interleave is enabled, we - * can queue this to the recieve queue instead + * can queue this to the receive queue instead * of the lobby. */ if (sctp_sk(sk)->frag_interleave) diff --git a/net/socket.c b/net/socket.c index 5212447c86e7..310d16b1b3c9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2986,7 +2986,7 @@ out: /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that - * use compatiable ioctls + * use compatible ioctls */ static int old_bridge_ioctl(compat_ulong_t __user *argp) { diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index bcdae78fdfc6..8d0f7d3c71c8 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1101,7 +1101,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* credential is: * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle - * at least 5 u32s, and is preceeded by length, so that makes 6. + * at least 5 u32s, and is preceded by length, so that makes 6. */ if (argv->iov_len < 5 * 4) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1e336a06d3e6..bf005d3c65ef 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -504,7 +504,7 @@ static int xs_nospace(struct rpc_task *task) * EAGAIN: The socket was blocked, please call again later to * complete the request * ENOTCONN: Caller needs to invoke connect logic then call again - * other: Some other error occured, the request was not sent + * other: Some other error occurred, the request was not sent */ static int xs_udp_send_request(struct rpc_task *task) { @@ -590,7 +590,7 @@ static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) * EAGAIN: The socket was blocked, please call again later to * complete the request * ENOTCONN: Caller needs to invoke connect logic then call again - * other: Some other error occured, the request was not sent + * other: Some other error occurred, the request was not sent * * XXX: In the case of soft timeouts, should we eventually give up * if sendmsg is not able to make progress? diff --git a/net/tipc/link.c b/net/tipc/link.c index 43639ff1cbec..ebf338f7b14e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2471,7 +2471,7 @@ exit: * A pending message being re-assembled must store certain values * to handle subsequent fragments correctly. The following functions * help storing these values in unused, available fields in the - * pending message. This makes dynamic memory allocation unecessary. + * pending message. This makes dynamic memory allocation unnecessary. */ static void set_long_msg_seqno(struct sk_buff *buf, u32 seqno) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index c9fa6dfcf287..80025a1b3bfd 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -160,7 +160,7 @@ void tipc_named_withdraw(struct publication *publ) buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0); if (!buf) { - warn("Withdrawl distribution failure\n"); + warn("Withdrawal distribution failure\n"); return; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1663e1a2efdd..3a43a8304768 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -207,7 +207,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) /* * This may look like an off by one error but it is a bit more * subtle. 108 is the longest valid AF_UNIX path for a binding. - * sun_path[108] doesnt as such exist. However in kernel space + * sun_path[108] doesn't as such exist. However in kernel space * we are guaranteed that it is a valid memory location in our * kernel address buffer. */ diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 11f25c7a7a05..f346395314ba 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -51,7 +51,7 @@ /* * Structures for interfacing with the /proc filesystem. - * Router creates its own directory /proc/net/router with the folowing + * Router creates its own directory /proc/net/router with the following * entries: * config device configuration * status global device statistics diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3332d5bce317..ab801a1097b2 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -809,7 +809,7 @@ static void handle_channel(struct wiphy *wiphy, if (r) { /* * We will disable all channels that do not match our - * recieved regulatory rule unless the hint is coming + * received regulatory rule unless the hint is coming * from a Country IE and the Country IE had no information * about a band. The IEEE 802.11 spec allows for an AP * to send only a subset of the regulatory rules allowed, @@ -838,7 +838,7 @@ static void handle_channel(struct wiphy *wiphy, request_wiphy && request_wiphy == wiphy && request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* - * This gaurantees the driver's requested regulatory domain + * This guarantees the driver's requested regulatory domain * will always be used as a base for further regulatory * settings */ diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 406207515b5e..f77e4e75f914 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -31,7 +31,7 @@ * x25_parse_facilities - Parse facilities from skb into the facilities structs * * @skb: sk_buff to parse - * @facilities: Regular facilites, updated as facilities are found + * @facilities: Regular facilities, updated as facilities are found * @dte_facs: ITU DTE facilities, updated as DTE facilities are found * @vc_fac_mask: mask is updated with all facilities found * diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c index 25a810793968..c541b622ae16 100644 --- a/net/x25/x25_forward.c +++ b/net/x25/x25_forward.c @@ -31,7 +31,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, goto out_no_route; if ((neigh_new = x25_get_neigh(rt->dev)) == NULL) { - /* This shouldnt happen, if it occurs somehow + /* This shouldn't happen, if it occurs somehow * do something sensible */ goto out_put_route; @@ -45,7 +45,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from, } /* Remote end sending a call request on an already - * established LCI? It shouldnt happen, just in case.. + * established LCI? It shouldn't happen, just in case.. */ read_lock_bh(&x25_forward_list_lock); list_for_each(entry, &x25_forward_list) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3d15d3e1b2c4..5d1d60d3ca83 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -894,7 +894,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); - if (nlh == NULL) /* shouldnt really happen ... */ + if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); @@ -954,7 +954,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); - if (nlh == NULL) /* shouldnt really happen ... */ + if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); @@ -1361,7 +1361,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (!xp) return err; - /* shouldnt excl be based on nlh flags?? + /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived * in netlink excl is a flag and you wouldnt need * a type XFRM_MSG_UPDPOLICY - JHS */ -- cgit v1.2.3 From c100c8f4c3c6f2a407bdbaaad2c4f1062e6a473a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Mar 2011 18:59:10 -0700 Subject: appletalk: Fix OOPS in atalk_release(). Commit 60d9f461a20ba59219fdcdc30cbf8e3a4ad3f625 ("appletalk: remove the BKL") added a dereference of "sk" before checking for NULL in atalk_release(). Guard the code block completely, rather than partially, with the NULL check. Reported-by: Dave Jones Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 206e771e82d1..956a5302002a 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1051,16 +1051,17 @@ static int atalk_release(struct socket *sock) { struct sock *sk = sock->sk; - sock_hold(sk); - lock_sock(sk); if (sk) { + sock_hold(sk); + lock_sock(sk); + sock_orphan(sk); sock->sk = NULL; atalk_destroy_socket(sk); - } - release_sock(sk); - sock_put(sk); + release_sock(sk); + sock_put(sk); + } return 0; } -- cgit v1.2.3 From 2cab86bee8e7f353e6ac8c15b3eb906643497644 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 31 Mar 2011 23:42:55 +0000 Subject: sctp: malloc enough room for asconf-ack chunk Sometime the ASCONF_ACK parameters can equal to the fourfold of ASCONF parameters, this only happend in some special case: ASCONF parameter is : Unrecognized Parameter (4 bytes) ASCONF_ACK parameter should be: Error Cause Indication parameter (8 bytes header) + Error Cause (4 bytes header) + Unrecognized Parameter (4bytes) Four 4bytes Unrecognized Parameters in ASCONF chunk will cause panic. Pid: 0, comm: swapper Not tainted 2.6.38-next+ #22 Bochs Bochs EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at skb_put+0x60/0x70 EAX: 00000077 EBX: c09060e2 ECX: dec1dc30 EDX: c09469c0 ESI: 00000000 EDI: de3c8d40 EBP: dec1dc58 ESP: dec1dc2c DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process swapper (pid: 0, ti=dec1c000 task=c09aef20 task.ti=c0980000) Stack: c09469c0 e1894fa4 00000044 00000004 de3c8d00 de3c8d00 de3c8d44 de3c8d40 c09060e2 de25dd80 de3c8d40 dec1dc7c e1894fa4 dec1dcb0 00000040 00000004 00000000 00000800 00000004 00000004 dec1dce0 e1895a2b dec1dcb4 de25d960 Call Trace: [] ? sctp_addto_chunk+0x4e/0x89 [sctp] [] sctp_addto_chunk+0x4e/0x89 [sctp] [] sctp_process_asconf+0x32f/0x3d1 [sctp] [] sctp_sf_do_asconf+0xf8/0x173 [sctp] [] sctp_do_sm+0xb8/0x159 [sctp] [] ? sctp_cname+0x0/0x52 [sctp] [] sctp_assoc_bh_rcv+0xac/0xe3 [sctp] [] sctp_inq_push+0x2d/0x30 [sctp] [] sctp_rcv+0x7a7/0x83d [sctp] [] ? ipv4_confirm+0x118/0x125 [] ? nf_iterate+0x34/0x62 [] ? ip_local_deliver_finish+0x0/0x194 [] ? ip_local_deliver_finish+0x0/0x194 [] ip_local_deliver_finish+0xf5/0x194 [] ? ip_local_deliver_finish+0x0/0x194 [] NF_HOOK.clone.1+0x3d/0x44 [] ip_local_deliver+0x3e/0x44 [] ? ip_local_deliver_finish+0x0/0x194 [] ip_rcv_finish+0x29f/0x2c7 [] ? ip_rcv_finish+0x0/0x2c7 [] NF_HOOK.clone.1+0x3d/0x44 [] ip_rcv+0x1f5/0x233 [] ? ip_rcv_finish+0x0/0x2c7 [] __netif_receive_skb+0x310/0x336 [] netif_receive_skb+0x4b/0x51 [] cp_rx_poll+0x1e7/0x29c [8139cp] [] net_rx_action+0x65/0x13a [] __do_softirq+0xa1/0x149 [] ? __do_softirq+0x0/0x149 [] ? irq_exit+0x37/0x72 [] ? do_IRQ+0x81/0x95 [] ? common_interrupt+0x30/0x38 [] ? native_safe_halt+0xa/0xc [] ? default_idle+0x58/0x92 [] ? cpu_idle+0x96/0xb2 [] ? rest_init+0x5d/0x5f [] ? start_kernel+0x34b/0x350 [] ? i386_start_kernel+0xba/0xc1 Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index de98665db524..b3434cc7d0cf 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3106,10 +3106,10 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, /* create an ASCONF_ACK chunk. * Based on the definitions of parameters, we know that the size of - * ASCONF_ACK parameters are less than or equal to the twice of ASCONF + * ASCONF_ACK parameters are less than or equal to the fourfold of ASCONF * parameters. */ - asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 2); + asconf_ack = sctp_make_asconf_ack(asoc, serial, chunk_len * 4); if (!asconf_ack) goto done; -- cgit v1.2.3 From 2fceec13375e5d98ef033c6b0ee03943fc460950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 1 Apr 2011 21:47:41 -0700 Subject: tcp: len check is unnecessarily devastating, change to WARN_ON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callers are prepared for alloc failures anyway, so this error can safely be boomeranged to the callers domain without super bad consequences. ...At worst the connection might go into a state where each RTO tries to (unsuccessfully) re-fragment with such a mis-sized value and eventually dies. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dfa5beb0c1c8..8b0d0167e44a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1003,7 +1003,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, int nlen; u8 flags; - BUG_ON(len > skb->len); + if (WARN_ON(len > skb->len)) + return -EINVAL; nsize = skb_headlen(skb) - len; if (nsize < 0) -- cgit v1.2.3 From 512d06b5b64fb422d90f199b1be188082729edf9 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 4 Apr 2011 15:18:45 +0200 Subject: netfilter: ipset: list:set timeout variant fixes - the timeout value was actually not set - the garbage collector was broken The variant is fixed, the tests to the ipset testsuite are added. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_list_set.c | 53 ++++++++++++++++------------------- 1 file changed, 24 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a47c32982f06..f4a46c0d25f3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -43,14 +43,19 @@ struct list_set { static inline struct set_elem * list_set_elem(const struct list_set *map, u32 id) { - return (struct set_elem *)((char *)map->members + id * map->dsize); + return (struct set_elem *)((void *)map->members + id * map->dsize); +} + +static inline struct set_telem * +list_set_telem(const struct list_set *map, u32 id) +{ + return (struct set_telem *)((void *)map->members + id * map->dsize); } static inline bool list_set_timeout(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_test(elem->timeout); } @@ -58,19 +63,11 @@ list_set_timeout(const struct list_set *map, u32 id) static inline bool list_set_expired(const struct list_set *map, u32 id) { - const struct set_telem *elem = - (const struct set_telem *) list_set_elem(map, id); + const struct set_telem *elem = list_set_telem(map, id); return ip_set_timeout_expired(elem->timeout); } -static inline int -list_set_exist(const struct set_telem *elem) -{ - return elem->id != IPSET_INVALID_ID && - !ip_set_timeout_expired(elem->timeout); -} - /* Set list without and with timeout */ static int @@ -146,11 +143,11 @@ list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, struct set_telem *e; for (; i < map->size; i++) { - e = (struct set_telem *)list_set_elem(map, i); + e = list_set_telem(map, i); swap(e->id, id); + swap(e->timeout, timeout); if (e->id == IPSET_INVALID_ID) break; - swap(e->timeout, timeout); } } @@ -164,7 +161,7 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, /* Last element replaced: e.g. add new,before,last */ ip_set_put_byindex(e->id); if (with_timeout(map->timeout)) - list_elem_tadd(map, i, id, timeout); + list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); else list_elem_add(map, i, id); @@ -172,11 +169,11 @@ list_set_add(struct list_set *map, u32 i, ip_set_id_t id, } static int -list_set_del(struct list_set *map, ip_set_id_t id, u32 i) +list_set_del(struct list_set *map, u32 i) { struct set_elem *a = list_set_elem(map, i), *b; - ip_set_put_byindex(id); + ip_set_put_byindex(a->id); for (; i < map->size - 1; i++) { b = list_set_elem(map, i + 1); @@ -308,11 +305,11 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], (before == 0 || (before > 0 && next_id_eq(map, i, refid)))) - ret = list_set_del(map, id, i); + ret = list_set_del(map, i); else if (before < 0 && elem->id == refid && next_id_eq(map, i, id)) - ret = list_set_del(map, id, i + 1); + ret = list_set_del(map, i + 1); } break; default: @@ -460,17 +457,15 @@ list_set_gc(unsigned long ul_set) struct list_set *map = set->data; struct set_telem *e; u32 i; - - /* We run parallel with other readers (test element) - * but adding/deleting new entries is locked out */ - read_lock_bh(&set->lock); - for (i = map->size - 1; i >= 0; i--) { - e = (struct set_telem *) list_set_elem(map, i); - if (e->id != IPSET_INVALID_ID && - list_set_expired(map, i)) - list_set_del(map, e->id, i); + + /* nfnl_lock should be called */ + write_lock_bh(&set->lock); + for (i = 0; i < map->size; i++) { + e = list_set_telem(map, i); + if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) + list_set_del(map, i); } - read_unlock_bh(&set->lock); + write_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; add_timer(&map->gc); -- cgit v1.2.3 From 2f9f28b212a2bd4948c8ceaaec33ce0123632129 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 4 Apr 2011 15:19:25 +0200 Subject: netfilter: ipset: references are protected by rwlock instead of mutex The timeout variant of the list:set type must reference the member sets. However, its garbage collector runs at timer interrupt so the mutex protection of the references is a no go. Therefore the reference protection is converted to rwlock. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_bitmap_ip.c | 3 +- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 3 +- net/netfilter/ipset/ip_set_bitmap_port.c | 3 +- net/netfilter/ipset/ip_set_core.c | 109 ++++++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 6 +- 5 files changed, 71 insertions(+), 53 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index bca96990218d..a113ff066928 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -338,8 +338,7 @@ bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); if (map->netmask != 32) NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 5e790172deff..00a33242e90c 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -434,8 +434,7 @@ bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + (map->last_ip - map->first_ip + 1) * map->dsize)); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 165f09b1a9cb..6b38eb8f6ed8 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -320,8 +320,7 @@ bitmap_port_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize)); if (with_timeout(map->timeout)) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d6b48230a540..e88ac3c3ed07 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -26,6 +26,7 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ +static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ static struct ip_set **ip_set_list; /* all individual sets */ static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ @@ -301,13 +302,18 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); static inline void __ip_set_get(ip_set_id_t index) { - atomic_inc(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + ip_set_list[index]->ref++; + write_unlock_bh(&ip_set_ref_lock); } static inline void __ip_set_put(ip_set_id_t index) { - atomic_dec(&ip_set_list[index]->ref); + write_lock_bh(&ip_set_ref_lock); + BUG_ON(ip_set_list[index]->ref == 0); + ip_set_list[index]->ref--; + write_unlock_bh(&ip_set_ref_lock); } /* @@ -324,7 +330,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -356,7 +362,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -378,7 +384,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, struct ip_set *set = ip_set_list[index]; int ret = 0; - BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); if (dim < set->type->dimension || @@ -397,7 +403,6 @@ EXPORT_SYMBOL_GPL(ip_set_del); * Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * - * The nfnl mutex must already be activated. */ ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set) @@ -423,15 +428,12 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * - * The nfnl mutex must already be activated. */ void ip_set_put_byindex(ip_set_id_t index) { - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + if (ip_set_list[index] != NULL) __ip_set_put(index); - } } EXPORT_SYMBOL_GPL(ip_set_put_byindex); @@ -441,7 +443,6 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * can't be destroyed. The set cannot be renamed due to * the referencing either. * - * The nfnl mutex must already be activated. */ const char * ip_set_name_byindex(ip_set_id_t index) @@ -449,7 +450,7 @@ ip_set_name_byindex(ip_set_id_t index) const struct ip_set *set = ip_set_list[index]; BUG_ON(set == NULL); - BUG_ON(atomic_read(&set->ref) == 0); + BUG_ON(set->ref == 0); /* Referenced, so it's safe */ return set->name; @@ -515,10 +516,7 @@ void ip_set_nfnl_put(ip_set_id_t index) { nfnl_lock(); - if (ip_set_list[index] != NULL) { - BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); - __ip_set_put(index); - } + ip_set_put_byindex(index); nfnl_unlock(); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -526,7 +524,7 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put); /* * Communication protocol with userspace over netlink. * - * We already locked by nfnl_lock. + * The commands are serialized by the nfnl mutex. */ static inline bool @@ -657,7 +655,6 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, return -ENOMEM; rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); - atomic_set(&set->ref, 0); set->family = family; /* @@ -690,8 +687,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, /* * Here, we have a valid, constructed set and we are protected - * by nfnl_lock. Find the first free index in ip_set_list and - * check clashing. + * by the nfnl mutex. Find the first free index in ip_set_list + * and check clashing. */ if ((ret = find_free_id(set->name, &index, &clash)) != 0) { /* If this is the same set and requested, ignore error */ @@ -751,31 +748,51 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlattr * const attr[]) { ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* References are protected by the nfnl mutex */ + /* Commands are serialized and references are + * protected by the ip_set_ref_lock. + * External systems (i.e. xt_set) must call + * ip_set_put|get_nfnl_* functions, that way we + * can safely check references here. + * + * list:set timer can only decrement the reference + * counter, so if it's already zero, we can proceed + * without holding the lock. + */ + read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { - if (ip_set_list[i] != NULL && - (atomic_read(&ip_set_list[i]->ref))) - return -IPSET_ERR_BUSY; + if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { + ret = IPSET_ERR_BUSY; + goto out; + } } + read_unlock_bh(&ip_set_ref_lock); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL) ip_set_destroy_set(i); } } else { i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); - if (i == IPSET_INVALID_ID) - return -ENOENT; - else if (atomic_read(&ip_set_list[i]->ref)) - return -IPSET_ERR_BUSY; + if (i == IPSET_INVALID_ID) { + ret = -ENOENT; + goto out; + } else if (ip_set_list[i]->ref) { + ret = -IPSET_ERR_BUSY; + goto out; + } + read_unlock_bh(&ip_set_ref_lock); ip_set_destroy_set(i); } return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Flush sets */ @@ -834,6 +851,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; const char *name2; ip_set_id_t i; + int ret = 0; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -843,25 +861,33 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; - if (atomic_read(&set->ref) != 0) - return -IPSET_ERR_REFERENCED; + + read_lock_bh(&ip_set_ref_lock); + if (set->ref != 0) { + ret = -IPSET_ERR_REFERENCED; + goto out; + } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && - STREQ(ip_set_list[i]->name, name2)) - return -IPSET_ERR_EXIST_SETNAME2; + STREQ(ip_set_list[i]->name, name2)) { + ret = -IPSET_ERR_EXIST_SETNAME2; + goto out; + } } strncpy(set->name, name2, IPSET_MAXNAMELEN); - return 0; +out: + read_unlock_bh(&ip_set_ref_lock); + return ret; } /* Swap two sets so that name/index points to the other. * References and set names are also swapped. * - * We are protected by the nfnl mutex and references are - * manipulated only by holding the mutex. The kernel interfaces + * The commands are serialized by the nfnl mutex and references are + * protected by the ip_set_ref_lock. The kernel interfaces * do not hold the mutex but the pointer settings are atomic * so the ip_set_list always contains valid pointers to the sets. */ @@ -874,7 +900,6 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; - u32 from_ref; if (unlikely(protocol_failed(attr) || attr[IPSET_ATTR_SETNAME] == NULL || @@ -899,17 +924,15 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, from->type->family == to->type->family)) return -IPSET_ERR_TYPE_MISMATCH; - /* No magic here: ref munging protected by the nfnl_lock */ strncpy(from_name, from->name, IPSET_MAXNAMELEN); - from_ref = atomic_read(&from->ref); - strncpy(from->name, to->name, IPSET_MAXNAMELEN); - atomic_set(&from->ref, atomic_read(&to->ref)); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - atomic_set(&to->ref, from_ref); + write_lock_bh(&ip_set_ref_lock); + swap(from->ref, to->ref); ip_set_list[from_id] = to; ip_set_list[to_id] = from; + write_unlock_bh(&ip_set_ref_lock); return 0; } @@ -926,7 +949,7 @@ ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[2]) { pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); - __ip_set_put((ip_set_id_t) cb->args[1]); + ip_set_put_byindex((ip_set_id_t) cb->args[1]); } return 0; } @@ -1068,7 +1091,7 @@ release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { pr_debug("release set %s\n", ip_set_list[index]->name); - __ip_set_put(index); + ip_set_put_byindex(index); } /* If we dump all sets, continue with dumping last ones */ diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index f4a46c0d25f3..e9159e99fc4b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -366,8 +366,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); if (with_timeout(map->timeout)) NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); - NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, - htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)); NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->size * map->dsize)); ipset_nest_end(skb, nested); @@ -457,8 +456,7 @@ list_set_gc(unsigned long ul_set) struct list_set *map = set->data; struct set_telem *e; u32 i; - - /* nfnl_lock should be called */ + write_lock_bh(&set->lock); for (i = 0; i < map->size; i++) { e = list_set_telem(map, i); -- cgit v1.2.3 From b4232a22776aa5d063f890d21ca69870dbbe431b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 4 Apr 2011 15:21:02 +0200 Subject: netfilter: h323: bug in parsing of ASN1 SEQOF field Static analyzer of clang found a dead store which appears to be a bug in reading count of items in SEQOF field, only the lower byte of word is stored. This may lead to corrupted read and communication shutdown. The bug has been in the module since it's first inclusion into linux kernel. [Patrick: the bug is real, but without practical consequence since the largest amount of sequence-of members we parse is 30.] Signed-off-by: David Sterba Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_h323_asn1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 867882313e49..bcd5ed6b7130 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -631,7 +631,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, CHECK_BOUND(bs, 2); count = *bs->cur++; count <<= 8; - count = *bs->cur++; + count += *bs->cur++; break; case SEMI: BYTE_ALIGN(bs); -- cgit v1.2.3 From a09d19779f3ffac6e16821accc2c1cc4df1b643a Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 4 Apr 2011 15:25:18 +0200 Subject: IPVS: fix NULL ptr dereference in ip_vs_ctl.c ip_vs_genl_dump_daemons() ipvsadm -ln --daemon will trigger a Null pointer exception because ip_vs_genl_dump_daemons() uses skb_net() instead of skb_sknet(). To prevent others from NULL ptr a check is made in ip_vs.h skb_net(). Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 33733c8872e7..ae47090bf45f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3120,7 +3120,7 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_net(skb); + struct net *net = skb_sknet(skb); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); -- cgit v1.2.3 From 31ad3dd64e689bc79dd819f8f134b9b025240eb8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Apr 2011 16:56:29 +0200 Subject: netfilter: af_info: add network namespace parameter to route hook This is required to eventually replace the rt6_lookup call in xt_addrtype.c with nf_afinfo->route(). Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- net/ipv4/netfilter.c | 5 +++-- net/ipv6/netfilter.c | 5 +++-- net/netfilter/nf_conntrack_h323_main.c | 8 ++++---- net/netfilter/xt_TCPMSS.c | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f3c0b549b8e1..f1035f056503 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, return csum; } -static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip_route(struct net *net, struct dst_entry **dst, + struct flowi *fl) { - struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); + struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 39aaca2b4fd2..e008b9b4a779 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -90,9 +90,10 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) +static int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl) { - *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); + *dst = ip6_route_output(net, NULL, &fl->u.ip6); return (*dst)->error; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 533a183e6661..39a453895b4d 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -731,9 +731,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi4_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi4_to_flowi(&fl2))) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) @@ -755,9 +755,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); - if (!afinfo->route((struct dst_entry **)&rt1, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1))) { - if (!afinfo->route((struct dst_entry **)&rt2, + if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2))) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 6e6b46cb1db9..8690125e3b18 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route((struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl); rcu_read_unlock(); if (rt != NULL) { -- cgit v1.2.3 From 0fae2e7740aca7e384c5f337f458897e7e337d58 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Apr 2011 17:00:54 +0200 Subject: netfilter: af_info: add 'strict' parameter to limit lookup to .oif ipv6 fib lookup can set RT6_LOOKUP_F_IFACE flag to restrict search to an interface, but this flag cannot be set via struct flowi. Also, it cannot be set via ip6_route_output: this function uses the passed sock struct to determine if this flag is required (by testing for nonzero sk_bound_dev_if). Work around this by passing in an artificial struct sk in case 'strict' argument is true. This is required to replace the rt6_lookup call in xt_addrtype.c with nf_afinfo->route(). Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- net/ipv4/netfilter.c | 2 +- net/ipv6/netfilter.c | 12 ++++++++++-- net/netfilter/nf_conntrack_h323_main.c | 8 ++++---- net/netfilter/xt_TCPMSS.c | 2 +- 4 files changed, 16 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f1035f056503..4614babdc45f 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -222,7 +222,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, } static int nf_ip_route(struct net *net, struct dst_entry **dst, - struct flowi *fl) + struct flowi *fl, bool strict __always_unused) { struct rtable *rt = ip_route_output_key(net, &fl->u.ip4); if (IS_ERR(rt)) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index e008b9b4a779..28bc1f644b7b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -91,9 +91,17 @@ static int nf_ip6_reroute(struct sk_buff *skb, } static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl) + struct flowi *fl, bool strict) { - *dst = ip6_route_output(net, NULL, &fl->u.ip6); + static const struct ipv6_pinfo fake_pinfo; + static const struct inet_sock fake_sk = { + /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */ + .sk.sk_bound_dev_if = 1, + .pinet6 = (struct ipv6_pinfo *) &fake_pinfo, + }; + const void *sk = strict ? &fake_sk : NULL; + + *dst = ip6_route_output(net, sk, &fl->u.ip6); return (*dst)->error; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 39a453895b4d..18b2ce5c8ced 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -732,9 +732,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, - flowi4_to_flowi(&fl1))) { + flowi4_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, - flowi4_to_flowi(&fl2))) { + flowi4_to_flowi(&fl2), false)) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -756,9 +756,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); ipv6_addr_copy(&fl2.daddr, &dst->in6); if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, - flowi6_to_flowi(&fl1))) { + flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2))) { + flowi6_to_flowi(&fl2), false)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8690125e3b18..9e63b43faeed 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -166,7 +166,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl); + ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); rcu_read_unlock(); if (rt != NULL) { -- cgit v1.2.3 From b7225041e93f81e7e38fcdf27fc82044e7695efd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Apr 2011 17:01:43 +0200 Subject: netfilter: xt_addrtype: replace rt6_lookup with nf_afinfo->route This avoids pulling in the ipv6 module when using (ipv4-only) iptables -m addrtype. Signed-off-by: Florian Westphal Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 1 - net/netfilter/xt_addrtype.c | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c3f988aa1152..32bff6d86cb2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -652,7 +652,6 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED - depends on (IPV6 || IPV6=n) ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 2220b85e9519..b77d383cec78 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -32,11 +32,32 @@ MODULE_ALIAS("ipt_addrtype"); MODULE_ALIAS("ip6t_addrtype"); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr) { + const struct nf_afinfo *afinfo; + struct flowi6 flow; + struct rt6_info *rt; u32 ret; + int route_err; - if (!rt) + memset(&flow, 0, sizeof(flow)); + ipv6_addr_copy(&flow.daddr, addr); + if (dev) + flow.flowi6_oif = dev->ifindex; + + rcu_read_lock(); + + afinfo = nf_get_afinfo(NFPROTO_IPV6); + if (afinfo != NULL) + route_err = afinfo->route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), !!dev); + else + route_err = 1; + + rcu_read_unlock(); + + if (route_err) return XT_ADDRTYPE_UNREACHABLE; if (rt->rt6i_flags & RTF_REJECT) @@ -48,6 +69,9 @@ static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) ret |= XT_ADDRTYPE_LOCAL; if (rt->rt6i_flags & RTF_ANYCAST) ret |= XT_ADDRTYPE_ANYCAST; + + + dst_release(&rt->dst); return ret; } @@ -65,18 +89,8 @@ static bool match_type6(struct net *net, const struct net_device *dev, return false; if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | - XT_ADDRTYPE_UNREACHABLE) & mask) { - struct rt6_info *rt; - u32 type; - int ifindex = dev ? dev->ifindex : 0; - - rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); - - type = xt_addrtype_rt6_to_type(rt); - - dst_release(&rt->dst); - return !!(mask & type); - } + XT_ADDRTYPE_UNREACHABLE) & mask) + return !!(mask & match_lookup_rt6(net, dev, addr)); return true; } -- cgit v1.2.3 From 96120d86fe302c006259baee9061eea9e1b9e486 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 4 Apr 2011 17:06:21 +0200 Subject: netfilter: xt_conntrack: fix inverted conntrack direction test --ctdir ORIGINAL matches REPLY packets, and vv: userspace sets "invert_flags &= ~XT_CONNTRACK_DIRECTION" in ORIGINAL case. Thus: (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) yields "1 ^ 0", which is true -> returns false. Reproducer: iptables -I OUTPUT 1 -p tcp --syn -m conntrack --ctdir ORIGINAL Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/xt_conntrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 2c0086a4751e..481a86fdc409 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -195,7 +195,7 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return info->match_flags & XT_CONNTRACK_STATE; if ((info->match_flags & XT_CONNTRACK_DIRECTION) && (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ^ - !!(info->invert_flags & XT_CONNTRACK_DIRECTION)) + !(info->invert_flags & XT_CONNTRACK_DIRECTION)) return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) -- cgit v1.2.3 From fcf8bd3ba5362682f945a3f838070ac5e10ff871 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 1 Apr 2011 15:46:05 +0200 Subject: mac80211: Fix duplicate frames on cooked monitor Cleaning the ieee80211_rx_data.flags field here is wrong, instead the flags should be valid accross processing the frame on different interfaces. Fix this by removing the incorrect flags=0 assignment. Introduced in commit 554891e63a29af35cc6bb403ef34e319518114d0 (mac80211: move packet flags into packet). Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index aa5cc37b4921..2afeac9c6453 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2541,7 +2541,6 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) * same TID from the same station */ rx->skb = skb; - rx->flags = 0; CALL_RXH(ieee80211_rx_h_decrypt) CALL_RXH(ieee80211_rx_h_check_more_data) @@ -2612,6 +2611,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) .sdata = sta->sdata, .local = sta->local, .queue = tid, + .flags = 0, }; struct tid_ampdu_rx *tid_agg_rx; -- cgit v1.2.3 From 738faca34335cd1d5d87fa7c58703139c7fa15bd Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 4 Apr 2011 13:07:26 -0700 Subject: ipv6: Don't pass invalid dst_entry pointer to dst_release(). Make sure dst_release() is not called with error pointer. This is similar to commit 4910ac6c526d2868adcb5893e0c428473de862b5 ("ipv4: Don't ip_rt_put() an error pointer in RAW sockets."). Signed-off-by: Boris Ostrovsky Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b0c186862c8..56fa12538d45 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -503,6 +503,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); + dst = NULL; goto done; } skb = tcp_make_synack(sk, dst, req, rvp); -- cgit v1.2.3 From 77f38e0eeac290827f41fd2215ab82546b8f73b8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 6 Apr 2011 09:09:16 -0700 Subject: libceph: fix linger request requeueing Fix the request transition from linger -> normal request. The key is to preserve r_osd and requeue on the same OSD. Reregister as a normal request, add the request to the proper queues, then unregister the linger. Fix the unregister helper to avoid clearing r_osd (and also simplify the parallel check in __unregister_request()). Reported-by: Henry Chang Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3b91d651fe08..9204de484c2e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -579,9 +579,15 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, r_linger_osd) { - __unregister_linger_request(osdc, req); + /* + * reregister request prior to unregistering linger so + * that r_osd is preserved. + */ + BUG_ON(!list_empty(&req->r_req_lru_item)); __register_request(osdc, req); - list_move(&req->r_req_lru_item, &osdc->req_unsent); + list_add(&req->r_req_lru_item, &osdc->req_unsent); + list_add(&req->r_osd_item, &req->r_osd->o_requests); + __unregister_linger_request(osdc, req); dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, osd->o_osd); } @@ -798,7 +804,7 @@ static void __register_request(struct ceph_osd_client *osdc, req->r_request->hdr.tid = cpu_to_le64(req->r_tid); INIT_LIST_HEAD(&req->r_req_lru_item); - dout("register_request %p tid %lld\n", req, req->r_tid); + dout("__register_request %p tid %lld\n", req, req->r_tid); __insert_request(osdc, req); ceph_osdc_get_request(req); osdc->num_requests++; -- cgit v1.2.3 From 0867659fa3c245bf203d837a82e0f6ea5079c2c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Apr 2011 10:13:32 -0700 Subject: Revert "net/sunrpc: Use static const char arrays" This reverts commit 411b5e05617593efebc06241dbc56f42150f2abe. Olga Kornievskaia reports: Problem: linux client mounting linux server using rc4-hmac-md5 enctype. gssd fails with create a context after receiving a reply from the server. Diagnose: putting printout statements in the server kernel and kerberos libraries revealed that client and server derived different integrity keys. Server kernel code was at fault due the the commit [aglo@skydive linux-pnfs]$ git show 411b5e05617593efebc06241dbc56f42150f2abe Trond: The problem is that since it relies on virt_to_page(), you cannot call sg_set_buf() for data in the const section. Reported-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Cc: stable@kernel.org [2.6.36+] --- net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 9022f0a6503e..0a9a2ec2e469 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -427,7 +427,7 @@ static int context_derive_keys_rc4(struct krb5_ctx *ctx) { struct crypto_hash *hmac; - static const char sigkeyconstant[] = "signaturekey"; + char sigkeyconstant[] = "signaturekey"; int slen = strlen(sigkeyconstant) + 1; /* include null terminator */ struct hash_desc desc; struct scatterlist sg[1]; -- cgit v1.2.3 From 47482f132a689af168fae3055ff1899dfd032d3a Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 6 Apr 2011 13:07:09 -0700 Subject: ipv6: Enable RFS sk_rxhash tracking for ipv6 sockets (v2) properly record sk_rxhash in ipv6 sockets (v2) Noticed while working on another project that flows to sockets which I had open on a test systems weren't getting steered properly when I had RFS enabled. Looking more closely I found that: 1) The affected sockets were all ipv6 2) They weren't getting steered because sk->sk_rxhash was never set from the incomming skbs on that socket. This was occuring because there are several points in the IPv4 tcp and udp code which save the rxhash value when a new connection is established. Those calls to sock_rps_save_rxhash were never added to the corresponding ipv6 code paths. This patch adds those calls. Tested by myself to properly enable RFS functionalty on ipv6. Change notes: v2: Filtered UDP to only arm RFS on bound sockets (Eric Dumazet) Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 +++- net/ipv6/udp.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 56fa12538d45..4f49e5dd41bb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1622,6 +1622,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + sock_rps_save_rxhash(sk, skb->rxhash); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1649,7 +1650,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) __kfree_skb(opt_skb); return 0; } - } + } else + sock_rps_save_rxhash(sk, skb->rxhash); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d7037c006e13..15c37746845e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -505,6 +505,9 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int rc; int is_udplite = IS_UDPLITE(sk); + if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + sock_rps_save_rxhash(sk, skb->rxhash); + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; -- cgit v1.2.3 From ec80bfcb68a0c46443991991d459a0cde773cdea Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Tue, 5 Apr 2011 03:03:56 +0000 Subject: dsa/mv88e6131: add support for mv88e6085 switch The mv88e6085 is identical to the mv88e6095, except that all ports are 10/100 Mb/s, so use the existing setup code except for the cpu/dsa speed selection in _setup_port(). Signed-off-by: Peter Korsgaard Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/dsa/mv88e6131.c | 23 +++++++++++++++++++---- net/dsa/mv88e6xxx.h | 2 ++ 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index bb2b41bc854e..a8e4f8c7dca4 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -14,6 +14,13 @@ #include "dsa_priv.h" #include "mv88e6xxx.h" +/* + * Switch product IDs + */ +#define ID_6085 0x04a0 +#define ID_6095 0x0950 +#define ID_6131 0x1060 + static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) { int ret; @@ -21,9 +28,11 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; - if (ret == 0x0950) + if (ret == ID_6085) + return "Marvell 88E6085"; + if (ret == ID_6095) return "Marvell 88E6095/88E6095F"; - if (ret == 0x1060) + if (ret == ID_6131) return "Marvell 88E6131"; } @@ -164,6 +173,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) static int mv88e6131_setup_port(struct dsa_switch *ds, int p) { + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); int addr = REG_PORT(p); u16 val; @@ -171,10 +181,13 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * MAC Forcing register: don't force link, speed, duplex * or flow control state to any particular values on physical * ports, but force the CPU port and all DSA ports to 1000 Mb/s - * full duplex. + * (100 Mb/s on 6085) full duplex. */ if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) - REG_WRITE(addr, 0x01, 0x003e); + if (ps->id == ID_6085) + REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */ + else + REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */ else REG_WRITE(addr, 0x01, 0x0003); @@ -286,6 +299,8 @@ static int mv88e6131_setup(struct dsa_switch *ds) mv88e6xxx_ppu_state_init(ds); mutex_init(&ps->stats_mutex); + ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0; + ret = mv88e6131_switch_reset(ds); if (ret < 0) return ret; diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h index eb0e0aaa9f1b..61156ca26a0d 100644 --- a/net/dsa/mv88e6xxx.h +++ b/net/dsa/mv88e6xxx.h @@ -39,6 +39,8 @@ struct mv88e6xxx_priv_state { * Hold this mutex over snapshot + dump sequences. */ struct mutex stats_mutex; + + int id; /* switch product id */ }; struct mv88e6xxx_hw_stat { -- cgit v1.2.3 From 1b86a58f9d7ce4fe2377687f378fbfb53bdc9b6c Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 7 Apr 2011 14:04:08 -0700 Subject: ipv4: Fix "Set rt->rt_iif more sanely on output routes." Commit 1018b5c01636c7c6bda31a719bda34fc631db29a ("Set rt->rt_iif more sanely on output routes.") breaks rt_is_{output,input}_route. This became the cause to return "IP_PKTINFO's ->ipi_ifindex == 0". To fix it, this does: 1) Add "int rt_route_iif;" to struct rtable 2) For input routes, always set rt_route_iif to same value as rt_iif 3) For output routes, always set rt_route_iif to zero. Set rt_iif as it is done currently. 4) Change rt_is_{output,input}_route() to test rt_route_iif Signed-off-by: OGAWA Hirofumi Signed-off-by: David S. Miller --- net/ipv4/route.c | 8 ++++++-- net/ipv4/xfrm4_policy.c | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4b0c81180804..1628be530314 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1891,6 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); @@ -2026,6 +2027,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_key_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; + rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); @@ -2202,6 +2204,7 @@ local_input: #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif + rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); @@ -2401,7 +2404,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_mark = oldflp4->flowi4_mark; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; - rth->rt_iif = 0; + rth->rt_route_iif = 0; + rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; @@ -2716,6 +2720,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_key_dst = ort->rt_key_dst; rt->rt_key_src = ort->rt_key_src; rt->rt_tos = ort->rt_tos; + rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; @@ -2725,7 +2730,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; - rt->rt_iif = ort->rt_iif; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; rt->peer = ort->peer; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 13e0e7f659ff..d20a05e970d8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -74,6 +74,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl4->daddr; rt->rt_key_src = fl4->saddr; rt->rt_tos = fl4->flowi4_tos; + rt->rt_route_iif = fl4->flowi4_iif; rt->rt_iif = fl4->flowi4_iif; rt->rt_oif = fl4->flowi4_oif; rt->rt_mark = fl4->flowi4_mark; -- cgit v1.2.3 From 0c184ed9032c58b21f0d90de28c796874b73d6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Mon, 11 Apr 2011 10:11:29 +0000 Subject: caif: Bugfix use for_each_safe when removing list nodes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfmuxl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 46f34b2e0478..24f1ffa74b06 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -244,9 +244,9 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { struct cfmuxl *muxl = container_obj(layr); - struct list_head *node; + struct list_head *node, *next; struct cflayer *layer; - list_for_each(node, &muxl->srvl_list) { + list_for_each_safe(node, next, &muxl->srvl_list) { layer = list_entry(node, struct cflayer, node); if (cfsrvl_phyid_match(layer, phyid)) layer->ctrlcmd(layer, ctrl, phyid); -- cgit v1.2.3 From 4a9f65f6304a00f6473e83b19c1e83caa1e42530 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= Date: Mon, 11 Apr 2011 10:11:30 +0000 Subject: caif: performance bugfix - allow radio stack to prioritize packets. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the CAIF Payload message the Packet Type indication must be set to UNCLASSIFIED in order to allow packet prioritization in the modem's network stack. Otherwise TCP-Ack is not prioritized in the modems transmit queue. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfdgml.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 27dab26ad3b8..054fdb5aeb88 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -13,6 +13,7 @@ #include #include + #define container_obj(layr) ((struct cfsrvl *) layr) #define DGM_CMD_BIT 0x80 @@ -83,6 +84,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt) static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) { + u8 packet_type; u32 zero = 0; struct caif_payload_info *info; struct cfsrvl *service = container_obj(layr); @@ -94,7 +96,9 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (cfpkt_getlen(pkt) > DGM_MTU) return -EMSGSIZE; - cfpkt_add_head(pkt, &zero, 4); + cfpkt_add_head(pkt, &zero, 3); + packet_type = 0x08; /* B9 set - UNCLASSIFIED */ + cfpkt_add_head(pkt, &packet_type, 1); /* Add info for MUX-layer to route the packet out. */ info = cfpkt_info(pkt); -- cgit v1.2.3 From aa8673599f1d269b4e4d9b0c0f61fca57bc02699 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Apr 2011 18:59:05 -0700 Subject: llc: Fix length check in llc_fixup_skb(). Fixes bugzilla #32872 The LLC stack pretends to support non-linear skbs but there is a direct use of skb_tail_pointer() in llc_fixup_skb(). Use pskb_may_pull() to see if data_size bytes remain and can be accessed linearly in the packet, instead of direct pointer checks. Signed-off-by: David S. Miller --- net/llc/llc_input.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 058f1e9a9128..903242111317 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -121,8 +121,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) s32 data_size = ntohs(pdulen) - llc_len; if (data_size < 0 || - ((skb_tail_pointer(skb) - - (u8 *)pdu) - llc_len) < data_size) + !pskb_may_pull(skb, data_size)) return 0; if (unlikely(pskb_trim_rcsum(skb, data_size))) return 0; -- cgit v1.2.3 From f8e9881c2aef1e982e5abc25c046820cd0b7cf64 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Apr 2011 13:39:14 -0700 Subject: bridge: reset IPCB in br_parse_ip_options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 462fb2af9788a82 (bridge : Sanitize skb before it enters the IP stack), missed one IPCB init before calling ip_options_compile() Thanks to Scot Doyle for his tests and bug reports. Reported-by: Scot Doyle Signed-off-by: Eric Dumazet Cc: Hiroaki SHIMODA Acked-by: Bandan Das Acked-by: Stephen Hemminger Cc: Jan Lübbe Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 008ff6c4eecf..f3bc322c5891 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -249,11 +249,9 @@ static int br_parse_ip_options(struct sk_buff *skb) goto drop; } - /* Zero out the CB buffer if no options present */ - if (iph->ihl == 5) { - memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (iph->ihl == 5) return 0; - } opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) -- cgit v1.2.3 From 66944e1c5797562cebe2d1857d46dff60bf9a69e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Apr 2011 22:39:40 +0000 Subject: inetpeer: reduce stack usage On 64bit arches, we use 752 bytes of stack when cleanup_once() is called from inet_getpeer(). Lets share the avl stack to save ~376 bytes. Before patch : # objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl 0x000006c3 unlink_from_pool [inetpeer.o]: 376 0x00000721 unlink_from_pool [inetpeer.o]: 376 0x00000cb1 inet_getpeer [inetpeer.o]: 376 0x00000e6d inet_getpeer [inetpeer.o]: 376 0x0004 inet_initpeers [inetpeer.o]: 112 # size net/ipv4/inetpeer.o text data bss dec hex filename 5320 432 21 5773 168d net/ipv4/inetpeer.o After patch : objdump -d net/ipv4/inetpeer.o | scripts/checkstack.pl 0x00000c11 inet_getpeer [inetpeer.o]: 376 0x00000dcd inet_getpeer [inetpeer.o]: 376 0x00000ab9 peer_check_expire [inetpeer.o]: 328 0x00000b7f peer_check_expire [inetpeer.o]: 328 0x0004 inet_initpeers [inetpeer.o]: 112 # size net/ipv4/inetpeer.o text data bss dec hex filename 5163 432 21 5616 15f0 net/ipv4/inetpeer.o Signed-off-by: Eric Dumazet Cc: Scot Doyle Cc: Stephen Hemminger Cc: Hiroaki SHIMODA Reviewed-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dd1b20eca1a2..9df4e635fb5f 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -354,7 +354,8 @@ static void inetpeer_free_rcu(struct rcu_head *head) } /* May be called with local BH enabled. */ -static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) +static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, + struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { int do_free; @@ -368,7 +369,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) * We use refcnt=-1 to alert lockless readers this entry is deleted. */ if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { - struct inet_peer __rcu **stack[PEER_MAXDEPTH]; struct inet_peer __rcu ***stackptr, ***delp; if (lookup(&p->daddr, stack, base) != p) BUG(); @@ -422,7 +422,7 @@ static struct inet_peer_base *peer_to_base(struct inet_peer *p) } /* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl) +static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { struct inet_peer *p = NULL; @@ -454,7 +454,7 @@ static int cleanup_once(unsigned long ttl) * happen because of entry limits in route cache. */ return -1; - unlink_from_pool(p, peer_to_base(p)); + unlink_from_pool(p, peer_to_base(p), stack); return 0; } @@ -524,7 +524,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) if (base->total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ - cleanup_once(0); + cleanup_once(0, stack); return p; } @@ -540,6 +540,7 @@ static void peer_check_expire(unsigned long dummy) { unsigned long now = jiffies; int ttl, total; + struct inet_peer __rcu **stack[PEER_MAXDEPTH]; total = compute_total(); if (total >= inet_peer_threshold) @@ -548,7 +549,7 @@ static void peer_check_expire(unsigned long dummy) ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl)) { + while (!cleanup_once(ttl, stack)) { if (jiffies != now) break; } -- cgit v1.2.3 From 192910a6cca5e50e5bd6cbd1da0e7376c7adfe62 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 12 Apr 2011 13:59:33 -0700 Subject: net: Do not wrap sysctl igmp_max_memberships in IP_MULTICAST controlling igmp_max_membership is useful even when IP_MULTICAST is off. Quagga(an OSPF deamon) uses multicast addresses for all interfaces using a single socket and hits igmp_max_membership limit when there are 20 interfaces or more. Always export sysctl igmp_max_memberships in proc, just like igmp_max_msf Signed-off-by: Joakim Tjernlund Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1a456652086b..321e6e84dbcc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -311,7 +311,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_do_large_bitmap, }, -#ifdef CONFIG_IP_MULTICAST { .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, @@ -319,8 +318,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - -#endif { .procname = "igmp_max_msf", .data = &sysctl_igmp_max_msf, -- cgit v1.2.3 From 020318d0d2af51e0fd59ba654ede9b2171558720 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 12 Apr 2011 15:29:54 -0700 Subject: irda: fix locking unbalance in irda_sendmsg 5b40964eadea40509d353318d2c82e8b7bf5e8a5 ("irda: Remove BKL instances from af_irda.c") introduced a path where we have a locking unbalance. If we pass invalid flags, we unlock a socket we never locked, resulting in this... ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- trinity/20101 is trying to release lock (sk_lock-AF_IRDA) at: [] irda_sendmsg+0x207/0x21d [irda] but there are no more locks to release! other info that might help us debug this: no locks held by trinity/20101. stack backtrace: Pid: 20101, comm: trinity Not tainted 2.6.39-rc3+ #3 Call Trace: [] ? irda_sendmsg+0x207/0x21d [irda] [] print_unlock_inbalance_bug+0xc7/0xd2 [] ? irda_sendmsg+0x207/0x21d [irda] [] lock_release+0xcf/0x18e [] release_sock+0x2d/0x155 [] irda_sendmsg+0x207/0x21d [irda] [] __sock_sendmsg+0x69/0x75 [] sock_sendmsg+0xa1/0xb6 [] ? might_fault+0x5c/0xac [] ? lock_release+0x181/0x18e [] ? might_fault+0xa5/0xac [] ? might_fault+0x5c/0xac [] ? fcheck_files+0xb9/0xf0 [] ? copy_from_user+0x2f/0x31 [] ? verify_iovec+0x52/0xa6 [] sys_sendmsg+0x23a/0x2b8 [] ? lock_release+0x181/0x18e [] ? up_read+0x28/0x2c [] ? do_page_fault+0x360/0x3b4 [] ? trace_hardirqs_on_caller+0x10b/0x12f [] ? finish_task_switch+0xb2/0xe3 [] ? finish_task_switch+0x46/0xe3 [] ? trace_hardirqs_off_caller+0x33/0x90 [] ? retint_swapgs+0x13/0x1b [] ? trace_hardirqs_on_caller+0x10b/0x12f [] ? audit_syscall_entry+0x11c/0x148 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/irda/af_irda.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index c9890e25cd4c..cc616974a447 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1297,8 +1297,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | MSG_NOSIGNAL)) { - err = -EINVAL; - goto out; + return -EINVAL; } lock_sock(sk); -- cgit v1.2.3 From bfac3693c426d280b026f6a1b77dc2294ea43fea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 12 Apr 2011 15:33:23 -0700 Subject: ieee802154: Remove hacked CFLAGS in net/ieee802154/Makefile It adds -Wall (which the kernel carefully controls already) and of all things -DDEBUG (which should be set by other means if desired, please we have dynamic-debug these days). Kill this noise. Reported-by: Dave Jones Signed-off-by: David S. Miller --- net/ieee802154/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index ce2d33582859..5761185f884e 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,5 +1,3 @@ obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o af_802154-y := af_ieee802154.o raw.o dgram.o - -ccflags-y += -Wall -DDEBUG -- cgit v1.2.3 From ea2d36883ca8e6caab23b6d15bfa80b1d1d81d2f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2011 14:38:37 +0000 Subject: net: Disable all TSO features when SG is disabled The feature flags NETIF_F_TSO and NETIF_F_TSO6 independently enable TSO for IPv4 and IPv6 respectively. However, the test in netdev_fix_features() and its predecessor functions was never updated to check for NETIF_F_TSO6, possibly because it was originally proposed that TSO for IPv6 would be dependent on both feature flags. Now that these feature flags can be changed independently from user-space and we depend on netdev_fix_features() to fix invalid feature combinations, it's important to disable them both if scatter-gather is disabled. Also disable NETIF_F_TSO_ECN so user-space sees all TSO features as disabled. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 956d3b006e8b..6401fb588145 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5203,9 +5203,9 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } /* TSO requires that SG is present as well. */ - if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); - features &= ~NETIF_F_TSO; + if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping TSO features since no SG feature.\n"); + features &= ~NETIF_F_ALL_TSO; } /* Software GSO depends on SG. */ -- cgit v1.2.3 From 31d8b9e099e59f880aa65095951559896d4e20fa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 12 Apr 2011 14:47:15 +0000 Subject: net: Disable NETIF_F_TSO_ECN when TSO is disabled NETIF_F_TSO_ECN has no effect when TSO is disabled; this just means that feature state will be accurately reported to user-space. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6401fb588145..c2ac599fa0f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5208,6 +5208,10 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~NETIF_F_ALL_TSO; } + /* TSO ECN requires that TSO is present as well. */ + if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) + features &= ~NETIF_F_TSO_ECN; + /* Software GSO depends on SG. */ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); -- cgit v1.2.3 From 25f7bf7d0dfb460505cbe42676340e33100aca2e Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 12 Apr 2011 15:20:48 +0000 Subject: sctp: fix oops when updating retransmit path with DEBUG on commit fbdf501c9374966a56829ecca3a7f25d2b49a305 sctp: Do no select unconfirmed transports for retransmissions Introduced the initial falt. commit d598b166ced20d9b9281ea3527c0e18405ddb803 sctp: Make sure we always return valid retransmit path Solved the problem, but forgot to change the DEBUG statement. Thus it was still possible to dereference a NULL pointer. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0698cad61763..922fdd7eb573 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1323,6 +1323,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) if (t) asoc->peer.retran_path = t; + else + t = asoc->peer.retran_path; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" " %p addr: ", -- cgit v1.2.3 From 9494c7c5774d64a84a269aad38c153c4dbff97e6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Apr 2011 15:22:22 +0000 Subject: sctp: fix oops while removed transport still using as retran path Since we can not update retran path to unconfirmed transports, when we remove a peer, the retran path may not be update if the other transports are all unconfirmed, and we will still using the removed transport as the retran path. This may cause panic if retrasnmit happen. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 922fdd7eb573..1a21c571aa03 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -569,6 +569,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, sctp_assoc_set_primary(asoc, transport); if (asoc->peer.active_path == peer) asoc->peer.active_path = transport; + if (asoc->peer.retran_path == peer) + asoc->peer.retran_path = transport; if (asoc->peer.last_data_from == peer) asoc->peer.last_data_from = transport; -- cgit v1.2.3 From 0e8a835aa59d08d702af0fcfd296e2218b2e344b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 13 Apr 2011 13:43:23 +0200 Subject: netfilter: ipset: bitmap:ip,mac type requires "src" for MAC Enforce that the second "src/dst" parameter of the set match and SET target must be "src", because we have access to the source MAC only in the packet. The previous behaviour, that the type required the second parameter but actually ignored the value was counter-intuitive and confusing. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 00a33242e90c..a274300b6a56 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -343,6 +343,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, ipset_adtfn adtfn = set->variant->adt[adt]; struct ipmac data; + /* MAC can be src only */ + if (!(flags & IPSET_DIM_TWO_SRC)) + return 0; + data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); if (data.id < map->first_ip || data.id > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; -- cgit v1.2.3 From eafbd3fde6fc5ada0d61307367e408813b04928a Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 13 Apr 2011 13:45:57 +0200 Subject: netfilter: ipset: set match and SET target fixes The SET target with --del-set did not work due to using wrongly the internal dimension of --add-set instead of --del-set. Also, the checkentries did not release the set references when returned an error. Bugs reported by Lennert Buytenhek. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/xt_set.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 061d48cec137..b3babaed7719 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,6 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); + ip_set_nfnl_put(info->match_set.index); return -ERANGE; } @@ -135,6 +136,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); return -ENOENT; } } @@ -142,6 +145,10 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); return -ERANGE; } @@ -192,6 +199,7 @@ set_match_checkentry(const struct xt_mtchk_param *par) if (info->match_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); + ip_set_nfnl_put(info->match_set.index); return -ERANGE; } @@ -219,7 +227,7 @@ set_target(struct sk_buff *skb, const struct xt_action_param *par) if (info->del_set.index != IPSET_INVALID_ID) ip_set_del(info->del_set.index, skb, par->family, - info->add_set.dim, + info->del_set.dim, info->del_set.flags); return XT_CONTINUE; @@ -245,13 +253,19 @@ set_target_checkentry(const struct xt_tgchk_param *par) if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); return -ENOENT; } } if (info->add_set.dim > IPSET_DIM_MAX || - info->del_set.flags > IPSET_DIM_MAX) { + info->del_set.dim > IPSET_DIM_MAX) { pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); return -ERANGE; } -- cgit v1.2.3 From 3e8c806a08c7beecd972e7ce15c570b9aba64baa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Apr 2011 12:01:14 -0700 Subject: Revert "tcp: disallow bind() to reuse addr/port" This reverts commit c191a836a908d1dd6b40c503741f91b914de3348. It causes known regressions for programs that expect to be able to use SO_REUSEADDR to shutdown a socket, then successfully rebind another socket to the same ID. Programs such as haproxy and amavisd expect this to work. This should fix kernel bugzilla 32832. Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 5 ++--- net/ipv6/inet6_connection_sock.c | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6c0b7f4a3d7d..38f23e721b80 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || - ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { + sk2->sk_state == TCP_LISTEN) { const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || sk2_rcv_saddr == sk_rcv_saddr(sk)) @@ -122,8 +122,7 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 && - !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { spin_unlock(&head->lock); snum = smallest_rover; goto have_snum; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 166054650466..f2c5b0fc0f21 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk, !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && (!sk->sk_reuse || !sk2->sk_reuse || - ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) && + sk2->sk_state == TCP_LISTEN) && ipv6_rcv_saddr_equal(sk, sk2)) break; } -- cgit v1.2.3 From d1a8016a2d1e75021ecc8715e3c81442d7218eb6 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Wed, 13 Apr 2011 14:31:28 -0400 Subject: NFS: Fix infinite loop in gss_create_upcall() There can be an infinite loop if gss_create_upcall() is called without the userspace program running. To prevent this, we return -EACCES if we notice that pipe_version hasn't changed (indicating that the pipe has not been opened). Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index f3914d0c5079..339ba64cce1e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -520,7 +520,7 @@ gss_refresh_upcall(struct rpc_task *task) warn_gssd(); task->tk_timeout = 15*HZ; rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL); - return 0; + return -EAGAIN; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); @@ -563,10 +563,12 @@ retry: if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, pipe_version >= 0, 15*HZ); + if (pipe_version < 0) { + warn_gssd(); + err = -EACCES; + } if (err) goto out; - if (pipe_version < 0) - warn_gssd(); goto retry; } if (IS_ERR(gss_msg)) { -- cgit v1.2.3 From c65353daf137dd41f3ede3baf62d561fca076228 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Apr 2011 05:55:37 +0000 Subject: ip: ip_options_compile() resilient to NULL skb route Scot Doyle demonstrated ip_options_compile() could be called with an skb without an attached route, using a setup involving a bridge, netfilter, and forged IP packets. Let's make ip_options_compile() and ip_options_rcv_srr() a bit more robust, instead of changing bridge/netfilter code. With help from Hiroaki SHIMODA. Reported-by: Scot Doyle Tested-by: Scot Doyle Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Acked-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 28a736f3442f..2391b24e8251 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -329,7 +329,7 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - if (skb) { + if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); opt->is_changed = 1; } @@ -371,7 +371,7 @@ int ip_options_compile(struct net *net, goto error; } opt->ts = optptr - iph; - if (skb) { + if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); timeptr = (__be32*)&optptr[optptr[2]+3]; } @@ -603,7 +603,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned long orefdst; int err; - if (!opt->srr) + if (!opt->srr || !rt) return 0; if (skb->pkt_type != PACKET_HOST) -- cgit v1.2.3 From df5d8c80f1871d9e79af4b0f3656a9528a7d4bab Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Mar 2011 20:38:35 +0530 Subject: 9p: revert tsyncfs related changes Now that we use write_inode to flush server cache related to fid, we don't need tsyncfs either fort dotl or dotu protocols. For dotu this helps to do a more efficient server flush. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 48b8e084e710..d72aac7d25cc 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1220,27 +1220,6 @@ error: } EXPORT_SYMBOL(p9_client_fsync); -int p9_client_sync_fs(struct p9_fid *fid) -{ - int err = 0; - struct p9_req_t *req; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid); - - clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto error; - } - P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid); - p9_free_req(clnt, req); -error: - return err; -} -EXPORT_SYMBOL(p9_client_sync_fs); - int p9_client_clunk(struct p9_fid *fid) { int err; -- cgit v1.2.3 From bd8c8ade6b6f109bc3dce14a8d12013f27f2a590 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 24 Mar 2011 23:14:46 +0530 Subject: 9p: Fix sparse error Signed-off-by: Aneesh Kumar K.V Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/protocol.c | 3 ++- net/9p/trans_common.c | 2 +- net/9p/trans_virtio.c | 15 +++++++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 8a4084fa8b5a..a7e899740041 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -468,7 +468,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, case 'E':{ int32_t cnt = va_arg(ap, int32_t); const char *k = va_arg(ap, const void *); - const char *u = va_arg(ap, const void *); + const char __user *u = va_arg(ap, + const void __user *); errcode = p9pdu_writef(pdu, proto_version, "d", cnt); if (!errcode && pdu_write_urw(pdu, k, u, cnt)) diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index d47880e971dd..e883172f9aa2 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -66,7 +66,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, uint32_t pdata_mapped_pages; struct trans_rpage_info *rpinfo; - *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1); + *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); if (*pdata_off) first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e8f046b07182..244e70742183 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -326,8 +326,11 @@ req_retry_pinned: outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, pdata_off, rpinfo->rp_data, pdata_len); } else { - char *pbuf = req->tc->pubuf ? req->tc->pubuf : - req->tc->pkbuf; + char *pbuf; + if (req->tc->pubuf) + pbuf = (__force char *) req->tc->pubuf; + else + pbuf = req->tc->pkbuf; outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, req->tc->pbuf_size); } @@ -352,8 +355,12 @@ req_retry_pinned: in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, pdata_off, rpinfo->rp_data, pdata_len); } else { - char *pbuf = req->tc->pubuf ? req->tc->pubuf : - req->tc->pkbuf; + char *pbuf; + if (req->tc->pubuf) + pbuf = (__force char *) req->tc->pubuf; + else + pbuf = req->tc->pkbuf; + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, pbuf, req->tc->pbuf_size); } -- cgit v1.2.3 From b76225e22ac98070325ee2ba89473c1e1360c4cb Mon Sep 17 00:00:00 2001 From: Harsh Prateek Bora Date: Thu, 31 Mar 2011 15:49:39 +0530 Subject: net/9p: nwname should be an unsigned int Signed-off-by: Harsh Prateek Bora Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric VAn Hensbergen --- net/9p/client.c | 8 ++++---- net/9p/protocol.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index d72aac7d25cc..77367745be9b 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -929,15 +929,15 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, - int clone) +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, + char **wnames, int clone) { int err; struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; - int16_t nwqids, count; + uint16_t nwqids, count; err = 0; wqids = NULL; @@ -955,7 +955,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, fid = oldfid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", + P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, diff --git a/net/9p/protocol.c b/net/9p/protocol.c index a7e899740041..b58a501cf3d1 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -265,7 +265,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t *nwname = va_arg(ap, int16_t *); + uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); errcode = p9pdu_readf(pdu, proto_version, @@ -496,7 +496,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } break; case 'T':{ - int16_t nwname = va_arg(ap, int); + uint16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); errcode = p9pdu_writef(pdu, proto_version, "w", -- cgit v1.2.3 From e3b2854faabd10438f5e7e34e078b099c3375577 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Apr 2011 12:58:56 -0400 Subject: SUNRPC: Fix the SUNRPC Kerberos V RPCSEC_GSS module dependencies Since kernel 2.6.35, the SUNRPC Kerberos support has had an implicit dependency on a number of additional crypto modules. The following patch makes that dependency explicit. Signed-off-by: Trond Myklebust --- net/sunrpc/Kconfig | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 8873fd8ddacd..b2198e65d8bb 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,14 +18,13 @@ config SUNRPC_XPRT_RDMA If unsure, say N. config RPCSEC_GSS_KRB5 - tristate + tristate "Secure RPC: Kerberos V mechanism" depends on SUNRPC && CRYPTO - prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) + depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS + depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES + depends on CRYPTO_ARC4 default y select SUNRPC_GSS - select CRYPTO_MD5 - select CRYPTO_DES - select CRYPTO_CBC help Choose Y here to enable Secure RPC using the Kerberos version 5 GSS-API mechanism (RFC 1964). -- cgit v1.2.3 From 468f86134ee515234afe5c5b3f39f266c50e61a5 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 18 Apr 2011 15:57:32 -0400 Subject: NFSv4.1: Don't update sequence number if rpc_task is not sent If we fail to contact the gss upcall program, then no message will be sent to the server. The client still updated the sequence number, however, and this lead to NFS4ERR_SEQ_MISMATCH for the next several RPC calls. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9494c3767356..ce5eb68a9664 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -906,6 +906,7 @@ void xprt_transmit(struct rpc_task *task) } dprintk("RPC: %5u xmit complete\n", task->tk_pid); + task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); xprt->ops->set_retrans_timeout(task); -- cgit v1.2.3 From 7a74aeb022b34a8fa8ef00545e66cf0568b5ddf6 Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 7 Apr 2011 14:59:50 +0300 Subject: Bluetooth: Fix refcount balance for hci connection hci_io_capa_reply_evt() holds reference for hciconnection. It's useless since hci_io_capa_request_evt()/hci_simple_pair_complete_evt() already protects the connection. In addition it leaves connection open after failed SSP pairing. Signed-off-by: Ville Tervo Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cebe7588469f..b2570159a044 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2387,8 +2387,6 @@ static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *s if (!conn) goto unlock; - hci_conn_hold(conn); - conn->remote_cap = ev->capability; conn->remote_oob = ev->oob_data; conn->remote_auth = ev->authentication; -- cgit v1.2.3 From b79f44c16a4e2181b1d6423afe746745d5e949ff Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 11 Apr 2011 18:46:55 -0300 Subject: Bluetooth: Fix keeping the command timer running In the teardown path the reset command is sent to the controller, this event causes the command timer to be reactivated. So the timer is removed in two situations, when the adapter isn't marked as UP and when we know that some command has been sent. Reported-by: Keith Packard Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2216620ff296..e7dced9080a5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -587,10 +587,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_req_cancel(hdev, ENODEV); hci_req_lock(hdev); - /* Stop timer, it might be running */ - del_timer_sync(&hdev->cmd_timer); - if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { + del_timer_sync(&hdev->cmd_timer); hci_req_unlock(hdev); return 0; } @@ -629,6 +627,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Drop last sent command */ if (hdev->sent_cmd) { + del_timer_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } -- cgit v1.2.3 From f21ca5fff6e548833fa5ee8867239a8378623150 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 8 Apr 2011 17:10:41 +0300 Subject: Bluetooth: fix shutdown on SCO sockets shutdown should wait for SCO link to be properly disconnected before detroying the socket, otherwise an application using the socket may assume link is properly disconnected before it really happens which can be a problem when e.g synchronizing profile switch. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 42fdffd1d76c..94954c74f6ae 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -369,6 +369,15 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: + if (sco_pi(sk)->conn) { + sk->sk_state = BT_DISCONN; + sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); + hci_conn_put(sco_pi(sk)->conn->hcon); + sco_pi(sk)->conn = NULL; + } else + sco_chan_del(sk, ECONNRESET); + break; + case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); -- cgit v1.2.3 From a429b51930e64dd355840c37251a563000d7c10b Mon Sep 17 00:00:00 2001 From: Ruiyi Zhang Date: Mon, 18 Apr 2011 11:04:30 +0800 Subject: Bluetooth: Only keeping SAR bits when retransmitting one frame. When retrasmitting one frame, only SAR bits in control field should be kept. Signed-off-by: Ruiyi Zhang Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ca27f3a41536..2c8dd4494c63 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1051,6 +1051,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) tx_skb = skb_clone(skb, GFP_ATOMIC); bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + control &= L2CAP_CTRL_SAR; if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { control |= L2CAP_CTRL_FINAL; -- cgit v1.2.3 From a8a8a0937e22a5fd55aeb22586724ba6bb70aadd Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 19 Apr 2011 15:59:15 +0200 Subject: netfilter: ipset: Fix the order of listing of sets A restoreable saving of sets requires that list:set type of sets come last and the code part which should have taken into account the ordering was broken. The patch fixes the listing order. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_core.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e88ac3c3ed07..d87e03bc8ef8 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1022,8 +1022,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[1] >= ip_set_max) goto out; - pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; +dump_last: + pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; set = ip_set_list[index]; @@ -1038,8 +1039,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) * so that lists (unions of sets) are dumped last. */ if (cb->args[0] != DUMP_ONE && - !((cb->args[0] == DUMP_ALL) ^ - (set->type->features & IPSET_DUMP_LAST))) + ((cb->args[0] == DUMP_ALL) == + !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); if (!cb->args[2]) { @@ -1083,6 +1084,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) goto release_refcount; } } + /* If we dump all sets, continue with dumping last ones */ + if (cb->args[0] == DUMP_ALL) { + cb->args[0] = DUMP_LAST; + cb->args[1] = 0; + goto dump_last; + } goto out; nla_put_failure: @@ -1093,11 +1100,6 @@ release_refcount: pr_debug("release set %s\n", ip_set_list[index]->name); ip_set_put_byindex(index); } - - /* If we dump all sets, continue with dumping last ones */ - if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2]) - cb->args[0] = DUMP_LAST; - out: if (nlh) { nlmsg_end(skb, nlh); -- cgit v1.2.3 From c6914a6f261aca0c9f715f883a353ae7ff51fe83 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 19 Apr 2011 20:36:59 -0700 Subject: can: Add missing socket check in can/bcm release. We can get here with a NULL socket argument passed from userspace, so we need to handle it accordingly. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/can/bcm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index 57b1aed79014..8a6a05e7c3c8 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1427,9 +1427,14 @@ static int bcm_init(struct sock *sk) static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; - struct bcm_sock *bo = bcm_sk(sk); + struct bcm_sock *bo; struct bcm_op *op, *next; + if (sk == NULL) + return 0; + + bo = bcm_sk(sk); + /* remove bcm_ops, timer, rx_unregister(), etc. */ unregister_netdevice_notifier(&bo->notifier); -- cgit v1.2.3 From 10022a6c66e199d8f61d9044543f38785713cbbd Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 20 Apr 2011 01:57:15 +0000 Subject: can: add missing socket check in can/raw release v2: added space after 'if' according code style. We can get here with a NULL socket argument passed from userspace, so we need to handle it accordingly. Thanks to Dave Jones pointing at this issue in net/can/bcm.c Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/raw.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index 649acfa7c70a..0eb39a7fdf64 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -305,7 +305,12 @@ static int raw_init(struct sock *sk) static int raw_release(struct socket *sock) { struct sock *sk = sock->sk; - struct raw_sock *ro = raw_sk(sk); + struct raw_sock *ro; + + if (!sk) + return 0; + + ro = raw_sk(sk); unregister_netdevice_notifier(&ro->notifier); -- cgit v1.2.3 From 243e6df4ed919880d079d717641ad699c6530a03 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Apr 2011 20:44:04 +0200 Subject: mac80211: fix SMPS debugfs locking The locking with SMPS requests means that the debugs file should lock the mgd mutex, not the iflist mutex. Calls to __ieee80211_request_smps() need to hold that mutex, so add an assertion. This has always been wrong, but for some reason never been noticed, probably because the locking error only happens while unassociated. Cc: stable@kernel.org [2.6.34+] Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 2 ++ net/mac80211/debugfs_netdev.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 334213571ad0..44049733c4ea 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1504,6 +1504,8 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode old_req; int err; + lockdep_assert_held(&sdata->u.mgd.mtx); + old_req = sdata->u.mgd.req_smps; sdata->u.mgd.req_smps = smps_mode; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index dacace6b1393..9ea7c0d0103f 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -177,9 +177,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - mutex_lock(&local->iflist_mtx); + mutex_lock(&sdata->u.mgd.mtx); err = __ieee80211_request_smps(sdata, smps_mode); - mutex_unlock(&local->iflist_mtx); + mutex_unlock(&sdata->u.mgd.mtx); return err; } -- cgit v1.2.3 From a9cf73ea7ff78f52662c8658d93c226effbbedde Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 19 Apr 2011 22:52:49 +0000 Subject: ipv6: udp: fix the wrong headroom check At this point, skb->data points to skb_transport_header. So, headroom check is wrong. For some case:bridge(UFO is on) + eth device(UFO is off), there is no enough headroom for IPv6 frag head. But headroom check is always false. This will bring about data be moved to there prior to skb->head, when adding IPv6 frag header to skb. Signed-off-by: Shan Wei Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 15c37746845e..9e305d74b3d4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1335,7 +1335,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) skb->ip_summed = CHECKSUM_NONE; /* Check if there is enough headroom to insert fragment header. */ - if ((skb_headroom(skb) < frag_hdr_sz) && + if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) && pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) goto out; -- cgit v1.2.3 From e965c05dabdabb85af0187952ccd75e43995c4b3 Mon Sep 17 00:00:00 2001 From: Thomas Egerer Date: Wed, 20 Apr 2011 22:56:02 +0000 Subject: ipv6: Remove hoplimit initialization to -1 The changes introduced with git-commit a02e4b7d ("ipv6: Demark default hoplimit as zero.") missed to remove the hoplimit initialization. As a result, ipv6_get_mtu interprets the return value of dst_metric_raw (-1) as 255 and answers ping6 with this hoplimit. This patche removes the line such that ping6 is answered with the hoplimit value configured via sysctl. Signed-off-by: Thomas Egerer Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 843406f14d7b..0a5d02ae5ceb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2012,7 +2012,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; -- cgit v1.2.3 From f01cb5fbea1c1613621f9f32f385e12c1a29dde0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 21 Apr 2011 21:17:25 -0700 Subject: Revert "bridge: Forward reserved group addresses if !STP" This reverts commit 1e253c3b8a1aeed51eef6fc366812f219b97de65. It breaks 802.3ad bonding inside of a bridge. The commit was meant to support transport bridging, and specifically virtual machines bridged to an ethernet interface connected to a switch port wiht 802.1x enabled. But this isn't the way to do it, it breaks too many other things. Signed-off-by: David S. Miller --- net/bridge/br_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e2160792e1bc..0c7badad62af 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -164,7 +164,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto drop; /* If STP is turned off, then forward */ - if (p->br->stp_enabled == BR_NO_STP) + if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) goto forward; if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, -- cgit v1.2.3 From 7494d00c7b826b6ceb79ec33892bd0ef59be5614 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Apr 2011 14:28:45 -0400 Subject: SUNRPC: Allow RPC calls to return ETIMEDOUT instead of EIO On occasion, it is useful for the NFS layer to distinguish between soft timeouts and other EIO errors due to (say) encoding errors, or authentication errors. The following patch ensures that the default behaviour of the RPC layer remains to return EIO on soft timeouts (until we have audited all the callers). Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e7a96e478f63..8d83f9d48713 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1508,7 +1508,10 @@ call_timeout(struct rpc_task *task) if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); - rpc_exit(task, -EIO); + if (task->tk_flags & RPC_TASK_TIMEOUT) + rpc_exit(task, -ETIMEDOUT); + else + rpc_exit(task, -EIO); return; } -- cgit v1.2.3 From 0972ddb2373d5e127aabdcabd8305eff0242cd0b Mon Sep 17 00:00:00 2001 From: Held Bernhard Date: Sun, 24 Apr 2011 22:07:32 +0000 Subject: net: provide cow_metrics() methods to blackhole dst_ops Since commit 62fa8a846d7d (net: Implement read-only protection and COW'ing of metrics.) the kernel throws an oops. [ 101.620985] BUG: unable to handle kernel NULL pointer dereference at (null) [ 101.621050] IP: [< (null)>] (null) [ 101.621084] PGD 6e53c067 PUD 3dd6a067 PMD 0 [ 101.621122] Oops: 0010 [#1] SMP [ 101.621153] last sysfs file: /sys/devices/virtual/ppp/ppp/uevent [ 101.621192] CPU 2 [ 101.621206] Modules linked in: l2tp_ppp pppox ppp_generic slhc l2tp_netlink l2tp_core deflate zlib_deflate twofish_x86_64 twofish_common des_generic cbc ecb sha1_generic hmac af_key iptable_filter snd_pcm_oss snd_mixer_oss snd_seq snd_seq_device loop snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_pcm snd_timer snd i2c_i801 iTCO_wdt psmouse soundcore snd_page_alloc evdev uhci_hcd ehci_hcd thermal [ 101.621552] [ 101.621567] Pid: 5129, comm: openl2tpd Not tainted 2.6.39-rc4-Quad #3 Gigabyte Technology Co., Ltd. G33-DS3R/G33-DS3R [ 101.621637] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 101.621684] RSP: 0018:ffff88003ddeba60 EFLAGS: 00010202 [ 101.621716] RAX: ffff88003ddb5600 RBX: ffff88003ddb5600 RCX: 0000000000000020 [ 101.621758] RDX: ffffffff81a69a00 RSI: ffffffff81b7ee61 RDI: ffff88003ddb5600 [ 101.621800] RBP: ffff8800537cd900 R08: 0000000000000000 R09: ffff88003ddb5600 [ 101.621840] R10: 0000000000000005 R11: 0000000000014b38 R12: ffff88003ddb5600 [ 101.621881] R13: ffffffff81b7e480 R14: ffffffff81b7e8b8 R15: ffff88003ddebad8 [ 101.621924] FS: 00007f06e4182700(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 [ 101.621971] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 101.622005] CR2: 0000000000000000 CR3: 0000000045274000 CR4: 00000000000006e0 [ 101.622046] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 101.622087] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 101.622129] Process openl2tpd (pid: 5129, threadinfo ffff88003ddea000, task ffff88003de9a280) [ 101.622177] Stack: [ 101.622191] ffffffff81447efa ffff88007d3ded80 ffff88003de9a280 ffff88007d3ded80 [ 101.622245] 0000000000000001 ffff88003ddebbb8 ffffffff8148d5a7 0000000000000212 [ 101.622299] ffff88003dcea000 ffff88003dcea188 ffffffff00000001 ffffffff81b7e480 [ 101.622353] Call Trace: [ 101.622374] [] ? ipv4_blackhole_route+0x1ba/0x210 [ 101.622415] [] ? xfrm_lookup+0x417/0x510 [ 101.622450] [] ? extract_buf+0x9a/0x140 [ 101.622485] [] ? __ip_flush_pending_frames+0x70/0x70 [ 101.622526] [] ? udp_sendmsg+0x62f/0x810 [ 101.622562] [] ? sock_sendmsg+0x116/0x130 [ 101.622599] [] ? find_get_page+0x18/0x90 [ 101.622633] [] ? filemap_fault+0x12a/0x4b0 [ 101.622668] [] ? move_addr_to_kernel+0x64/0x90 [ 101.622706] [] ? verify_iovec+0x7a/0xf0 [ 101.622739] [] ? sys_sendmsg+0x292/0x420 [ 101.622774] [] ? handle_pte_fault+0x8a/0x7c0 [ 101.622810] [] ? __pte_alloc+0xae/0x130 [ 101.622844] [] ? handle_mm_fault+0x138/0x380 [ 101.622880] [] ? do_page_fault+0x189/0x410 [ 101.622915] [] ? sys_getsockname+0xf3/0x110 [ 101.622952] [] ? ip_setsockopt+0x4d/0xa0 [ 101.622986] [] ? sockfd_lookup_light+0x22/0x90 [ 101.623024] [] ? system_call_fastpath+0x16/0x1b [ 101.623060] Code: Bad RIP value. [ 101.623090] RIP [< (null)>] (null) [ 101.623125] RSP [ 101.623146] CR2: 0000000000000000 [ 101.650871] ---[ end trace ca3856a7d8e8dad4 ]--- [ 101.651011] __sk_free: optmem leakage (160 bytes) detected. The oops happens in dst_metrics_write_ptr() include/net/dst.h:124: return dst->ops->cow_metrics(dst, p); dst->ops->cow_metrics is NULL and causes the oops. Provide cow_metrics() methods, like we did in commit 214f45c91bb (net: provide default_advmss() methods to blackhole dst_ops) Signed-off-by: Held Bernhard Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 7 +++++++ net/ipv6/route.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c1acf69858fd..99e6e4bb1c72 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2690,6 +2690,12 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } +static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, + unsigned long old) +{ + return NULL; +} + static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -2698,6 +2704,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .default_mtu = ipv4_blackhole_default_mtu, .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, + .cow_metrics = ipv4_rt_blackhole_cow_metrics, }; struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0a5d02ae5ceb..fd0eec6f88c6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -153,6 +153,12 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } +static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, + unsigned long old) +{ + return NULL; +} + static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -161,6 +167,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .default_mtu = ip6_blackhole_default_mtu, .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, + .cow_metrics = ip6_rt_blackhole_cow_metrics, }; static const u32 ip6_template_metrics[RTAX_MAX] = { -- cgit v1.2.3 From 3f602b08dec32c418fc391fc838db357aab84f8a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 25 Apr 2011 19:39:24 +0000 Subject: xfrm: Fix replay window size calculation on initialization On replay initialization, we compute the size of the replay buffer to see if the replay window fits into the buffer. This computation lacks a mutliplication by 8 because we need the size in bit, not in byte. So we might return an error even though the replay window would fit into the buffer. This patch fixes this issue. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_replay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index f218385950ca..e8a781422feb 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -532,7 +532,7 @@ int xfrm_init_replay(struct xfrm_state *x) if (replay_esn) { if (replay_esn->replay_window > - replay_esn->bmp_len * sizeof(__u32)) + replay_esn->bmp_len * sizeof(__u32) * 8) return -EINVAL; if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) -- cgit v1.2.3 From c0a56e64aec331f33ead29ba493ee184d9bdc840 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 25 Apr 2011 19:40:23 +0000 Subject: esp6: Fix scatterlist initialization When we use IPsec extended sequence numbers, we may overwrite the last scatterlist of the associated data by the scatterlist for the skb. This patch fixes this by placing the scatterlist for the skb right behind the last scatterlist of the associated data. esp4 does it already like that. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 5aa8ec88f194..59dccfbb5b11 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -371,7 +371,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; skb->ip_summed = CHECKSUM_NONE; -- cgit v1.2.3 From 7833aa05b8db63484b43b4b4c389cd4533140afb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 25 Apr 2011 19:41:21 +0000 Subject: xfrm: Check for the new replay implementation if an esn state is inserted IPsec extended sequence numbers can be used only with the new anti-replay window implementation. So check if the new implementation is used if an esn state is inserted and return an error if it is not. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5d1d60d3ca83..c658cb3bc7c3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -124,6 +124,9 @@ static inline int verify_replay(struct xfrm_usersa_info *p, { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + if ((p->flags & XFRM_STATE_ESN) && !rt) + return -EINVAL; + if (!rt) return 0; -- cgit v1.2.3 From e9c549998dc24209847007e1f209f3b6c88d21ba Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 26 Apr 2011 23:28:26 -0700 Subject: Revert wrong fixes for common misspellings These changes were incorrectly fixed by codespell. They were now manually corrected. Signed-off-by: Lucas De Marchi --- net/l2tp/l2tp_ip.c | 2 +- net/sctp/ulpevent.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index fce9bd3bd3fe..5c04f3e42704 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -667,7 +667,7 @@ MODULE_AUTHOR("James Chapman "); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify does't like +/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like * enums */ MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index dff27d5e22fd..61b1f5ada96a 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -554,7 +554,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed( memcpy(&ssf->ssf_info, &chunk->sinfo, sizeof(struct sctp_sndrcvinfo)); /* Per TSVWG discussion with Randy. Allow the application to - * resemble a fragmented message. + * reassemble a fragmented message. */ ssf->ssf_info.sinfo_flags = chunk->chunk_hdr->flags; -- cgit v1.2.3 From b3b270054b80e6195b1d2b2ce082239911261839 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Tue, 26 Apr 2011 01:45:41 +0000 Subject: dsa/mv88e6131: fix unknown multicast/broadcast forwarding on mv88e6085 The 88e6085 has a few differences from the other devices in the port control registers, causing unknown multicast/broadcast packets to get dropped when using the standard port setup. At the same time update kconfig to clarify that the mv88e6085 is now supported. Signed-off-by: Peter Korsgaard Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/dsa/Kconfig | 4 ++-- net/dsa/mv88e6131.c | 26 +++++++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 87bb5f4de0e8..c53ded2a98df 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -41,12 +41,12 @@ config NET_DSA_MV88E6XXX_NEED_PPU default n config NET_DSA_MV88E6131 - bool "Marvell 88E6095/6095F/6131 ethernet switch chip support" + bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support" select NET_DSA_MV88E6XXX select NET_DSA_MV88E6XXX_NEED_PPU select NET_DSA_TAG_DSA ---help--- - This enables support for the Marvell 88E6095/6095F/6131 + This enables support for the Marvell 88E6085/6095/6095F/6131 ethernet switch chips. config NET_DSA_MV88E6123_61_65 diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 3da418894efc..45f7411e90ba 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -207,8 +207,15 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * mode, but do not enable forwarding of unknown unicasts. */ val = 0x0433; - if (p == dsa_upstream_port(ds)) + if (p == dsa_upstream_port(ds)) { val |= 0x0104; + /* + * On 6085, unknown multicast forward is controlled + * here rather than in Port Control 2 register. + */ + if (ps->id == ID_6085) + val |= 0x0008; + } if (ds->dsa_port_mask & (1 << p)) val |= 0x0100; REG_WRITE(addr, 0x04, val); @@ -251,10 +258,19 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * If this is the upstream port for this switch, enable * forwarding of unknown multicast addresses. */ - val = 0x0080 | dsa_upstream_port(ds); - if (p == dsa_upstream_port(ds)) - val |= 0x0040; - REG_WRITE(addr, 0x08, val); + if (ps->id == ID_6085) + /* + * on 6085, bits 3:0 are reserved, bit 6 control ARP + * mirroring, and multicast forward is handled in + * Port Control register. + */ + REG_WRITE(addr, 0x08, 0x0080); + else { + val = 0x0080 | dsa_upstream_port(ds); + if (p == dsa_upstream_port(ds)) + val |= 0x0040; + REG_WRITE(addr, 0x08, val); + } /* * Rate Control: disable ingress rate limiting. -- cgit v1.2.3 From a05d2ad1c1f391c7f514a1d1e09b5417968a7d07 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 24 Apr 2011 01:54:57 +0000 Subject: af_unix: Only allow recv on connected seqpacket sockets. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following oops discovered by Dan Aloni: > Anyway, the following is the output of the Oops that I got on the > Ubuntu kernel on which I first detected the problem > (2.6.37-12-generic). The Oops that followed will be more useful, I > guess. >[ 5594.669852] BUG: unable to handle kernel NULL pointer dereference > at           (null) > [ 5594.681606] IP: [] unix_dgram_recvmsg+0x1fb/0x420 > [ 5594.687576] PGD 2a05d067 PUD 2b951067 PMD 0 > [ 5594.693720] Oops: 0002 [#1] SMP > [ 5594.699888] last sysfs file: The bug was that unix domain sockets use a pseduo packet for connecting and accept uses that psudo packet to get the socket. In the buggy seqpacket case we were allowing unconnected sockets to call recvmsg and try to receive the pseudo packet. That is always wrong and as of commit 7361c36c5 the pseudo packet had become enough different from a normal packet that the kernel started oopsing. Do for seqpacket_recv what was done for seqpacket_send in 2.5 and only allow it on connected seqpacket sockets. Cc: stable@kernel.org Tested-by: Dan Aloni Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/unix/af_unix.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3a43a8304768..b1d75beb7e20 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -524,6 +524,8 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); +static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, + struct msghdr *, size_t, int); static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, @@ -583,7 +585,7 @@ static const struct proto_ops unix_seqpacket_ops = { .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_seqpacket_sendmsg, - .recvmsg = unix_dgram_recvmsg, + .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -1699,6 +1701,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, return unix_dgram_sendmsg(kiocb, sock, msg, len); } +static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, + int flags) +{ + struct sock *sk = sock->sk; + + if (sk->sk_state != TCP_ESTABLISHED) + return -ENOTCONN; + + return unix_dgram_recvmsg(iocb, sock, msg, size, flags); +} + static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); -- cgit v1.2.3 From 7cfd260910b881250cde76ba92ebe3cbf8493a8f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 1 May 2011 02:04:11 +0000 Subject: ipv4: don't spam dmesg with "Using LC-trie" messages fib_trie_table() is called during netns creation and Chromium uses clone(CLONE_NEWNET) to sandbox renderer process. Don't print anything. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e9013d6c1f51..5fe9b8b41df3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1978,9 +1978,6 @@ struct fib_table *fib_trie_table(u32 id) t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); - if (id == RT_TABLE_LOCAL) - pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION); - return tb; } -- cgit v1.2.3 From 41c31f318a5209922d051e293c61e4724daad11c Mon Sep 17 00:00:00 2001 From: Lifeng Sun Date: Wed, 27 Apr 2011 22:04:51 +0000 Subject: networking: inappropriate ioctl operation should return ENOTTY ioctl() calls against a socket with an inappropriate ioctl operation are incorrectly returning EINVAL rather than ENOTTY: [ENOTTY] Inappropriate I/O control operation. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=33992 Signed-off-by: Lifeng Sun Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index c2ac599fa0f6..856b6ee9a1d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4773,7 +4773,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm * is never reached */ WARN_ON(1); - err = -EINVAL; + err = -ENOTTY; break; } @@ -5041,7 +5041,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* Set the per device memory buffer space. * Not applicable in our case */ case SIOCSIFLINK: - return -EINVAL; + return -ENOTTY; /* * Unknown or private ioctl. @@ -5062,7 +5062,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) return wext_handle_ioctl(net, &ifr, cmd, arg); - return -EINVAL; + return -ENOTTY; } } -- cgit v1.2.3 From ff538818f4a82c4cf02d2d6bd6ac5c7360b9d41d Mon Sep 17 00:00:00 2001 From: Lucian Adrian Grijincu Date: Sun, 1 May 2011 01:44:01 +0000 Subject: sysctl: net: call unregister_net_sysctl_table where needed ctl_table_headers registered with register_net_sysctl_table should have been unregistered with the equivalent unregister_net_sysctl_table Signed-off-by: Lucian Adrian Grijincu Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 2 +- net/ipv6/addrconf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5345b0bee6df..cd9ca0811cfa 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1680,7 +1680,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) return; cnf->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); + unregister_net_sysctl_table(t->sysctl_header); kfree(t->dev_name); kfree(t); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1493534116df..a7bda0757053 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4537,7 +4537,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) t = p->sysctl; p->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); + unregister_net_sysctl_table(t->sysctl_header); kfree(t->dev_name); kfree(t); } -- cgit v1.2.3 From ca20892db7567c40e8ed0668f46cf0d085d7db6d Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Tue, 3 May 2011 02:29:56 +0000 Subject: libceph: fix ceph_msg_new error path If memory allocation failed, calling ceph_msg_put() will cause GPF since some of ceph_msg variables are not initialized first. Fix Bug #970. Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- net/ceph/messenger.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 05f357828a2f..e15a82ccc05f 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2267,6 +2267,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) m->more_to_follow = false; m->pool = NULL; + /* middle */ + m->middle = NULL; + + /* data */ + m->nr_pages = 0; + m->page_alignment = 0; + m->pages = NULL; + m->pagelist = NULL; + m->bio = NULL; + m->bio_iter = NULL; + m->bio_seg = 0; + m->trail = NULL; + /* front */ if (front_len) { if (front_len > PAGE_CACHE_SIZE) { @@ -2286,19 +2299,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) } m->front.iov_len = front_len; - /* middle */ - m->middle = NULL; - - /* data */ - m->nr_pages = 0; - m->page_alignment = 0; - m->pages = NULL; - m->pagelist = NULL; - m->bio = NULL; - m->bio_iter = NULL; - m->bio_seg = 0; - m->trail = NULL; - dout("ceph_msg_new %p front %d\n", m, front_len); return m; -- cgit v1.2.3 From 4ad12621e442b7a072e81270808f617cb65c5672 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 3 May 2011 09:23:36 -0700 Subject: libceph: fix ceph_osdc_alloc_request error checks ceph_osdc_alloc_request returns NULL on failure. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 5a80f41c0cba..6b5dda1cb5df 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -470,8 +470,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, snapc, ops, use_mempool, GFP_NOFS, NULL, NULL); - if (IS_ERR(req)) - return req; + if (!req) + return NULL; /* calculate max write size */ calc_layout(osdc, vino, layout, off, plen, req, ops); -- cgit v1.2.3 From 64f3b9e203bd06855072e295557dca1485a2ecba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 May 2011 10:02:26 +0000 Subject: net: ip_expire() must revalidate route Commit 4a94445c9a5c (net: Use ip_route_input_noref() in input path) added a bug in IP defragmentation handling, in case timeout is fired. When a frame is defragmented, we use last skb dst field when building final skb. Its dst is valid, since we are in rcu read section. But if a timeout occurs, we take first queued fragment to build one ICMP TIME EXCEEDED message. Problem is all queued skb have weak dst pointers, since we escaped RCU critical section after their queueing. icmp_send() might dereference a now freed (and possibly reused) part of memory. Calling skb_dst_drop() and ip_route_input_noref() to revalidate route is the only possible choice. Reported-by: Denys Fedoryshchenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a1151b8adf3c..b1d282f11be7 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -223,31 +223,30 @@ static void ip_expire(unsigned long arg) if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; + const struct iphdr *iph; + int err; rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out_rcu_unlock; + /* skb dst is stale, drop it, and perform route lookup again */ + skb_dst_drop(head); + iph = ip_hdr(head); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); + if (err) + goto out_rcu_unlock; + /* - * Only search router table for the head fragment, - * when defraging timeout at PRE_ROUTING HOOK. + * Only an end host needs to send an ICMP + * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { - const struct iphdr *iph = ip_hdr(head); - int err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); - if (unlikely(err)) - goto out_rcu_unlock; - - /* - * Only an end host needs to send an ICMP - * "Fragment Reassembly Timeout" message, per RFC792. - */ - if (skb_rtable(head)->rt_type != RTN_LOCAL) - goto out_rcu_unlock; + if (qp->user == IP_DEFRAG_CONNTRACK_IN && + skb_rtable(head)->rt_type != RTN_LOCAL) + goto out_rcu_unlock; - } /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); -- cgit v1.2.3 From a294865978b701e4d0d90135672749531b9a900d Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Fri, 6 May 2011 03:27:18 +0000 Subject: dccp: handle invalid feature options length A length of zero (after subtracting two for the type and len fields) for the DCCPO_{CHANGE,CONFIRM}_{L,R} options will cause an underflow due to the subtraction. The subsequent code may read past the end of the options value buffer when parsing. I'm unsure of what the consequences of this might be, but it's probably not good. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Acked-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/options.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/dccp/options.c b/net/dccp/options.c index f06ffcfc8d71..4b2ab657ac8e 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ break; + if (len == 0) + goto out_invalid_option; rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, *value, value + 1, len - 1); if (rc) -- cgit v1.2.3 From b9f47a3aaeabdce3b42829bbb27765fa340f76ba Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 4 May 2011 10:04:56 +0000 Subject: tcp_cubic: limit delayed_ack ratio to prevent divide error TCP Cubic keeps a metric that estimates the amount of delayed acknowledgements to use in adjusting the window. If an abnormally large number of packets are acknowledged at once, then the update could wrap and reach zero. This kind of ACK could only happen when there was a large window and huge number of ACK's were lost. This patch limits the value of delayed ack ratio. The choice of 32 is just a conservative value since normally it should be range of 1 to 4 packets. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 34340c9c95fa..f376b05cca81 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -93,6 +93,7 @@ struct bictcp { u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ #define ACK_RATIO_SHIFT 4 +#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ @@ -398,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) u32 delay; if (icsk->icsk_ca_state == TCP_CA_Open) { - cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ca->delayed_ack += cnt; + u32 ratio = ca->delayed_ack; + + ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; + ratio += cnt; + + ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ -- cgit v1.2.3 From 3cd7967825a2b3926dc96ae566d986c4420919f7 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Fri, 15 Apr 2011 13:59:33 +0530 Subject: net/9p: Handle get_user_pages_fast return properly Use proper data type to handle get_user_pages_fast error condition. Also do not treat EFAULT error as fatal. Signed-off-by: M. Mohan Kumar Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- net/9p/client.c | 2 +- net/9p/trans_common.c | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 77367745be9b..a9aa2dd66482 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -614,7 +614,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) err = c->trans_mod->request(c, req); if (err < 0) { - if (err != -ERESTARTSYS) + if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto reterr; } diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index e883172f9aa2..9a70ebdec56e 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -63,7 +63,7 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, int nr_pages, u8 rw) { uint32_t first_page_bytes = 0; - uint32_t pdata_mapped_pages; + int32_t pdata_mapped_pages; struct trans_rpage_info *rpinfo; *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); @@ -75,14 +75,9 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, rpinfo = req->tc->private; pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, nr_pages, rw, &rpinfo->rp_data[0]); + if (pdata_mapped_pages <= 0) + return pdata_mapped_pages; - if (pdata_mapped_pages < 0) { - printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p" - "nr_pages:%d\n", pdata_mapped_pages, - req->tc->pubuf, nr_pages); - pdata_mapped_pages = 0; - return -EIO; - } rpinfo->rp_nr_pages = pdata_mapped_pages; if (*pdata_off) { *pdata_len = first_page_bytes; -- cgit v1.2.3 From 315c34dae0069d0c67abd714bb846cd466289c7f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 21 Apr 2011 10:55:07 +0200 Subject: netfilter: ctnetlink: fix timestamp support for new conntracks This patch fixes the missing initialization of the start time if the timestamp support is enabled. libnetfilter_conntrack/utils# conntrack -E & libnetfilter_conntrack/utils# ./conntrack_create tcp 6 109 ESTABLISHED src=1.1.1.1 dst=2.2.2.2 sport=1025 dport=21 packets=0 bytes=0 [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=21 dport=1025 packets=0 bytes=0 mark=0 delta-time=1303296401 use=2 Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 30bf8a167fc8..482e90c61850 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1334,6 +1334,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conn *ct; int err = -EINVAL; struct nf_conntrack_helper *helper; + struct nf_conn_tstamp *tstamp; ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) @@ -1451,6 +1452,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) + tstamp->start = ktime_to_ns(ktime_get_real()); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); -- cgit v1.2.3 From 5a6351eecf8c87afed9c883bb6341d09406d74ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Apr 2011 10:57:21 +0200 Subject: netfilter: fix ebtables compat support commit 255d0dc34068a976 (netfilter: x_table: speedup compat operations) made ebtables not working anymore. 1) xt_compat_calc_jump() is not an exact match lookup 2) compat_table_info() has a typo in xt_compat_init_offsets() call 3) compat_do_replace() misses a xt_compat_init_offsets() call Reported-by: dann frazier Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/bridge/netfilter/ebtables.c | 3 ++- net/netfilter/x_tables.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 893669caa8de..9707079bc40a 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1766,7 +1766,7 @@ static int compat_table_info(const struct ebt_table_info *info, newinfo->entries_size = size; - xt_compat_init_offsets(AF_INET, info->nentries); + xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } @@ -2240,6 +2240,7 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); + xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a9adf4c6b299..8a025a585d2f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -455,6 +455,7 @@ void xt_compat_flush_offsets(u_int8_t af) vfree(xt[af].compat_tab); xt[af].compat_tab = NULL; xt[af].number = 0; + xt[af].cur = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); @@ -473,8 +474,7 @@ int xt_compat_calc_jump(u_int8_t af, unsigned int offset) else return mid ? tmp[mid - 1].delta : 0; } - WARN_ON_ONCE(1); - return 0; + return left ? tmp[left - 1].delta : 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); -- cgit v1.2.3 From 103a9778e07bcc0cd34b5c35a87281454eec719e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Apr 2011 10:58:25 +0200 Subject: netfilter: ebtables: only call xt_compat_add_offset once per rule The optimizations in commit 255d0dc34068a976 (netfilter: x_table: speedup compat operations) assume that xt_compat_add_offset is called once per rule. ebtables however called it for each match/target found in a rule. The match/watcher/target parser already returns the needed delta, so it is sufficient to move the xt_compat_add_offset call to a more reasonable location. While at it, also get rid of the unused COMPAT iterator macros. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/bridge/netfilter/ebtables.c | 61 ++++++----------------------------------- 1 file changed, 9 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 9707079bc40a..1a92b369c820 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1882,7 +1882,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, struct xt_match *match; struct xt_target *wt; void *dst = NULL; - int off, pad = 0, ret = 0; + int off, pad = 0; unsigned int size_kern, entry_offset, match_size = mwt->match_size; strlcpy(name, mwt->u.name, sizeof(name)); @@ -1935,13 +1935,6 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, break; } - if (!dst) { - ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, - off + ebt_compat_entry_padsize()); - if (ret < 0) - return ret; - } - state->buf_kern_offset += match_size + off; state->buf_user_offset += match_size; pad = XT_ALIGN(size_kern) - size_kern; @@ -2016,50 +2009,6 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, return growth; } -#define EBT_COMPAT_WATCHER_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct compat_ebt_entry_mwt *__watcher; \ - \ - for (__i = e->watchers_offset; \ - __i < (e)->target_offset; \ - __i += __watcher->watcher_size + \ - sizeof(struct compat_ebt_entry_mwt)) { \ - __watcher = (void *)(e) + __i; \ - __ret = fn(__watcher , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->target_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - -#define EBT_COMPAT_MATCH_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct compat_ebt_entry_mwt *__match; \ - \ - for (__i = sizeof(struct ebt_entry); \ - __i < (e)->watchers_offset; \ - __i += __match->match_size + \ - sizeof(struct compat_ebt_entry_mwt)) { \ - __match = (void *)(e) + __i; \ - __ret = fn(__match , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->watchers_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - /* called for all ebt_entry structures. */ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, unsigned int *total, @@ -2132,6 +2081,14 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, } } + if (state->buf_kern_start == NULL) { + unsigned int offset = buf_start - (char *) base; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset); + if (ret < 0) + return ret; + } + startoff = state->buf_user_offset - startoff; BUG_ON(*total < startoff); -- cgit v1.2.3 From 1ae132b0347907ac95b8bc9dba37934f59d2a508 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 3 May 2011 22:09:30 +0200 Subject: IPVS: Change of socket usage to enable name space exit. If the sync daemons run in a name space while it crashes or get killed, there is no way to stop them except for a reboot. When all patches are there, ip_vs_core will handle register_pernet_(), i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed. Kernel threads should not increment the use count of a socket. By calling sk_change_net() after creating a socket this is avoided. sock_release cant be used intead sk_release_kernel() should be used. Thanks Eric W Biederman for your advices. Signed-off-by: Hans Schillstrom [horms@verge.net.au: minor edit to changelog] Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 58 ++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 07accf6b2401..a0791dc05a27 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net) static void __net_exit __ip_vs_cleanup(struct net *net) { - IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); + IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); } static struct pernet_operations ipvs_core_ops = { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3e7961e85e9c..0cce95310820 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net) struct socket *sock; int result; - /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + /* First create a socket move it to right name space later */ + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); @@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net) int result; /* First create a socket */ - result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - + /* + * Kernel sockets that are a part of a namespace, should not + * hold a reference to a namespace in order to allow to stop it. + * After sk_change_net should be released using sk_release_kernel. + */ + sk_change_net(sock->sk, net); /* it is equivalent to the REUSEADDR option in user-space */ sock->sk->sk_reuse = 1; @@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net) return sock; - error: - sock_release(sock); +error: + sk_release_kernel(sock->sk); return ERR_PTR(result); } @@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data) ip_vs_sync_buff_release(sb); /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo); return 0; @@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data) } /* release the sending multicast socket */ - sock_release(tinfo->sock); + sk_release_kernel(tinfo->sock->sk); kfree(tinfo->buf); kfree(tinfo); @@ -1601,7 +1611,7 @@ outtinfo: outbuf: kfree(buf); outsocket: - sock_release(sock); + sk_release_kernel(sock->sk); out: return result; } @@ -1610,6 +1620,7 @@ out: int stop_sync_thread(struct net *net, int state) { struct netns_ipvs *ipvs = net_ipvs(net); + int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); @@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state) spin_lock_bh(&ipvs->sync_lock); ipvs->sync_state &= ~IP_VS_STATE_MASTER; spin_unlock_bh(&ipvs->sync_lock); - kthread_stop(ipvs->master_thread); + retc = kthread_stop(ipvs->master_thread); ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { if (!ipvs->backup_thread) @@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state) task_pid_nr(ipvs->backup_thread)); ipvs->sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(ipvs->backup_thread); + retc = kthread_stop(ipvs->backup_thread); ipvs->backup_thread = NULL; - } else { - return -EINVAL; } /* decrease the module use count */ ip_vs_use_count_dec(); - return 0; + return retc; } /* @@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net) static void __ip_vs_sync_cleanup(struct net *net) { - stop_sync_thread(net, IP_VS_STATE_MASTER); - stop_sync_thread(net, IP_VS_STATE_BACKUP); + int retc; + + retc = stop_sync_thread(net, IP_VS_STATE_MASTER); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Master Daemon\n"); + + retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); + if (retc && retc != -ESRCH) + pr_err("Failed to stop Backup Daemon\n"); } static struct pernet_operations ipvs_sync_ops = { @@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = { int __init ip_vs_sync_init(void) { - return register_pernet_subsys(&ipvs_sync_ops); + return register_pernet_device(&ipvs_sync_ops); } void ip_vs_sync_cleanup(void) { - unregister_pernet_subsys(&ipvs_sync_ops); + unregister_pernet_device(&ipvs_sync_ops); } -- cgit v1.2.3 From 7a4f0761fce32ff4918a7c23b08db564ad33092d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 3 May 2011 22:09:31 +0200 Subject: IPVS: init and cleanup restructuring DESCRIPTION This patch tries to restore the initial init and cleanup sequences that was before namspace patch. Netns also requires action when net devices unregister which has never been implemented. I.e this patch also covers when a device moves into a network namespace, and has to be released. IMPLEMENTATION The number of calls to register_pernet_device have been reduced to one for the ip_vs.ko Schedulers still have their own calls. This patch adds a function __ip_vs_service_cleanup() and an enable flag for the netfilter hooks. The nf hooks will be enabled when the first service is loaded and never disabled again, except when a namespace exit starts. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov [horms@verge.net.au: minor edit to changelog] Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 15 +---- net/netfilter/ipvs/ip_vs_conn.c | 12 +--- net/netfilter/ipvs/ip_vs_core.c | 101 +++++++++++++++++++++++++++++--- net/netfilter/ipvs/ip_vs_ctl.c | 120 ++++++++++++++++++++++++++++++++------- net/netfilter/ipvs/ip_vs_est.c | 14 +---- net/netfilter/ipvs/ip_vs_proto.c | 11 +--- net/netfilter/ipvs/ip_vs_sync.c | 13 +---- 7 files changed, 206 insertions(+), 80 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 2dc6de13ac18..51f3af7c4743 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -static int __net_init __ip_vs_app_init(struct net *net) +int __net_init __ip_vs_app_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net) return 0; } -static void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit __ip_vs_app_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_app_init, - .exit = __ip_vs_app_cleanup, -}; - int __init ip_vs_app_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_app_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c97bd45975be..d3fd91bbba49 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -static void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit __ip_vs_conn_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } -static struct pernet_operations ipvs_conn_ops = { - .init = __ip_vs_conn_init, - .exit = __ip_vs_conn_cleanup, -}; int __init ip_vs_conn_init(void) { int idx; - int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - retc = register_pernet_subsys(&ipvs_conn_ops); - /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return retc; + return 0; } void ip_vs_conn_cleanup(void) { - unregister_pernet_subsys(&ipvs_conn_ops); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a0791dc05a27..a74dae6c5dbc 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1113,6 +1113,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1343,6 +1346,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return NF_ACCEPT; /* The packet looks wrong, ignore */ net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1529,6 +1533,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* Bad... Do not break raw sockets */ @@ -1562,7 +1571,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - net = skb_net(skb); /* Protocol supported? */ pd = ip_vs_proto_data_get(net, iph.protocol); if (unlikely(!pd)) @@ -1588,7 +1596,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - net = skb_net(skb); ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { @@ -1743,10 +1750,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp(skb, &r, hooknum); } @@ -1757,10 +1770,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp_v6(skb, &r, hooknum); } #endif @@ -1884,21 +1903,72 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + /* Hold the beast until a service is registerd */ + ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; + + if (__ip_vs_estimator_init(net) < 0) + goto estimator_fail; + + if (__ip_vs_control_init(net) < 0) + goto control_fail; + + if (__ip_vs_protocol_init(net) < 0) + goto protocol_fail; + + if (__ip_vs_app_init(net) < 0) + goto app_fail; + + if (__ip_vs_conn_init(net) < 0) + goto conn_fail; + + if (__ip_vs_sync_init(net) < 0) + goto sync_fail; + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; +/* + * Error handling + */ + +sync_fail: + __ip_vs_conn_cleanup(net); +conn_fail: + __ip_vs_app_cleanup(net); +app_fail: + __ip_vs_protocol_cleanup(net); +protocol_fail: + __ip_vs_control_cleanup(net); +control_fail: + __ip_vs_estimator_cleanup(net); +estimator_fail: + return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { + __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ + __ip_vs_conn_cleanup(net); + __ip_vs_app_cleanup(net); + __ip_vs_protocol_cleanup(net); + __ip_vs_control_cleanup(net); + __ip_vs_estimator_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); } +static void __net_exit __ip_vs_dev_cleanup(struct net *net) +{ + EnterFunction(2); + net_ipvs(net)->enable = 0; /* Disable packet reception */ + __ip_vs_sync_cleanup(net); + LeaveFunction(2); +} + static struct pernet_operations ipvs_core_ops = { .init = __ip_vs_init, .exit = __ip_vs_cleanup, @@ -1906,6 +1976,10 @@ static struct pernet_operations ipvs_core_ops = { .size = sizeof(struct netns_ipvs), }; +static struct pernet_operations ipvs_core_dev_ops = { + .exit = __ip_vs_dev_cleanup, +}; + /* * Initialize IP Virtual Server */ @@ -1913,10 +1987,6 @@ static int __init ip_vs_init(void) { int ret; - ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ - if (ret < 0) - return ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { @@ -1944,15 +2014,28 @@ static int __init ip_vs_init(void) goto cleanup_conn; } + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + goto cleanup_sync; + + ret = register_pernet_device(&ipvs_core_dev_ops); + if (ret < 0) + goto cleanup_sub; + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_sync; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); + return ret; +cleanup_dev: + unregister_pernet_device(&ipvs_core_dev_ops); +cleanup_sub: + unregister_pernet_subsys(&ipvs_core_ops); cleanup_sync: ip_vs_sync_cleanup(); cleanup_conn: @@ -1964,20 +2047,20 @@ cleanup_sync: ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + unregister_pernet_device(&ipvs_core_dev_ops); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ae47090bf45f..ea722810faf3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void) } #endif + +/* Protos */ +static void __ip_vs_del_service(struct ip_vs_service *svc); + + #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ static int __ip_vs_addr_is_local_v6(struct net *net, @@ -1214,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; + /* Now there is a service - full throttle */ + ipvs->enable = 1; return 0; @@ -1472,6 +1479,84 @@ static int ip_vs_flush(struct net *net) return 0; } +/* + * Delete service by {netns} in the service table. + * Called by __ip_vs_cleanup() + */ +void __ip_vs_service_cleanup(struct net *net) +{ + EnterFunction(2); + /* Check for "full" addressed entries */ + mutex_lock(&__ip_vs_mutex); + ip_vs_flush(net); + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); +} +/* + * Release dst hold by dst_cache + */ +static inline void +__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +{ + spin_lock_bh(&dest->dst_lock); + if (dest->dst_cache && dest->dst_cache->dev == dev) { + IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", + dev->name, + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + ip_vs_dst_reset(dest); + } + spin_unlock_bh(&dest->dst_lock); + +} +/* + * Netdev event receiver + * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to + * a device that is "unregister" it must be released. + */ +static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct net *net = dev_net(dev); + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + unsigned int idx; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + EnterFunction(2); + mutex_lock(&__ip_vs_mutex); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + } + + list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + + } + } + + list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + __ip_vs_dev_reset(dest, dev); + } + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); + return NOTIFY_DONE; +} /* * Zero counters in a service or all services @@ -3588,6 +3673,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } #endif +static struct notifier_block ip_vs_dst_notifier = { + .notifier_call = ip_vs_dst_event, +}; + int __net_init __ip_vs_control_init(struct net *net) { int idx; @@ -3626,7 +3715,7 @@ err: return -ENOMEM; } -static void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit __ip_vs_control_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3639,11 +3728,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -static struct pernet_operations ipvs_control_ops = { - .init = __ip_vs_control_init, - .exit = __ip_vs_control_cleanup, -}; - int __init ip_vs_control_init(void) { int idx; @@ -3657,33 +3741,32 @@ int __init ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) { - pr_err("cannot register namespace.\n"); - goto err; - } - smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - goto err_net; + goto err_sock; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); - nf_unregister_sockopt(&ip_vs_sockopts); - goto err_net; + goto err_genl; } + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + goto err_notf; + LeaveFunction(2); return 0; -err_net: - unregister_pernet_subsys(&ipvs_control_ops); -err: +err_notf: + ip_vs_genl_unregister(); +err_genl: + nf_unregister_sockopt(&ip_vs_sockopts); +err_sock: return ret; } @@ -3691,7 +3774,6 @@ err: void ip_vs_control_cleanup(void) { EnterFunction(2); - unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 8c8766ca56ad..508cce98777c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -static int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -static void __net_exit __ip_vs_estimator_exit(struct net *net) +void __net_exit __ip_vs_estimator_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_estimator_init, - .exit = __ip_vs_estimator_exit, -}; int __init ip_vs_estimator_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_estimator_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 17484a4416ef..eb86028536fc 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -static int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init __ip_vs_protocol_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit __ip_vs_protocol_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; @@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net) } } -static struct pernet_operations ipvs_proto_ops = { - .init = __ip_vs_protocol_init, - .exit = __ip_vs_protocol_cleanup, -}; - int __init ip_vs_protocol_init(void) { char protocols[64]; @@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); - return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; - unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 0cce95310820..e292e5bddc70 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state) /* * Initialize data struct for each netns */ -static int __net_init __ip_vs_sync_init(struct net *net) +int __net_init __ip_vs_sync_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1677,7 +1677,7 @@ static int __net_init __ip_vs_sync_init(struct net *net) return 0; } -static void __ip_vs_sync_cleanup(struct net *net) +void __ip_vs_sync_cleanup(struct net *net) { int retc; @@ -1690,18 +1690,11 @@ static void __ip_vs_sync_cleanup(struct net *net) pr_err("Failed to stop Backup Daemon\n"); } -static struct pernet_operations ipvs_sync_ops = { - .init = __ip_vs_sync_init, - .exit = __ip_vs_sync_cleanup, -}; - - int __init ip_vs_sync_init(void) { - return register_pernet_device(&ipvs_sync_ops); + return 0; } void ip_vs_sync_cleanup(void) { - unregister_pernet_device(&ipvs_sync_ops); } -- cgit v1.2.3 From 4319cc0cf5bb894b7368008cdf6dd20eb8868018 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Tue, 10 May 2011 09:55:44 +0200 Subject: netfilter: IPv6: initialize TOS field in REJECT target module The IPv6 header is not zeroed out in alloc_skb so we must initialize it properly unless we want to see IPv6 packets with random TOS fields floating around. The current implementation resets the flow label but this could be changed if deemed necessary. We stumbled upon this issue when trying to apply a mangle rule to the RST packet generated by the REJECT target module. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_REJECT.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 28e74488a329..a5a4c5dd5396 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -45,6 +45,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; struct dst_entry *dst = NULL; u8 proto; struct flowi6 fl6; @@ -124,7 +126,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); - ip6h->version = 6; + *(__be32 *)ip6h = htonl(0x60000000 | (tclass << 20)); ip6h->hop_limit = ip6_dst_hoplimit(dst); ip6h->nexthdr = IPPROTO_TCP; ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); -- cgit v1.2.3 From 1ed2f73d90fb49bcf5704aee7e9084adb882bfc5 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Tue, 10 May 2011 10:00:21 +0200 Subject: netfilter: IPv6: fix DSCP mangle code The mask indicates the bits one wants to zero out, so it needs to be inverted before applying to the original TOS field. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_DSCP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 0a229191e55b..ae8271652efa 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -99,7 +99,7 @@ tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) u_int8_t orig, nv; orig = ipv6_get_dsfield(iph); - nv = (orig & info->tos_mask) ^ info->tos_value; + nv = (orig & ~info->tos_mask) ^ info->tos_value; if (orig != nv) { if (!skb_make_writable(skb, sizeof(struct iphdr))) -- cgit v1.2.3 From 93bbce1ad0cd788190dd7d6c17d289f771fe3d0d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 10 May 2011 12:13:36 +0200 Subject: netfilter: revert a2361c8735e07322023aedc36e4938b35af31eb0 This patch reverts a2361c8735e07322023aedc36e4938b35af31eb0: "[PATCH] netfilter: xt_conntrack: warn about use in raw table" Florian Wesphal says: "... when the packet was sent from the local machine the skb already has ->nfct attached, and -m conntrack seems to do the right thing." Acked-by: Jan Engelhardt Reported-by: Florian Wesphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_conntrack.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 481a86fdc409..61805d7b38aa 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -272,11 +272,6 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - if (strcmp(par->table, "raw") == 0) { - pr_info("state is undetermined at the time of raw table\n"); - return -EINVAL; - } - ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", -- cgit v1.2.3 From 5db1c07ced19b2eec3a149a3c624d88e02e246ae Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 3 May 2011 21:40:08 +0300 Subject: mac80211: don't start the dynamic ps timer if not associated When we are disconnecting, we set PS off, but this happens before we send the deauth/disassoc request. When the deauth/disassoc frames are sent, we trigger the dynamic ps timer, which then times out and turns PS back on. Thus, PS remains on after disconnecting, causing problems when associating again. This can be fixed by preventing the timer to start when we're not associated anymore. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- net/mac80211/tx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ce4596ed1268..bd1224fd216a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -237,6 +237,10 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) &local->dynamic_ps_disable_work); } + /* Don't restart the timer if we're not disassociated */ + if (!ifmgd->associated) + return TX_CONTINUE; + mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); -- cgit v1.2.3 From 55aee10dec477254241e4f72968f92e0543b33ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 May 2011 12:22:54 -0700 Subject: vlan: fix GVRP at dismantle time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ip link add link eth2 eth2.103 type vlan id 103 gvrp on loose_binding on ip link set eth2.103 up rmmod tg3 # driver providing eth2 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] garp_request_leave+0x3e/0xc0 [garp] PGD 11d251067 PUD 11b9e0067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/virtual/net/eth2.104/ifindex CPU 0 Modules linked in: tg3(-) 8021q garp nfsd lockd auth_rpcgss sunrpc libphy sg [last unloaded: x_tables] Pid: 11494, comm: rmmod Tainted: G W 2.6.39-rc6-00261-gfd71257-dirty #580 HP ProLiant BL460c G6 RIP: 0010:[] [] garp_request_leave+0x3e/0xc0 [garp] RSP: 0018:ffff88007a19bae8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88011b5e2000 RCX: 0000000000000002 RDX: 0000000000000000 RSI: 0000000000000175 RDI: ffffffffa0030d5b RBP: ffff88007a19bb18 R08: 0000000000000001 R09: ffff88011bd64a00 R10: ffff88011d34ec00 R11: 0000000000000000 R12: 0000000000000002 R13: ffff88007a19bc48 R14: ffff88007a19bb88 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88011fc00000(0063) knlGS:00000000f77d76c0 CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000011a675000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process rmmod (pid: 11494, threadinfo ffff88007a19a000, task ffff8800798595c0) Stack: ffff88007a19bb36 ffff88011c84b800 ffff88011b5e2000 ffff88007a19bc48 ffff88007a19bb88 0000000000000006 ffff88007a19bb38 ffffffffa003a5f6 ffff88007a19bb38 670088007a19bba8 ffff88007a19bb58 ffffffffa00397e7 Call Trace: [] vlan_gvrp_request_leave+0x46/0x50 [8021q] [] vlan_dev_stop+0xb7/0xc0 [8021q] [] __dev_close_many+0x87/0xe0 [] dev_close_many+0x87/0x110 [] rollback_registered_many+0xa0/0x240 [] unregister_netdevice_many+0x19/0x60 [] vlan_device_event+0x53b/0x550 [8021q] [] ? ip6mr_device_event+0xa8/0xd0 [] notifier_call_chain+0x53/0x80 [] __raw_notifier_call_chain+0x9/0x10 [] raw_notifier_call_chain+0x11/0x20 [] call_netdevice_notifiers+0x32/0x60 [] rollback_registered_many+0x10f/0x240 [] rollback_registered+0x2f/0x40 [] unregister_netdevice_queue+0x58/0x90 [] unregister_netdev+0x1b/0x30 [] tg3_remove_one+0x6f/0x10b [tg3] We should call vlan_gvrp_request_leave() from unregister_vlan_dev(), not from vlan_dev_stop(), because vlan_gvrp_uninit_applicant() is called right after unregister_netdevice_queue(). In batch mode, unregister_netdevice_queue() doesn’t immediately call vlan_dev_stop(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 +++ net/8021q/vlan_dev.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 7850412f52b7..0eb1a886b370 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -124,6 +124,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp->nr_vlans--; + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_leave(dev); + vlan_group_set_device(grp, vlan_id, NULL); if (!grp->killall) synchronize_net(); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index e34ea9e5e28b..b2ff6c8d3603 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -487,9 +487,6 @@ static int vlan_dev_stop(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - if (vlan->flags & VLAN_FLAG_GVRP) - vlan_gvrp_request_leave(dev); - dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) -- cgit v1.2.3 From e14a599335427f81bbb0008963e59aa9c6449dce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 May 2011 12:26:06 -0700 Subject: net: dev_close() should check IFF_UP Commit 443457242beb (factorize sync-rcu call in unregister_netdevice_many) mistakenly removed one test from dev_close() Following actions trigger a BUG : modprobe bonding modprobe dummy ifconfig bond0 up ifenslave bond0 dummy0 rmmod dummy dev_close() must not close a non IFF_UP device. With help from Frank Blaschka and Einar EL Lueck Reported-by: Frank Blaschka Reported-by: Einar EL Lueck Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 856b6ee9a1d5..92009440d28b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1284,11 +1284,13 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - LIST_HEAD(single); + if (dev->flags & IFF_UP) { + LIST_HEAD(single); - list_add(&dev->unreg_list, &single); - dev_close_many(&single); - list_del(&single); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); + list_del(&single); + } return 0; } EXPORT_SYMBOL(dev_close); -- cgit v1.2.3 From 43a4dea4c9d44baae38ddc14b9b6d86fde4c8b88 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 May 2011 19:36:38 +0000 Subject: xfrm: Assign the inner mode output function to the dst entry As it is, we assign the outer modes output function to the dst entry when we create the xfrm bundle. This leads to two problems on interfamily scenarios. We might insert ipv4 packets into ip6_fragment when called from xfrm6_output. The system crashes if we try to fragment an ipv4 packet with ip6_fragment. This issue was introduced with git commit ad0081e4 (ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed). The second issue is, that we might insert ipv4 packets in netfilter6 and vice versa on interfamily scenarios. With this patch we assign the inner mode output function to the dst entry when we create the xfrm bundle. So xfrm4_output/xfrm6_output from the inner mode is used and the right fragmentation and netfilter functions are called. We switch then to outer mode with the output_finish functions. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/xfrm4_output.c | 8 ++++++-- net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_output.c | 6 +++--- net/ipv6/xfrm6_state.c | 1 + net/xfrm/xfrm_policy.c | 14 +++++++++++++- 5 files changed, 24 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 571aa96a175c..2d51840e53a1 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -static int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER if (!skb_dst(skb)->xfrm) { @@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, skb_dst(skb)->dev, xfrm4_output_finish, + NULL, dst->dev, + x->outer_mode->afinfo->output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1717c64628d1..805d63ef4340 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -78,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_tempsel = __xfrm4_init_tempsel, .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, + .output_finish = xfrm4_output_finish, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8e688b3de9ab..49a91c5f5623 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -79,7 +79,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -static int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; @@ -97,9 +97,9 @@ static int __xfrm6_output(struct sk_buff *skb) if ((x && x->props.mode == XFRM_MODE_TUNNEL) && ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, xfrm6_output_finish); + return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); } - return xfrm6_output_finish(skb); + return x->outer_mode->afinfo->output_finish(skb); } int xfrm6_output(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index afe941e9415c..248f0b2a7ee9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -178,6 +178,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .output_finish = xfrm6_output_finish, .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 15792d8b6272..b4d745ea8ee1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1406,6 +1406,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; + struct xfrm_mode *inner_mode; struct dst_entry *dst_prev = NULL; struct dst_entry *dst0 = NULL; int i = 0; @@ -1436,6 +1437,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (xfrm[i]->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(xfrm[i], + xfrm_af2proto(family)); + if (!inner_mode) { + err = -EAFNOSUPPORT; + dst_release(dst); + goto put_states; + } + } else + inner_mode = xfrm[i]->inner_mode; + if (!dst_prev) dst0 = dst1; else { @@ -1464,7 +1476,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = xfrm[i]->outer_mode->afinfo->output; + dst1->output = inner_mode->afinfo->output; dst1->next = dst_prev; dst_prev = dst1; -- cgit v1.2.3 From 6fa5ddcc675b937f94d05628e8997c07a80c6cb9 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 May 2011 19:43:05 +0000 Subject: xfrm: Don't allow esn with disabled anti replay detection Unlike the standard case, disabled anti replay detection needs some nontrivial extra treatment on ESN. RFC 4303 states: Note: If a receiver chooses to not enable anti-replay for an SA, then the receiver SHOULD NOT negotiate ESN in an SA management protocol. Use of ESN creates a need for the receiver to manage the anti-replay window (in order to determine the correct value for the high-order bits of the ESN, which are employed in the ICV computation), which is generally contrary to the notion of disabling anti-replay for an SA. So return an error if an ESN state with disabled anti replay detection is inserted for now and add the extra treatment later if we need it. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_replay.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index e8a781422feb..47f1b8638df9 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -535,6 +535,9 @@ int xfrm_init_replay(struct xfrm_state *x) replay_esn->bmp_len * sizeof(__u32) * 8) return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && replay_esn->replay_window == 0) + return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) x->repl = &xfrm_replay_esn; else -- cgit v1.2.3 From ce8453776d68982cfe93bcb28191af8ccad01f45 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 May 2011 18:58:16 -0700 Subject: Revert "Bluetooth: fix shutdown on SCO sockets" This reverts commit f21ca5fff6e548833fa5ee8867239a8378623150. Quoth Gustavo F. Padovan: "Commit f21ca5fff6e548833fa5ee8867239a8378623150 can cause a NULL dereference if we call shutdown in a bluetooth SCO socket and doesn't wait the shutdown completion to call close(). Please revert it. I may have a fix for it soon, but we don't have time anymore, so revert is the way to go. ;)" Requested-by: Gustavo F. Padovan Signed-off-by: Linus Torvalds --- net/bluetooth/sco.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 94954c74f6ae..42fdffd1d76c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -369,15 +369,6 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: - if (sco_pi(sk)->conn) { - sk->sk_state = BT_DISCONN; - sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); - hci_conn_put(sco_pi(sk)->conn->hcon); - sco_pi(sk)->conn = NULL; - } else - sco_chan_del(sk, ECONNRESET); - break; - case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); -- cgit v1.2.3 From 1b0bcbcf62884959fa7214eb16c44cff445691c6 Mon Sep 17 00:00:00 2001 From: Pedro Scarapicchia Junior Date: Mon, 9 May 2011 14:10:49 +0000 Subject: net/9p/protocol.c: Fix a memory leak When p9pdu_readf() is called with "s" attribute, it allocates a pointer that will store a string. In p9dirent_read(), this pointer is not being released, leading to out of memory errors. This patch releases this pointer after string is copyed to dirent->d_name. Signed-off-by: Pedro Scarapicchia Junior Signed-off-by: Eric Van Hensbergen --- net/9p/protocol.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/9p/protocol.c b/net/9p/protocol.c index b58a501cf3d1..a873277cb996 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -674,6 +674,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, } strcpy(dirent->d_name, nameptr); + kfree(nameptr); out: return fake_pdu.offset; -- cgit v1.2.3 From cb68552858c64db302771469b1202ea09e696329 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 May 2011 16:03:24 -0400 Subject: bridge: fix forwarding of IPv6 The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e bridge: Reset IPCB when entering IP stack on NF_FORWARD broke forwarding of IPV6 packets in bridge because it would call bp_parse_ip_options with an IPV6 packet. Reported-by: Noah Meyerhans Signed-off-by: Stephen Hemminger Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f3bc322c5891..74ef4d4846a4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (br_parse_ip_options(skb)) + if (pf == PF_INET && br_parse_ip_options(skb)) return NF_DROP; /* The physdev module checks on this */ -- cgit v1.2.3 From d8083deb4f1aa0977980dfb834fcc336ef38318f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 May 2011 16:03:24 -0400 Subject: bridge: fix forwarding of IPv6 The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e bridge: Reset IPCB when entering IP stack on NF_FORWARD broke forwarding of IPV6 packets in bridge because it would call bp_parse_ip_options with an IPV6 packet. Reported-by: Noah Meyerhans Signed-off-by: Stephen Hemminger Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f3bc322c5891..74ef4d4846a4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (br_parse_ip_options(skb)) + if (pf == PF_INET && br_parse_ip_options(skb)) return NF_DROP; /* The physdev module checks on this */ -- cgit v1.2.3 From 0f08190fe8af3cdb6ba19690eb0fa253ecef4bde Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Sun, 15 May 2011 17:20:29 +0200 Subject: IPVS: fix netns if reading ip_vs_* procfs entries Without this patch every access to ip_vs in procfs will increase the netns count i.e. an unbalanced get_net()/put_net(). (ipvsadm commands also use procfs.) The result is you can't exit a netns if reading ip_vs_* procfs entries. Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_app.c | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 51f3af7c4743..059af3120be7 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -572,7 +572,7 @@ static const struct file_operations ip_vs_app_fops = { .open = ip_vs_app_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d3fd91bbba49..bf28ac2fc99b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1046,7 +1046,7 @@ static const struct file_operations ip_vs_conn_fops = { .open = ip_vs_conn_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; static const char *ip_vs_origin_name(unsigned flags) @@ -1114,7 +1114,7 @@ static const struct file_operations ip_vs_conn_sync_fops = { .open = ip_vs_conn_sync_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ea722810faf3..37890f228b19 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2066,7 +2066,7 @@ static const struct file_operations ip_vs_info_fops = { .open = ip_vs_info_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif @@ -2109,7 +2109,7 @@ static const struct file_operations ip_vs_stats_fops = { .open = ip_vs_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) @@ -2178,7 +2178,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = { .open = ip_vs_stats_percpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; #endif -- cgit v1.2.3 From 6f404e441d169afc90929ef5e451ec9779c1f11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 16 May 2011 15:14:21 -0400 Subject: net: Change netdev_fix_features messages loglevel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those reduced to DEBUG can possibly be triggered by unprivileged processes and are nothing exceptional. Illegal checksum combinations can only be caused by driver bug, so promote those messages to WARN. Since GSO without SG will now only cause DEBUG message from netdev_fix_features(), remove the workaround from register_netdevice(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 92009440d28b..b624fe4d9bd7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5186,27 +5186,27 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, "mixed HW and IP checksum settings.\n"); + netdev_warn(dev, "mixed HW and IP checksum settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } if ((features & NETIF_F_NO_CSUM) && (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, "mixed no checksumming and other settings.\n"); + netdev_warn(dev, "mixed no checksumming and other settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - netdev_info(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); + netdev_dbg(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping TSO features since no SG feature.\n"); + netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } @@ -5216,7 +5216,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) /* Software GSO depends on SG. */ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); features &= ~NETIF_F_GSO; } @@ -5226,13 +5226,13 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, + netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - netdev_info(dev, + netdev_dbg(dev, "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } @@ -5414,12 +5414,6 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->wanted_features & NETIF_F_SG)) { - dev->wanted_features &= ~NETIF_F_GSO; - dev->features &= ~NETIF_F_GSO; - } - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. -- cgit v1.2.3