From 59e7e7078d6c2c6294caf454c6e3695f9d3e46a2 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 2 Jun 2011 17:28:37 -0300 Subject: mac80211: call dev_alloc_name before copying name to sdata This partially reverts 1c5cae815d19ffe02bdfda1260949ef2b1806171, because the netdev name is copied into sdata->name, which is used for debugging messages, for example. Otherwise, we get messages like this: wlan%d: authenticated Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Jiri Pirko Cc: David S. Miller Cc: Johannes Berg Cc: "John W. Linville" Signed-off-by: John W. Linville --- net/mac80211/iface.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 49d4f869e0bc..dee30aea9ab3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1145,6 +1145,10 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, + IEEE80211_ENCRYPT_HEADROOM; ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; + ret = dev_alloc_name(ndev, ndev->name); + if (ret < 0) + goto fail; + ieee80211_assign_perm_addr(local, ndev, type); memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN); SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); -- cgit v1.2.3 From c316e6a3084cef1a5857cd66bb5429c969f06c93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2011 00:37:35 +0000 Subject: get_net_ns_by_fd() oopses if proc_ns_fget() returns an error BTW, looking through the code related to struct net lifetime rules has caught something else: struct net *get_net_ns_by_fd(int fd) { ... file = proc_ns_fget(fd); if (!file) goto out; ei = PROC_I(file->f_dentry->d_inode); while in proc_ns_fget() we have two return ERR_PTR(...) and not a single path that would return NULL. The other caller of proc_ns_fget() treats ERR_PTR() correctly... Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/core/net_namespace.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da15..e41e5110c65c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -310,19 +310,17 @@ struct net *get_net_ns_by_fd(int fd) struct file *file; struct net *net; - net = ERR_PTR(-EINVAL); file = proc_ns_fget(fd); - if (!file) - goto out; + if (IS_ERR(file)) + return ERR_CAST(file); ei = PROC_I(file->f_dentry->d_inode); - if (ei->ns_ops != &netns_operations) - goto out; + if (ei->ns_ops == &netns_operations) + net = get_net(ei->ns); + else + net = ERR_PTR(-EINVAL); - net = get_net(ei->ns); -out: - if (file) - fput(file); + fput(file); return net; } -- cgit v1.2.3 From b8f07a063163f8216cd891c5b007e839a56b6d93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Jun 2011 00:54:03 +0000 Subject: fix return values of l2tp_dfs_seq_open() More fallout from struct net lifetime rules review: PTR_ERR() is *already* negative and failing ->open() should return negatives on failure. Signed-off-by: Al Viro Signed-off-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8dbae82fab8..76130134bfa6 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -258,7 +258,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) */ pd->net = get_net_ns_by_pid(current->pid); if (IS_ERR(pd->net)) { - rc = -PTR_ERR(pd->net); + rc = PTR_ERR(pd->net); goto err_free_pd; } -- cgit v1.2.3 From fb04883371f2cb7867d24783e7d590036dc9b548 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 15:44:27 +0200 Subject: netfilter: add more values to enum ip_conntrack_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following error is raised (and other similar ones) : net/ipv4/netfilter/nf_nat_standalone.c: In function ‘nf_nat_fn’: net/ipv4/netfilter/nf_nat_standalone.c:119:2: warning: case value ‘4’ not in enumerated type ‘enum ip_conntrack_info’ gcc barfs on adding two enum values and getting a not enumerated result : case IP_CT_RELATED+IP_CT_IS_REPLY: Add missing enum values Signed-off-by: Eric Dumazet CC: David Miller Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 6 +++--- net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- net/ipv4/netfilter/nf_nat_standalone.c | 4 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_ftp.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 10 ++++------ net/netfilter/nf_conntrack_irc.c | 3 +-- net/netfilter/nf_conntrack_pptp.c | 3 +-- net/netfilter/nf_conntrack_sane.c | 2 +- net/netfilter/nf_conntrack_sip.c | 2 +- net/netfilter/xt_socket.c | 4 ++-- 15 files changed, 23 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d609ac3cb9a4..5c9e97c79017 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) * error messages (RELATED) and information requests (see below) */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) + ctinfo == IP_CT_RELATED_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -321,12 +321,12 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) ct->mark = hash; break; case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: /* FIXME: we don't handle expectations at the * moment. they can arrive on a different node than * the master connection (e.g. FTP passive mode) */ case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: + case IP_CT_ESTABLISHED_REPLY: break; default: break; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d2ed9dc74ebc..9931152a78b5 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a03c02af999..db10075dd88e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 9c71b2755ce3..3346de5d94d0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* Must be RELATED */ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + skb->nfctinfo == IP_CT_RELATED_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 21c30426480b..733c9abc1cbd 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 7317bdf1d457..483b76d042da 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) @@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum, default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + ctinfo == IP_CT_ESTABLISHED_REPLY); } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c8af58b22562..4111050a9fc5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e1c11f78419..0bd568929403 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + *ctinfo = IP_CT_ESTABLISHED_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { @@ -1143,7 +1143,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e17cb7c7dd8f..6f5801eac999 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -368,7 +368,7 @@ static int help(struct sk_buff *skb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + ctinfo != IP_CT_ESTABLISHED_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 18b2ce5c8ced..f03c2d4539f6 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b394aa318776..4f9390b98697 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 088944824e13..2fd4565144de 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; nexthdr_off = protoff; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index d9e27734b2a2..8501823b3f9b 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -78,7 +78,7 @@ static int help(struct sk_buff *skb, ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cb5a28581782..93faf6a3a637 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* No Data ? */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9cc46356b577..fe39f7e913df 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; -- cgit v1.2.3 From afb523c54718da57ff661950bd3287ec9eeb66bd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 2 Jun 2011 09:09:54 +0900 Subject: ipvs: restore support for iptables SNAT Fix the IPVS priority in LOCAL_IN hook, so that SNAT target in POSTROUTING is supported for IPVS traffic as in 2.6.36 where it worked depending on module load order. Before 2.6.37 we used priority 100 in LOCAL_IN to process remote requests. We used the same priority as iptables SNAT and if IPVS handlers are installed before SNAT handlers we supported SNAT in POSTROUTING for the IPVS traffic. If SNAT is installed before IPVS, the netfilter handlers are before IPVS and netfilter checks the NAT table twice for the IPVS requests: once in LOCAL_IN where IPS_SRC_NAT_DONE is set and second time in POSTROUTING where the SNAT rules are ignored because IPS_SRC_NAT_DONE was already set in LOCAL_IN. But in 2.6.37 we changed the IPVS priority for LOCAL_IN with the goal to be unique (101) forgetting the fact that for IPVS traffic we should not walk both LOCAL_IN and POSTROUTING nat tables. So, change the priority for processing remote IPVS requests from 101 to 99, i.e. before NAT_SRC (100) because we prefer to support SNAT in POSTROUTING instead of LOCAL_IN. It also moves the priority for IPVS replies from 99 to 98. Use constants instead of magic numbers at these places. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index bfa808f4da13..55af2242bccd 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1772,7 +1772,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1782,7 +1782,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1790,7 +1790,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1798,7 +1798,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1824,7 +1824,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP6_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1834,7 +1834,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1842,7 +1842,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1850,7 +1850,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP6_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ -- cgit v1.2.3 From fcbf12817100d23890832801507107718a1fa448 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 1 Jun 2011 23:35:48 +0200 Subject: netfilter: ipset: Fix return code for destroy when sets are in use Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8041befc6555..42aa64b6b0b1 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -767,7 +767,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { - ret = IPSET_ERR_BUSY; + ret = -IPSET_ERR_BUSY; goto out; } } -- cgit v1.2.3 From b48e3c5c323fea08c12a340cbb8dcc8ca2431d5b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 1 Jun 2011 23:35:49 +0200 Subject: netfilter: ipset: Use the stored first cidr value instead of '1' The stored cidr values are tried one after anoter. The boolean condition evaluated to '1' instead of the first stored cidr or the default host cidr. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 10 ++++++---- net/netfilter/ipset/ip_set_hash_net.c | 8 ++++++-- net/netfilter/ipset/ip_set_hash_netport.c | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 4743e5402522..565a7c5b8818 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -146,8 +146,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet4_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; @@ -394,8 +395,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet6_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c4db202b7da4..2aeeabcd5a21 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -131,7 +131,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; @@ -296,7 +298,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d2a40362dd3a..e50d9bb8820b 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -144,7 +144,8 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; @@ -357,7 +358,8 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; -- cgit v1.2.3 From d9be76f38526dccf84062e3ac3ed3a6a97698565 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 29 May 2011 23:42:29 +0300 Subject: netfilter: nf_nat: fix crash in nf_nat_csum Fix crash in nf_nat_csum when mangling packets in OUTPUT hook where skb->dev is not defined, it is set later before POSTROUTING. Problem happens for CHECKSUM_NONE. We can check device from rt but using CHECKSUM_PARTIAL should be safe (skb_checksum_help). Signed-off-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 99cfa28b6d38..ebc5f8894f99 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - skb->dev->features & NETIF_F_V4_CSUM) { + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + -- cgit v1.2.3 From 88ed01d17b44bc2bed4ad4835d3b1099bff3dd71 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 2 Jun 2011 15:08:45 +0200 Subject: netfilter: nf_conntrack: fix ct refcount leak in l4proto->error() This patch fixes a refcount leak of ct objects that may occur if l4proto->error() assigns one conntrack object to one skbuff. In that case, we have to skip further processing in nf_conntrack_in(). With this patch, we can also fix wrong return values (-NF_ACCEPT) for special cases in ICMP[v6] that should not bump the invalid/error statistic counters. Reported-by: Zoltan Menyhart Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7404bde95994..ab5b27a2916f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } /* Small and modified version of icmp_rcv */ diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 1df3c8b6bf47..7c05e7eacbc6 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -177,7 +177,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } static int diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0bd568929403..f7af8b866017 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -922,6 +922,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = -ret; goto out; } + /* ICMP[v6] protocol trackers may assign one conntrack. */ + if (skb->nfct) + goto out; } ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, -- cgit v1.2.3 From d232b8dded624af3e346b13807a591c63b601c44 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 27 May 2011 20:36:51 -0400 Subject: netfilter: use unsigned variables for packet lengths in ip[6]_queue. Netlink message lengths can't be negative, so use unsigned variables. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d2c1311cb28d..f7f9bd7ba12d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -402,7 +402,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 413ab0754e1f..065fe405fb58 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -403,7 +403,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; -- cgit v1.2.3 From b084f598df36b62dfae83c10ed17f0b66b50f442 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 31 May 2011 12:24:58 -0400 Subject: nfsd: fix dependency of nfsd on auth_rpcgss Commit b0b0c0a26e84 "nfsd: add proc file listing kernel's gss_krb5 enctypes" added an nunnecessary dependency of nfsd on the auth_rpcgss module. It's a little ad hoc, but since the only piece of information nfsd needs from rpcsec_gss_krb5 is a single static string, one solution is just to share it with an include file. Cc: stable@kernel.org Reported-by: Michael Guntsche Cc: Kevin Coffman Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_krb5_mech.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0a9a2ec2e469..c3b75333b821 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -750,7 +751,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "18,17,16,23,3,1,2", + .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES, }; static int __init init_kerberos_module(void) -- cgit v1.2.3 From ab6a44ce1da48d35fe7ec95fa068aa617bd7e8dd Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 6 Jun 2011 14:35:27 -0400 Subject: Revert "mac80211: Skip tailroom reservation for full HW-crypto devices" This reverts commit aac6af5534fade2b18682a0b9efad1a6c04c34c6. Conflicts: net/mac80211/key.c That commit has a race that causes a warning, as documented in the thread here: http://marc.info/?l=linux-wireless&m=130717684914101&w=2 Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 3 --- net/mac80211/key.c | 21 ++------------------- net/mac80211/tx.c | 7 ++++++- 3 files changed, 8 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2025af52b195..090b0ec1e056 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -775,9 +775,6 @@ struct ieee80211_local { int tx_headroom; /* required headroom for hardware/radiotap */ - /* count for keys needing tailroom space allocation */ - int crypto_tx_tailroom_needed_cnt; - /* Tasklet and skb queue to process calls from IRQ mode. All frames * added to skb_queue will be processed, but frames in * skb_queue_unreliable may be dropped if the total length of these diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 31afd712930d..f825e2f0a57e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -101,11 +101,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - - if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))) - key->local->crypto_tx_tailroom_needed_cnt--; - return 0; } @@ -161,10 +156,6 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - - if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || - (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))) - key->local->crypto_tx_tailroom_needed_cnt++; } void ieee80211_key_removed(struct ieee80211_key_conf *key_conf) @@ -403,10 +394,8 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) ieee80211_aes_key_free(key->u.ccmp.tfm); if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); - if (key->local) { + if (key->local) ieee80211_debugfs_key_remove(key); - key->local->crypto_tx_tailroom_needed_cnt--; - } kfree(key); } @@ -468,8 +457,6 @@ int ieee80211_key_link(struct ieee80211_key *key, ieee80211_debugfs_key_add(key); - key->local->crypto_tx_tailroom_needed_cnt++; - ret = ieee80211_key_enable_hw_accel(key); mutex_unlock(&sdata->local->key_mtx); @@ -511,12 +498,8 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->local->key_mtx); - sdata->local->crypto_tx_tailroom_needed_cnt = 0; - - list_for_each_entry(key, &sdata->key_list, list) { - sdata->local->crypto_tx_tailroom_needed_cnt++; + list_for_each_entry(key, &sdata->key_list, list) ieee80211_key_enable_hw_accel(key); - } mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 64e0f7587e6d..3104c844b544 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1480,7 +1480,12 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, { int tail_need = 0; - if (may_encrypt && local->crypto_tx_tailroom_needed_cnt) { + /* + * This could be optimised, devices that do full hardware + * crypto (including TKIP MMIC) need no tailroom... But we + * have no drivers for such devices currently. + */ + if (may_encrypt) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); -- cgit v1.2.3 From 5a079c305ad4dda9708b7a29db4a8bd38e21c3a6 Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Mon, 6 Jun 2011 06:00:07 +0000 Subject: net/ipv6: check for mistakenly passed in non-AF_INET6 sockaddrs Same check as for IPv4, also do for IPv6. (If you passed in a IPv4 sockaddr_in here, the sizeof check in the line before would have triggered already though.) Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b7919f901fbf..d450a2f9fc06 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -272,6 +272,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) return -EINVAL; -- cgit v1.2.3 From 3019de124b9f5b1526cb3668b74af14371e21795 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Jun 2011 16:41:33 -0700 Subject: net: Rework netdev_drivername() to avoid warning. This interface uses a temporary buffer, but for no real reason. And now can generate warnings like: net/sched/sch_generic.c: In function dev_watchdog net/sched/sch_generic.c:254:10: warning: unused variable drivername Just return driver->name directly or "". Reported-by: Connor Hansen Signed-off-by: David S. Miller --- net/core/dev.c | 16 +++++----------- net/sched/sch_generic.c | 3 +-- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 939307891e71..1af6cb27f67a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,29 +6264,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b1721d71c27c..b4c680900d7a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg) } if (some_queue_timedout) { - char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev, drivername, 64), i); + dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, -- cgit v1.2.3 From 79b3891587741dfac72cdfead1f2764b56a567b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Jun 2011 17:00:35 -0700 Subject: irda: iriap: Use seperate lockdep class for irias_objects->hb_spinlock The SEQ output functions grab the obj->attrib->hb_spinlock lock of sub-objects found in the hash traversal. These locks are in a different realm than the one used for the irias_objects hash table itself. So put the latter into it's own lockdep class. Reported-by: Dave Jones Signed-off-by: David S. Miller --- net/irda/iriap.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 36477538cea8..f876eed7d4aa 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -87,6 +87,8 @@ static inline void iriap_start_watchdog_timer(struct iriap_cb *self, iriap_watchdog_timer_expired); } +static struct lock_class_key irias_objects_key; + /* * Function iriap_init (void) * @@ -114,6 +116,9 @@ int __init iriap_init(void) return -ENOMEM; } + lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key, + "irias_objects"); + /* * Register some default services for IrLMP */ -- cgit v1.2.3 From 13fcb7bd322164c67926ffe272846d4860196dc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2011 22:42:06 -0700 Subject: af_packet: prevent information leak In 2.6.27, commit 393e52e33c6c2 (packet: deliver VLAN TCI to userspace) added a small information leak. Add padding field and make sure its zeroed before copy to user. Signed-off-by: Eric Dumazet CC: Patrick McHardy Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba248d93399a..c0c3cda19712 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -804,6 +804,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } else { h.h2->tp_vlan_tci = 0; } + h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; default: @@ -1736,6 +1737,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, } else { aux.tp_vlan_tci = 0; } + aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } -- cgit v1.2.3 From 6407d74c5106bb362b4087693688afd34942b094 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Tue, 7 Jun 2011 00:51:35 -0700 Subject: bridge: provide a cow_metrics method for fake_ops Like in commit 0972ddb237 (provide cow_metrics() methods to blackhole dst_ops), we must provide a cow_metrics for bridges fake_dst_ops as well. This fixes a regression coming from commits 62fa8a846d7d (net: Implement read-only protection and COW'ing of metrics.) and 33eb9873a28 (bridge: initialize fake_rtable metrics) ip link set mybridge mtu 1234 --> [ 136.546243] Pid: 8415, comm: ip Tainted: P 2.6.39.1-00006-g40545b7 #103 ASUSTeK Computer Inc. V1Sn /V1Sn [ 136.546256] EIP: 0060:[<00000000>] EFLAGS: 00010202 CPU: 0 [ 136.546268] EIP is at 0x0 [ 136.546273] EAX: f14a389c EBX: 000005d4 ECX: f80d32c0 EDX: f80d1da1 [ 136.546279] ESI: f14a3000 EDI: f255bf10 EBP: f15c3b54 ESP: f15c3b48 [ 136.546285] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 136.546293] Process ip (pid: 8415, ti=f15c2000 task=f4741f80 task.ti=f15c2000) [ 136.546297] Stack: [ 136.546301] f80c658f f14a3000 ffffffed f15c3b64 c12cb9c8 f80d1b80 ffffffa1 f15c3bbc [ 136.546315] c12da347 c12d9c7d 00000000 f7670b00 00000000 f80d1b80 ffffffa6 f15c3be4 [ 136.546329] 00000004 f14a3000 f255bf20 00000008 f15c3bbc c11d6cae 00000000 00000000 [ 136.546343] Call Trace: [ 136.546359] [] ? br_change_mtu+0x5f/0x80 [bridge] [ 136.546372] [] dev_set_mtu+0x38/0x80 [ 136.546381] [] do_setlink+0x1a7/0x860 [ 136.546390] [] ? rtnl_fill_ifinfo+0x9bd/0xc70 [ 136.546400] [] ? nla_parse+0x6e/0xb0 [ 136.546409] [] rtnl_newlink+0x361/0x510 [ 136.546420] [] ? vmalloc_sync_all+0x100/0x100 [ 136.546429] [] ? error_code+0x5a/0x60 [ 136.546438] [] ? rtnl_configure_link+0x80/0x80 [ 136.546446] [] rtnetlink_rcv_msg+0xfa/0x210 [ 136.546454] [] ? __rtnl_unlock+0x20/0x20 [ 136.546463] [] netlink_rcv_skb+0x8e/0xb0 [ 136.546471] [] rtnetlink_rcv+0x1c/0x30 [ 136.546479] [] netlink_unicast+0x23a/0x280 [ 136.546487] [] netlink_sendmsg+0x26b/0x2f0 [ 136.546497] [] sock_sendmsg+0xc8/0x100 [ 136.546508] [] ? __alloc_pages_nodemask+0xe1/0x750 [ 136.546517] [] ? _copy_from_user+0x42/0x60 [ 136.546525] [] ? verify_iovec+0x4c/0xc0 [ 136.546534] [] sys_sendmsg+0x1c5/0x200 [ 136.546542] [] ? __do_fault+0x310/0x410 [ 136.546549] [] ? do_wp_page+0x1d6/0x6b0 [ 136.546557] [] ? handle_pte_fault+0xe1/0x720 [ 136.546565] [] ? sys_getsockname+0x7f/0x90 [ 136.546574] [] ? handle_mm_fault+0xb1/0x180 [ 136.546582] [] ? vmalloc_sync_all+0x100/0x100 [ 136.546589] [] ? do_page_fault+0x173/0x3d0 [ 136.546596] [] ? sys_recvmsg+0x3b/0x60 [ 136.546605] [] sys_socketcall+0x293/0x2d0 [ 136.546614] [] sysenter_do_call+0x12/0x26 [ 136.546619] Code: Bad EIP value. [ 136.546627] EIP: [<00000000>] 0x0 SS:ESP 0068:f15c3b48 [ 136.546645] CR2: 0000000000000000 [ 136.546652] ---[ end trace 6909b560e78934fa ]--- Signed-off-by: Alexander Holler Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3fa123185e89..56149ec36d7f 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -104,10 +104,16 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) { } +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, + .cow_metrics = fake_cow_metrics, }; /* -- cgit v1.2.3 From 264524d5e5195f6e0f099bee20253a22b651e272 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Jun 2011 20:50:03 +0000 Subject: net: cpu offline cause napi stall Frank Blaschka reported : During heavy network load we turn off/on cpus. Sometimes this causes a stall on the network device. Digging into the dump I found out following: napi is scheduled but does not run. From the I/O buffers and the napi state I see napi/rx_softirq processing has stopped because the budget was reached. napi stays in the softnet_data poll_list and the rx_softirq was raised again. I assume at this time the cpu offline comes in, the rx softirq is raised/moved to another cpu but napi stays in the poll_list of the softnet_data of the now offline cpu. Reviewing dev_cpu_callback (net/core/dev.c) I did not find the poll_list is transfered to the new cpu. This patch is a straightforward implementation of Frank suggestion : Transfert poll_list and trigger NET_RX_SOFTIRQ on new cpu. Reported-by: Frank Blaschka Signed-off-by: Heiko Carstens Signed-off-by: Eric Dumazet Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1af6cb27f67a..a54c9f87ddbb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6178,6 +6178,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } + /* Append NAPI poll list from offline CPU. */ + if (!list_empty(&oldsd->poll_list)) { + list_splice_init(&oldsd->poll_list, &sd->poll_list); + raise_softirq_irqoff(NET_RX_SOFTIRQ); + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); -- cgit v1.2.3 From bb77f6341728624314f488ebd8b4c69f2caa33ea Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 7 Jun 2011 14:03:08 -0400 Subject: Revert "mac80211: stop queues before rate control updation" This reverts commit 1d38c16ce4156f63b45abbd09dd28ca2ef5172b4. The mac80211 maintainer raised complaints about abuse of the CSA stop reason, and about whether this patch actually serves its intended purpose at all. Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 456cccf26b51..d595265d6c22 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -232,9 +232,6 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); } - ieee80211_stop_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); @@ -248,9 +245,6 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } - ieee80211_wake_queues_by_reason(&sdata->local->hw, - IEEE80211_QUEUE_STOP_REASON_CSA); - ht_opmode = le16_to_cpu(hti->operation_mode); /* if bss configuration changed store the new one */ -- cgit v1.2.3 From 57a27e1d6a3bb9ad4efeebd3a8c71156d6207536 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 7 Jun 2011 20:42:26 +0300 Subject: nl80211: fix overflow in ssid_len When one of the SSID's length passed in a scan or sched_scan request is larger than 255, there will be an overflow in the u8 that is used to store the length before checking. This causes the check to fail and we overrun the buffer when copying the SSID. Fix this by checking the nl80211 attribute length before copying it to the struct. This is a follow up for the previous commit 208c72f4fe44fe09577e7975ba0e7fa0278f3d03, which didn't fix the problem entirely. Reported-by: Ido Yariv Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 88a565f130a5..98fa8eb6cc4b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3406,11 +3406,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) i = 0; if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { - request->ssids[i].ssid_len = nla_len(attr); - if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { + if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } + request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; } @@ -3572,12 +3572,11 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { - request->ssids[i].ssid_len = nla_len(attr); - if (request->ssids[i].ssid_len > - IEEE80211_MAX_SSID_LEN) { + if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } + request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; -- cgit v1.2.3 From e756682c8baa47da1648c0c016e9f48ed66bc32d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 5 Jun 2011 20:46:03 +0000 Subject: xfrm: Fix off by one in the replay advance functions We may write 4 byte too much when we reinitialize the anti replay window in the replay advance functions. This patch fixes this by adjusting the last index of the initialization loop. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_replay.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 47f1b8638df9..b11ea692bd7d 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -265,7 +265,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; @@ -471,7 +471,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; -- cgit v1.2.3 From 2584547230ae49b8de91ab3bb5e0a81898905b45 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 3 Jun 2011 09:37:09 -0700 Subject: ceph: fix sync vs canceled write If we cancel a write, trigger the safe completions to prevent a sync from blocking indefinitely in ceph_osdc_sync(). Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6ea2b892f44b..9cb627a4073a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1144,6 +1144,13 @@ static void handle_osds_timeout(struct work_struct *work) round_jiffies_relative(delay)); } +static void complete_request(struct ceph_osd_request *req) +{ + if (req->r_safe_callback) + req->r_safe_callback(req, NULL); + complete_all(&req->r_safe_completion); /* fsync waiter */ +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1226,11 +1233,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, else complete_all(&req->r_completion); - if (flags & CEPH_OSD_FLAG_ONDISK) { - if (req->r_safe_callback) - req->r_safe_callback(req, msg); - complete_all(&req->r_safe_completion); /* fsync waiter */ - } + if (flags & CEPH_OSD_FLAG_ONDISK) + complete_request(req); done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); @@ -1732,6 +1736,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, __cancel_request(req); __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); + complete_request(req); dout("wait_request tid %llu canceled/timed out\n", req->r_tid); return rc; } -- cgit v1.2.3 From f3209bea110cade12e2b133da8b8499689cb0e2e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Jun 2011 13:27:29 +0200 Subject: mac80211: fix IBSS teardown race Ignacy reports that sometimes after leaving an IBSS joining a new one didn't work because there still were stations on the list. He fixed it by flushing stations when attempting to join a new IBSS, but this shouldn't be happening in the first case. When I looked into it I saw a race condition in teardown that could cause stations to be added after flush, and thus cause this situation. Ignacy confirms that after applying my patch he hasn't seen this happen again. Reported-by: Ignacy Gawedzki Debugged-by: Ignacy Gawedzki Tested-by: Ignacy Gawedzki Cc: stable@kernel.org Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 421eaa6b0c2b..56c24cabf26d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -965,6 +965,10 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->u.ibss.mtx); + sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; + memset(sdata->u.ibss.bssid, 0, ETH_ALEN); + sdata->u.ibss.ssid_len = 0; + active_ibss = ieee80211_sta_active_ibss(sdata); if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { @@ -999,8 +1003,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree_skb(skb); skb_queue_purge(&sdata->skb_queue); - memset(sdata->u.ibss.bssid, 0, ETH_ALEN); - sdata->u.ibss.ssid_len = 0; del_timer_sync(&sdata->u.ibss.timer); -- cgit v1.2.3 From fe6fe792faec3fc2d2db39b69651682b8c4e7fcb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2011 06:07:07 +0000 Subject: net: pmtu_expires fixes commit 2c8cec5c10bc (ipv4: Cache learned PMTU information in inetpeer) added some racy peer->pmtu_expires accesses. As its value can be changed by another cpu/thread, we should be more careful, reading its value once. Add peer_pmtu_expired() and peer_pmtu_cleaned() helpers Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 78 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52b0b956508b..045f0ec6a4a0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1316,6 +1316,23 @@ reject_redirect: ; } +static bool peer_pmtu_expired(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + time_after_eq(jiffies, orig) && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + +static bool peer_pmtu_cleaned(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1331,14 +1348,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && - rt->peer->pmtu_expires && - time_after_eq(jiffies, rt->peer->pmtu_expires)) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(dst, RTAX_MTU, - rt->peer->pmtu_orig); + } else if (rt->peer && peer_pmtu_expired(rt->peer)) { + dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); } } return ret; @@ -1531,8 +1542,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { - unsigned long expires = peer->pmtu_expires; + unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); + if (!expires) + return; if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { @@ -1555,10 +1568,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt_bind_peer(rt, rt->rt_dst, 1); peer = rt->peer; if (peer) { + unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; + if (!pmtu_expires || mtu < peer->pmtu_learned) { pmtu_expires = jiffies + ip_rt_mtu_expires; if (!pmtu_expires) @@ -1612,13 +1626,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) rt_bind_peer(rt, rt->rt_dst, 0); peer = rt->peer; - if (peer && peer->pmtu_expires) + if (peer) { check_peer_pmtu(dst, peer); - if (peer && peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) - return NULL; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } } rt->rt_peer_genid = rt_peer_genid(); @@ -1649,14 +1664,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && - rt->peer && - rt->peer->pmtu_expires) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); - } + if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); } static int ip_rt_bug(struct sk_buff *skb) @@ -1770,8 +1779,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - if (peer->pmtu_expires) - check_peer_pmtu(&rt->dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; @@ -2775,7 +2783,8 @@ static int rt_fill_info(struct net *net, struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; - long expires; + long expires = 0; + const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2823,15 +2832,16 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; - expires = (rt->peer && rt->peer->pmtu_expires) ? - rt->peer->pmtu_expires - jiffies : 0; - if (rt->peer) { + if (peer) { inet_peer_refcheck(rt->peer); - id = atomic_read(&rt->peer->ip_id_count) & 0xffff; - if (rt->peer->tcp_ts_stamp) { - ts = rt->peer->tcp_ts; - tsage = get_seconds() - rt->peer->tcp_ts_stamp; + id = atomic_read(&peer->ip_id_count) & 0xffff; + if (peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; } + expires = ACCESS_ONCE(peer->pmtu_expires); + if (expires) + expires -= jiffies; } if (rt_is_input_route(rt)) { -- cgit v1.2.3 From 0c1ad04aecb975f2a2014e1bc5a2fa23923ecbd9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 9 Jun 2011 00:28:13 -0700 Subject: netpoll: prevent netpoll setup on slave devices In commit 8d8fc29d02a33e4bd5f4fa47823c1fd386346093 (netpoll: disable netpoll when enslave a device), we automatically disable netpoll when the underlying device is being enslaved, we also need to prevent people from setuping netpoll on devices that are already enslaved. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2d7d6d473781..42ea4b0e59f1 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -792,6 +792,12 @@ int netpoll_setup(struct netpoll *np) return -ENODEV; } + if (ndev->master) { + printk(KERN_ERR "%s: %s is a slave device, aborting.\n", + np->name, np->dev_name); + return -EBUSY; + } + if (!netif_running(ndev)) { unsigned long atmost, atleast; -- cgit v1.2.3 From 8d03e971cf403305217b8e62db3a2e5ad2d6263f Mon Sep 17 00:00:00 2001 From: Filip Palian Date: Thu, 12 May 2011 19:32:46 +0200 Subject: Bluetooth: l2cap and rfcomm: fix 1 byte infoleak to userspace. Structures "l2cap_conninfo" and "rfcomm_conninfo" have one padding byte each. This byte in "cinfo" is copied to userspace uninitialized. Signed-off-by: Filip Palian Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 1 + net/bluetooth/rfcomm/sock.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 18dc9888d8c2..8248303f44e8 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -413,6 +413,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us break; } + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = chan->conn->hcon->handle; memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 386cfaffd4b7..1b10727ce523 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -788,6 +788,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); -- cgit v1.2.3 From 96d7303e9cfb6a9bc664174a4dfdb6fa689284fe Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 5 Jun 2011 20:48:47 +0000 Subject: ipv4: Fix packet size calculation for raw IPsec packets in __ip_append_data We assume that transhdrlen is positive on the first fragment which is wrong for raw packets. So we don't add exthdrlen to the packet size for raw packets. This leads to a reallocation on IPsec because we have not enough headroom on the skb to place the IPsec headers. This patch fixes this by adding exthdrlen to the packet size whenever the send queue of the socket is empty. This issue was introduced with git commit 1470ddf7 (inet: Remove explicit write references to sk/inet in ip_append_data) Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 98af3697c718..a8024eaa0e87 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -799,7 +799,9 @@ static int __ip_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; - exthdrlen = transhdrlen ? rt->dst.header_len : 0; + skb = skb_peek_tail(queue); + + exthdrlen = !skb ? rt->dst.header_len : 0; length += exthdrlen; transhdrlen += exthdrlen; mtu = cork->fragsize; @@ -825,8 +827,6 @@ static int __ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(queue); - cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && -- cgit v1.2.3 From 0da67bed835fdde68ca0e924d2a2d6ac82c70833 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 May 2011 11:13:15 +0300 Subject: Bluetooth: fix shutdown on SCO sockets shutdown should wait for SCO link to be properly disconnected before detroying the socket, otherwise an application using the socket may assume link is properly disconnected before it really happens which can be a problem when e.g synchronizing profile switch. Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 42fdffd1d76c..cb4fb7837e5c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -369,6 +369,15 @@ static void __sco_sock_close(struct sock *sk) case BT_CONNECTED: case BT_CONFIG: + if (sco_pi(sk)->conn) { + sk->sk_state = BT_DISCONN; + sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); + hci_conn_put(sco_pi(sk)->conn->hcon); + sco_pi(sk)->conn->hcon = NULL; + } else + sco_chan_del(sk, ECONNRESET); + break; + case BT_CONNECT: case BT_DISCONN: sco_chan_del(sk, ECONNRESET); @@ -819,7 +828,9 @@ static void sco_chan_del(struct sock *sk, int err) conn->sk = NULL; sco_pi(sk)->conn = NULL; sco_conn_unlock(conn); - hci_conn_put(conn->hcon); + + if (conn->hcon) + hci_conn_put(conn->hcon); } sk->sk_state = BT_CLOSED; -- cgit v1.2.3 From 7f4f0572df6c8eaa6a587bc212b0806ff37380dd Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Fri, 27 May 2011 11:16:21 +0300 Subject: Bluetooth: Do not send SET_EVENT_MASK for 1.1 and earlier devices Some old hci controllers do not accept any mask so leave the default mask on for these devices. < HCI Command: Set Event Mask (0x03|0x0001) plen 8 Mask: 0xfffffbff00000000 > HCI Event: Command Complete (0x0e) plen 4 Set Event Mask (0x03|0x0001) ncmd 1 status 0x12 Error: Invalid HCI Command Parameters Signed-off-by: Ville Tervo Tested-by: Corey Boyle Tested-by: Ed Tomlinson Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f13ddbf858ba..77930aa522e3 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -477,14 +477,16 @@ static void hci_setup_event_mask(struct hci_dev *hdev) * command otherwise */ u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; - /* Events for 1.2 and newer controllers */ - if (hdev->lmp_ver > 1) { - events[4] |= 0x01; /* Flow Specification Complete */ - events[4] |= 0x02; /* Inquiry Result with RSSI */ - events[4] |= 0x04; /* Read Remote Extended Features Complete */ - events[5] |= 0x08; /* Synchronous Connection Complete */ - events[5] |= 0x10; /* Synchronous Connection Changed */ - } + /* CSR 1.1 dongles does not accept any bitfield so don't try to set + * any event mask for pre 1.2 devices */ + if (hdev->lmp_ver <= 1) + return; + + events[4] |= 0x01; /* Flow Specification Complete */ + events[4] |= 0x02; /* Inquiry Result with RSSI */ + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ if (hdev->features[3] & LMP_RSSI_INQ) events[4] |= 0x04; /* Inquiry Result with RSSI */ -- cgit v1.2.3 From 0b5c9db1b11d3175bb42b80663a9f072f801edf5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 10 Jun 2011 06:56:58 +0000 Subject: vlan: Fix the ingress VLAN_FLAG_REORDER_HDR check Testing of VLAN_FLAG_REORDER_HDR does not belong in vlan_untag but rather in vlan_do_receive. Otherwise the vlan header will not be properly put on the packet in the case of vlan header accelleration. As we remove the check from vlan_check_reorder_header rename it vlan_reorder_header to keep the naming clean. Fix up the skb->pkt_type early so we don't look at the packet after adding the vlan tag, which guarantees we don't goof and look at the wrong field. Use a simple if statement instead of a complicated switch statement to decided that we need to increment rx_stats for a multicast packet. Hopefully at somepoint we will just declare the case where VLAN_FLAG_REORDER_HDR is cleared as unsupported and remove the code. Until then this keeps it working correctly. Signed-off-by: Eric W. Biederman Signed-off-by: Jiri Pirko Acked-by: Changli Gao Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 60 ++++++++++++++++++++++++++++----------------------- net/core/dev.c | 2 +- 2 files changed, 34 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 41495dc2a4c9..fcc684678af6 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; + if (skb->pkt_type == PACKET_OTHERHOST) { + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + vlan_dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + } + + if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + unsigned int offset = skb->data - skb_mac_header(skb); + + /* + * vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + skb_push(skb, offset); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + if (!skb) + return false; + skb_pull(skb, offset + VLAN_HLEN); + skb_reset_mac_len(skb); + } + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -31,22 +56,8 @@ bool vlan_do_receive(struct sk_buff **skbp) u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; - - switch (skb->pkt_type) { - case PACKET_BROADCAST: - break; - case PACKET_MULTICAST: + if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; - break; - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - vlan_dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - } u64_stats_update_end(&rx_stats->syncp); return true; @@ -89,18 +100,13 @@ gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_frags); -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_cow(skb, skb_headroom(skb)) < 0) - skb = NULL; - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac_header += VLAN_HLEN; - } - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + return NULL; + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); return skb; } @@ -161,7 +167,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); + skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; diff --git a/net/core/dev.c b/net/core/dev.c index a54c9f87ddbb..9c58c1ec41a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3114,7 +3114,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; -- cgit v1.2.3 From 83fe32de63e60af34fa8dae83716cb13b8677abd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 11 Jun 2011 18:55:22 -0700 Subject: netpoll: call dev_put() on error in netpoll_setup() There is a dev_put(ndev) missing on an error path. This was introduced in 0c1ad04aecb "netpoll: prevent netpoll setup on slave devices". Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 42ea4b0e59f1..18d9cbda3a39 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -795,7 +795,8 @@ int netpoll_setup(struct netpoll *np) if (ndev->master) { printk(KERN_ERR "%s: %s is a slave device, aborting.\n", np->name, np->dev_name); - return -EBUSY; + err = -EBUSY; + goto put; } if (!netif_running(ndev)) { -- cgit v1.2.3 From a685e08987d1edf1995b76511d4c98ea0e905377 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jun 2011 21:13:01 -0400 Subject: Delay struct net freeing while there's a sysfs instance refering to it * new refcount in struct net, controlling actual freeing of the memory * new method in kobj_ns_type_operations (->drop_ns()) * ->current_ns() semantics change - it's supposed to be followed by corresponding ->drop_ns(). For struct net in case of CONFIG_NET_NS it bumps the new refcount; net_drop_ns() decrements it and calls net_free() if the last reference has been dropped. Method renamed to ->grab_current_ns(). * old net_free() callers call net_drop_ns() instead. * sysfs_exit_ns() is gone, along with a large part of callchain leading to it; now that the references stored in ->ns[...] stay valid we do not need to hunt them down and replace them with NULL. That fixes problems in sysfs_lookup() and sysfs_readdir(), along with getting rid of sb->s_instances abuse. Note that struct net *shutdown* logics has not changed - net_cleanup() is called exactly when it used to be called. The only thing postponed by having a sysfs instance refering to that struct net is actual freeing of memory occupied by struct net. Signed-off-by: Al Viro --- net/core/net-sysfs.c | 23 +++++++++-------------- net/core/net_namespace.c | 12 ++++++++++-- 2 files changed, 19 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc2aa8f..33d2a1fba131 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net) #endif } -static const void *net_current_ns(void) +static void *net_grab_current_ns(void) { - return current->nsproxy->net_ns; + struct net *ns = current->nsproxy->net_ns; +#ifdef CONFIG_NET_NS + if (ns) + atomic_inc(&ns->passive); +#endif + return ns; } static const void *net_initial_ns(void) @@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk) struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, - .current_ns = net_current_ns, + .grab_current_ns = net_grab_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, + .drop_ns = net_drop_ns, }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static void net_kobj_ns_exit(struct net *net) -{ - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); -} - -static struct pernet_operations kobj_net_ops = { - .exit = net_kobj_ns_exit, -}; - - #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { @@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); - register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6c6b86d0da15..cdcbc3cb00a9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -128,6 +128,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -210,6 +211,13 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; @@ -230,7 +238,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { - net_free(net); + net_drop_ns(net); return ERR_PTR(rv); } return net; @@ -286,7 +294,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); - net_free(net); + net_drop_ns(net); } } static DECLARE_WORK(net_cleanup_work, cleanup_net); -- cgit v1.2.3 From 8f4e0a18682d91abfad72ede3d3cb5f3ebdf54b4 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 13 Jun 2011 09:06:57 +0200 Subject: IPVS netns exit causes crash in conntrack Quote from Patric Mc Hardy "This looks like nfnetlink.c excited and destroyed the nfnl socket, but ip_vs was still holding a reference to a conntrack. When the conntrack got destroyed it created a ctnetlink event, causing an oops in netlink_has_listeners when trying to use the destroyed nfnetlink socket." If nf_conntrack_netlink is loaded before ip_vs this is not a problem. This patch simply avoids calling ip_vs_conn_drop_conntrack() when netns is dying as suggested by Julian. Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++++- net/netfilter/ipvs/ip_vs_core.c | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index bf28ac2fc99b..782db275ac53 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -776,8 +776,16 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) + if (cp->flags & IP_VS_CONN_F_NFCT) { ip_vs_conn_drop_conntrack(cp); + /* Do not access conntracks during subsys cleanup + * because nf_conntrack_find_get can not be used after + * conntrack cleanup for the net. + */ + smp_rmb(); + if (ipvs->enable) + ip_vs_conn_drop_conntrack(cp); + } ip_vs_pe_put(cp->pe); kfree(cp->pe_data); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 55af2242bccd..24c28d238dcb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1945,6 +1945,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { EnterFunction(2); net_ipvs(net)->enable = 0; /* Disable packet reception */ + smp_wmb(); __ip_vs_sync_cleanup(net); LeaveFunction(2); } -- cgit v1.2.3 From 6fdf658c9a0e51e6663f2769f6d310c2843a862b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 13 Jun 2011 15:37:35 +0300 Subject: Bluetooth: Fix L2CAP security check With older userspace versions (using hciops) it might not have the key type to check if the key has sufficient security for any security level so it is necessary to check the return of hci_conn_auth to make sure the connection is authenticated Signed-off-by: Luiz Augusto von Dentz Acked-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 3163330cd4f1..b9aa9862d14b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -611,8 +611,8 @@ auth: if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; - hci_conn_auth(conn, sec_level, auth_type); - return 0; + if (!hci_conn_auth(conn, sec_level, auth_type)) + return 0; encrypt: if (conn->link_mode & HCI_LM_ENCRYPT) -- cgit v1.2.3 From b9cabe52c27cf834137f3aaa46da23bcf32284e8 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 12 Jun 2011 04:28:16 +0000 Subject: ieee802154: Don't leak memory in ieee802154_nl_fill_phy In net/ieee802154/nl-phy.c::ieee802154_nl_fill_phy() I see two small issues. 1) If the allocation of 'buf' fails we may just as well return -EMSGSIZE directly rather than jumping to 'out:' and do a pointless kfree(0). 2) We do not free 'buf' unless we jump to one of the error labels and this leaks memory. This patch should address both. Signed-off-by: Jesper Juhl Acked-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- net/ieee802154/nl-phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index ed0eab39f531..02548b292b53 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -44,7 +44,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pr_debug("%s\n", __func__); if (!buf) - goto out; + return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_PHY); @@ -65,6 +65,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pages * sizeof(uint32_t), buf); mutex_unlock(&phy->pib_lock); + kfree(buf); return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3 From 9bb0ce2b0b734f3325ea5cd6b351856eeac94f78 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Jun 2011 16:20:18 -0700 Subject: libceph: fix page calculation for non-page-aligned io Set the page count correctly for non-page-aligned IO. We were already doing this correctly for alignment, but not the page count. Fixes DIRECT_IO writes from unaligned pages. Signed-off-by: Sage Weil --- net/ceph/osd_client.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 9cb627a4073a..7330c2757c0c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -477,8 +477,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, calc_layout(osdc, vino, layout, off, plen, req, ops); req->r_file_layout = *layout; /* keep a copy */ - /* in case it differs from natural alignment that calc_layout - filled in for us */ + /* in case it differs from natural (file) alignment that + calc_layout filled in for us */ + req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; ceph_osdc_build_request(req, off, plen, ops, @@ -2027,8 +2028,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, int want = calc_pages_for(req->r_page_alignment, data_len); if (unlikely(req->r_num_pages < want)) { - pr_warning("tid %lld reply %d > expected %d pages\n", - tid, want, m->nr_pages); + pr_warning("tid %lld reply has %d bytes %d pages, we" + " had only %d pages ready\n", tid, data_len, + want, req->r_num_pages); *skip = 1; ceph_msg_put(m); m = NULL; -- cgit v1.2.3 From df3c3931ec58cca3409c71b18ad6da0cd71fe163 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 14 Jun 2011 12:48:19 +0300 Subject: Bluetooth: Fix accepting connect requests for defer_setup When authentication completes we shouldn't blindly accept any pending L2CAP connect requests. If the socket has the defer_setup feature enabled it should still wait for user space acceptance of the connect request. The issue only happens for non-SSP connections since with SSP the L2CAP Connect request may not be sent for non-SDP PSMs before authentication has completed successfully. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e64a1c2df238..56fdd9162da9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4002,21 +4002,30 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } } else if (sk->sk_state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; - __u16 result; + __u16 res, stat; if (!status) { - sk->sk_state = BT_CONFIG; - result = L2CAP_CR_SUCCESS; + if (bt_sk(sk)->defer_setup) { + struct sock *parent = bt_sk(sk)->parent; + res = L2CAP_CR_PEND; + stat = L2CAP_CS_AUTHOR_PEND; + parent->sk_data_ready(parent, 0); + } else { + sk->sk_state = BT_CONFIG; + res = L2CAP_CR_SUCCESS; + stat = L2CAP_CS_NO_INFO; + } } else { sk->sk_state = BT_DISCONN; l2cap_sock_set_timer(sk, HZ / 10); - result = L2CAP_CR_SEC_BLOCK; + res = L2CAP_CR_SEC_BLOCK; + stat = L2CAP_CS_NO_INFO; } rsp.scid = cpu_to_le16(chan->dcid); rsp.dcid = cpu_to_le16(chan->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + rsp.result = cpu_to_le16(res); + rsp.status = cpu_to_le16(stat); l2cap_send_cmd(conn, chan->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } -- cgit v1.2.3 From 0b760113a3a155269a3fba93a409c640031dd68f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 31 May 2011 15:15:34 -0400 Subject: NLM: Don't hang forever on NLM unlock requests If the NLM daemon is killed on the NFS server, we can currently end up hanging forever on an 'unlock' request, instead of aborting. Basically, if the rpcbind request fails, or the server keeps returning garbage, we really want to quit instead of retrying. Tested-by: Vasily Averin Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- net/sunrpc/clnt.c | 3 +++ net/sunrpc/sched.c | 1 + 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b84d7395535e..566bcfd067f6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1175,6 +1175,9 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } + if (task->tk_rebind_retry == 0) + break; + task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6b43ee7221d5..a27406b1654f 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -792,6 +792,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; -- cgit v1.2.3 From 330605423ca6eafafb8dcc27502bce1c585d1b06 Mon Sep 17 00:00:00 2001 From: Ilia Kolomisnky Date: Wed, 15 Jun 2011 06:52:26 +0300 Subject: Bluetooth: Fix L2CAP connection establishment In hci_conn_security ( which is used during L2CAP connection establishment ) test for HCI_CONN_ENCRYPT_PEND state also sets this state, which is bogus and leads to connection time-out on L2CAP sockets in certain situations (especially when using non-ssp devices ) Signed-off-by: Ilia Kolomisnky Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b9aa9862d14b..d3a05b9ade7a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -608,7 +608,7 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) goto encrypt; auth: - if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; if (!hci_conn_auth(conn, sec_level, auth_type)) -- cgit v1.2.3 From 63f6fe92c6b3cbf4c0bbbea4b31fdd3d68e21e4d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 16 Jun 2011 17:16:37 +0200 Subject: netfilter: ip_tables: fix compile with debug Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 764743843503..24e556e83a3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name) const struct xt_entry_target *t; if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, par->match->name); + duprintf("ip check failed %p %s.\n", e, name); return -EINVAL; } -- cgit v1.2.3 From 58d5a0257d2fd89fbe4451f704193cc95b0a9c97 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jun 2011 17:24:17 +0200 Subject: netfilter: ipt_ecn: fix protocol check in ecn_mt_check() Check for protocol inversion in ecn_mt_check() and remove the unnecessary runtime check for IPPROTO_TCP in ecn_mt(). Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_ecn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index af6e9c778345..aaa85be1b2d8 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -76,8 +76,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return false; if (!match_tcp(skb, info, &par->hotdrop)) return false; } @@ -97,7 +95,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && - ip->proto != IPPROTO_TCP) { + (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info("cannot match TCP bits in rule for non-tcp packets\n"); return -EINVAL; } -- cgit v1.2.3 From db898aa2ef6529fa80891e754d353063611c77de Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Jun 2011 17:24:55 +0200 Subject: netfilter: ipt_ecn: fix inversion for IP header ECN match Userspace allows to specify inversion for IP header ECN matches, the kernel silently accepts it, but doesn't invert the match result. Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_ecn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index aaa85be1b2d8..2b57e52c746c 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -25,7 +25,8 @@ MODULE_LICENSE("GPL"); static inline bool match_ip(const struct sk_buff *skb, const struct ipt_ecn_info *einfo) { - return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; + return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ + !!(einfo->invert & IPT_ECN_OP_MATCH_IP); } static inline bool match_tcp(const struct sk_buff *skb, -- cgit v1.2.3 From 2c38de4c1f8da799bdca0e4bb40ca13f2174d3e8 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 16 Jun 2011 17:27:04 +0200 Subject: netfilter: fix looped (broad|multi)cast's MAC handling By default, when broadcast or multicast packet are sent from a local application, they are sent to the interface then looped by the kernel to other local applications, going throught netfilter hooks in the process. These looped packet have their MAC header removed from the skb by the kernel looping code. This confuse various netfilter's netlink queue, netlink log and the legacy ip_queue, because they try to extract a hardware address from these packets, but extracts a part of the IP header instead. This patch prevent NFQUEUE, NFLOG and ip_QUEUE to include a MAC header if there is none in the packet. Signed-off-by: Nicolas Cavallari Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink_log.c | 3 ++- net/netfilter/nfnetlink_queue.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index f7f9bd7ba12d..5c9b9d963918 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 065fe405fb58..249394863284 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -204,7 +204,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e0ee010935e7..2e7ccbb43ddb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -456,7 +456,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); - if (indev && skb->dev) { + if (indev && skb->dev && + skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; int len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b83123f12b42..fdd2fafe0a14 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -335,7 +335,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (indev && entskb->dev) { + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; int len = dev_parse_header(entskb, phw.hw_addr); if (len) { -- cgit v1.2.3 From 42c1edd345c8412d96e7a362ee06feb7be73bb6c Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 16 Jun 2011 17:29:22 +0200 Subject: netfilter: nf_nat: avoid double seq_adjust for loopback Avoid double seq adjustment for loopback traffic because it causes silent repetition of TCP data. One example is passive FTP with DNAT rule and difference in the length of IP addresses. This patch adds check if packet is sent and received via loopback device. As the same conntrack is used both for outgoing and incoming direction, we restrict seq adjustment to happen only in POSTROUTING. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index db10075dd88e..de9da21113a1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum, return ret; } - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); -- cgit v1.2.3 From 62b2bcb49cca72f6d3f39f831127a6ab315a475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Mon, 13 Jun 2011 15:04:43 +0000 Subject: IGMP snooping: set mrouters_only flag for IPv4 traffic properly Upon reception of a IGMP/IGMPv2 membership report the kernel sets the mrouters_only flag in a skb that may be a clone of the original skb, which means that sometimes the bridge loses track of membership report packets (cb buffers are tied to a specific skb and not shared) and it ends up forwading join requests to the bridge interface. This can cause unexpected membership timeouts and intermitent/permanent loss of connectivity as described in RFC 4541 [2.1.1. IGMP Forwarding Rules]: A snooping switch should forward IGMP Membership Reports only to those ports where multicast routers are attached. [...] Sending membership reports to other hosts can result, for IGMPv1 and IGMPv2, in unintentionally preventing a host from joining a specific multicast group. Signed-off-by: Fernando Luis Vazquez Cao Tested-by: Hayato Kakuta Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2f14eafdeeab..a6d87c17bb03 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1424,7 +1424,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: -- cgit v1.2.3 From fc2af6c73fc9449cd5894a36bb76b8f8c0e49fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Mon, 13 Jun 2011 15:06:58 +0000 Subject: IGMP snooping: set mrouters_only flag for IPv6 traffic properly Upon reception of a MGM report packet the kernel sets the mrouters_only flag in a skb that is a clone of the original skb, which means that the bridge loses track of MGM packets (cb buffers are tied to a specific skb and not shared) and it ends up forwading join requests to the bridge interface. This can cause unexpected membership timeouts and intermitent/permanent loss of connectivity as described in RFC 4541 [2.1.1. IGMP Forwarding Rules]: A snooping switch should forward IGMP Membership Reports only to those ports where multicast routers are attached. [...] Sending membership reports to other hosts can result, for IGMPv1 and IGMPv2, in unintentionally preventing a host from joining a specific multicast group. Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a6d87c17bb03..29b9812c8da0 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1543,7 +1543,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; } -- cgit v1.2.3 From a1b7f85e4f632f9cc342d8a34a3903feaf47a261 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Wed, 15 Jun 2011 12:38:25 +0000 Subject: caif: Bugfix - XOFF removed channel from caif-mux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XOFF was mixed up with DOWN indication, causing causing CAIF channel to be removed from mux and all incoming traffic to be lost after receiving flow-off. Fix this by replacing FLOW_OFF with DOWN notification. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- net/caif/cfmuxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 3a66b8c10e09..c23979e79dfa 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -255,7 +255,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { - if ((ctrl == _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND || + if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND || ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && layer->id != 0) { -- cgit v1.2.3 From e3cb78c772de593afa720687ce3abbed8d93b0c3 Mon Sep 17 00:00:00 2001 From: Antoine Reversat Date: Thu, 16 Jun 2011 10:47:13 +0000 Subject: vlan: don't call ndo_vlan_rx_register on hardware that doesn't have vlan support This patch removes the call to ndo_vlan_rx_register if the underlying device doesn't have hardware support for VLAN. Signed-off-by: Antoine Reversat Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c7a581a96894..917ecb93ea28 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -205,7 +205,7 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp) { - if (ops->ndo_vlan_rx_register) + if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX)) ops->ndo_vlan_rx_register(real_dev, ngrp); rcu_assign_pointer(real_dev->vlgrp, ngrp); } -- cgit v1.2.3 From 5afa9133cfe67f1bfead6049a9640c9262a7101c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Jun 2011 10:14:59 -0400 Subject: SUNRPC: Ensure the RPC client only quits on fatal signals Fix a couple of instances where we were exiting the RPC client on arbitrary signals. We should only do so on fatal signals. Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/clnt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 339ba64cce1e..5daf6cc4faea 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -577,13 +577,13 @@ retry: } inode = &gss_msg->inode->vfs_inode; for (;;) { - prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } spin_unlock(&inode->i_lock); - if (signalled()) { + if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 566bcfd067f6..8c9141583d6f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1061,7 +1061,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !signalled()) { + if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; -- cgit v1.2.3 From 1eddceadb0d6441cd39b2c38705a8f5fec86e770 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 03:45:15 +0000 Subject: net: rfs: enable RFS before first data packet is received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le jeudi 16 juin 2011 à 23:38 -0400, David Miller a écrit : > From: Ben Hutchings > Date: Fri, 17 Jun 2011 00:50:46 +0100 > > > On Wed, 2011-06-15 at 04:15 +0200, Eric Dumazet wrote: > >> @@ -1594,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) > >> goto discard; > >> > >> if (nsk != sk) { > >> + sock_rps_save_rxhash(nsk, skb->rxhash); > >> if (tcp_child_process(sk, nsk, skb)) { > >> rsk = nsk; > >> goto reset; > >> > > > > I haven't tried this, but it looks reasonable to me. > > > > What about IPv6? The logic in tcp_v6_do_rcv() looks very similar. > > Indeed ipv6 side needs the same fix. > > Eric please add that part and resubmit. And in fact I might stick > this into net-2.6 instead of net-next-2.6 > OK, here is the net-2.6 based one then, thanks ! [PATCH v2] net: rfs: enable RFS before first data packet is received First packet received on a passive tcp flow is not correctly RFS steered. One sock_rps_record_flow() call is missing in inet_accept() But before that, we also must record rxhash when child socket is setup. Signed-off-by: Eric Dumazet CC: Tom Herbert CC: Ben Hutchings CC: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9c1926027a26..eae1f676f870 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -676,6 +676,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); + sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671e33b8..708dc203b034 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1589,6 +1589,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fd28711ba5..87551ca568cd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1644,6 +1644,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) -- cgit v1.2.3 From eeb1497277d6b1a0a34ed36b97e18f2bd7d6de0d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jun 2011 16:25:39 -0400 Subject: inet_diag: fix inet_diag_bc_audit() A malicious user or buggy application can inject code and trigger an infinite loop in inet_diag_bc_audit() Also make sure each instruction is aligned on 4 bytes boundary, to avoid unaligned accesses. Reported-by: Dan Rosenberg Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_diag.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6ffe94ca5bc9..3267d3898437 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc) return 0; if (cc == len) return 1; - if (op->yes < 4) + if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; @@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc) static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { - const unsigned char *bc = bytecode; + const void *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + const struct inet_diag_bc_op *op = bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4) + if (op->no < 4 || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; break; default: return -EINVAL; } + if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + return -EINVAL; bc += op->yes; len -= op->yes; } -- cgit v1.2.3 From 9aa3c94ce59066f545521033007abb6441706068 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Jun 2011 11:59:18 -0700 Subject: ipv4: fix multicast losses Knut Tidemann found that first packet of a multicast flow was not correctly received, and bisected the regression to commit b23dd4fe42b4 (Make output route lookup return rtable directly.) Special thanks to Knut, who provided a very nice bug report, including sample programs to demonstrate the bug. Reported-and-bisectedby: Knut Tidemann Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 045f0ec6a4a0..aa13ef105110 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1902,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); + return IS_ERR(rth) ? PTR_ERR(rth) : 0; e_nobufs: return -ENOBUFS; -- cgit v1.2.3 From cefa9993f161c1c2b6b91b7ea2e84a9bfbd43d2e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 19 Jun 2011 16:13:01 -0700 Subject: netpoll: copy dev name of slaves to struct netpoll Otherwise we will not see the name of the slave dev in error message: [ 388.469446] (null): doesn't support polling, aborting. Signed-off-by: WANG Cong Signed-off-by: David S. Miller --- net/bridge/br_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index a6b2f86378c7..c188c803c09c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -243,6 +243,7 @@ int br_netpoll_enable(struct net_bridge_port *p) goto out; np->dev = p->dev; + strlcpy(np->dev_name, p->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { -- cgit v1.2.3 From 8ad2475e3555346fbd738e77da12578b97d10505 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 19 Jun 2011 22:31:20 +0000 Subject: ipv4, ping: Remove duplicate icmp.h include Remove the duplicate inclusion of net/icmp.h from net/ipv4/ping.c Signed-off-by: Jesper Juhl Signed-off-by: David S. Miller --- net/ipv4/ping.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9aaa67165f42..39b403f854c6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 32c90254ed4a0c698caa0794ebb4de63fcc69631 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Tue, 21 Jun 2011 10:43:39 +0000 Subject: ipv6/udp: Use the correct variable to determine non-blocking condition udpv6_recvmsg() function is not using the correct variable to determine whether or not the socket is in non-blocking operation, this will lead to unexpected behavior when a UDP checksum error occurs. Consider a non-blocking udp receive scenario: when udpv6_recvmsg() is called by sock_common_recvmsg(), MSG_DONTWAIT bit of flags variable in udpv6_recvmsg() is cleared by "flags & ~MSG_DONTWAIT" in this call: err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); i.e. with udpv6_recvmsg() getting these values: int noblock = flags & MSG_DONTWAIT int flags = flags & ~MSG_DONTWAIT So, when udp checksum error occurs, the execution will go to csum_copy_err, and then the problem happens: csum_copy_err: ............... if (flags & MSG_DONTWAIT) return -EAGAIN; goto try_again; ............... But it will always go to try_again as MSG_DONTWAIT has been cleared from flags at call time -- only noblock contains the original value of MSG_DONTWAIT, so the test should be: if (noblock) return -EAGAIN; This is also consistent with what the ipv4/udp code does. Signed-off-by: Xufeng Zhang Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 41f8c9c08dba..1e7a43f500ab 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -453,7 +453,7 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (flags & MSG_DONTWAIT) + if (noblock) return -EAGAIN; goto try_again; } -- cgit v1.2.3 From 9cfaa8def1c795a512bc04f2aec333b03724ca2e Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Tue, 21 Jun 2011 10:43:40 +0000 Subject: udp/recvmsg: Clear MSG_TRUNC flag when starting over for a new packet Consider this scenario: When the size of the first received udp packet is bigger than the receive buffer, MSG_TRUNC bit is set in msg->msg_flags. However, if checksum error happens and this is a blocking socket, it will goto try_again loop to receive the next packet. But if the size of the next udp packet is smaller than receive buffer, MSG_TRUNC flag should not be set, but because MSG_TRUNC bit is not cleared in msg->msg_flags before receive the next packet, MSG_TRUNC is still set, which is wrong. Fix this problem by clearing MSG_TRUNC flag when starting over for a new packet. Signed-off-by: Xufeng Zhang Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +++ net/ipv6/udp.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abca870d8ff6..48cd88e62553 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1249,6 +1249,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e7a43f500ab..328985c40883 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -455,6 +455,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } -- cgit v1.2.3 From bd4265fe365c0f3945dd5ff1527e52bbe2bedfa2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 23 Jun 2011 02:39:12 +0000 Subject: bridge: Only flood unregistered groups to routers The bridge currently floods packets to groups that we have never seen before to all ports. This is not required by RFC4541 and in fact it is not desirable in environment where traffic to unregistered group is always present. This patch changes the behaviour so that we only send traffic to unregistered groups to ports marked as routers. The user can always force flooding behaviour to any given port by marking it as a router. Note that this change does not apply to traffic to 224.0.0.X as traffic to those groups must always be flooded to all ports. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 29b9812c8da0..2d85ca7111d3 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1379,8 +1379,11 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) return -EINVAL; - if (iph->protocol != IPPROTO_IGMP) + if (iph->protocol != IPPROTO_IGMP) { + if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } len = ntohs(iph->tot_len); if (skb->len < len || len < ip_hdrlen(skb)) -- cgit v1.2.3 From a66b98db570a638afd909459e1e6bfa272344bd3 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 23 Jun 2011 00:00:24 +0300 Subject: mac80211: fix rx->key NULL dereference during mic failure Sometimes when reporting a MIC failure rx->key may be unset. This code path is hit when receiving a packet meant for a multicast address, and decryption is performed in HW. Fortunately, the failing key_idx is not used for anything up to (and including) usermode, so we allow ourselves to drop it on the way up when a key cannot be retrieved. Signed-off-by: Arik Nemtsov Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/wpa.c | 8 +++++++- net/wireless/nl80211.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 9dc3b5f26e80..d91c1a26630d 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -154,7 +154,13 @@ update_iv: return RX_CONTINUE; mic_fail: - mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, + /* + * In some cases the key can be unset - e.g. a multicast packet, in + * a driver that supports HW encryption. Send up the key idx only if + * the key is set. + */ + mac80211_ev_michael_mic_failure(rx->sdata, + rx->key ? rx->key->conf.keyidx : -1, (void *) skb->data, NULL, GFP_ATOMIC); return RX_DROP_UNUSABLE; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 98fa8eb6cc4b..f07602d7bf68 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6463,7 +6463,8 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, if (addr) NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr); NLA_PUT_U32(msg, NL80211_ATTR_KEY_TYPE, key_type); - NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id); + if (key_id != -1) + NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_id); if (tsc) NLA_PUT(msg, NL80211_ATTR_KEY_SEQ, 6, tsc); -- cgit v1.2.3 From 33f99dc7fd948bbc808a24a0989c167f8973b643 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 22 Jun 2011 01:04:37 +0000 Subject: ipv4: Fix packet size calculation in __ip_append_data Git commit 59104f06 (ip: take care of last fragment in ip_append_data) added a check to see if we exceed the mtu when we add trailer_len. However, the mtu is already subtracted by the trailer length when the xfrm transfomation bundles are set up. So IPsec packets with mtu size get fragmented, or if the DF bit is set the packets will not be send even though they match the mtu perfectly fine. This patch actually reverts commit 59104f06. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a8024eaa0e87..6b894d430e61 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -888,12 +888,9 @@ alloc_new_skb: * because we have no idea what fragment will be * the last. */ - if (datalen == length + fraggap) { + if (datalen == length + fraggap) alloclen += rt->dst.trailer_len; - /* make sure mtu is not reached */ - if (datalen > mtu - fragheaderlen - rt->dst.trailer_len) - datalen -= ALIGN(rt->dst.trailer_len, 8); - } + if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len + 15, -- cgit v1.2.3 From 353e5c9abd900de3d1a40925386ffe4abf76111e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 22 Jun 2011 01:05:37 +0000 Subject: ipv4: Fix IPsec slowpath fragmentation problem ip_append_data() builds packets based on the mtu from dst_mtu(rt->dst.path). On IPsec the effective mtu is lower because we need to add the protocol headers and trailers later when we do the IPsec transformations. So after the IPsec transformations the packet might be too big, which leads to a slowpath fragmentation then. This patch fixes this by building the packets based on the lower IPsec mtu from dst_mtu(&rt->dst) and adapts the exthdr handling to this. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6b894d430e61..4a7e16b5d3f3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -802,8 +802,6 @@ static int __ip_append_data(struct sock *sk, skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - length += exthdrlen; - transhdrlen += exthdrlen; mtu = cork->fragsize; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -883,6 +881,8 @@ alloc_new_skb: else alloclen = fraglen; + alloclen += exthdrlen; + /* The last fragment gets additional space at tail. * Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be @@ -923,11 +923,11 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen + exthdrlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); - data += fragheaderlen; + data += fragheaderlen + exthdrlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( @@ -1061,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, */ *rtp = NULL; cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(&rt->dst); cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; -- cgit v1.2.3 From ed6e4ef836d425bc35e33bf20fcec95e68203afa Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 18 Jun 2011 07:53:59 +0000 Subject: netfilter: Fix ip_route_me_harder triggering ip_rt_bug Avoid creating input routes with ip_route_me_harder. It does not work for locally generated packets. Instead, restrict sockets to provide valid saddr for output route (or unicast saddr for transparent proxy). For other traffic allow saddr to be unicast or local but if callers forget to check saddr type use 0 for the output route. The resulting handling should be: - REJECT TCP: - in INPUT we can provide addr_type = RTN_LOCAL but better allow rejecting traffic delivered with local route (no IP address => use RTN_UNSPEC to allow also RTN_UNICAST). - FORWARD: RTN_UNSPEC => allow RTN_LOCAL/RTN_UNICAST saddr, add fix to ignore RTN_BROADCAST and RTN_MULTICAST - OUTPUT: RTN_UNSPEC - NAT, mangle, ip_queue, nf_ip_reroute: RTN_UNSPEC in LOCAL_OUT - IPVS: - use RTN_LOCAL in LOCAL_OUT and FORWARD after SNAT to restrict saddr to be local Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/netfilter.c | 60 +++++++++++++++-------------------------- net/ipv4/netfilter/ipt_REJECT.c | 14 +++------- 2 files changed, 26 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4614babdc45f..2e97e3ec1eb7 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,51 +17,35 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; - unsigned long orefdst; + __be32 saddr = iph->saddr; + __u8 flags = 0; unsigned int hh_len; - unsigned int type; - type = inet_addr_type(net, iph->saddr); - if (skb->sk && inet_sk(skb->sk)->transparent) - type = RTN_LOCAL; - if (addr_type == RTN_UNSPEC) - addr_type = type; + if (!skb->sk && addr_type != RTN_LOCAL) { + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; + } /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ - if (addr_type == RTN_LOCAL) { - fl4.daddr = iph->daddr; - if (type == RTN_LOCAL) - fl4.saddr = iph->saddr; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -1; - - /* Drop old route. */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl4.daddr = iph->saddr; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -1; + fl4.daddr = iph->daddr; + fl4.saddr = saddr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return -1; - orefdst = skb->_skb_refdst; - if (ip_route_input(skb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->dst.dev) != 0) { - dst_release(&rt->dst); - return -1; - } - dst_release(&rt->dst); - refdst_drop(orefdst); - } + /* Drop old route. */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); if (skb_dst(skb)->error) return -1; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1ff79e557f96..51f13f8ec724 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -40,7 +40,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _otcph, *tcph; - unsigned int addr_type; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) @@ -55,6 +54,9 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (oth->rst) return; + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) return; @@ -101,19 +103,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); - addr_type = RTN_UNSPEC; - if (hook != NF_INET_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) - addr_type = RTN_LOCAL; - /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, addr_type)) + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); -- cgit v1.2.3 From 11d53b4990226247a950e2b1ccfa4cf93bfbc822 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2011 15:23:34 -0700 Subject: ipv6: Don't change dst->flags using assignments. This blows away any flags already set in the entry. Signed-off-by: David S. Miller --- net/ipv6/route.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index de2b1decd786..c2af4da074b0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1062,14 +1062,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; -#if 0 /* there's no chance to use these for ndisc */ - rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST - ? DST_HOST - : 0; - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); - rt->rt6i_dst.plen = 128; -#endif - spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; icmp6_dst_gc_list = &rt->dst; @@ -1244,7 +1236,7 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -2025,7 +2017,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, in6_dev_hold(idev); - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; -- cgit v1.2.3 From 957c665f37007de93ccbe45902a23143724170d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 24 Jun 2011 15:25:00 -0700 Subject: ipv6: Don't put artificial limit on routing table size. IPV6, unlike IPV4, doesn't have a routing cache. Routing table entries, as well as clones made in response to route lookup requests, all live in the same table. And all of these things are together collected in the destination cache table for ipv6. This means that routing table entries count against the garbage collection limits, even though such entries cannot ever be reclaimed and are added explicitly by the administrator (rather than being created in response to lookups). Therefore it makes no sense to count ipv6 routing table entries against the GC limits. Add a DST_NOCOUNT destination cache entry flag, and skip the counting if it is set. Use this flag bit in ipv6 when adding routing table entries. Signed-off-by: David S. Miller --- net/core/dst.c | 6 ++++-- net/ipv6/route.c | 13 +++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 9ccca038444f..6135f3671692 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; - dst_entries_add(ops, 1); + if (!(flags & DST_NOCOUNT)) + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -243,7 +244,8 @@ again: neigh_release(neigh); } - dst_entries_add(dst->ops, -1); + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c2af4da074b0..0ef1f086feb8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,9 +228,10 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, - struct net_device *dev) + struct net_device *dev, + int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0); + struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); @@ -1042,7 +1043,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -1206,7 +1207,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); if (rt == NULL) { err = -ENOMEM; @@ -1726,7 +1727,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { struct net *net = dev_net(ort->rt6i_dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev); + ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; @@ -2005,7 +2006,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev); + net->loopback_dev, 0); struct neighbour *neigh; if (rt == NULL) { -- cgit v1.2.3 From 12fdb4d3babcde43834c54dee22a69bb73adbae7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:18:20 +0000 Subject: xfrm: Remove family arg from xfrm_bundle_ok The family arg is not used any more, so remove it. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9bec2e8a838c..5ce74a385525 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,7 +50,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family); +static int xfrm_bundle_ok(struct xfrm_dst *xdst); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -2241,7 +2241,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2313,7 +2313,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -static int xfrm_bundle_ok(struct xfrm_dst *first, int family) +static int xfrm_bundle_ok(struct xfrm_dst *first) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; -- cgit v1.2.3 From c146066ab80267c3305de5dda6a4083f06df9265 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:19:32 +0000 Subject: ipv4: Don't use ufo handling on later transformed packets We might call ip_ufo_append_data() for packets that will be IPsec transformed later. This function should be used just for real udp packets. So we check for rt->dst.header_len which is only nonzero on IPsec handling and call ip_ufo_append_data() just if rt->dst.header_len is zero. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4a7e16b5d3f3..84f26e8e6c60 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -828,7 +828,7 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); -- cgit v1.2.3 From b00897b881f775040653955fda99dcf7c167b382 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Jun 2011 23:20:41 +0000 Subject: xfrm4: Don't call icmp_send on local error Calling icmp_send() on a local message size error leads to an incorrect update of the path mtu. So use ip_local_error() instead to notify the socket about the error. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/xfrm4_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 2d51840e53a1..327a617d594c 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -32,7 +32,12 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) dst = skb_dst(skb); mtu = dst_mtu(dst); if (skb->len > mtu) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + if (skb->sk) + ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, + inet_sk(skb->sk)->inet_dport, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); ret = -EMSGSIZE; } out: -- cgit v1.2.3 From c349a528cd47e2272ded0ea358363855e86180da Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Mon, 4 Jul 2011 01:30:29 +0000 Subject: net: bind() fix error return on wrong address family Hi, Reinhard Max also pointed out that the error should EAFNOSUPPORT according to POSIX. The Linux manpages have it as EINVAL, some other OSes (Minix, HPUX, perhaps BSD) use EAFNOSUPPORT. Windows uses WSAEFAULT according to MSDN. Other protocols error values in their af bind() methods in current mainline git as far as a brief look shows: EAFNOSUPPORT: atm, appletalk, l2tp, llc, phonet, rxrpc EINVAL: ax25, bluetooth, decnet, econet, ieee802154, iucv, netlink, netrom, packet, rds, rose, unix, x25, No check?: can/raw, ipv6/raw, irda, l2tp/l2tp_ip Ciao, Marcus Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 4 +++- net/ipv6/af_inet6.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eae1f676f870..ef1528af7abf 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,8 +465,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - if (addr->sin_family != AF_INET) + if (addr->sin_family != AF_INET) { + err = -EAFNOSUPPORT; goto out; + } chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d450a2f9fc06..3b5669a2582d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -274,7 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EINVAL; if (addr->sin6_family != AF_INET6) - return -EINVAL; + return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) -- cgit v1.2.3 From 44661462ee1ee3c922754fc1f246867f0d01e7ea Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 5 Jul 2011 13:58:33 +0000 Subject: bridge: Always flood broadcast packets As is_multicast_ether_addr returns true on broadcast packets as well, we need to explicitly exclude broadcast packets so that they're always flooded. This wasn't an issue before as broadcast packets were considered to be an unregistered multicast group, which were always flooded. However, as we now only flood such packets to router ports, this is no longer acceptable. Reported-by: Michael Guntsche Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_device.c | 4 +++- net/bridge/br_input.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index c188c803c09c..32b8f9f7f79e 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,7 +49,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); rcu_read_lock(); - if (is_multicast_ether_addr(dest)) { + if (is_broadcast_ether_addr(dest)) + br_flood_deliver(br, skb); + else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { br_flood_deliver(br, skb); goto out; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f3ac1e858ee1..f06ee39c73fd 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,7 +60,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br = p->br; br_fdb_update(br, p, eth_hdr(skb)->h_source); - if (is_multicast_ether_addr(dest) && + if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) goto drop; @@ -77,7 +77,9 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_multicast_ether_addr(dest)) { + if (is_broadcast_ether_addr(dest)) + skb2 = skb; + else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || -- cgit v1.2.3 From 712ae51afd55b20c04c5383d02ba5d10233313b1 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 5 Jul 2011 20:43:12 -0700 Subject: net: vlan: enable soft features regardless of underlying device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If gso/gro feature of underlying device is turned off, then new created vlan device never can turn gso/gro on. Although underlying device don't support TSO, we still should use software segments for vlan device. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 7ea5cf9ea08a..86bff9b1ac47 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -586,9 +586,14 @@ static void vlan_dev_uninit(struct net_device *dev) static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + u32 old_features = features; features &= real_dev->features; features &= real_dev->vlan_features; + + if (old_features & NETIF_F_SOFT_FEATURES) + features |= old_features & NETIF_F_SOFT_FEATURES; + if (dev_ethtool_get_rx_csum(real_dev)) features |= NETIF_F_RXCSUM; features |= NETIF_F_LLTX; -- cgit v1.2.3 From f03d78db65085609938fdb686238867e65003181 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Jul 2011 00:27:05 -0700 Subject: net: refine {udp|tcp|sctp}_mem limits Current tcp/udp/sctp global memory limits are not taking into account hugepages allocations, and allow 50% of ram to be used by buffers of a single protocol [ not counting space used by sockets / inodes ...] Lets use nr_free_buffer_pages() and allow a default of 1/8 of kernel ram per protocol, and a minimum of 128 pages. Heavy duty machines sysadmins probably need to tweak limits anyway. References: https://bugzilla.stlinux.com/show_bug.cgi?id=38032 Reported-by: starlight Suggested-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 10 ++-------- net/ipv4/udp.c | 10 ++-------- net/sctp/protocol.c | 11 +---------- 3 files changed, 5 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 054a59d21eb0..46febcacb729 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3220,7 +3220,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long nr_pages, limit; + unsigned long limit; int i, max_share, cnt; unsigned long jiffy = jiffies; @@ -3277,13 +3277,7 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - /* Set the pressure threshold to be a fraction of global memory that - * is up to 1/2 at 256 MB, decreasing toward zero with the amount of - * memory, with a floor of 128 pages. - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 48cd88e62553..198f75b7bdd3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2209,16 +2209,10 @@ void __init udp_table_init(struct udp_table *table, const char *name) void __init udp_init(void) { - unsigned long nr_pages, limit; + unsigned long limit; udp_table_init(&udp_table, "UDP"); - /* Set the pressure threshold up by the same strategy of TCP. It is a - * fraction of global memory that is up to 1/2 at 256 MB, decreasing - * toward zero with the amount of memory, with a floor of 128 pages. - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_udp_mem[0] = limit / 4 * 3; sysctl_udp_mem[1] = limit; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 67380a29e2e9..207175b2f40a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1058,7 +1058,6 @@ SCTP_STATIC __init int sctp_init(void) int status = -EINVAL; unsigned long goal; unsigned long limit; - unsigned long nr_pages; int max_share; int order; @@ -1148,15 +1147,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); - /* Set the pressure threshold to be a fraction of global memory that - * is up to 1/2 at 256 MB, decreasing toward zero with the amount of - * memory, with a floor of 128 pages. - * Note this initializes the data in sctpv6_prot too - * Unabashedly stolen from tcp_init - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_sctp_mem[0] = limit / 4 * 3; sysctl_sctp_mem[1] = limit; -- cgit v1.2.3 From 949123016a2ef578009b6aa3e98d45d1a154ebfb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 2 Jul 2011 09:28:04 +0000 Subject: sctp: fix missing send up SCTP_SENDER_DRY_EVENT when subscribe it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We forgot to send up SCTP_SENDER_DRY_EVENT notification when user app subscribes to this event, and there is no data to be sent or retransmit. This is required by the Socket API and used by the DTLS/SCTP implementation. Reported-by: Michael Tüxen Signed-off-by: Wei Yongjun Tested-by: Robin Seggelmann Signed-off-by: David S. Miller --- net/sctp/socket.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6766913a53e6..08c6238802de 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2073,10 +2073,33 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk, static int sctp_setsockopt_events(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_association *asoc; + struct sctp_ulpevent *event; + if (optlen > sizeof(struct sctp_event_subscribe)) return -EINVAL; if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; + + /* + * At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, + * if there is no data to be sent or retransmit, the stack will + * immediately send up this notification. + */ + if (sctp_ulpevent_type_enabled(SCTP_SENDER_DRY_EVENT, + &sctp_sk(sk)->subscribe)) { + asoc = sctp_id2assoc(sk, 0); + + if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { + event = sctp_ulpevent_make_sender_dry_event(asoc, + GFP_ATOMIC); + if (!event) + return -ENOMEM; + + sctp_ulpq_tail_event(&asoc->ulpq, event); + } + } + return 0; } -- cgit v1.2.3