From 78541c1dc60b65ecfce5a6a096fc260219d6784e Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Wed, 16 Apr 2014 21:41:34 -0700 Subject: net: Fix ns_capable check in sock_diag_put_filterinfo The caller needs capabilities on the namespace being queried, not on their own namespace. This is a security bug, although it likely has only a minor impact. Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/sock_diag.c | 4 ++-- net/packet/diag.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index d7af18859322..9deb6abd6cf6 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +int sock_diag_put_filterinfo(struct sock *sk, struct sk_buff *skb, int attrtype) { struct sock_fprog_kern *fprog; @@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, unsigned int flen; int err = 0; - if (!ns_capable(user_ns, CAP_NET_ADMIN)) { + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { nla_reserve(skb, attrtype, 0); return 0; } diff --git a/net/packet/diag.c b/net/packet/diag.c index 533ce4ff108a..435ff99ba8c7 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -172,7 +172,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && - sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER)) + sock_diag_put_filterinfo(sk, skb, PACKET_DIAG_FILTER)) goto out_nlmsg_trim; return nlmsg_end(skb, nlh); -- cgit v1.2.3 From 83d5b7ef99c9f05e87333b334a638de1264ab8e4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 22 Apr 2014 20:18:57 -0700 Subject: net: filter: initialize A and X registers exisiting BPF verifier allows uninitialized access to registers, 'ret A' is considered to be a valid filter. So initialize A and X to zero to prevent leaking kernel memory In the future BPF verifier will be rejecting such filters Signed-off-by: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index cd58614660cf..9d79ca0a6e8e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -122,6 +122,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return 0; } +/* Register mappings for user programs. */ +#define A_REG 0 +#define X_REG 7 +#define TMP_REG 8 +#define ARG2_REG 2 +#define ARG3_REG 3 + /** * __sk_run_filter - run a filter on a given context * @ctx: buffer to run the filter on @@ -242,6 +249,8 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; regs[ARG1_REG] = (u64) (unsigned long) ctx; + regs[A_REG] = 0; + regs[X_REG] = 0; select_insn: goto *jumptable[insn->code]; @@ -643,13 +652,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) return raw_smp_processor_id(); } -/* Register mappings for user programs. */ -#define A_REG 0 -#define X_REG 7 -#define TMP_REG 8 -#define ARG2_REG 2 -#define ARG3_REG 3 - static bool convert_bpf_extensions(struct sock_filter *fp, struct sock_filter_int **insnp) { -- cgit v1.2.3 From 5187cd055b6e81fc6526109456f8b20623148d5f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:25:48 -0700 Subject: netlink: Rename netlink_capable netlink_allowed netlink_capable is a static internal function in af_netlink.c and we have better uses for the name netlink_capable. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 894cda0206bb..7f931fe4d187 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1360,7 +1360,7 @@ retry: return err; } -static inline int netlink_capable(const struct socket *sock, unsigned int flag) +static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); @@ -1428,7 +1428,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -1490,7 +1490,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if ((nladdr->nl_groups || nladdr->nl_pid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) @@ -2096,7 +2096,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -2247,7 +2247,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_group = ffs(addr->nl_groups); err = -EPERM; if ((dst_group || dst_portid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { dst_portid = nlk->dst_portid; -- cgit v1.2.3 From a53b72c83a4216f2eb883ed45a0cbce014b8e62d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:26:25 -0700 Subject: net: Move the permission check in sock_diag_put_filterinfo to packet_diag_dump The permission check in sock_diag_put_filterinfo is wrong, and it is so removed from it's sources it is not clear why it is wrong. Move the computation into packet_diag_dump and pass a bool of the result into sock_diag_filterinfo. This does not yet correct the capability check but instead simply moves it to make it clear what is going on. Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/sock_diag.c | 4 ++-- net/packet/diag.c | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 9deb6abd6cf6..a4216a4c9572 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype) { struct sock_fprog_kern *fprog; @@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct sock *sk, unsigned int flen; int err = 0; - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!may_report_filterinfo) { nla_reserve(skb, attrtype, 0); return 0; } diff --git a/net/packet/diag.c b/net/packet/diag.c index 435ff99ba8c7..b34d0de24091 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + bool may_report_filterinfo, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { @@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && - sock_diag_put_filterinfo(sk, skb, PACKET_DIAG_FILTER)) + sock_diag_put_filterinfo(may_report_filterinfo, sk, skb, + PACKET_DIAG_FILTER)) goto out_nlmsg_trim; return nlmsg_end(skb, nlh); @@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; + bool may_report_filterinfo; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); + may_report_filterinfo = ns_capable(net->user_ns, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { @@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (sk_diag_fill(sk, skb, req, + may_report_filterinfo, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, -- cgit v1.2.3 From a3b299da869d6e78cf42ae0b1b41797bcb8c5e4b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:26:56 -0700 Subject: net: Add variants of capable for use on on sockets sk_net_capable - The common case, operations that are safe in a network namespace. sk_capable - Operations that are not known to be safe in a network namespace sk_ns_capable - The general case for special cases. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/sock.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index b4fff008136f..664ee4295b6f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,55 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +/** + * sk_ns_capable - General socket capability test + * @sk: Socket to use a capability on or through + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in the user + * namespace @user_ns. + */ +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap) +{ + return file_ns_capable(sk->sk_socket->file, user_ns, cap) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(sk_ns_capable); + +/** + * sk_capable - Socket global capability test + * @sk: Socket to use a capability on or through + * @cap: The global capbility to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in all user + * namespaces. + */ +bool sk_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, &init_user_ns, cap); +} +EXPORT_SYMBOL(sk_capable); + +/** + * sk_net_capable - Network namespace socket capability test + * @sk: Socket to use a capability on or through + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socke was created + * and the current process has the capability @cap over the network namespace + * the socket is a member of. + */ +bool sk_net_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); +} +EXPORT_SYMBOL(sk_net_capable); + + #ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { -- cgit v1.2.3 From aa4cf9452f469f16cea8c96283b641b4576d4a7b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:28:03 -0700 Subject: net: Add variants of capable for use on netlink messages netlink_net_capable - The common case use, for operations that are safe on a network namespace netlink_capable - For operations that are only known to be safe for the global root netlink_ns_capable - The general case of capable used to handle special cases __netlink_ns_capable - Same as netlink_ns_capable except taking a netlink_skb_parms instead of the skbuff of a netlink message. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7f931fe4d187..81dca96d2be6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1360,6 +1360,71 @@ retry: return err; } +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *user_ns, int cap) +{ + return sk_ns_capable(nsp->sk, user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *user_ns, int cap) +{ + return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || -- cgit v1.2.3 From 90f62cf30a78721641e08737bda787552428061e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 23 Apr 2014 14:29:27 -0700 Subject: net: Use netlink_ns_capable to verify the permisions of netlink messages It is possible by passing a netlink socket to a more privileged executable and then to fool that executable into writing to the socket data that happens to be valid netlink message to do something that privileged executable did not intend to do. To keep this from happening replace bare capable and ns_capable calls with netlink_capable, netlink_net_calls and netlink_ns_capable calls. Which act the same as the previous calls except they verify that the opener of the socket had the desired permissions as well. Reported-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/can/gw.c | 4 ++-- net/core/rtnetlink.c | 20 +++++++++++--------- net/dcb/dcbnl.c | 2 +- net/decnet/dn_dev.c | 4 ++-- net/decnet/dn_fib.c | 4 ++-- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/netfilter/nfnetlink.c | 2 +- net/netlink/genetlink.c | 2 +- net/packet/diag.c | 2 +- net/phonet/pn_netlink.c | 8 ++++---- net/sched/act_api.c | 2 +- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 6 +++--- net/tipc/netlink.c | 2 +- net/xfrm/xfrm_user.c | 2 +- 15 files changed, 33 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/can/gw.c b/net/can/gw.c index ac31891967da..050a2110d43f 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) @@ -893,7 +893,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d4ff41739b0f..64ad17d077ed 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1395,7 +1395,8 @@ static int do_set_master(struct net_device *dev, int ifindex) return 0; } -static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, + struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1407,7 +1408,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { err = -EPERM; goto errout; } @@ -1661,7 +1662,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - err = do_setlink(dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, tb, ifname, 0); errout: return err; } @@ -1778,7 +1779,8 @@ err: } EXPORT_SYMBOL(rtnl_create_link); -static int rtnl_group_changelink(struct net *net, int group, +static int rtnl_group_changelink(const struct sk_buff *skb, + struct net *net, int group, struct ifinfomsg *ifm, struct nlattr **tb) { @@ -1787,7 +1789,7 @@ static int rtnl_group_changelink(struct net *net, int group, for_each_netdev(net, dev) { if (dev->group == group) { - err = do_setlink(dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, tb, NULL, 0); if (err < 0) return err; } @@ -1929,12 +1931,12 @@ replay: modified = 1; } - return do_setlink(dev, ifm, tb, ifname, modified); + return do_setlink(skb, dev, ifm, tb, ifname, modified); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) - return rtnl_group_changelink(net, + return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), ifm, tb); return -ENODEV; @@ -2321,7 +2323,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) int err = -EINVAL; __u8 *addr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); @@ -2773,7 +2775,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 553644402670..f8b98d89c285 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1669,7 +1669,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlmsghdr *reply_nlh = NULL; const struct reply_func *fn; - if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN)) + if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index a603823a3e27..3b726f31c64c 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -574,7 +574,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr __rcu **ifap; int err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -618,7 +618,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr *ifa; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 57dc159245ec..d332aefb0846 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index e83015cecfa7..e4d9560a910b 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); /* Eventually we might send routing messages too */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e8138da4c14f..84392f3237c1 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -375,7 +375,7 @@ static void nfnetlink_rcv(struct sk_buff *skb) skb->len < nlh->nlmsg_len) return; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { netlink_ack(skb, nlh, -EPERM); return; } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b1dcdb932a86..a3ba3ca0ff92 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -561,7 +561,7 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EOPNOTSUPP; if ((ops->flags & GENL_ADMIN_PERM) && - !capable(CAP_NET_ADMIN)) + !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { diff --git a/net/packet/diag.c b/net/packet/diag.c index b34d0de24091..92f2c7107eec 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -194,7 +194,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); - may_report_filterinfo = ns_capable(net->user_ns, CAP_NET_ADMIN); + may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index dc15f4300808..b64151ade6b3 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 pnaddr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); @@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 dst; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8a5ba5add4bc..648778aef1a2 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -948,7 +948,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; - if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 29a30a14c315..bdbdb1a7920a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -134,7 +134,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) int err; int tp_created = 0; - if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a0b84e0e22de..400769014bbd 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1084,7 +1084,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1151,7 +1151,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q, *p; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1490,7 +1490,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 3aaf73de9e2d..ad844d365340 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; - if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) + if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) cmd = TIPC_CMD_NOT_NET_ADMIN; else cmd = req_userhdr->cmd; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8f131c10a6f3..51398ae6cda8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2377,7 +2377,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || -- cgit v1.2.3 From a64d90fd962c2956da7505f98a302408450365e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Apr 2014 13:51:29 -0400 Subject: netfilter: Fix warning in nfnetlink_receive(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netfilter/nfnetlink.c: In function ‘nfnetlink_rcv’: net/netfilter/nfnetlink.c:371:14: warning: unused variable ‘net’ [-Wunused-variable] Signed-off-by: David S. Miller --- net/netfilter/nfnetlink.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 84392f3237c1..e009087620e3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -368,7 +368,6 @@ done: static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - struct net *net = sock_net(skb->sk); int msglen; if (nlh->nlmsg_len < NLMSG_HDRLEN || -- cgit v1.2.3 From 973462bbde79bb827824c73b59027a0aed5c9ca6 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 24 Apr 2014 10:22:35 +1000 Subject: rtnetlink: Warn when interface's information won't fit in our packet Without IFLA_EXT_MASK specified, the information reported for a single interface in response to RTM_GETLINK is expected to fit within a netlink packet of NLMSG_GOODSIZE. If it doesn't, however, things will go badly wrong, When listing all interfaces, netlink_dump() will incorrectly treat -EMSGSIZE on the first message in a packet as the end of the listing and omit information for that interface and all subsequent ones. This can cause getifaddrs(3) to enter an infinite loop. This patch won't fix the problem, but it will WARN_ON() making it easier to track down what's going wrong. Signed-off-by: David Gibson Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 64ad17d077ed..8db72ac88feb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1198,6 +1198,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_head *head; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; + int err; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1218,11 +1219,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) + err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -- cgit v1.2.3 From c53864fd60227de025cb79e05493b13f69843971 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 24 Apr 2014 10:22:36 +1000 Subject: rtnetlink: Only supply IFLA_VF_PORTS information when RTEXT_FILTER_VF is set Since 115c9b81928360d769a76c632bae62d15206a94a (rtnetlink: Fix problem with buffer allocation), RTM_NEWLINK messages only contain the IFLA_VFINFO_LIST attribute if they were solicited by a GETLINK message containing an IFLA_EXT_MASK attribute with the RTEXT_FILTER_VF flag. That was done because some user programs broke when they received more data than expected - because IFLA_VFINFO_LIST contains information for each VF it can become large if there are many VFs. However, the IFLA_VF_PORTS attribute, supplied for devices which implement ndo_get_vf_port (currently the 'enic' driver only), has the same problem. It supplies per-VF information and can therefore become large, but it is not currently conditional on the IFLA_EXT_MASK value. Worse, it interacts badly with the existing EXT_MASK handling. When IFLA_EXT_MASK is not supplied, the buffer for netlink replies is fixed at NLMSG_GOODSIZE. If the information for IFLA_VF_PORTS exceeds this, then rtnl_fill_ifinfo() returns -EMSGSIZE on the first message in a packet. netlink_dump() will misinterpret this as having finished the listing and omit data for this interface and all subsequent ones. That can cause getifaddrs(3) to enter an infinite loop. This patch addresses the problem by only supplying IFLA_VF_PORTS when IFLA_EXT_MASK is supplied with the RTEXT_FILTER_VF flag set. Signed-off-by: David Gibson Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8db72ac88feb..9837bebf93ce 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -774,7 +774,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, return 0; } -static size_t rtnl_port_size(const struct net_device *dev) +static size_t rtnl_port_size(const struct net_device *dev, + u32 ext_filter_mask) { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ @@ -790,7 +791,8 @@ static size_t rtnl_port_size(const struct net_device *dev) size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + port_size; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; if (dev_num_vf(dev->dev.parent)) return port_self_size + vf_ports_size + @@ -826,7 +828,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ - + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */ @@ -888,11 +890,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) return 0; } -static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) +static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, + u32 ext_filter_mask) { int err; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; err = rtnl_port_self_fill(skb, dev); @@ -1079,7 +1083,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, vfinfo); } - if (rtnl_port_fill(skb, dev)) + if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { -- cgit v1.2.3 From 1c2658545816088477e91860c3a645053719cb54 Mon Sep 17 00:00:00 2001 From: Kumar Sundararajan Date: Thu, 24 Apr 2014 09:48:53 -0400 Subject: ipv6: fib: fix fib dump restart When the ipv6 fib changes during a table dump, the walk is restarted and the number of nodes dumped are skipped. But the existing code doesn't advance to the next node after a node is skipped. This can cause the dump to loop or produce lots of duplicates when the fib is modified during the dump. This change advances the walk to the next node if the current node is skipped after a restart. Signed-off-by: Kumar Sundararajan Signed-off-by: Chris Mason Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 34e0ded5c14b..87891f5f57b5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1459,7 +1459,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) if (w->skip) { w->skip--; - continue; + goto skip; } err = w->func(w); @@ -1469,6 +1469,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->count++; continue; } +skip: w->state = FWS_U; case FWS_U: if (fn == w->root) -- cgit v1.2.3 From 9eb1fbfa0a737fd4d3a6d12d71c5ea9af622b887 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 11 Apr 2014 12:02:31 -0700 Subject: Bluetooth: Fix triggering BR/EDR L2CAP Connect too early Commit 1c2e004183178 introduced an event handler for the encryption key refresh complete event with the intent of fixing some LE/SMP cases. However, this event is shared with BR/EDR and there we actually want to act only on the auth_complete event (which comes after the key refresh). If we do not do this we may trigger an L2CAP Connect Request too early and cause the remote side to return a security block error. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- net/bluetooth/hci_event.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 49774912cb01..15010a230b6d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3330,6 +3330,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (!conn) goto unlock; + /* For BR/EDR the necessary steps are taken through the + * auth_complete event. + */ + if (conn->type != LE_LINK) + goto unlock; + if (!ev->status) conn->sec_level = conn->pending_sec_level; -- cgit v1.2.3 From 09da1f3463eb81d59685df723b1c5950b7570340 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 11 Apr 2014 12:02:32 -0700 Subject: Bluetooth: Fix redundant encryption request for reauthentication When we're performing reauthentication (in order to elevate the security level from an unauthenticated key to an authenticated one) we do not need to issue any encryption command once authentication completes. Since the trigger for the encryption HCI command is the ENCRYPT_PEND flag this flag should not be set in this scenario. Instead, the REAUTH_PEND flag takes care of all necessary steps for reauthentication. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Cc: stable@vger.kernel.org --- net/bluetooth/hci_conn.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d958e2dca52f..521fd4f3985e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -819,14 +819,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested cp; - /* encrypt must be pending if auth is also pending */ - set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); - cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); + + /* If we're already encrypted set the REAUTH_PEND flag, + * otherwise set the ENCRYPT_PEND. + */ if (conn->key_type != 0xff) set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); + else + set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } return 0; -- cgit v1.2.3 From 30313a3d5794472c3548d7288e306a5492030370 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 25 Apr 2014 17:01:18 +0900 Subject: bridge: Handle IFLA_ADDRESS correctly when creating bridge device When bridge device is created with IFLA_ADDRESS, we are not calling br_stp_change_bridge_id(), which leads to incorrect local fdb management and bridge id calculation, and prevents us from receiving frames on the bridge device. Reported-by: Tom Gundersen Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e74b6d530cb6..e8844d975b32 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -445,6 +445,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + return register_netdevice(dev); +} + static size_t br_get_link_af_size(const struct net_device *dev) { struct net_port_vlans *pv; @@ -473,6 +487,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .newlink = br_dev_newlink, .dellink = br_dev_delete, }; -- cgit v1.2.3 From 85350871317a5adb35519d9dc6fc9e80809d42ad Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Fri, 25 Apr 2014 16:55:41 +0800 Subject: sctp: reset flowi4_oif parameter on route lookup commit 813b3b5db83 (ipv4: Use caller's on-stack flowi as-is in output route lookups.) introduces another regression which is very similar to the problem of commit e6b45241c (ipv4: reset flowi parameters on route connect) wants to fix: Before we call ip_route_output_key() in sctp_v4_get_dst() to get a dst that matches a bind address as the source address, we have already called this function previously and the flowi parameters have been initialized including flowi4_oif, so when we call this function again, the process in __ip_route_output_key() will be different because of the setting of flowi4_oif, and we'll get a networking device which corresponds to the inputted flowi4_oif as the output device, this is wrong because we'll never hit this place if the previously returned source address of dst match one of the bound addresses. To reproduce this problem, a vlan setting is enough: # ifconfig eth0 up # route del default # vconfig add eth0 2 # vconfig add eth0 3 # ifconfig eth0.2 10.0.1.14 netmask 255.255.255.0 # route add default gw 10.0.1.254 dev eth0.2 # ifconfig eth0.3 10.0.0.14 netmask 255.255.255.0 # ip rule add from 10.0.0.14 table 4 # ip route add table 4 default via 10.0.0.254 src 10.0.0.14 dev eth0.3 # sctp_darn -H 10.0.0.14 -P 36422 -h 10.1.4.134 -p 36422 -s -I You'll detect that all the flow are routed to eth0.2(10.0.1.254). Signed-off-by: Xufeng Zhang Signed-off-by: Julian Anastasov Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c09757fbf803..44cbb54c8574 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -491,8 +491,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; + flowi4_update_output(fl4, + asoc->base.sk->sk_bound_dev_if, + RT_CONN_FLAGS(asoc->base.sk), + daddr->v4.sin_addr.s_addr, + laddr->a.v4.sin_addr.s_addr); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; -- cgit v1.2.3 From 8c2eab9097dba50bcd73ed4632baccc3f34857f9 Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Fri, 25 Apr 2014 14:26:30 -0400 Subject: net: sctp: Don't transition to PF state when transport has exhausted 'Path.Max.Retrans'. Don't transition to the PF state on every strike after 'Path.Max.Retrans'. Per draft-ietf-tsvwg-sctp-failover-03 Section 5.1.6: Additional (PMR - PFMR) consecutive timeouts on a PF destination confirm the path failure, upon which the destination transitions to the Inactive state. As described in [RFC4960], the sender (i) SHOULD notify ULP about this state transition, and (ii) transmit heartbeats to the Inactive destination at a lower frequency as described in Section 8.3 of [RFC4960]. This also prevents sending SCTP_ADDR_UNREACHABLE to the user as the state bounces between SCTP_INACTIVE and SCTP_PF for each subsequent strike. Signed-off-by: Karl Heiss Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 5d6883ff00c3..fef2acdf4a2e 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -496,11 +496,10 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, /* If the transport error count is greater than the pf_retrans * threshold, and less than pathmaxrtx, and if the current state - * is not SCTP_UNCONFIRMED, then mark this transport as Partially - * Failed, see SCTP Quick Failover Draft, section 5.1 + * is SCTP_ACTIVE, then mark this transport as Partially Failed, + * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state != SCTP_PF) && - (transport->state != SCTP_UNCONFIRMED) && + if ((transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { -- cgit v1.2.3 From e374c618b1465f0292047a9f4c244bd71ab5f1f0 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 28 Apr 2014 10:51:56 +0300 Subject: net: ipv6: more places need LOOPBACK_IFINDEX for flowi6_iif To properly match iif in ip rules we have to provide LOOPBACK_IFINDEX in flowi6_iif, not 0. Some ip6mr_fib_lookup and fib6_rule_lookup callers need such fix. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- net/ipv6/netfilter/ip6t_rpfilter.c | 1 + net/ipv6/route.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8659067da28e..8250474ab7dc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1633,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; struct flowi6 fl6 = { - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index e0983f3648a6..790e0c6b19e1 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = false; struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .daddr = iph->saddr, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4011617cca68..004fffb6c221 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1273,6 +1273,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = iph->daddr; @@ -1294,6 +1295,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = msg->dest; -- cgit v1.2.3 From fc9f35010641ea85dd19d144b86cdd93156f1a1e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Apr 2014 22:00:29 -0700 Subject: tcp: increment retransmit counters in tlp and fast open Both TLP and Fast Open call __tcp_retransmit_skb() instead of tcp_retransmit_skb() to avoid changing tp->retrans_out. This has the side effect of missing SNMP counters increments as well as tcp_info tcpi_total_retrans updates. Fix this by moving the stats increments of into __tcp_retransmit_skb() Signed-off-by: Eric Dumazet Acked-by: Nandita Dukkipati Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 025e25093984..12d6016bdd9a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2441,8 +2441,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } - if (likely(!err)) + if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + /* Update global TCP statistics. */ + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + tp->total_retrans++; + } return err; } @@ -2452,12 +2458,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int err = __tcp_retransmit_skb(sk, skb); if (err == 0) { - /* Update global TCP statistics. */ - TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); - tp->total_retrans++; - #if FASTRETRANS_DEBUG > 0 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { net_dbg_ratelimited("retrans_out leaked\n"); -- cgit v1.2.3 From 0cda345d1b2201dd15591b163e3c92bad5191745 Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Wed, 30 Apr 2014 17:34:09 +0800 Subject: tcp_cubic: fix the range of delayed_ack commit b9f47a3aaeab (tcp_cubic: limit delayed_ack ratio to prevent divide error) try to prevent divide error, but there is still a little chance that delayed_ack can reach zero. In case the param cnt get negative value, then ratio+cnt would overflow and may happen to be zero. As a result, min(ratio, ACK_RATIO_LIMIT) will calculate to be zero. In some old kernels, such as 2.6.32, there is a bug that would pass negative param, which then ultimately leads to this divide error. commit 5b35e1e6e9c (tcp: fix tcp_trim_head() to adjust segment count with skb MSS) fixed the negative param issue. However, it's safe that we fix the range of delayed_ack as well, to make sure we do not hit a divide by zero. CC: Stephen Hemminger Signed-off-by: Liu Yu Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 8bf224516ba2..b4f1b29b08bd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -409,7 +409,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; ratio += cnt; - ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); + ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ -- cgit v1.2.3 From f6a082fed1e6407c2f4437d0d963b1bcbe5f9f58 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 1 May 2014 09:23:06 -0700 Subject: net: sched: lock imbalance in hhf qdisc hhf_change() takes the sch_tree_lock and releases it but misses the error cases. Fix the missed case here. To reproduce try a command like this, # tc qdisc change dev p3p2 root hhf quantum 40960 non_hh_weight 300000 Signed-off-by: John Fastabend Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_hhf.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index edee03d922e2..6e957c3b9854 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -553,11 +553,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; - sch_tree_lock(sch); - - if (tb[TCA_HHF_BACKLOG_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); - if (tb[TCA_HHF_QUANTUM]) new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); @@ -567,6 +562,12 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; if (non_hh_quantum > INT_MAX) return -EINVAL; + + sch_tree_lock(sch); + + if (tb[TCA_HHF_BACKLOG_LIMIT]) + sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); + q->quantum = new_quantum; q->hhf_non_hh_weight = new_hhf_non_hh_weight; -- cgit v1.2.3 From 2c4a336e0a3e203fab6aa8d8f7bb70a0ad968a6b Mon Sep 17 00:00:00 2001 From: Andy King Date: Thu, 1 May 2014 15:20:43 -0700 Subject: vsock: Make transport the proto owner Right now the core vsock module is the owner of the proto family. This means there's nothing preventing the transport module from unloading if there are open sockets, which results in a panic. Fix that by allowing the transport to be the owner, which will refcount it properly. Includes version bump to 1.0.1.0-k Passes checkpatch this time, I swear... Acked-by: Dmitry Torokhov Signed-off-by: Andy King Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5adfd94c5b85..85d232bed87d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = { .fops = &vsock_device_ops, }; -static int __vsock_core_init(void) +int __vsock_core_init(const struct vsock_transport *t, struct module *owner) { - int err; + int err = mutex_lock_interruptible(&vsock_register_mutex); + + if (err) + return err; + + if (transport) { + err = -EBUSY; + goto err_busy; + } + + /* Transport must be the owner of the protocol so that it can't + * unload while there are open sockets. + */ + vsock_proto.owner = owner; + transport = t; vsock_init_tables(); @@ -1951,36 +1965,19 @@ static int __vsock_core_init(void) goto err_unregister_proto; } + mutex_unlock(&vsock_register_mutex); return 0; err_unregister_proto: proto_unregister(&vsock_proto); err_misc_deregister: misc_deregister(&vsock_device); - return err; -} - -int vsock_core_init(const struct vsock_transport *t) -{ - int retval = mutex_lock_interruptible(&vsock_register_mutex); - if (retval) - return retval; - - if (transport) { - retval = -EBUSY; - goto out; - } - - transport = t; - retval = __vsock_core_init(); - if (retval) - transport = NULL; - -out: + transport = NULL; +err_busy: mutex_unlock(&vsock_register_mutex); - return retval; + return err; } -EXPORT_SYMBOL_GPL(vsock_core_init); +EXPORT_SYMBOL_GPL(__vsock_core_init); void vsock_core_exit(void) { @@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.0.0-k"); +MODULE_VERSION("1.0.1.0-k"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From e96f2e7c430014eff52c93cabef1ad4f42ed0db1 Mon Sep 17 00:00:00 2001 From: Ying Cai Date: Sun, 4 May 2014 15:20:04 -0700 Subject: ip_tunnel: Set network header properly for IP_ECN_decapsulate() In ip_tunnel_rcv(), set skb->network_header to inner IP header before IP_ECN_decapsulate(). Without the fix, IP_ECN_decapsulate() takes outer IP header as inner IP header, possibly causing error messages or packet drops. Note that this skb_reset_network_header() call was in this spot when the original feature for checking consistency of ECN bits through tunnels was added in eccc1bb8d4b4 ("tunnel: drop packet if ECN present with not-ECT"). It was only removed from this spot in 3d7b46cd20e3 ("ip_tunnel: push generic protocol handling to ip_tunnel module."). Fixes: 3d7b46cd20e3 ("ip_tunnel: push generic protocol handling to ip_tunnel module.") Reported-by: Neal Cardwell Signed-off-by: Ying Cai Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fa5b7519765f..b3f859731c60 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -442,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) -- cgit v1.2.3