diff options
Diffstat (limited to 'net')
77 files changed, 1568 insertions, 970 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 0a9019da18f3..b7b958f61faf 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -122,7 +122,7 @@ static int get_protocol_version(char *s) } /** - * parse_options - parse mount options into client structure + * parse_opts - parse mount options into client structure * @opts: options string passed from mount * @clnt: existing v9fs client information * @@ -256,7 +256,7 @@ EXPORT_SYMBOL(p9_fcall_fini); static struct kmem_cache *p9_req_cache; /** - * p9_req_alloc - Allocate a new request. + * p9_tag_alloc - Allocate a new request. * @c: Client session. * @type: Transaction type. * @max_size: Maximum packet size for this request. diff --git a/net/9p/error.c b/net/9p/error.c index 231f355fa9c6..61c18daf3050 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -197,7 +197,7 @@ int p9_error_init(void) EXPORT_SYMBOL(p9_error_init); /** - * errstr2errno - convert error string to error number + * p9_errstr2errno - convert error string to error number * @errstr: error string * @len: length of error string * diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fa158397bb63..f4dd0456beaf 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -872,7 +872,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) } /** - * p9_mux_destroy - cancels all pending requests of mux + * p9_conn_destroy - cancels all pending requests of mux * @m: mux to destroy * */ diff --git a/net/ax25/TODO b/net/ax25/TODO deleted file mode 100644 index 69fb4e368d92..000000000000 --- a/net/ax25/TODO +++ /dev/null @@ -1,20 +0,0 @@ -Do the ax25_list_lock, ax25_dev_lock, linkfail_lockreally, ax25_frag_lock and -listen_lock have to be bh-safe? - -Do the netrom and rose locks have to be bh-safe? - -A device might be deleted after lookup in the SIOCADDRT ioctl but before it's -being used. - -Routes to a device being taken down might be deleted by ax25_rt_device_down -but added by somebody else before the device has been deleted fully. - -The ax25_rt_find_route synopsys is pervert but I somehow had to deal with -the race caused by the static variable in it's previous implementation. - -Implement proper socket locking in netrom and rose. - -Check socket locking when ax25_rcv is sending to raw sockets. In particular -ax25_send_to_raw() seems fishy. Heck - ax25_rcv is fishy. - -Handle XID and TEST frames properly. diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 28166402d30c..1d63c8cbbfe7 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -454,8 +454,9 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, return 0; } - read_lock_bh(&in6_dev->lock); - for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) { + for (pmc6 = rcu_dereference(in6_dev->mc_list); + pmc6; + pmc6 = rcu_dereference(pmc6->next)) { if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) continue; @@ -484,7 +485,6 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev, hlist_add_head(&new->list, mcast_list); ret++; } - read_unlock_bh(&in6_dev->lock); rcu_read_unlock(); return ret; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 2b1dd252f231..c959320c4775 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1069,7 +1069,7 @@ again: /* * Do not return the error but go back to waiting. We - * have the inital workspace and the CRUSH computation + * have the initial workspace and the CRUSH computation * time is bounded so we will get it eventually. */ WARN_ON(atomic_read(&wsm->total_ws) < 1); diff --git a/net/core/dev.c b/net/core/dev.c index 48b529d59157..b4c67a5be606 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10460,7 +10460,7 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); - if (refcnt && + if (refcnt != 1 && time_after(jiffies, warning_time + netdev_unregister_timeout_secs * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index fa1c37ec40c9..45ae6eeb2964 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL(__hw_addr_unsync); * @sync: function to call if address should be added * @unsync: function to call if address should be removed * - * This funciton is intended to be called from the ndo_set_rx_mode + * This function is intended to be called from the ndo_set_rx_mode * function of devices that require explicit address add/remove * notifications. The unsync function may be NULL in which case * the addresses requiring removal will simply be removed without @@ -723,7 +723,7 @@ void dev_uc_flush(struct net_device *dev) EXPORT_SYMBOL(dev_uc_flush); /** - * dev_uc_flush - Init unicast address list + * dev_uc_init - Init unicast address list * @dev: device * * Init unicast address list. diff --git a/net/core/netevent.c b/net/core/netevent.c index d76ed7739c70..5bb615e963cc 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -32,7 +32,7 @@ int register_netevent_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(register_netevent_notifier); /** - * netevent_unregister_notifier - unregister a netevent notifier block + * unregister_netevent_notifier - unregister a netevent notifier block * @nb: notifier * * Unregister a notifier previously registered by diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d84c8a1b280e..c8496c1142c9 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -577,7 +577,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = SYSCTL_ONE, .extra2 = &int_3600, }, { } diff --git a/net/decnet/TODO b/net/decnet/TODO deleted file mode 100644 index 358e9eb49016..000000000000 --- a/net/decnet/TODO +++ /dev/null @@ -1,40 +0,0 @@ -Steve's quick list of things that need finishing off: -[they are in no particular order and range from the trivial to the long winded] - - o Proper timeouts on each neighbour (in routing mode) rather than - just the 60 second On-Ethernet cache value. - - o Support for X.25 linklayer - - o Support for DDCMP link layer - - o The DDCMP device itself - - o PPP support (rfc1762) - - o Lots of testing with real applications - - o Verify errors etc. against POSIX 1003.1g (draft) - - o Using send/recvmsg() to get at connect/disconnect data (POSIX 1003.1g) - [maybe this should be done at socket level... the control data in the - send/recvmsg() calls should simply be a vector of set/getsockopt() - calls] - - o check MSG_CTRUNC is set where it should be. - - o Find all the commonality between DECnet and IPv4 routing code and extract - it into a small library of routines. [probably a project for 2.7.xx] - - o Add perfect socket hashing - an idea suggested by Paul Koning. Currently - we have a half-way house scheme which seems to work reasonably well, but - the full scheme is still worth implementing, its not not top of my list - right now. - - o Add session control message flow control - - o Add NSP message flow control - - o DECnet sendpages() function - - o AIO for DECnet diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index c97bdca5ec30..1a12912b88d6 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -520,7 +520,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) fcval = *ptr; /* - * Here we ignore erronous packets which should really + * Here we ignore erroneous packets which should really * should cause a connection abort. It is not critical * for now though. */ diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index e9176475bac8..cf8ac316f4c7 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -79,7 +79,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, /* The RTL4 header has its own custom Ethertype 0x8899 and that * starts right at the beginning of the packet, after the src - * ethernet addr. Apparantly skb->data always points 2 bytes in, + * ethernet addr. Apparently skb->data always points 2 bytes in, * behind the Ethertype. */ tag = skb->data - 2; diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 7a849ff22dad..c2dc9033a8f7 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ - tunnels.o + tunnels.o fec.o diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c new file mode 100644 index 000000000000..31454b9188bd --- /dev/null +++ b/net/ethtool/fec.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct fec_req_info { + struct ethnl_req_info base; +}; + +struct fec_reply_data { + struct ethnl_reply_data base; + __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes); + u32 active_fec; + u8 fec_auto; +}; + +#define FEC_REPDATA(__reply_base) \ + container_of(__reply_base, struct fec_reply_data, base) + +#define ETHTOOL_FEC_MASK ((ETHTOOL_FEC_LLRS << 1) - 1) + +const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = { + [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static void +ethtool_fec_to_link_modes(u32 fec, unsigned long *link_modes, u8 *fec_auto) +{ + if (fec_auto) + *fec_auto = !!(fec & ETHTOOL_FEC_AUTO); + + if (fec & ETHTOOL_FEC_OFF) + __set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes); + if (fec & ETHTOOL_FEC_RS) + __set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes); + if (fec & ETHTOOL_FEC_BASER) + __set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes); + if (fec & ETHTOOL_FEC_LLRS) + __set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes); +} + +static int +ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec, + unsigned long *link_modes, u8 fec_auto) +{ + memset(fec, 0, sizeof(*fec)); + + if (fec_auto) + fec->fec |= ETHTOOL_FEC_AUTO; + + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_OFF; + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_RS; + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_BASER; + if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes)) + fec->fec |= ETHTOOL_FEC_LLRS; + + if (!bitmap_empty(link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) + return -EINVAL; + + return 0; +} + +static int fec_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {}; + struct fec_reply_data *data = FEC_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + struct ethtool_fecparam fec = {}; + int ret; + + if (!dev->ethtool_ops->get_fecparam) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + ret = dev->ethtool_ops->get_fecparam(dev, &fec); + ethnl_ops_complete(dev); + if (ret) + return ret; + + WARN_ON_ONCE(fec.reserved); + + ethtool_fec_to_link_modes(fec.fec, data->fec_link_modes, + &data->fec_auto); + + ethtool_fec_to_link_modes(fec.active_fec, active_fec_modes, NULL); + data->active_fec = find_first_bit(active_fec_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS); + /* Don't report attr if no FEC mode set. Note that + * ethtool_fecparam_to_link_modes() ignores NONE and AUTO. + */ + if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS) + data->active_fec = 0; + + return 0; +} + +static int fec_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct fec_reply_data *data = FEC_REPDATA(reply_base); + int len = 0; + int ret; + + ret = ethnl_bitset_size(data->fec_link_modes, NULL, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); + if (ret < 0) + return ret; + len += ret; + + len += nla_total_size(sizeof(u8)) + /* _FEC_AUTO */ + nla_total_size(sizeof(u32)); /* _FEC_ACTIVE */ + + return len; +} + +static int fec_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct fec_reply_data *data = FEC_REPDATA(reply_base); + int ret; + + ret = ethnl_put_bitset(skb, ETHTOOL_A_FEC_MODES, + data->fec_link_modes, NULL, + __ETHTOOL_LINK_MODE_MASK_NBITS, + link_mode_names, compact); + if (ret < 0) + return ret; + + if (nla_put_u8(skb, ETHTOOL_A_FEC_AUTO, data->fec_auto) || + (data->active_fec && + nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_fec_request_ops = { + .request_cmd = ETHTOOL_MSG_FEC_GET, + .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY, + .hdr_attr = ETHTOOL_A_FEC_HEADER, + .req_info_size = sizeof(struct fec_req_info), + .reply_data_size = sizeof(struct fec_reply_data), + + .prepare_data = fec_prepare_data, + .reply_size = fec_reply_size, + .fill_reply = fec_fill_reply, +}; + +/* FEC_SET */ + +const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = { + [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_FEC_MODES] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEC_AUTO] = NLA_POLICY_MAX(NLA_U8, 1), +}; + +int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {}; + struct ethnl_req_info req_info = {}; + struct nlattr **tb = info->attrs; + struct ethtool_fecparam fec = {}; + const struct ethtool_ops *ops; + struct net_device *dev; + bool mod = false; + u8 fec_auto; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEC_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_fecparam || !ops->set_fecparam) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ret = ops->get_fecparam(dev, &fec); + if (ret < 0) + goto out_ops; + + ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto); + + ret = ethnl_update_bitset(fec_link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS, + tb[ETHTOOL_A_FEC_MODES], + link_mode_names, info->extack, &mod); + if (ret < 0) + goto out_ops; + ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod); + + ret = 0; + if (!mod) + goto out_ops; + + ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto); + if (ret) { + NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], + "invalid FEC modes requested"); + goto out_ops; + } + if (!fec.fec) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], + "no FEC modes set"); + goto out_ops; + } + + ret = dev->ethtool_ops->set_fecparam(dev, &fec); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_FEC_NTF, NULL); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 0788cc3b3114..26b3e7086075 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2568,6 +2568,9 @@ static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) if (rc) return rc; + if (WARN_ON_ONCE(fecparam.reserved)) + fecparam.reserved = 0; + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) return -EFAULT; return 0; @@ -2583,6 +2586,12 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) return -EFAULT; + if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE) + return -EINVAL; + + fecparam.active_fec = 0; + fecparam.reserved = 0; + return dev->ethtool_ops->set_fecparam(dev, &fecparam); } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 50d3c8896f91..705a4b201564 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -244,6 +244,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_COALESCE_GET] = ðnl_coalesce_request_ops, [ETHTOOL_MSG_PAUSE_GET] = ðnl_pause_request_ops, [ETHTOOL_MSG_EEE_GET] = ðnl_eee_request_ops, + [ETHTOOL_MSG_FEC_GET] = ðnl_fec_request_ops, [ETHTOOL_MSG_TSINFO_GET] = ðnl_tsinfo_request_ops, }; @@ -551,6 +552,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_COALESCE_NTF] = ðnl_coalesce_request_ops, [ETHTOOL_MSG_PAUSE_NTF] = ðnl_pause_request_ops, [ETHTOOL_MSG_EEE_NTF] = ðnl_eee_request_ops, + [ETHTOOL_MSG_FEC_NTF] = ðnl_fec_request_ops, }; /* default notification handler */ @@ -643,6 +645,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_COALESCE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PAUSE_NTF] = ethnl_default_notify, [ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -912,6 +915,22 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_tunnel_info_get_policy, .maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_FEC_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_fec_get_policy, + .maxattr = ARRAY_SIZE(ethnl_fec_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_FEC_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_fec, + .policy = ethnl_fec_set_policy, + .maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 6eabd58d81bf..785f7ee45930 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -344,6 +344,7 @@ extern const struct ethnl_request_ops ethnl_coalesce_request_ops; extern const struct ethnl_request_ops ethnl_pause_request_ops; extern const struct ethnl_request_ops ethnl_eee_request_ops; extern const struct ethnl_request_ops ethnl_tsinfo_request_ops; +extern const struct ethnl_request_ops ethnl_fec_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -375,6 +376,8 @@ extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1]; extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1]; extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; +extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; +extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); @@ -392,5 +395,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a3271ec3e162..1ae920b93f39 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -309,7 +309,7 @@ static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { @@ -854,7 +854,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) struct ip_esp_hdr *esph; /* For ESN we move the header forward by 4 bytes to - * accomodate the high bits. We will move it back after + * accommodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 616e2dc1c8fa..76990e13a2f9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -971,7 +971,7 @@ static bool icmp_redirect(struct sk_buff *skb) } /* - * Handle ICMP_ECHO ("ping") requests. + * Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests. * * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo * requests. @@ -979,27 +979,125 @@ static bool icmp_redirect(struct sk_buff *skb) * included in the reply. * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring * echo requests, MUST have default=NOT. + * RFC 8335: 8 MUST have a config option to enable/disable ICMP + * Extended Echo Functionality, MUST be disabled by default * See also WRT handling of options once they are done and working. */ static bool icmp_echo(struct sk_buff *skb) { + struct icmp_ext_hdr *ext_hdr, _ext_hdr; + struct icmp_ext_echo_iio *iio, _iio; + struct icmp_bxm icmp_param; + struct net_device *dev; + char buff[IFNAMSIZ]; struct net *net; + u16 ident_len; + u8 status; net = dev_net(skb_dst(skb)->dev); - if (!net->ipv4.sysctl_icmp_echo_ignore_all) { - struct icmp_bxm icmp_param; + /* should there be an ICMP stat for ignored echos? */ + if (net->ipv4.sysctl_icmp_echo_ignore_all) + return true; + + icmp_param.data.icmph = *icmp_hdr(skb); + icmp_param.skb = skb; + icmp_param.offset = 0; + icmp_param.data_len = skb->len; + icmp_param.head_len = sizeof(struct icmphdr); - icmp_param.data.icmph = *icmp_hdr(skb); + if (icmp_param.data.icmph.type == ICMP_ECHO) { icmp_param.data.icmph.type = ICMP_ECHOREPLY; - icmp_param.skb = skb; - icmp_param.offset = 0; - icmp_param.data_len = skb->len; - icmp_param.head_len = sizeof(struct icmphdr); - icmp_reply(&icmp_param, skb); + goto send_reply; } - /* should there be an ICMP stat for ignored echos? */ - return true; + if (!net->ipv4.sysctl_icmp_echo_enable_probe) + return true; + /* We currently only support probing interfaces on the proxy node + * Check to ensure L-bit is set + */ + if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1)) + return true; + /* Clear status bits in reply message */ + icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00); + icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY; + ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr); + /* Size of iio is class_type dependent. + * Only check header here and assign length based on ctype in the switch statement + */ + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio); + if (!ext_hdr || !iio) + goto send_mal_query; + if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr)) + goto send_mal_query; + ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr); + status = 0; + dev = NULL; + switch (iio->extobj_hdr.class_type) { + case EXT_ECHO_CTYPE_NAME: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); + if (ident_len >= IFNAMSIZ) + goto send_mal_query; + memset(buff, 0, sizeof(buff)); + memcpy(buff, &iio->ident.name, ident_len); + dev = dev_get_by_name(net, buff); + break; + case EXT_ECHO_CTYPE_INDEX: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + + sizeof(iio->ident.ifindex), &_iio); + if (ident_len != sizeof(iio->ident.ifindex)) + goto send_mal_query; + dev = dev_get_by_index(net, ntohl(iio->ident.ifindex)); + break; + case EXT_ECHO_CTYPE_ADDR: + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + iio->ident.addr.ctype3_hdr.addrlen) + goto send_mal_query; + switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) { + case ICMP_AFI_IP: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) + + sizeof(struct in_addr), &_iio); + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + sizeof(struct in_addr)) + goto send_mal_query; + dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr); + break; +#if IS_ENABLED(CONFIG_IPV6) + case ICMP_AFI_IP6: + iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio); + if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + + sizeof(struct in6_addr)) + goto send_mal_query; + dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); + if (dev) + dev_hold(dev); + break; +#endif + default: + goto send_mal_query; + } + break; + default: + goto send_mal_query; + } + if (!dev) { + icmp_param.data.icmph.code = ICMP_EXT_NO_IF; + goto send_reply; + } + /* Fill bits in reply message */ + if (dev->flags & IFF_UP) + status |= EXT_ECHOREPLY_ACTIVE; + if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list) + status |= EXT_ECHOREPLY_IPV4; + if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) + status |= EXT_ECHOREPLY_IPV6; + dev_put(dev); + icmp_param.data.icmph.un.echo.sequence |= htons(status); +send_reply: + icmp_reply(&icmp_param, skb); + return true; +send_mal_query: + icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY; + goto send_reply; } /* @@ -1088,6 +1186,16 @@ int icmp_rcv(struct sk_buff *skb) icmph = icmp_hdr(skb); ICMPMSGIN_INC_STATS(net, icmph->type); + + /* Check for ICMP Extended Echo (PROBE) messages */ + if (icmph->type == ICMP_EXT_ECHO) { + /* We can't use icmp_pointers[].handler() because it is an array of + * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42. + */ + success = icmp_echo(skb); + goto success_check; + } + /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -1097,7 +1205,6 @@ int icmp_rcv(struct sk_buff *skb) if (icmph->type > NR_ICMP_TYPES) goto error; - /* * Parse the ICMP message */ @@ -1123,7 +1230,7 @@ int icmp_rcv(struct sk_buff *skb) } success = icmp_pointers[icmph->type].handler(skb); - +success_check: if (success) { consume_skb(skb); return NET_RX_SUCCESS; @@ -1340,6 +1447,7 @@ static int __net_init icmp_sk_init(struct net *net) /* Control parameters for ECHO replies. */ net->ipv4.sysctl_icmp_echo_ignore_all = 0; + net->ipv4.sysctl_icmp_echo_enable_probe = 0; net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3aab53beb4ea..c3efc7d658f6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -34,7 +34,7 @@ * Andi Kleen : Replace ip_reply with ip_send_reply. * Andi Kleen : Split fast and slow ip_build_xmit path * for decreased register pressure on x86 - * and more readibility. + * and more readability. * Marc Boucher : When call_out_firewall returns FW_QUEUE, * silently drop skb instead of failing with -EPERM. * Detlev Wengorz : Copy protocol for fragments. @@ -262,7 +262,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, * interface with a smaller MTU. * - Arriving GRO skb (or GSO skb in a virtualized environment) that is * bridged to a NETIF_F_TSO tunnel stacked over an interface with an - * insufficent MTU. + * insufficient MTU. */ features = netif_skb_features(skb); BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f09fe3a5608f..5a2fc8798d20 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -116,8 +116,8 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info) kfree(info->nh); } -static int nh_notifier_mp_info_init(struct nh_notifier_info *info, - struct nh_group *nhg) +static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, + struct nh_group *nhg) { u16 num_nh = nhg->num_nh; int i; @@ -181,8 +181,8 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); - if (nhg->mpath) - return nh_notifier_mp_info_init(info, nhg); + if (nhg->hash_threshold) + return nh_notifier_mpath_info_init(info, nhg); else if (nhg->resilient) return nh_notifier_res_table_info_init(info, nhg); return -EINVAL; @@ -193,7 +193,7 @@ static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); - if (nhg->mpath) + if (nhg->hash_threshold) kfree(info->nh_grp); else if (nhg->resilient) vfree(info->nh_res_table); @@ -406,7 +406,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, * could potentially veto it in case of unsupported configuration. */ nhg = rtnl_dereference(nh->nh_grp); - err = nh_notifier_mp_info_init(&info, nhg); + err = nh_notifier_mpath_info_init(&info, nhg); if (err) { NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info"); return err; @@ -661,7 +661,7 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) u16 group_type = 0; int i; - if (nhg->mpath) + if (nhg->hash_threshold) group_type = NEXTHOP_GRP_TYPE_MPATH; else if (nhg->resilient) group_type = NEXTHOP_GRP_TYPE_RES; @@ -992,9 +992,9 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, struct nh_group *nhg = rtnl_dereference(nh->nh_grp); /* Nesting groups within groups is not supported. */ - if (nhg->mpath) { + if (nhg->hash_threshold) { NL_SET_ERR_MSG(extack, - "Multipath group can not be a nexthop within a group"); + "Hash-threshold group can not be a nexthop within a group"); return false; } if (nhg->resilient) { @@ -1151,7 +1151,7 @@ static bool ipv4_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) +static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) { struct nexthop *rc = NULL; int i; @@ -1160,7 +1160,7 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; - if (hash > atomic_read(&nhge->mpath.upper_bound)) + if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; nhi = rcu_dereference(nhge->nh->nh_info); @@ -1212,8 +1212,8 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) return nh; nhg = rcu_dereference(nh->nh_grp); - if (nhg->mpath) - return nexthop_select_path_mp(nhg, hash); + if (nhg->hash_threshold) + return nexthop_select_path_hthr(nhg, hash); else if (nhg->resilient) return nexthop_select_path_res(nhg, hash); @@ -1710,7 +1710,7 @@ static void replace_nexthop_grp_res(struct nh_group *oldg, nh_res_table_upkeep(old_res_table, true, false); } -static void nh_mp_group_rebalance(struct nh_group *nhg) +static void nh_hthr_group_rebalance(struct nh_group *nhg) { int total = 0; int w = 0; @@ -1725,7 +1725,7 @@ static void nh_mp_group_rebalance(struct nh_group *nhg) w += nhge->weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; - atomic_set(&nhge->mpath.upper_bound, upper_bound); + atomic_set(&nhge->hthr.upper_bound, upper_bound); } } @@ -1752,7 +1752,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, newg->has_v4 = false; newg->is_multipath = nhg->is_multipath; - newg->mpath = nhg->mpath; + newg->hash_threshold = nhg->hash_threshold; newg->resilient = nhg->resilient; newg->fdb_nh = nhg->fdb_nh; newg->num_nh = nhg->num_nh; @@ -1781,8 +1781,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, j++; } - if (newg->mpath) - nh_mp_group_rebalance(newg); + if (newg->hash_threshold) + nh_hthr_group_rebalance(newg); else if (newg->resilient) replace_nexthop_grp_res(nhg, newg); @@ -1794,7 +1794,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, /* Removal of a NH from a resilient group is notified through * bucket notifications. */ - if (newg->mpath) { + if (newg->hash_threshold) { err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack); if (err) @@ -1928,12 +1928,12 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, oldg = rtnl_dereference(old->nh_grp); newg = rtnl_dereference(new->nh_grp); - if (newg->mpath != oldg->mpath) { + if (newg->hash_threshold != oldg->hash_threshold) { NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type."); return -EINVAL; } - if (newg->mpath) { + if (newg->hash_threshold) { err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack); if (err) @@ -2063,7 +2063,7 @@ static int replace_nexthop_single_notify(struct net *net, struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp); struct nh_res_table *res_table; - if (nhg->mpath) { + if (nhg->hash_threshold) { return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, group_nh, extack); } else if (nhg->resilient) { @@ -2328,8 +2328,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, rb_link_node_rcu(&new_nh->rb_node, parent, pp); rb_insert_color(&new_nh->rb_node, root); - /* The initial insertion is a full notification for mpath as well - * as resilient groups. + /* The initial insertion is a full notification for hash-threshold as + * well as resilient groups. */ rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack); if (rc) @@ -2438,7 +2438,7 @@ static struct nexthop *nexthop_create_group(struct net *net, } if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { - nhg->mpath = 1; + nhg->hash_threshold = 1; nhg->is_multipath = true; } else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) { struct nh_res_table *res_table; @@ -2455,10 +2455,10 @@ static struct nexthop *nexthop_create_group(struct net *net, nhg->is_multipath = true; } - WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1); + WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1); - if (nhg->mpath) - nh_mp_group_rebalance(nhg); + if (nhg->hash_threshold) + nh_hthr_group_rebalance(nhg); if (cfg->nh_fdb) nhg->fdb_nh = 1; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8b943f85fff9..1c9f71a37258 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -453,7 +453,9 @@ EXPORT_SYMBOL_GPL(ping_bind); static inline int ping_supported(int family, int type, int code) { return (family == AF_INET && type == ICMP_ECHO && code == 0) || - (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); + (family == AF_INET && type == ICMP_EXT_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) || + (family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0); } /* diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f55095d3ed16..a09e466ce11d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -31,7 +31,6 @@ static int two = 2; static int four = 4; static int thousand = 1000; -static int gso_max_segs = GSO_MAX_SEGS; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -47,7 +46,6 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; -static int comp_sack_nr_max = 255; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; @@ -209,7 +207,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write, net = container_of(table->data, struct net, ipv4.sysctl_ip_fwd_update_priority); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, net); @@ -389,7 +387,7 @@ static int proc_tcp_early_demux(struct ctl_table *table, int write, { int ret = 0; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && !ret) { int enabled = init_net.ipv4.sysctl_tcp_early_demux; @@ -405,7 +403,7 @@ static int proc_udp_early_demux(struct ctl_table *table, int write, { int ret = 0; - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && !ret) { int enabled = init_net.ipv4.sysctl_udp_early_demux; @@ -457,7 +455,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, ipv4.sysctl_fib_multipath_hash_policy); int ret; - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); @@ -595,30 +593,39 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "icmp_echo_ignore_all", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, + }, + { + .procname = "icmp_echo_enable_probe", + .data = &init_net.ipv4.sysctl_icmp_echo_enable_probe, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_ignore_broadcasts", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "icmp_errors_use_inbound_ifaddr", .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "icmp_ratelimit", @@ -645,9 +652,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "raw_l3mdev_accept", .data = &init_net.ipv4.sysctl_raw_l3mdev_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -655,60 +662,60 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_ecn", .data = &init_net.ipv4.sysctl_tcp_ecn, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_ecn_fallback", .data = &init_net.ipv4.sysctl_tcp_ecn_fallback, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_dynaddr", .data = &init_net.ipv4.sysctl_ip_dynaddr, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_early_demux", .data = &init_net.ipv4.sysctl_ip_early_demux, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "udp_early_demux", .data = &init_net.ipv4.sysctl_udp_early_demux, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_udp_early_demux }, { .procname = "tcp_early_demux", .data = &init_net.ipv4.sysctl_tcp_early_demux, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_tcp_early_demux }, { .procname = "nexthop_compat_mode", .data = &init_net.ipv4.sysctl_nexthop_compat_mode, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = &ip_ttl_min, .extra2 = &ip_ttl_max, }, @@ -729,21 +736,21 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "ip_no_pmtu_disc", .data = &init_net.ipv4.sysctl_ip_no_pmtu_disc, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_forward_use_pmtu", .data = &init_net.ipv4.sysctl_ip_fwd_use_pmtu, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_forward_update_priority", .data = &init_net.ipv4.sysctl_ip_fwd_update_priority, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = ipv4_fwd_update_priority, .extra1 = SYSCTL_ZERO, @@ -752,40 +759,40 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "ip_nonlocal_bind", .data = &init_net.ipv4.sysctl_ip_nonlocal_bind, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_autobind_reuse", .data = &init_net.ipv4.sysctl_ip_autobind_reuse, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "fwmark_reflect", .data = &init_net.ipv4.sysctl_fwmark_reflect, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fwmark_accept", .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "tcp_l3mdev_accept", .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -793,9 +800,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_base_mss", @@ -840,9 +847,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "igmp_link_local_mcast_reports", .data = &init_net.ipv4.sysctl_igmp_llm_reports, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "igmp_max_memberships", @@ -897,9 +904,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_keepalive_probes", .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_keepalive_intvl", @@ -911,26 +918,26 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_syn_retries", .data = &init_net.ipv4.sysctl_tcp_syn_retries, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = &tcp_syn_retries_min, .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", .data = &init_net.ipv4.sysctl_tcp_synack_retries, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, #ifdef CONFIG_SYN_COOKIES { .procname = "tcp_syncookies", .data = &init_net.ipv4.sysctl_tcp_syncookies, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, #endif { @@ -943,24 +950,24 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_retries1", .data = &init_net.ipv4.sysctl_tcp_retries1, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra2 = &tcp_retr1_max }, { .procname = "tcp_retries2", .data = &init_net.ipv4.sysctl_tcp_retries2, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_orphan_retries", .data = &init_net.ipv4.sysctl_tcp_orphan_retries, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fin_timeout", @@ -979,9 +986,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_tw_reuse", .data = &init_net.ipv4.sysctl_tcp_tw_reuse, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, @@ -1030,16 +1037,16 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "fib_multipath_use_neigh", .data = &init_net.ipv4.sysctl_fib_multipath_use_neigh, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "fib_multipath_hash_policy", .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, @@ -1057,9 +1064,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "udp_l3mdev_accept", .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1067,88 +1074,88 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_sack", .data = &init_net.ipv4.sysctl_tcp_sack, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_window_scaling", .data = &init_net.ipv4.sysctl_tcp_window_scaling, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_timestamps", .data = &init_net.ipv4.sysctl_tcp_timestamps, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_early_retrans", .data = &init_net.ipv4.sysctl_tcp_early_retrans, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &four, }, { .procname = "tcp_recovery", .data = &init_net.ipv4.sysctl_tcp_recovery, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_thin_linear_timeouts", .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_slow_start_after_idle", .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_retrans_collapse", .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_stdurg", .data = &init_net.ipv4.sysctl_tcp_stdurg, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_rfc1337", .data = &init_net.ipv4.sysctl_tcp_rfc1337, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_abort_on_overflow", .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_fack", .data = &init_net.ipv4.sysctl_tcp_fack, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_max_reordering", @@ -1160,16 +1167,16 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_dsack", .data = &init_net.ipv4.sysctl_tcp_dsack, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_app_win", .data = &init_net.ipv4.sysctl_tcp_app_win, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_adv_win_scale", @@ -1183,46 +1190,46 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_frto", .data = &init_net.ipv4.sysctl_tcp_frto, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_no_metrics_save", .data = &init_net.ipv4.sysctl_tcp_nometrics_save, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_no_ssthresh_metrics_save", .data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "tcp_moderate_rcvbuf", .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_tso_win_divisor", .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_workaround_signed_windows", .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "tcp_limit_output_bytes", @@ -1241,11 +1248,10 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_min_tso_segs", .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, - .extra2 = &gso_max_segs, }, { .procname = "tcp_min_rtt_wlen", @@ -1259,9 +1265,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_autocorking", .data = &init_net.ipv4.sysctl_tcp_autocorking, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1323,18 +1329,17 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_comp_sack_nr", .data = &init_net.ipv4.sysctl_tcp_comp_sack_nr, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &comp_sack_nr_max, }, { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, @@ -1357,9 +1362,9 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "fib_notify_on_flag_change", .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index e6459537d4d2..82b36ec3f2f8 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -63,7 +63,7 @@ enum tcp_lp_state { * @sowd: smoothed OWD << 3 * @owd_min: min OWD * @owd_max: max OWD - * @owd_max_rsv: resrved max owd + * @owd_max_rsv: reserved max owd * @remote_hz: estimated remote HZ * @remote_ref_time: remote reference time * @local_ref_time: local reference time @@ -305,7 +305,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) /* FIXME: try to reset owd_min and owd_max here * so decrease the chance the min/max is no longer suitable - * and will usually within threshold when whithin inference */ + * and will usually within threshold when within inference */ lp->owd_min = lp->sowd >> 3; lp->owd_max = lp->sowd >> 2; lp->owd_max_rsv = lp->sowd >> 2; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4d02f6839e38..bfcc7f1a8a7f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2207,6 +2207,8 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + + udp_post_segment_fix_csum(skb); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); @@ -2693,9 +2695,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case UDP_GRO: lock_sock(sk); + + /* when enabling GRO, accept the related GSO packet type */ if (valbool) udp_tunnel_encap_enable(sk->sk_socket); up->gro_enabled = valbool; + up->accept_udp_l4 = valbool; release_sock(sk); break; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index c5b4b586570f..54e06b88af69 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -515,21 +515,24 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; + /* we can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream + */ NAPI_GRO_CB(skb)->is_flist = 0; - if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; + if (!sk || !udp_sk(sk)->gro_receive) { + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; - if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || - (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { - pp = call_gro_receive(udp_gro_receive_segment, head, skb); + if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || + (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + pp = call_gro_receive(udp_gro_receive_segment, head, skb); return pp; } - if (!sk || NAPI_GRO_CB(skb)->encap_mark || + if (NAPI_GRO_CB(skb)->encap_mark || (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid) || - !udp_sk(sk)->gro_receive) + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the tunnel gro layer */ @@ -639,6 +642,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + /* clear the encap mark, so that inner frag_list gro_complete + * can take place + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + /* Set encapsulation before calling into inner gro_complete() * functions to make them set up the inner offsets. */ @@ -662,7 +670,8 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (NAPI_GRO_CB(skb)->is_flist) { + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ + if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) { uh->len = htons(skb->len - nhoff); skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2337fb756ac..120073ffb666 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2358,7 +2358,7 @@ regen: /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1: * check if generated address is not inappropriate: * - * - Reserved IPv6 Interface Identifers + * - Reserved IPv6 Interface Identifiers * - XXX: already assigned to an address on the device */ @@ -5107,17 +5107,20 @@ next: break; } case MULTICAST_ADDR: + read_unlock_bh(&idev->lock); fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = idev->mc_list; ifmca; - ifmca = ifmca->next, ip_idx++) { + for (ifmca = rcu_dereference(idev->mc_list); + ifmca; + ifmca = rcu_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); if (err < 0) break; } + read_lock_bh(&idev->lock); break; case ANYCAST_ADDR: fillargs->event = RTM_GETANYCAST; @@ -6093,10 +6096,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - rcu_read_lock_bh(); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - rcu_read_unlock_bh(); } #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index c70c192bc91b..1d4054bb345b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -198,6 +198,12 @@ static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct s return -EAFNOSUPPORT; } +static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr, + struct net_device *dev) +{ + return ERR_PTR(-EAFNOSUPPORT); +} + const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, .ipv6_route_input = eafnosupport_ipv6_route_input, @@ -209,6 +215,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .fib6_nh_init = eafnosupport_fib6_nh_init, .ip6_del_rt = eafnosupport_ip6_del_rt, .ipv6_fragment = eafnosupport_ipv6_fragment, + .ipv6_dev_find = eafnosupport_ipv6_dev_find, }; EXPORT_SYMBOL_GPL(ipv6_stub); @@ -250,7 +257,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) struct net_device *dev = idev->dev; WARN_ON(!list_empty(&idev->addr_list)); - WARN_ON(idev->mc_list); + WARN_ON(rcu_access_pointer(idev->mc_list)); WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 71de739b4a9e..2389ff702f51 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -222,7 +222,7 @@ lookup_protocol: inet->mc_loop = 1; inet->mc_ttl = 1; inet->mc_index = 0; - inet->mc_list = NULL; + RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; if (net->ipv4.sysctl_ip_no_pmtu_disc) @@ -1033,6 +1033,7 @@ static const struct ipv6_stub ipv6_stub_impl = { #endif .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, + .ipv6_dev_find = ipv6_dev_find, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fd1f896115c1..1bca2b09d77e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -944,11 +944,11 @@ static int icmpv6_rcv(struct sk_buff *skb) case ICMPV6_MGM_QUERY: igmp6_event_query(skb); - break; + return 0; case ICMPV6_MGM_REPORT: igmp6_event_report(skb); - break; + return 0; case ICMPV6_MGM_REDUCTION: case ICMPV6_NI_QUERY: @@ -1169,23 +1169,23 @@ static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "echo_ignore_all", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_multicast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_anycast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ratemask", diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1baf43aacb2e..bc224f917bbd 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -387,7 +387,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; - dev_hold(dev); ip6gre_tunnel_link(ign, nt); return nt; @@ -1496,6 +1495,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) } ip6gre_tnl_init_features(dev); + dev_hold(dev); return 0; cleanup_dst_cache_init: @@ -1538,8 +1538,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) strcpy(tunnel->parms.name, dev->name); tunnel->hlen = sizeof(struct ipv6hdr) + 4; - - dev_hold(dev); } static struct inet6_protocol ip6gre_protocol __read_mostly = { @@ -1889,6 +1887,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; ip6erspan_tnl_link_config(tunnel, 1); + dev_hold(dev); return 0; cleanup_dst_cache_init: @@ -1988,8 +1987,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, if (tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); - dev_hold(dev); - out: return err; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3fa0eca5a06f..07a0a06a9b52 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -266,7 +266,6 @@ static int ip6_tnl_create2(struct net_device *dev) strcpy(t->parms.name, dev->name); - dev_hold(dev); ip6_tnl_link(ip6n, t); return 0; @@ -388,7 +387,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) } /** - * parse_tvl_tnl_enc_lim - handle encapsulation limit option + * ip6_tnl_parse_tlv_enc_lim - handle encapsulation limit option * @skb: received socket buffer * @raw: the ICMPv6 error message data * @@ -1882,6 +1881,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + dev_hold(dev); return 0; destroy_dst: @@ -1925,7 +1925,6 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); t->parms.proto = IPPROTO_IPV6; - dev_hold(dev); rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index f10e7a72ea62..856e46ad0895 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -193,7 +193,6 @@ static int vti6_tnl_create2(struct net_device *dev) strcpy(t->parms.name, dev->name); - dev_hold(dev); vti6_tnl_link(ip6n, t); return 0; @@ -932,6 +931,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + dev_hold(dev); return 0; } @@ -963,7 +963,6 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); t->parms.proto = IPPROTO_IPV6; - dev_hold(dev); rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6c8604390266..49b0cebfdcdc 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -29,7 +29,6 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/jiffies.h> -#include <linux/times.h> #include <linux/net.h> #include <linux/in.h> #include <linux/in6.h> @@ -42,6 +41,7 @@ #include <linux/slab.h> #include <linux/pkt_sched.h> #include <net/mld.h> +#include <linux/workqueue.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -67,14 +67,13 @@ static int __mld2_query_bugs[] __attribute__((__unused__)) = { BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4) }; +static struct workqueue_struct *mld_wq; static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); -static void igmp6_timer_handler(struct timer_list *t); +static void mld_mca_work(struct work_struct *work); -static void mld_gq_timer_expire(struct timer_list *t); -static void mld_ifc_timer_expire(struct timer_list *t); static void mld_ifc_event(struct inet6_dev *idev); static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); @@ -112,12 +111,49 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; /* * socket join on multicast group */ +#define mc_dereference(e, idev) \ + rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) + +#define for_each_pmc_rtnl(np, pmc) \ + for (pmc = rtnl_dereference((np)->ipv6_mc_list); \ + pmc; \ + pmc = rtnl_dereference(pmc->next)) #define for_each_pmc_rcu(np, pmc) \ - for (pmc = rcu_dereference(np->ipv6_mc_list); \ - pmc != NULL; \ + for (pmc = rcu_dereference((np)->ipv6_mc_list); \ + pmc; \ pmc = rcu_dereference(pmc->next)) +#define for_each_psf_mclock(mc, psf) \ + for (psf = mc_dereference((mc)->mca_sources, mc->idev); \ + psf; \ + psf = mc_dereference(psf->sf_next, mc->idev)) + +#define for_each_psf_rcu(mc, psf) \ + for (psf = rcu_dereference((mc)->mca_sources); \ + psf; \ + psf = rcu_dereference(psf->sf_next)) + +#define for_each_psf_tomb(mc, psf) \ + for (psf = mc_dereference((mc)->mca_tomb, mc->idev); \ + psf; \ + psf = mc_dereference(psf->sf_next, mc->idev)) + +#define for_each_mc_mclock(idev, mc) \ + for (mc = mc_dereference((idev)->mc_list, idev); \ + mc; \ + mc = mc_dereference(mc->next, idev)) + +#define for_each_mc_rcu(idev, mc) \ + for (mc = rcu_dereference((idev)->mc_list); \ + mc; \ + mc = rcu_dereference(mc->next)) + +#define for_each_mc_tomb(idev, mc) \ + for (mc = mc_dereference((idev)->mc_tomb, idev); \ + mc; \ + mc = mc_dereference(mc->next, idev)) + static int unsolicited_report_interval(struct inet6_dev *idev) { int iv; @@ -144,15 +180,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - rcu_read_lock(); - for_each_pmc_rcu(np, mc_lst) { + for_each_pmc_rtnl(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && - ipv6_addr_equal(&mc_lst->addr, addr)) { - rcu_read_unlock(); + ipv6_addr_equal(&mc_lst->addr, addr)) return -EADDRINUSE; - } } - rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -179,8 +211,7 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, mc_lst->ifindex = dev->ifindex; mc_lst->sfmode = mode; - rwlock_init(&mc_lst->sflock); - mc_lst->sflist = NULL; + RCU_INIT_POINTER(mc_lst->sflist, NULL); /* * now add/increase the group membership on the device @@ -239,11 +270,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); - (void) ip6_mc_leave_src(sk, mc_lst, idev); + ip6_mc_leave_src(sk, mc_lst, idev); if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else - (void) ip6_mc_leave_src(sk, mc_lst, NULL); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); @@ -255,10 +287,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -/* called with rcu_read_lock() */ -static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net, + const struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -270,19 +301,17 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, dev = rt->dst.dev; ip6_rt_put(rt); } - } else - dev = dev_get_by_index_rcu(net, ifindex); + } else { + dev = __dev_get_by_index(net, ifindex); + } if (!dev) return NULL; idev = __in6_dev_get(dev); if (!idev) return NULL; - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); + if (idev->dead) return NULL; - } return idev; } @@ -303,11 +332,12 @@ void __ipv6_sock_mc_close(struct sock *sk) if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); - (void) ip6_mc_leave_src(sk, mc_lst, idev); + ip6_mc_leave_src(sk, mc_lst, idev); if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else - (void) ip6_mc_leave_src(sk, mc_lst, NULL); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); @@ -336,7 +366,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct net *net = sock_net(sk); int i, j, rv; int leavegroup = 0; - int pmclocked = 0; int err; source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; @@ -345,16 +374,14 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface); - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface); + if (!idev) return -ENODEV; - } err = -EADDRNOTAVAIL; - for_each_pmc_rcu(inet6, pmc) { + mutex_lock(&idev->mc_lock); + for_each_pmc_rtnl(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -365,7 +392,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; } /* if a source filter was set, must be the same mode as before */ - if (pmc->sflist) { + if (rcu_access_pointer(pmc->sflist)) { if (pmc->sfmode != omode) { err = -EINVAL; goto done; @@ -377,10 +404,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - write_lock(&pmc->sflock); - pmclocked = 1; - - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -420,7 +444,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC); + newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -430,9 +454,11 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - pmc->sflist = psl = newpsl; + psl = newpsl; + rcu_assign_pointer(pmc->sflist, psl); } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i = 0; i < psl->sl_count; i++) { @@ -448,10 +474,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, /* update the interface list */ ip6_mc_add_src(idev, group, omode, 1, source, 1); done: - if (pmclocked) - write_unlock(&pmc->sflock); - read_unlock_bh(&idev->lock); - rcu_read_unlock(); + mutex_unlock(&idev->mc_lock); if (leavegroup) err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -477,13 +500,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + if (!idev) return -ENODEV; - } err = 0; @@ -492,7 +511,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } - for_each_pmc_rcu(inet6, pmc) { + for_each_pmc_rtnl(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -504,7 +523,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } if (gsf->gf_numsrc) { newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_ATOMIC); + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -516,32 +535,37 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } + mutex_lock(&idev->mc_lock); err = ip6_mc_add_src(idev, group, gsf->gf_fmode, - newpsl->sl_count, newpsl->sl_addr, 0); + newpsl->sl_count, newpsl->sl_addr, 0); if (err) { + mutex_unlock(&idev->mc_lock); sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); goto done; } + mutex_unlock(&idev->mc_lock); } else { newpsl = NULL; - (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); + mutex_lock(&idev->mc_lock); + ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); + mutex_unlock(&idev->mc_lock); } - write_lock(&pmc->sflock); - psl = pmc->sflist; + mutex_lock(&idev->mc_lock); + psl = rtnl_dereference(pmc->sflist); if (psl) { - (void) ip6_mc_del_src(idev, group, pmc->sfmode, - psl->sl_count, psl->sl_addr, 0); - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); - } else - (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); - pmc->sflist = newpsl; + ip6_mc_del_src(idev, group, pmc->sfmode, + psl->sl_count, psl->sl_addr, 0); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); + } else { + ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); + } + mutex_unlock(&idev->mc_lock); + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = gsf->gf_fmode; - write_unlock(&pmc->sflock); err = 0; done: - read_unlock_bh(&idev->lock); - rcu_read_unlock(); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -563,13 +587,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + if (!idev) return -ENODEV; - } err = -EADDRNOTAVAIL; /* changes to the ipv6_mc_list require the socket lock and @@ -577,25 +597,22 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for_each_pmc_rcu(inet6, pmc) { + for_each_pmc_rtnl(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) break; } if (!pmc) /* must have a prior join */ - goto done; + return err; + gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); count = psl ? psl->sl_count : 0; - read_unlock_bh(&idev->lock); - rcu_read_unlock(); copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - /* changes to psl require the socket lock, and a write lock - * on pmc->sflock. We have the socket lock so reading here is safe. - */ + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -608,10 +625,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return -EFAULT; } return 0; -done: - read_unlock_bh(&idev->lock); - rcu_read_unlock(); - return err; } bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, @@ -631,8 +644,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rcu_read_unlock(); return np->mc_all; } - read_lock(&mc->sflock); - psl = mc->sflist; + psl = rcu_dereference(mc->sflist); if (!psl) { rv = mc->sfmode == MCAST_EXCLUDE; } else { @@ -647,12 +659,12 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) rv = false; } - read_unlock(&mc->sflock); rcu_read_unlock(); return rv; } +/* called with mc_lock */ static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -662,13 +674,11 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) IPV6_ADDR_SCOPE_LINKLOCAL) return; - spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) dev_mc_add(dev, buf); } - spin_unlock_bh(&mc->mca_lock); if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT)) return; @@ -689,6 +699,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) mld_ifc_event(mc->idev); } +/* called with mc_lock */ static void igmp6_group_dropped(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -698,28 +709,25 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) IPV6_ADDR_SCOPE_LINKLOCAL) return; - spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) dev_mc_del(dev, buf); } - spin_unlock_bh(&mc->mca_lock); if (mc->mca_flags & MAF_NOREPORT) return; if (!mc->idev->dead) igmp6_leave_group(mc); - spin_lock_bh(&mc->mca_lock); - if (del_timer(&mc->mca_timer)) + if (cancel_delayed_work(&mc->mca_work)) refcount_dec(&mc->mca_refcnt); - spin_unlock_bh(&mc->mca_lock); } /* * deleted ifmcaddr6 manipulation + * called with mc_lock */ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { @@ -731,12 +739,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kzalloc(sizeof(*pmc), GFP_ATOMIC); + pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; - spin_lock_bh(&im->mca_lock); - spin_lock_init(&pmc->mca_lock); pmc->idev = im->idev; in6_dev_hold(idev); pmc->mca_addr = im->mca_addr; @@ -745,90 +751,110 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc->mca_sfmode == MCAST_INCLUDE) { struct ip6_sf_list *psf; - pmc->mca_tomb = im->mca_tomb; - pmc->mca_sources = im->mca_sources; - im->mca_tomb = im->mca_sources = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + rcu_assign_pointer(pmc->mca_tomb, + mc_dereference(im->mca_tomb, idev)); + rcu_assign_pointer(pmc->mca_sources, + mc_dereference(im->mca_sources, idev)); + RCU_INIT_POINTER(im->mca_tomb, NULL); + RCU_INIT_POINTER(im->mca_sources, NULL); + + for_each_psf_mclock(pmc, psf) psf->sf_crcount = pmc->mca_crcount; } - spin_unlock_bh(&im->mca_lock); - spin_lock_bh(&idev->mc_lock); - pmc->next = idev->mc_tomb; - idev->mc_tomb = pmc; - spin_unlock_bh(&idev->mc_lock); + rcu_assign_pointer(pmc->next, idev->mc_tomb); + rcu_assign_pointer(idev->mc_tomb, pmc); } +/* called with mc_lock */ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { - struct ifmcaddr6 *pmc, *pmc_prev; - struct ip6_sf_list *psf; + struct ip6_sf_list *psf, *sources, *tomb; struct in6_addr *pmca = &im->mca_addr; + struct ifmcaddr6 *pmc, *pmc_prev; - spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; - for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) { + for_each_mc_tomb(idev, pmc) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) break; pmc_prev = pmc; } if (pmc) { if (pmc_prev) - pmc_prev->next = pmc->next; + rcu_assign_pointer(pmc_prev->next, pmc->next); else - idev->mc_tomb = pmc->next; + rcu_assign_pointer(idev->mc_tomb, pmc->next); } - spin_unlock_bh(&idev->mc_lock); - spin_lock_bh(&im->mca_lock); if (pmc) { im->idev = pmc->idev; if (im->mca_sfmode == MCAST_INCLUDE) { - swap(im->mca_tomb, pmc->mca_tomb); - swap(im->mca_sources, pmc->mca_sources); - for (psf = im->mca_sources; psf; psf = psf->sf_next) + tomb = rcu_replace_pointer(im->mca_tomb, + mc_dereference(pmc->mca_tomb, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_tomb, tomb); + + sources = rcu_replace_pointer(im->mca_sources, + mc_dereference(pmc->mca_sources, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_sources, sources); + for_each_psf_mclock(im, psf) psf->sf_crcount = idev->mc_qrv; } else { im->mca_crcount = idev->mc_qrv; } in6_dev_put(pmc->idev); ip6_mc_clear_src(pmc); - kfree(pmc); + kfree_rcu(pmc, rcu); } - spin_unlock_bh(&im->mca_lock); } +/* called with mc_lock */ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; - spin_lock_bh(&idev->mc_lock); - pmc = idev->mc_tomb; - idev->mc_tomb = NULL; - spin_unlock_bh(&idev->mc_lock); + pmc = mc_dereference(idev->mc_tomb, idev); + RCU_INIT_POINTER(idev->mc_tomb, NULL); for (; pmc; pmc = nextpmc) { - nextpmc = pmc->next; + nextpmc = mc_dereference(pmc->next, idev); ip6_mc_clear_src(pmc); in6_dev_put(pmc->idev); - kfree(pmc); + kfree_rcu(pmc, rcu); } /* clear dead sources, too */ - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + for_each_mc_mclock(idev, pmc) { struct ip6_sf_list *psf, *psf_next; - spin_lock_bh(&pmc->mca_lock); - psf = pmc->mca_tomb; - pmc->mca_tomb = NULL; - spin_unlock_bh(&pmc->mca_lock); + psf = mc_dereference(pmc->mca_tomb, idev); + RCU_INIT_POINTER(pmc->mca_tomb, NULL); for (; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + psf_next = mc_dereference(psf->sf_next, idev); + kfree_rcu(psf, rcu); } } - read_unlock_bh(&idev->lock); +} + +static void mld_clear_query(struct inet6_dev *idev) +{ + struct sk_buff *skb; + + spin_lock_bh(&idev->mc_query_lock); + while ((skb = __skb_dequeue(&idev->mc_query_queue))) + kfree_skb(skb); + spin_unlock_bh(&idev->mc_query_lock); +} + +static void mld_clear_report(struct inet6_dev *idev) +{ + struct sk_buff *skb; + + spin_lock_bh(&idev->mc_report_lock); + while ((skb = __skb_dequeue(&idev->mc_report_queue))) + kfree_skb(skb); + spin_unlock_bh(&idev->mc_report_lock); } static void mca_get(struct ifmcaddr6 *mc) @@ -840,21 +866,22 @@ static void ma_put(struct ifmcaddr6 *mc) { if (refcount_dec_and_test(&mc->mca_refcnt)) { in6_dev_put(mc->idev); - kfree(mc); + kfree_rcu(mc, rcu); } } +/* called with mc_lock */ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, const struct in6_addr *addr, unsigned int mode) { struct ifmcaddr6 *mc; - mc = kzalloc(sizeof(*mc), GFP_ATOMIC); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; - timer_setup(&mc->mca_timer, igmp6_timer_handler, 0); + INIT_DELAYED_WORK(&mc->mca_work, mld_mca_work); mc->mca_addr = *addr; mc->idev = idev; /* reference taken by caller */ @@ -862,7 +889,6 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; refcount_set(&mc->mca_refcnt, 1); - spin_lock_init(&mc->mca_lock); mc->mca_sfmode = mode; mc->mca_sfcount[mode] = 1; @@ -891,18 +917,17 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, if (!idev) return -EINVAL; - write_lock_bh(&idev->lock); if (idev->dead) { - write_unlock_bh(&idev->lock); in6_dev_put(idev); return -ENODEV; } - for (mc = idev->mc_list; mc; mc = mc->next) { + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; - write_unlock_bh(&idev->lock); ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0); + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return 0; } @@ -910,22 +935,19 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mc = mca_alloc(idev, addr, mode); if (!mc) { - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENOMEM; } - mc->next = idev->mc_list; - idev->mc_list = mc; + rcu_assign_pointer(mc->next, idev->mc_list); + rcu_assign_pointer(idev->mc_list, mc); - /* Hold this for the code below before we unlock, - * it is already exposed via idev->mc_list. - */ mca_get(mc); - write_unlock_bh(&idev->lock); mld_del_delrec(idev, mc); igmp6_group_added(mc); + mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; } @@ -937,33 +959,35 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) EXPORT_SYMBOL(ipv6_dev_mc_inc); /* - * device multicast group del + * device multicast group del */ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { - struct ifmcaddr6 *ma, **map; + struct ifmcaddr6 *ma, __rcu **map; ASSERT_RTNL(); - write_lock_bh(&idev->lock); - for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) { + mutex_lock(&idev->mc_lock); + for (map = &idev->mc_list; + (ma = mc_dereference(*map, idev)); + map = &ma->next) { if (ipv6_addr_equal(&ma->mca_addr, addr)) { if (--ma->mca_users == 0) { *map = ma->next; - write_unlock_bh(&idev->lock); igmp6_group_dropped(ma); ip6_mc_clear_src(ma); + mutex_unlock(&idev->mc_lock); ma_put(ma); return 0; } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); return 0; } } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); return -ENOENT; } @@ -997,8 +1021,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - for (mc = idev->mc_list; mc; mc = mc->next) { + for_each_mc_rcu(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, group)) break; } @@ -1006,8 +1029,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, if (src_addr && !ipv6_addr_any(src_addr)) { struct ip6_sf_list *psf; - spin_lock_bh(&mc->mca_lock); - for (psf = mc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_rcu(mc, psf) { if (ipv6_addr_equal(&psf->sf_addr, src_addr)) break; } @@ -1017,89 +1039,107 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, mc->mca_sfcount[MCAST_EXCLUDE]; else rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; - spin_unlock_bh(&mc->mca_lock); } else rv = true; /* don't filter unspecified source */ } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); return rv; } -static void mld_gq_start_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_gq_start_work(struct inet6_dev *idev) { unsigned long tv = prandom_u32() % idev->mc_maxdelay; idev->mc_gq_running = 1; - if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) in6_dev_hold(idev); } -static void mld_gq_stop_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_gq_stop_work(struct inet6_dev *idev) { idev->mc_gq_running = 0; - if (del_timer(&idev->mc_gq_timer)) + if (cancel_delayed_work(&idev->mc_gq_work)) __in6_dev_put(idev); } -static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay) +/* called with mc_lock */ +static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = prandom_u32() % delay; - if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); } -static void mld_ifc_stop_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_ifc_stop_work(struct inet6_dev *idev) { idev->mc_ifc_count = 0; - if (del_timer(&idev->mc_ifc_timer)) + if (cancel_delayed_work(&idev->mc_ifc_work)) __in6_dev_put(idev); } -static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay) +/* called with mc_lock */ +static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = prandom_u32() % delay; - if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); } -static void mld_dad_stop_timer(struct inet6_dev *idev) +static void mld_dad_stop_work(struct inet6_dev *idev) +{ + if (cancel_delayed_work(&idev->mc_dad_work)) + __in6_dev_put(idev); +} + +static void mld_query_stop_work(struct inet6_dev *idev) { - if (del_timer(&idev->mc_dad_timer)) + spin_lock_bh(&idev->mc_query_lock); + if (cancel_delayed_work(&idev->mc_query_work)) + __in6_dev_put(idev); + spin_unlock_bh(&idev->mc_query_lock); +} + +static void mld_report_stop_work(struct inet6_dev *idev) +{ + if (cancel_delayed_work_sync(&idev->mc_report_work)) __in6_dev_put(idev); } /* - * IGMP handling (alias multicast ICMPv6 messages) + * IGMP handling (alias multicast ICMPv6 messages) + * called with mc_lock */ - static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) { unsigned long delay = resptime; - /* Do not start timer for these addresses */ + /* Do not start work for these addresses */ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) || IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; - if (del_timer(&ma->mca_timer)) { + if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); - delay = ma->mca_timer.expires - jiffies; + delay = ma->mca_work.timer.expires - jiffies; } if (delay >= resptime) delay = prandom_u32() % resptime; - ma->mca_timer.expires = jiffies + delay; - if (!mod_timer(&ma->mca_timer, jiffies + delay)) + if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING; } -/* mark EXCLUDE-mode sources */ +/* mark EXCLUDE-mode sources + * called with mc_lock + */ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { @@ -1107,7 +1147,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, int i, scount; scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) break; for (i = 0; i < nsrcs; i++) { @@ -1128,6 +1168,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, return true; } +/* called with mc_lock */ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { @@ -1140,7 +1181,7 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, /* mark INCLUDE-mode sources */ scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) break; for (i = 0; i < nsrcs; i++) { @@ -1305,10 +1346,10 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, if (v1_query) mld_set_v1_mode(idev); - /* cancel MLDv2 report timer */ - mld_gq_stop_timer(idev); - /* cancel the interface change timer */ - mld_ifc_stop_timer(idev); + /* cancel MLDv2 report work */ + mld_gq_stop_work(idev); + /* cancel the interface change work */ + mld_ifc_stop_work(idev); /* clear deleted report items */ mld_clear_delrec(idev); @@ -1332,18 +1373,41 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, /* called with rcu_read_lock() */ int igmp6_event_query(struct sk_buff *skb) { + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return -EINVAL; + + if (idev->dead) { + kfree_skb(skb); + return -ENODEV; + } + + spin_lock_bh(&idev->mc_query_lock); + if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_query_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0)) + in6_dev_hold(idev); + } + spin_unlock_bh(&idev->mc_query_lock); + + return 0; +} + +static void __mld_query_work(struct sk_buff *skb) +{ struct mld2_query *mlh2 = NULL; - struct ifmcaddr6 *ma; const struct in6_addr *group; unsigned long max_delay; struct inet6_dev *idev; + struct ifmcaddr6 *ma; struct mld_msg *mld; int group_type; int mark = 0; int len, err; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - return -EINVAL; + goto kfree_skb; /* compute payload length excluding extension headers */ len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); @@ -1360,11 +1424,11 @@ int igmp6_event_query(struct sk_buff *skb) ipv6_hdr(skb)->hop_limit != 1 || !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD)) - return -EINVAL; + goto kfree_skb; - idev = __in6_dev_get(skb->dev); + idev = in6_dev_get(skb->dev); if (!idev) - return 0; + goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); group = &mld->mld_mca; @@ -1372,60 +1436,56 @@ int igmp6_event_query(struct sk_buff *skb) if (group_type != IPV6_ADDR_ANY && !(group_type&IPV6_ADDR_MULTICAST)) - return -EINVAL; + goto out; if (len < MLD_V1_QUERY_LEN) { - return -EINVAL; + goto out; } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) { err = mld_process_v1(idev, mld, &max_delay, len == MLD_V1_QUERY_LEN); if (err < 0) - return err; + goto out; } else if (len >= MLD_V2_QUERY_LEN_MIN) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); if (!pskb_may_pull(skb, srcs_offset)) - return -EINVAL; + goto out; mlh2 = (struct mld2_query *)skb_transport_header(skb); err = mld_process_v2(idev, mlh2, &max_delay); if (err < 0) - return err; + goto out; if (group_type == IPV6_ADDR_ANY) { /* general query */ if (mlh2->mld2q_nsrcs) - return -EINVAL; /* no sources allowed */ + goto out; /* no sources allowed */ - mld_gq_start_timer(idev); - return 0; + mld_gq_start_work(idev); + goto out; } /* mark sources to include, if group & source-specific */ if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) - return -EINVAL; + goto out; mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } } else { - return -EINVAL; + goto out; } - read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { - for (ma = idev->mc_list; ma; ma = ma->next) { - spin_lock_bh(&ma->mca_lock); + for_each_mc_mclock(idev, ma) { igmp6_group_queried(ma, max_delay); - spin_unlock_bh(&ma->mca_lock); } } else { - for (ma = idev->mc_list; ma; ma = ma->next) { + for_each_mc_mclock(idev, ma) { if (!ipv6_addr_equal(group, &ma->mca_addr)) continue; - spin_lock_bh(&ma->mca_lock); if (ma->mca_flags & MAF_TIMER_RUNNING) { /* gsquery <- gsquery && mark */ if (!mark) @@ -1440,34 +1500,91 @@ int igmp6_event_query(struct sk_buff *skb) if (!(ma->mca_flags & MAF_GSQUERY) || mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs)) igmp6_group_queried(ma, max_delay); - spin_unlock_bh(&ma->mca_lock); break; } } - read_unlock_bh(&idev->lock); - return 0; +out: + in6_dev_put(idev); +kfree_skb: + consume_skb(skb); +} + +static void mld_query_work(struct work_struct *work) +{ + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_query_work); + struct sk_buff_head q; + struct sk_buff *skb; + bool rework = false; + int cnt = 0; + + skb_queue_head_init(&q); + + spin_lock_bh(&idev->mc_query_lock); + while ((skb = __skb_dequeue(&idev->mc_query_queue))) { + __skb_queue_tail(&q, skb); + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; + schedule_delayed_work(&idev->mc_query_work, 0); + break; + } + } + spin_unlock_bh(&idev->mc_query_lock); + + mutex_lock(&idev->mc_lock); + while ((skb = __skb_dequeue(&q))) + __mld_query_work(skb); + mutex_unlock(&idev->mc_lock); + + if (!rework) + in6_dev_put(idev); } /* called with rcu_read_lock() */ int igmp6_event_report(struct sk_buff *skb) { - struct ifmcaddr6 *ma; + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return -EINVAL; + + if (idev->dead) { + kfree_skb(skb); + return -ENODEV; + } + + spin_lock_bh(&idev->mc_report_lock); + if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_report_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0)) + in6_dev_hold(idev); + } + spin_unlock_bh(&idev->mc_report_lock); + + return 0; +} + +static void __mld_report_work(struct sk_buff *skb) +{ struct inet6_dev *idev; + struct ifmcaddr6 *ma; struct mld_msg *mld; int addr_type; /* Our own report looped back. Ignore it. */ if (skb->pkt_type == PACKET_LOOPBACK) - return 0; + goto kfree_skb; /* send our report if the MC router may not have heard this report */ if (skb->pkt_type != PACKET_MULTICAST && skb->pkt_type != PACKET_BROADCAST) - return 0; + goto kfree_skb; if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr))) - return -EINVAL; + goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); @@ -1475,29 +1592,61 @@ int igmp6_event_report(struct sk_buff *skb) addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); if (addr_type != IPV6_ADDR_ANY && !(addr_type&IPV6_ADDR_LINKLOCAL)) - return -EINVAL; + goto kfree_skb; - idev = __in6_dev_get(skb->dev); + idev = in6_dev_get(skb->dev); if (!idev) - return -ENODEV; + goto kfree_skb; /* - * Cancel the timer for this group + * Cancel the work for this group */ - read_lock_bh(&idev->lock); - for (ma = idev->mc_list; ma; ma = ma->next) { + for_each_mc_mclock(idev, ma) { if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { - spin_lock(&ma->mca_lock); - if (del_timer(&ma->mca_timer)) + if (cancel_delayed_work(&ma->mca_work)) refcount_dec(&ma->mca_refcnt); - ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING); - spin_unlock(&ma->mca_lock); + ma->mca_flags &= ~(MAF_LAST_REPORTER | + MAF_TIMER_RUNNING); break; } } - read_unlock_bh(&idev->lock); - return 0; + + in6_dev_put(idev); +kfree_skb: + consume_skb(skb); +} + +static void mld_report_work(struct work_struct *work) +{ + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_report_work); + struct sk_buff_head q; + struct sk_buff *skb; + bool rework = false; + int cnt = 0; + + skb_queue_head_init(&q); + spin_lock_bh(&idev->mc_report_lock); + while ((skb = __skb_dequeue(&idev->mc_report_queue))) { + __skb_queue_tail(&q, skb); + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; + schedule_delayed_work(&idev->mc_report_work, 0); + break; + } + } + spin_unlock_bh(&idev->mc_report_lock); + + mutex_lock(&idev->mc_lock); + while ((skb = __skb_dequeue(&q))) + __mld_report_work(skb); + mutex_unlock(&idev->mc_lock); + + if (!rework) + in6_dev_put(idev); } static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, @@ -1550,7 +1699,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) struct ip6_sf_list *psf; int scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -1724,15 +1873,18 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) +/* called with mc_lock */ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, int gdeleted, int sdeleted, int crsend) + int type, int gdeleted, int sdeleted, + int crsend) { + struct ip6_sf_list *psf, *psf_prev, *psf_next; + int scount, stotal, first, isquery, truncate; + struct ip6_sf_list __rcu **psf_list; struct inet6_dev *idev = pmc->idev; struct net_device *dev = idev->dev; - struct mld2_report *pmr; struct mld2_grec *pgr = NULL; - struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; - int scount, stotal, first, isquery, truncate; + struct mld2_report *pmr; unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) @@ -1751,7 +1903,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources; - if (!*psf_list) + if (!rcu_access_pointer(*psf_list)) goto empty_source; pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL; @@ -1767,10 +1919,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } first = 1; psf_prev = NULL; - for (psf = *psf_list; psf; psf = psf_next) { + for (psf = mc_dereference(*psf_list, idev); + psf; + psf = psf_next) { struct in6_addr *psrc; - psf_next = psf->sf_next; + psf_next = mc_dereference(psf->sf_next, idev); if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) { psf_prev = psf; @@ -1817,10 +1971,12 @@ decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - *psf_list = psf->sf_next; - kfree(psf); + rcu_assign_pointer(*psf_list, + mc_dereference(psf->sf_next, idev)); + kfree_rcu(psf, rcu); continue; } } @@ -1849,72 +2005,73 @@ empty_source: return skb; } +/* called with mc_lock */ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) { struct sk_buff *skb = NULL; int type; - read_lock_bh(&idev->lock); if (!pmc) { - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + for_each_mc_mclock(idev, pmc) { if (pmc->mca_flags & MAF_NOREPORT) continue; - spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0, 0); - spin_unlock_bh(&pmc->mca_lock); } } else { - spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0, 0); - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } /* * remove zero-count source records from a source filter list + * called with mc_lock */ -static void mld_clear_zeros(struct ip6_sf_list **ppsf) +static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev) { struct ip6_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf = *ppsf; psf; psf = psf_next) { - psf_next = psf->sf_next; + for (psf = mc_dereference(*ppsf, idev); + psf; + psf = psf_next) { + psf_next = mc_dereference(psf->sf_next, idev); if (psf->sf_crcount == 0) { if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - *ppsf = psf->sf_next; - kfree(psf); - } else + rcu_assign_pointer(*ppsf, + mc_dereference(psf->sf_next, idev)); + kfree_rcu(psf, rcu); + } else { psf_prev = psf; + } } } +/* called with mc_lock */ static void mld_send_cr(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next; struct sk_buff *skb = NULL; int type, dtype; - read_lock_bh(&idev->lock); - spin_lock(&idev->mc_lock); - /* deleted MCA's */ pmc_prev = NULL; - for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) { - pmc_next = pmc->next; + for (pmc = mc_dereference(idev->mc_tomb, idev); + pmc; + pmc = pmc_next) { + pmc_next = mc_dereference(pmc->next, idev); if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; @@ -1928,26 +2085,25 @@ static void mld_send_cr(struct inet6_dev *idev) } pmc->mca_crcount--; if (pmc->mca_crcount == 0) { - mld_clear_zeros(&pmc->mca_tomb); - mld_clear_zeros(&pmc->mca_sources); + mld_clear_zeros(&pmc->mca_tomb, idev); + mld_clear_zeros(&pmc->mca_sources, idev); } } - if (pmc->mca_crcount == 0 && !pmc->mca_tomb && - !pmc->mca_sources) { + if (pmc->mca_crcount == 0 && + !rcu_access_pointer(pmc->mca_tomb) && + !rcu_access_pointer(pmc->mca_sources)) { if (pmc_prev) - pmc_prev->next = pmc_next; + rcu_assign_pointer(pmc_prev->next, pmc_next); else - idev->mc_tomb = pmc_next; + rcu_assign_pointer(idev->mc_tomb, pmc_next); in6_dev_put(pmc->idev); - kfree(pmc); + kfree_rcu(pmc, rcu); } else pmc_prev = pmc; } - spin_unlock(&idev->mc_lock); /* change recs */ - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { - spin_lock_bh(&pmc->mca_lock); + for_each_mc_mclock(idev, pmc) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_ALLOW_NEW_SOURCES; @@ -1967,9 +2123,7 @@ static void mld_send_cr(struct inet6_dev *idev) skb = add_grec(skb, pmc, type, 0, 0, 0); pmc->mca_crcount--; } - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (!skb) return; (void) mld_sendpack(skb); @@ -2071,6 +2225,7 @@ err_out: goto out; } +/* called with mc_lock */ static void mld_send_initial_cr(struct inet6_dev *idev) { struct sk_buff *skb; @@ -2081,47 +2236,49 @@ static void mld_send_initial_cr(struct inet6_dev *idev) return; skb = NULL; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { - spin_lock_bh(&pmc->mca_lock); + for_each_mc_mclock(idev, pmc) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_CHANGE_TO_EXCLUDE; else type = MLD2_ALLOW_NEW_SOURCES; skb = add_grec(skb, pmc, type, 0, 0, 1); - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } void ipv6_mc_dad_complete(struct inet6_dev *idev) { + mutex_lock(&idev->mc_lock); idev->mc_dad_count = idev->mc_qrv; if (idev->mc_dad_count) { mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, - unsolicited_report_interval(idev)); + mld_dad_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); } -static void mld_dad_timer_expire(struct timer_list *t) +static void mld_dad_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer); - + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_dad_work); + mutex_lock(&idev->mc_lock); mld_send_initial_cr(idev); if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, - unsolicited_report_interval(idev)); + mld_dad_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); } +/* called with mc_lock */ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2129,7 +2286,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2144,21 +2301,27 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, /* no more filters for this source */ if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - pmc->mca_sources = psf->sf_next; + rcu_assign_pointer(pmc->mca_sources, + mc_dereference(psf->sf_next, idev)); + if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) && !mld_in_v1_mode(idev)) { psf->sf_crcount = idev->mc_qrv; - psf->sf_next = pmc->mca_tomb; - pmc->mca_tomb = psf; + rcu_assign_pointer(psf->sf_next, + mc_dereference(pmc->mca_tomb, idev)); + rcu_assign_pointer(pmc->mca_tomb, psf); rv = 1; - } else - kfree(psf); + } else { + kfree_rcu(psf, rcu); + } } return rv; } +/* called with mc_lock */ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2169,24 +2332,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } - if (!pmc) { - /* MCA not found?? bug */ - read_unlock_bh(&idev->lock); + if (!pmc) return -ESRCH; - } - spin_lock_bh(&pmc->mca_lock); + sf_markstate(pmc); if (!delta) { - if (!pmc->mca_sfcount[sfmode]) { - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + if (!pmc->mca_sfcount[sfmode]) return -EINVAL; - } + pmc->mca_sfcount[sfmode]--; } err = 0; @@ -2206,18 +2364,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfmode = MCAST_INCLUDE; pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) psf->sf_crcount = 0; mld_ifc_event(pmc->idev); - } else if (sf_setstate(pmc) || changerec) + } else if (sf_setstate(pmc) || changerec) { mld_ifc_event(pmc->idev); - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + } + return err; } /* * Add multicast single-source filter to the interface list + * called with mc_lock */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) @@ -2225,40 +2384,45 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, struct ip6_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; } if (!psf) { - psf = kzalloc(sizeof(*psf), GFP_ATOMIC); + psf = kzalloc(sizeof(*psf), GFP_KERNEL); if (!psf) return -ENOBUFS; psf->sf_addr = *psfsrc; if (psf_prev) { - psf_prev->sf_next = psf; - } else - pmc->mca_sources = psf; + rcu_assign_pointer(psf_prev->sf_next, psf); + } else { + rcu_assign_pointer(pmc->mca_sources, psf); + } } psf->sf_count[sfmode]++; return 0; } +/* called with mc_lock */ static void sf_markstate(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; - } else + } else { psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0; + } + } } +/* called with mc_lock */ static int sf_setstate(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *dpsf; @@ -2267,7 +2431,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) int new_in, rv; rv = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -2277,8 +2441,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) if (!psf->sf_oldin) { struct ip6_sf_list *prev = NULL; - for (dpsf = pmc->mca_tomb; dpsf; - dpsf = dpsf->sf_next) { + for_each_psf_tomb(pmc, dpsf) { if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2286,10 +2449,14 @@ static int sf_setstate(struct ifmcaddr6 *pmc) } if (dpsf) { if (prev) - prev->sf_next = dpsf->sf_next; + rcu_assign_pointer(prev->sf_next, + mc_dereference(dpsf->sf_next, + pmc->idev)); else - pmc->mca_tomb = dpsf->sf_next; - kfree(dpsf); + rcu_assign_pointer(pmc->mca_tomb, + mc_dereference(dpsf->sf_next, + pmc->idev)); + kfree_rcu(dpsf, rcu); } psf->sf_crcount = qrv; rv++; @@ -2300,18 +2467,19 @@ static int sf_setstate(struct ifmcaddr6 *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next) + + for_each_psf_tomb(pmc, dpsf) if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; if (!dpsf) { - dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_KERNEL); if (!dpsf) continue; *dpsf = *psf; - /* pmc->mca_lock held by callers */ - dpsf->sf_next = pmc->mca_tomb; - pmc->mca_tomb = dpsf; + rcu_assign_pointer(dpsf->sf_next, + mc_dereference(pmc->mca_tomb, pmc->idev)); + rcu_assign_pointer(pmc->mca_tomb, dpsf); } dpsf->sf_crcount = qrv; rv++; @@ -2322,6 +2490,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) /* * Add multicast source filter list to the interface list + * called with mc_lock */ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, @@ -2333,17 +2502,13 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } - if (!pmc) { - /* MCA not found?? bug */ - read_unlock_bh(&idev->lock); + if (!pmc) return -ESRCH; - } - spin_lock_bh(&pmc->mca_lock); sf_markstate(pmc); isexclude = pmc->mca_sfmode == MCAST_EXCLUDE; @@ -2374,36 +2539,40 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) psf->sf_crcount = 0; mld_ifc_event(idev); - } else if (sf_setstate(pmc)) + } else if (sf_setstate(pmc)) { mld_ifc_event(idev); - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + } return err; } +/* called with mc_lock */ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; - for (psf = pmc->mca_tomb; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); + for (psf = mc_dereference(pmc->mca_tomb, pmc->idev); + psf; + psf = nextpsf) { + nextpsf = mc_dereference(psf->sf_next, pmc->idev); + kfree_rcu(psf, rcu); } - pmc->mca_tomb = NULL; - for (psf = pmc->mca_sources; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); + RCU_INIT_POINTER(pmc->mca_tomb, NULL); + for (psf = mc_dereference(pmc->mca_sources, pmc->idev); + psf; + psf = nextpsf) { + nextpsf = mc_dereference(psf->sf_next, pmc->idev); + kfree_rcu(psf, rcu); } - pmc->mca_sources = NULL; + RCU_INIT_POINTER(pmc->mca_sources, NULL); pmc->mca_sfmode = MCAST_EXCLUDE; pmc->mca_sfcount[MCAST_INCLUDE] = 0; pmc->mca_sfcount[MCAST_EXCLUDE] = 1; } - +/* called with mc_lock */ static void igmp6_join_group(struct ifmcaddr6 *ma) { unsigned long delay; @@ -2415,93 +2584,115 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) delay = prandom_u32() % unsolicited_report_interval(ma->idev); - spin_lock_bh(&ma->mca_lock); - if (del_timer(&ma->mca_timer)) { + if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); - delay = ma->mca_timer.expires - jiffies; + delay = ma->mca_work.timer.expires - jiffies; } - if (!mod_timer(&ma->mca_timer, jiffies + delay)) + if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER; - spin_unlock_bh(&ma->mca_lock); } static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, struct inet6_dev *idev) { + struct ip6_sf_socklist *psl; int err; - write_lock_bh(&iml->sflock); - if (!iml->sflist) { + psl = rtnl_dereference(iml->sflist); + + if (idev) + mutex_lock(&idev->mc_lock); + + if (!psl) { /* any-source empty exclude case */ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); } else { err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + psl->sl_count, psl->sl_addr, 0); + RCU_INIT_POINTER(iml->sflist, NULL); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - write_unlock_bh(&iml->sflock); + + if (idev) + mutex_unlock(&idev->mc_lock); + return err; } +/* called with mc_lock */ static void igmp6_leave_group(struct ifmcaddr6 *ma) { if (mld_in_v1_mode(ma->idev)) { - if (ma->mca_flags & MAF_LAST_REPORTER) + if (ma->mca_flags & MAF_LAST_REPORTER) { igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REDUCTION); + } } else { mld_add_delrec(ma->idev, ma); mld_ifc_event(ma->idev); } } -static void mld_gq_timer_expire(struct timer_list *t) +static void mld_gq_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer); + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_gq_work); - idev->mc_gq_running = 0; + mutex_lock(&idev->mc_lock); mld_send_report(idev, NULL); + idev->mc_gq_running = 0; + mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); } -static void mld_ifc_timer_expire(struct timer_list *t) +static void mld_ifc_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer); + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_ifc_work); + mutex_lock(&idev->mc_lock); mld_send_cr(idev); + if (idev->mc_ifc_count) { idev->mc_ifc_count--; if (idev->mc_ifc_count) - mld_ifc_start_timer(idev, - unsolicited_report_interval(idev)); + mld_ifc_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); } +/* called with mc_lock */ static void mld_ifc_event(struct inet6_dev *idev) { if (mld_in_v1_mode(idev)) return; + idev->mc_ifc_count = idev->mc_qrv; - mld_ifc_start_timer(idev, 1); + mld_ifc_start_work(idev, 1); } -static void igmp6_timer_handler(struct timer_list *t) +static void mld_mca_work(struct work_struct *work) { - struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer); + struct ifmcaddr6 *ma = container_of(to_delayed_work(work), + struct ifmcaddr6, mca_work); + mutex_lock(&ma->idev->mc_lock); if (mld_in_v1_mode(ma->idev)) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); else mld_send_report(ma->idev, ma); - - spin_lock(&ma->mca_lock); ma->mca_flags |= MAF_LAST_REPORTER; ma->mca_flags &= ~MAF_TIMER_RUNNING; - spin_unlock(&ma->mca_lock); + mutex_unlock(&ma->idev->mc_lock); + ma_put(ma); } @@ -2513,10 +2704,10 @@ void ipv6_mc_unmap(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock_bh(&idev->lock); - for (i = idev->mc_list; i; i = i->next) + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, i) igmp6_group_dropped(i); - read_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } void ipv6_mc_remap(struct inet6_dev *idev) @@ -2525,25 +2716,25 @@ void ipv6_mc_remap(struct inet6_dev *idev) } /* Device going down */ - void ipv6_mc_down(struct inet6_dev *idev) { struct ifmcaddr6 *i; + mutex_lock(&idev->mc_lock); /* Withdraw multicast list */ - - read_lock_bh(&idev->lock); - - for (i = idev->mc_list; i; i = i->next) + for_each_mc_mclock(idev, i) igmp6_group_dropped(i); + mutex_unlock(&idev->mc_lock); - /* Should stop timer after group drop. or we will - * start timer again in mld_ifc_event() + /* Should stop work after group drop. or we will + * start work again in mld_ifc_event() */ - mld_ifc_stop_timer(idev); - mld_gq_stop_timer(idev); - mld_dad_stop_timer(idev); - read_unlock_bh(&idev->lock); + synchronize_net(); + mld_query_stop_work(idev); + mld_report_stop_work(idev); + mld_ifc_stop_work(idev); + mld_gq_stop_work(idev); + mld_dad_stop_work(idev); } static void ipv6_mc_reset(struct inet6_dev *idev) @@ -2563,29 +2754,33 @@ void ipv6_mc_up(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock_bh(&idev->lock); ipv6_mc_reset(idev); - for (i = idev->mc_list; i; i = i->next) { + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, i) { mld_del_delrec(idev, i); igmp6_group_added(i); } - read_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } /* IPv6 device initialization. */ void ipv6_mc_init_dev(struct inet6_dev *idev) { - write_lock_bh(&idev->lock); - spin_lock_init(&idev->mc_lock); idev->mc_gq_running = 0; - timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0); - idev->mc_tomb = NULL; + INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work); + RCU_INIT_POINTER(idev->mc_tomb, NULL); idev->mc_ifc_count = 0; - timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0); - timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0); + INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work); + INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work); + INIT_DELAYED_WORK(&idev->mc_query_work, mld_query_work); + INIT_DELAYED_WORK(&idev->mc_report_work, mld_report_work); + skb_queue_head_init(&idev->mc_query_queue); + skb_queue_head_init(&idev->mc_report_queue); + spin_lock_init(&idev->mc_query_lock); + spin_lock_init(&idev->mc_report_lock); + mutex_init(&idev->mc_lock); ipv6_mc_reset(idev); - write_unlock_bh(&idev->lock); } /* @@ -2596,9 +2791,13 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) { struct ifmcaddr6 *i; - /* Deactivate timers */ + /* Deactivate works */ ipv6_mc_down(idev); + mutex_lock(&idev->mc_lock); mld_clear_delrec(idev); + mutex_unlock(&idev->mc_lock); + mld_clear_query(idev); + mld_clear_report(idev); /* Delete all-nodes address. */ /* We cannot call ipv6_dev_mc_dec() directly, our caller in @@ -2610,16 +2809,14 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) if (idev->cnf.forwarding) __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters); - write_lock_bh(&idev->lock); - while ((i = idev->mc_list) != NULL) { - idev->mc_list = i->next; + mutex_lock(&idev->mc_lock); + while ((i = mc_dereference(idev->mc_list, idev))) { + rcu_assign_pointer(idev->mc_list, mc_dereference(i->next, idev)); - write_unlock_bh(&idev->lock); ip6_mc_clear_src(i); ma_put(i); - write_lock_bh(&idev->lock); } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) @@ -2628,13 +2825,14 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) ASSERT_RTNL(); + mutex_lock(&idev->mc_lock); if (mld_in_v1_mode(idev)) { - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) + for_each_mc_mclock(idev, pmc) igmp6_join_group(pmc); - read_unlock_bh(&idev->lock); - } else + } else { mld_send_report(idev, NULL); + } + mutex_unlock(&idev->mc_lock); } static int ipv6_mc_netdev_event(struct notifier_block *this, @@ -2681,13 +2879,12 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) idev = __in6_dev_get(state->dev); if (!idev) continue; - read_lock_bh(&idev->lock); - im = idev->mc_list; + + im = rcu_dereference(idev->mc_list); if (im) { state->idev = idev; break; } - read_unlock_bh(&idev->lock); } return im; } @@ -2696,11 +2893,8 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - im = im->next; + im = rcu_dereference(im->next); while (!im) { - if (likely(state->idev)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2709,8 +2903,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); - im = state->idev->mc_list; + im = rcu_dereference(state->idev->mc_list); } return im; } @@ -2744,10 +2937,8 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - if (likely(state->idev)) { - read_unlock_bh(&state->idev->lock); + if (likely(state->idev)) state->idev = NULL; - } state->dev = NULL; rcu_read_unlock(); } @@ -2762,8 +2953,8 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, &im->mca_addr, im->mca_users, im->mca_flags, - (im->mca_flags&MAF_TIMER_RUNNING) ? - jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0); + (im->mca_flags & MAF_TIMER_RUNNING) ? + jiffies_to_clock_t(im->mca_work.timer.expires - jiffies) : 0); return 0; } @@ -2797,19 +2988,16 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) idev = __in6_dev_get(state->dev); if (unlikely(idev == NULL)) continue; - read_lock_bh(&idev->lock); - im = idev->mc_list; + + im = rcu_dereference(idev->mc_list); if (likely(im)) { - spin_lock_bh(&im->mca_lock); - psf = im->mca_sources; + psf = rcu_dereference(im->mca_sources); if (likely(psf)) { state->im = im; state->idev = idev; break; } - spin_unlock_bh(&im->mca_lock); } - read_unlock_bh(&idev->lock); } return psf; } @@ -2818,14 +3006,10 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - psf = psf->sf_next; + psf = rcu_dereference(psf->sf_next); while (!psf) { - spin_unlock_bh(&state->im->mca_lock); - state->im = state->im->next; + state->im = rcu_dereference(state->im->next); while (!state->im) { - if (likely(state->idev)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2834,13 +3018,11 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; - spin_lock_bh(&state->im->mca_lock); - psf = state->im->mca_sources; + psf = rcu_dereference(state->im->mca_sources); } out: return psf; @@ -2877,14 +3059,12 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - if (likely(state->im)) { - spin_unlock_bh(&state->im->mca_lock); + + if (likely(state->im)) state->im = NULL; - } - if (likely(state->idev)) { - read_unlock_bh(&state->idev->lock); + if (likely(state->idev)) state->idev = NULL; - } + state->dev = NULL; rcu_read_unlock(); } @@ -2965,6 +3145,7 @@ static int __net_init igmp6_net_init(struct net *net) } inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + net->ipv6.igmp_sk->sk_allocation = GFP_KERNEL; err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); @@ -3002,7 +3183,19 @@ static struct pernet_operations igmp6_net_ops = { int __init igmp6_init(void) { - return register_pernet_subsys(&igmp6_net_ops); + int err; + + err = register_pernet_subsys(&igmp6_net_ops); + if (err) + return err; + + mld_wq = create_workqueue("mld"); + if (!mld_wq) { + unregister_pernet_subsys(&igmp6_net_ops); + return -ENOMEM; + } + + return err; } int __init igmp6_late_init(void) @@ -3013,6 +3206,7 @@ int __init igmp6_late_init(void) void igmp6_cleanup(void) { unregister_pernet_subsys(&igmp6_net_ops); + destroy_workqueue(mld_wq); } void igmp6_late_cleanup(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ebb7519bec2a..28801ae80548 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2085,13 +2085,10 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, if (rt->rt6i_flags & RTF_GATEWAY) { struct neighbour *neigh; - __u8 neigh_flags = 0; neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); - if (neigh) - neigh_flags = neigh->flags; - if (!(neigh_flags & NTF_ROUTER)) { + if (!(neigh && (neigh->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); rt6_remove_exception(bucket, rt6_ex); @@ -6075,7 +6072,7 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, if (!rcu_access_pointer(f6i->fib6_node)) /* The route was removed from the tree, do not send - * notfication. + * notification. */ return; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 63ccd9f2dccc..ff2ca2e7c7f5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -218,8 +218,6 @@ static int ipip6_tunnel_create(struct net_device *dev) ipip6_tunnel_clone_6rd(dev, sitn); - dev_hold(dev); - ipip6_tunnel_link(sitn, t); return 0; @@ -325,7 +323,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) rcu_read_lock(); - ca = t->prl_count < cmax ? t->prl_count : cmax; + ca = min(t->prl_count, cmax); if (!kp) { /* We don't try hard to allocate much memory for @@ -1456,7 +1454,7 @@ static int ipip6_tunnel_init(struct net_device *dev) dev->tstats = NULL; return err; } - + dev_hold(dev); return 0; } @@ -1472,7 +1470,6 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 263ab43ed06b..27102c3d6e1d 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -23,7 +23,6 @@ static int two = 2; static int flowlabel_reflect_max = 0x7; -static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, @@ -34,7 +33,7 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, net = container_of(table->data, struct net, ipv6.sysctl.multipath_hash_policy); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); @@ -45,39 +44,38 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "anycast_src_echo_reply", .data = &init_net.ipv6.sysctl.anycast_src_echo_reply, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "flowlabel_consistency", .data = &init_net.ipv6.sysctl.flowlabel_consistency, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "auto_flowlabels", .data = &init_net.ipv6.sysctl.auto_flowlabels, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &auto_flowlabels_min, + .proc_handler = proc_dou8vec_minmax, .extra2 = &auto_flowlabels_max }, { .procname = "fwmark_reflect", .data = &init_net.ipv6.sysctl.fwmark_reflect, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "idgen_retries", @@ -96,16 +94,16 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "flowlabel_state_ranges", .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "ip_nonlocal_bind", .data = &init_net.ipv6.sysctl.ip_nonlocal_bind, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dou8vec_minmax, }, { .procname = "flowlabel_reflect", @@ -147,7 +145,7 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "fib_multipath_hash_policy", .data = &init_net.ipv6.sysctl.multipath_hash_policy, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, @@ -163,9 +161,9 @@ static struct ctl_table ipv6_table_template[] = { { .procname = "fib_notify_on_flag_change", .data = &init_net.ipv6.sysctl.fib_notify_on_flag_change, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, }, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ef2c75bb4771..199b080d418a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -749,6 +749,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) skb_list_walk_safe(segs, skb, next) { __skb_pull(skb, skb_transport_offset(skb)); + udp_post_segment_fix_csum(skb); ret = udpv6_queue_rcv_one_skb(sk, skb); if (ret > 0) ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index faa823c24292..b3d9ed96e5ea 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -163,7 +163,8 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (NAPI_GRO_CB(skb)->is_flist) { + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ + if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) { uh->len = htons(skb->len - nhoff); skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6092d5cb7168..0fdb389c3390 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -621,7 +621,7 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, for_each_netdev_rcu(&init_net, dev) { if (!memcmp(dev->perm_addr, uid, 8)) { memcpy(iucv->src_user_id, sa->siucv_user_id, 8); - /* Check for unitialized siucv_name */ + /* Check for uninitialized siucv_name */ if (strncmp(sa->siucv_name, " ", 8) == 0) __iucv_auto_name(iucv); else @@ -2134,7 +2134,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, } /** - * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets + * afiucv_hs_callback_txnotify() - handle send notifications from HiperSockets * transport **/ static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d0b56ffbb057..6201965bd822 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -663,7 +663,7 @@ do_frag: /* Hard failure in sending message, abort this * psock since it has lost framing - * synchonization and retry sending the + * synchronization and retry sending the * message from the beginning. */ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE, @@ -1419,7 +1419,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, write_lock_bh(&csk->sk_callback_lock); - /* Check if sk_user_data is aready by KCM or someone else. + /* Check if sk_user_data is already by KCM or someone else. * Must be done under lock to prevent race conditions. */ if (csk->sk_user_data) { diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ad7730b68772..17927966abb3 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -103,7 +103,7 @@ unlock: EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id); /** - * l3mdev_master_ifindex - get index of L3 master device + * l3mdev_master_ifindex_rcu - get index of L3 master device * @dev: targeted interface */ @@ -136,7 +136,7 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev) EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu); /** - * l3mdev_master_upper_ifindex_by_index - get index of upper l3 master + * l3mdev_master_upper_ifindex_by_index_rcu - get index of upper l3 master * device * @net: network namespace for device index lookup * @ifindex: targeted interface diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c index 523fdd1cf781..d6627a80cb45 100644 --- a/net/llc/llc_c_ev.c +++ b/net/llc/llc_c_ev.c @@ -608,7 +608,7 @@ int llc_conn_ev_qlfy_p_flag_eq_1(struct sock *sk, struct sk_buff *skb) } /** - * conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window + * llc_conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window * @sk: current connection structure. * @skb: current event. * @@ -624,7 +624,7 @@ int llc_conn_ev_qlfy_last_frame_eq_1(struct sock *sk, struct sk_buff *skb) } /** - * conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window + * llc_conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window * @sk: current connection structure. * @skb: current event. * diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 64d4bef04e73..6e387aadffce 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -59,10 +59,10 @@ out: } /** - * llc_sap_find - searchs a SAP in station + * llc_sap_find - searches a SAP in station * @sap_value: sap to be found * - * Searchs for a sap in the sap list of the LLC's station upon the sap ID. + * Searches for a sap in the sap list of the LLC's station upon the sap ID. * If the sap is found it will be refcounted and the user will have to do * a llc_sap_put after use. * Returns the sap or %NULL if not found. diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c index 792d195c8bae..63749dde542f 100644 --- a/net/llc/llc_pdu.c +++ b/net/llc/llc_pdu.c @@ -24,7 +24,7 @@ void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type) } /** - * pdu_set_pf_bit - sets poll/final bit in LLC header + * llc_pdu_set_pf_bit - sets poll/final bit in LLC header * @skb: Frame to set bit in * @bit_value: poll/final bit (0 or 1). * diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index 7ae4cc684d3a..b554f26c68ee 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -27,7 +27,7 @@ /** - * llc_sap_action_unit_data_ind - forward UI PDU to network layer + * llc_sap_action_unitdata_ind - forward UI PDU to network layer * @sap: SAP * @skb: the event to forward * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 68a0de02b561..a0a11624a5be 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1486,7 +1486,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME); /* auth flags will be set later for TDLS, - * and for unassociated stations that move to assocaited */ + * and for unassociated stations that move to associated */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) && !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) && (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) { diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 2b7eec93c9f5..69cafaacc31b 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -882,7 +882,7 @@ fully_established: subflow->pm_notified = 1; if (subflow->mp_join) { clear_3rdack_retransmission(ssk); - mptcp_pm_subflow_established(msk, subflow); + mptcp_pm_subflow_established(msk); } else { mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); } @@ -1040,6 +1040,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_pm_add_addr_received(msk, &addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { + mptcp_pm_add_addr_echoed(msk, &addr); mptcp_pm_del_add_timer(msk, &addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 4cfd80f90003..9d00fa6d22e9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -14,7 +14,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo, bool port) + bool echo) { u8 add_addr = READ_ONCE(msk->pm.addr_signal); @@ -33,7 +33,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); if (addr->family == AF_INET6) add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); - if (port) + if (addr->port) add_addr |= BIT(MPTCP_ADD_ADDR_PORT); WRITE_ONCE(msk->pm.addr_signal, add_addr); return 0; @@ -53,6 +53,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); + mptcp_pm_nl_addr_send_ack(msk); return 0; } @@ -152,8 +153,7 @@ void mptcp_pm_connection_closed(struct mptcp_sock *msk) pr_debug("msk=%p", msk); } -void mptcp_pm_subflow_established(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) +void mptcp_pm_subflow_established(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; @@ -188,7 +188,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_lock_bh(&pm->lock); if (!READ_ONCE(pm->accept_addr)) { - mptcp_pm_announce_addr(msk, addr, true, addr->port); + mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->remote = *addr; @@ -197,6 +197,21 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_unlock_bh(&pm->lock); } +void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) +{ + struct mptcp_pm_data *pm = &msk->pm; + + pr_debug("msk=%p", msk); + + spin_lock_bh(&pm->lock); + + if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) + mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); + + spin_unlock_bh(&pm->lock); +} + void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) { if (!mptcp_pm_should_add_signal(msk)) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5857b82c88bf..cadafafa1049 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -56,8 +56,6 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 -static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); - static bool addresses_equal(const struct mptcp_addr_info *a, struct mptcp_addr_info *b, bool use_port) { @@ -140,6 +138,24 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, return false; } +static bool lookup_subflow_by_daddr(const struct list_head *list, + struct mptcp_addr_info *daddr) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info cur; + struct sock_common *skc; + + list_for_each_entry(subflow, list, node) { + skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); + + remote_address(skc, &cur); + if (addresses_equal(&cur, daddr, daddr->port)) + return true; + } + + return false; +} + static struct mptcp_pm_addr_entry * select_local_address(const struct pm_nl_pernet *pernet, struct mptcp_sock *msk) @@ -245,9 +261,9 @@ static void check_work_pending(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.work_pending, false); } -static struct mptcp_pm_add_entry * -lookup_anno_list_by_saddr(struct mptcp_sock *msk, - struct mptcp_addr_info *addr) +struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; @@ -308,7 +324,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!mptcp_pm_should_add_signal(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); - mptcp_pm_announce_addr(msk, &entry->addr, false, entry->addr.port); + mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); entry->retrans_times++; } @@ -319,6 +335,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer) spin_unlock_bh(&msk->pm.lock); + if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) + mptcp_pm_subflow_established(msk); + out: __sock_put(sk); } @@ -331,7 +350,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; spin_lock_bh(&msk->pm.lock); - entry = lookup_anno_list_by_saddr(msk, addr); + entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (entry) entry->retrans_times = ADD_ADDR_RETRANS_MAX; spin_unlock_bh(&msk->pm.lock); @@ -351,7 +370,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - if (lookup_anno_list_by_saddr(msk, &entry->addr)) + if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) return false; add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); @@ -417,8 +436,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) { msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false, local->addr.port); - mptcp_pm_nl_add_addr_send_ack(msk); + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); } } else { /* pick failed, avoid fourther attempts later */ @@ -468,7 +487,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) struct mptcp_addr_info remote; struct mptcp_addr_info local; unsigned int subflows_max; - bool use_port = false; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); @@ -476,6 +494,10 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) pr_debug("accepted %d:%d remote family %d", msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); + + if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) + goto add_addr_echo; + msk->pm.add_addr_accepted++; msk->pm.subflows++; if (msk->pm.add_addr_accepted >= add_addr_accept_max || @@ -488,8 +510,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; - else - use_port = true; memset(&local, 0, sizeof(local)); local.family = remote.family; @@ -497,28 +517,30 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) __mptcp_subflow_connect(sk, &local, &remote); spin_lock_bh(&msk->pm.lock); - mptcp_pm_announce_addr(msk, &remote, true, use_port); - mptcp_pm_nl_add_addr_send_ack(msk); +add_addr_echo: + mptcp_pm_announce_addr(msk, &msk->pm.remote, true); + mptcp_pm_nl_addr_send_ack(msk); } -static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) +void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; msk_owned_by_me(msk); lockdep_assert_held(&msk->pm.lock); - if (!mptcp_pm_should_add_signal(msk)) + if (!mptcp_pm_should_add_signal(msk) && + !mptcp_pm_should_rm_signal(msk)) return; __mptcp_flush_join_list(msk); subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); if (subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - u8 add_addr; spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for add_addr%s%s", + pr_debug("send ack for %s%s%s", + mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr", mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); @@ -526,13 +548,6 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) tcp_send_ack(ssk); release_sock(ssk); spin_lock_bh(&msk->pm.lock); - - add_addr = READ_ONCE(msk->pm.addr_signal); - if (mptcp_pm_should_add_signal_ipv6(msk)) - add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6); - if (mptcp_pm_should_add_signal_port(msk)) - add_addr &= ~BIT(MPTCP_ADD_ADDR_PORT); - WRITE_ONCE(msk->pm.addr_signal, add_addr); } } @@ -571,47 +586,68 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list, + enum linux_mptcp_mib_field rm_type) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; u8 i; - pr_debug("address rm_list_nr %d", msk->pm.rm_list_rx.nr); + pr_debug("%s rm_list_nr %d", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); msk_owned_by_me(msk); - if (!msk->pm.rm_list_rx.nr) + if (!rm_list->nr) return; if (list_empty(&msk->conn_list)) return; - for (i = 0; i < msk->pm.rm_list_rx.nr; i++) { + for (i = 0; i < rm_list->nr; i++) { list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; + u8 id = subflow->local_id; + + if (rm_type == MPTCP_MIB_RMADDR) + id = subflow->remote_id; - if (msk->pm.rm_list_rx.ids[i] != subflow->remote_id) + if (rm_list->ids[i] != id) continue; - pr_debug(" -> address rm_list_ids[%d]=%u", i, msk->pm.rm_list_rx.ids[i]); + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", + i, rm_list->ids[i], subflow->local_id, subflow->remote_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_accepted--; + if (rm_type == MPTCP_MIB_RMADDR) { + msk->pm.add_addr_accepted--; + WRITE_ONCE(msk->pm.accept_addr, true); + } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { + msk->pm.local_addr_used--; + } msk->pm.subflows--; - WRITE_ONCE(msk->pm.accept_addr, true); - - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR); - - break; + __MPTCP_INC_STATS(sock_net(sk), rm_type); } } } +static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +{ + mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); +} + +void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) +{ + mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); +} + void mptcp_pm_nl_work(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; @@ -627,7 +663,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) } if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_add_addr_send_ack(msk); + mptcp_pm_nl_addr_send_ack(msk); } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); @@ -645,47 +681,6 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) -{ - struct mptcp_subflow_context *subflow, *tmp; - struct sock *sk = (struct sock *)msk; - u8 i; - - pr_debug("subflow rm_list_nr %d", rm_list->nr); - - msk_owned_by_me(msk); - - if (!rm_list->nr) - return; - - if (list_empty(&msk->conn_list)) - return; - - for (i = 0; i < rm_list->nr; i++) { - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - - if (rm_list->ids[i] != subflow->local_id) - continue; - - pr_debug(" -> subflow rm_list_ids[%d]=%u", i, rm_list->ids[i]); - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, how); - mptcp_close_ssk(sk, ssk, subflow); - spin_lock_bh(&msk->pm.lock); - - msk->pm.local_addr_used--; - msk->pm.subflows--; - - __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); - - break; - } - } -} - static bool address_use_port(struct mptcp_pm_addr_entry *entry) { return (entry->addr.flags & @@ -1161,6 +1156,41 @@ static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) } } +static int mptcp_nl_remove_id_zero_address(struct net *net, + struct mptcp_addr_info *addr) +{ + struct mptcp_rm_list list = { .nr = 0 }; + long s_slot = 0, s_num = 0; + struct mptcp_sock *msk; + + list.ids[list.nr++] = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + struct mptcp_addr_info msk_local; + + if (list_empty(&msk->conn_list)) + goto next; + + local_address((struct sock_common *)msk, &msk_local); + if (!addresses_equal(&msk_local, addr, addr->port)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1173,6 +1203,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; + /* the zero id address is special: the first address used by the msk + * always gets such an id, so different subflows can have different zero + * id addresses. Additionally zero id is not accounted for in id_bitmap. + * Let's use an 'mptcp_rm_list' instead of the common remove code. + */ + if (addr.addr.id == 0) + return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); + spin_lock_bh(&pernet->lock); entry = __lookup_addr_by_id(pernet, addr.addr.id); if (!entry) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1590b9d4cde2..171b77537dcb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2047,28 +2047,21 @@ out_err: return copied; } -static void mptcp_retransmit_handler(struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - set_bit(MPTCP_WORK_RTX, &msk->flags); - mptcp_schedule_work(sk); -} - static void mptcp_retransmit_timer(struct timer_list *t) { struct inet_connection_sock *icsk = from_timer(icsk, t, icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - mptcp_retransmit_handler(sk); + /* we need a process context to retransmit */ + if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags)) + mptcp_schedule_work(sk); } else { /* delegate our work to tcp_release_cb() */ - if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, - &sk->sk_tsq_flags)) - sock_hold(sk); + set_bit(MPTCP_RETRANSMIT, &msk->flags); } bh_unlock_sock(sk); sock_put(sk); @@ -2958,17 +2951,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) } } -#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) - /* processes deferred events and flush wmem */ static void mptcp_release_cb(struct sock *sk) { - unsigned long flags, nflags; - for (;;) { - flags = 0; + unsigned long flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) flags |= BIT(MPTCP_PUSH_PENDING); + if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags)) + flags |= BIT(MPTCP_RETRANSMIT); if (!flags) break; @@ -2983,6 +2975,8 @@ static void mptcp_release_cb(struct sock *sk) spin_unlock_bh(&sk->sk_lock.slock); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); + if (flags & BIT(MPTCP_RETRANSMIT)) + __mptcp_retrans(sk); cond_resched(); spin_lock_bh(&sk->sk_lock.slock); @@ -2998,20 +2992,6 @@ static void mptcp_release_cb(struct sock *sk) */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); - - do { - flags = sk->sk_tsq_flags; - if (!(flags & MPTCP_DEFERRED_ALL)) - return; - nflags = flags & ~MPTCP_DEFERRED_ALL; - } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - - sock_release_ownership(sk); - - if (flags & TCPF_WRITE_TIMER_DEFERRED) { - mptcp_retransmit_handler(sk); - __sock_put(sk); - } } void mptcp_subflow_process_delegated(struct sock *ssk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1111a99b024f..e8c5ff2b8ace 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -104,6 +104,7 @@ #define MPTCP_PUSH_PENDING 6 #define MPTCP_CLEAN_UNA 7 #define MPTCP_ERROR_REPORT 8 +#define MPTCP_RETRANSMIT 9 static inline bool before64(__u64 seq1, __u64 seq2) { @@ -642,12 +643,14 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); -void mptcp_pm_subflow_established(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow); +void mptcp_pm_subflow_established(struct mptcp_sock *msk); void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id); void mptcp_pm_add_addr_received(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); +void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); @@ -659,10 +662,13 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_addr_info *addr); +struct mptcp_pm_add_entry * +mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, + struct mptcp_addr_info *addr); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, - bool echo, bool port); + bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d17d39ccdf34..6c074d3db0ed 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1081,7 +1081,7 @@ bool mptcp_subflow_data_available(struct sock *sk) * In mptcp, rwin is about the mptcp-level connection data. * * Data that is still on the ssk rx queue can thus be ignored, - * as far as mptcp peer is concerened that data is still inflight. + * as far as mptcp peer is concerned that data is still inflight. * DSS ACK is updated when skb is moved to the mptcp rx queue. */ void mptcp_space(const struct sock *ssk, int *space, int *full_space) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index e37102546be6..49031f804276 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -100,7 +100,7 @@ enum { struct ncsi_channel_version { u32 version; /* Supported BCD encoded NCSI version */ u32 alpha2; /* Supported BCD encoded NCSI version */ - u8 fw_name[12]; /* Firware name string */ + u8 fw_name[12]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ u32 mf_id; /* Manufacture ID */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 0c132ff9b446..128690c512df 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2398,7 +2398,7 @@ static int __net_init __ip_vs_init(struct net *net) if (ipvs == NULL) return -ENOMEM; - /* Hold the beast until a service is registerd */ + /* Hold the beast until a service is registered */ ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 2ccda8ace796..91bc8df3e4b0 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/* Accouting handling for netfilter. */ +/* Accounting handling for netfilter. */ /* * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl> diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index df1b41ed73fd..ca52f5085989 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -63,7 +63,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { */ /** - * netlbl_mgmt_add - Handle an ADD message + * netlbl_mgmt_add_common - Handle an ADD message * @info: the Generic NETLINK info block * @audit_info: NetLabel audit information * diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index da7e2112771f..5044c7db577e 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -457,7 +457,7 @@ static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech, } /** - * start_poll operation + * digital_start_poll - start_poll operation * @nfc_dev: device to be polled * @im_protocols: bitset of nfc initiator protocols to be used for polling * @tm_protocols: bitset of nfc transport protocols to be used for polling diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index dfc820ee553a..4b46c69e14ab 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -20,6 +20,8 @@ /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff +#define QRTR_EPH_PORT_RANGE \ + XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET) /** * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 @@ -106,8 +108,7 @@ static LIST_HEAD(qrtr_all_nodes); static DEFINE_MUTEX(qrtr_node_lock); /* local port allocation management */ -static DEFINE_IDR(qrtr_ports); -static DEFINE_MUTEX(qrtr_port_lock); +static DEFINE_XARRAY_ALLOC(qrtr_ports); /** * struct qrtr_node - endpoint node @@ -653,7 +654,7 @@ static struct qrtr_sock *qrtr_port_lookup(int port) port = 0; rcu_read_lock(); - ipc = idr_find(&qrtr_ports, port); + ipc = xa_load(&qrtr_ports, port); if (ipc) sock_hold(&ipc->sk); rcu_read_unlock(); @@ -695,9 +696,7 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) __sock_put(&ipc->sk); - mutex_lock(&qrtr_port_lock); - idr_remove(&qrtr_ports, port); - mutex_unlock(&qrtr_port_lock); + xa_erase(&qrtr_ports, port); /* Ensure that if qrtr_port_lookup() did enter the RCU read section we * wait for it to up increment the refcount */ @@ -716,29 +715,20 @@ static void qrtr_port_remove(struct qrtr_sock *ipc) */ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) { - u32 min_port; int rc; - mutex_lock(&qrtr_port_lock); if (!*port) { - min_port = QRTR_MIN_EPH_SOCKET; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE, + GFP_KERNEL); } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) { rc = -EACCES; } else if (*port == QRTR_PORT_CTRL) { - min_port = 0; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC); + rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL); } else { - min_port = *port; - rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC); - if (!rc) - *port = min_port; + rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL); } - mutex_unlock(&qrtr_port_lock); - if (rc == -ENOSPC) + if (rc == -EBUSY) return -EADDRINUSE; else if (rc < 0) return rc; @@ -752,20 +742,16 @@ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port) static void qrtr_reset_ports(void) { struct qrtr_sock *ipc; - int id; - - mutex_lock(&qrtr_port_lock); - idr_for_each_entry(&qrtr_ports, ipc, id) { - /* Don't reset control port */ - if (id == 0) - continue; + unsigned long index; + rcu_read_lock(); + xa_for_each_start(&qrtr_ports, index, ipc, 1) { sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; ipc->sk.sk_error_report(&ipc->sk); sock_put(&ipc->sk); } - mutex_unlock(&qrtr_port_lock); + rcu_read_unlock(); } /* Bind socket to address. diff --git a/net/rds/send.c b/net/rds/send.c index 985d0b7713ac..53444397de66 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1225,7 +1225,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } /* If the socket is already bound to a link local address, * it can only send to peers on the same link. But allow - * communicating beween link local and non-link local address. + * communicating between link local and non-link local address. */ if (scope_id != rs->rs_bound_scope_id) { if (!scope_id) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f77484df097b..54e6a708d06e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3217,7 +3217,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, return false; break; default: - /* This is unkown to us, reject! */ + /* This is unknown to us, reject! */ return false; } } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index af2b7041fa4e..7632714c1e5b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1452,7 +1452,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc, return 'E'; } -/* Common helper routine for both duplicate and simulataneous INIT +/* Common helper routine for both duplicate and simultaneous INIT * chunk handling. */ static enum sctp_disposition sctp_sf_do_unexpected_init( @@ -1685,7 +1685,7 @@ enum sctp_disposition sctp_sf_do_5_2_1_siminit( void *arg, struct sctp_cmd_seq *commands) { - /* Call helper to do the real work for both simulataneous and + /* Call helper to do the real work for both simultaneous and * duplicate INIT chunk handling. */ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); @@ -1740,7 +1740,7 @@ enum sctp_disposition sctp_sf_do_5_2_2_dupinit( void *arg, struct sctp_cmd_seq *commands) { - /* Call helper to do the real work for both simulataneous and + /* Call helper to do the real work for both simultaneous and * duplicate INIT chunk handling. */ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands); @@ -2221,11 +2221,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( break; } - /* Delete the tempory new association. */ + /* Delete the temporary new association. */ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); - /* Restore association pointer to provide SCTP command interpeter + /* Restore association pointer to provide SCTP command interpreter * with a valid context in case it needs to manipulate * the queues */ sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a710917c5ac7..76a388b5021c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9327,7 +9327,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); - /* Set newsk security attributes from orginal sk and connection + /* Set newsk security attributes from original sk and connection * security attribute from ep. */ security_sctp_sk_clone(ep, sk, newsk); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index e8e448771f85..6d6fd1397c87 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -410,7 +410,6 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev, struct smc_sock; struct smc_clc_msg_accept_confirm; -struct smc_clc_msg_local; void smc_lgr_cleanup_early(struct smc_connection *conn); void smc_lgr_terminate_sched(struct smc_link_group *lgr); diff --git a/net/socket.c b/net/socket.c index 84a8049c2b09..27e3e7d53f8e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3568,7 +3568,7 @@ EXPORT_SYMBOL(kernel_accept); * @addrlen: address length * @flags: flags (O_NONBLOCK, ...) * - * For datagram sockets, @addr is the addres to which datagrams are sent + * For datagram sockets, @addr is the address to which datagrams are sent * by default, and the only address from which datagrams are received. * For stream sockets, attempts to connect to @addr. * Returns 0 or an error code. diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a4389ef08a98..443f8e5b9477 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -240,10 +240,12 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) * @disc_domain: bearer domain * @prio: bearer priority * @attr: nlattr array + * @extack: netlink extended ack */ static int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain, u32 prio, - struct nlattr *attr[]) + struct nlattr *attr[], + struct netlink_ext_ack *extack) { struct tipc_net *tn = tipc_net(net); struct tipc_bearer_names b_names; @@ -254,20 +256,24 @@ static int tipc_enable_bearer(struct net *net, const char *name, int bearer_id = 0; int res = -EINVAL; char *errstr = ""; + u32 i; if (!bearer_name_validate(name, &b_names)) { errstr = "illegal name"; + NL_SET_ERR_MSG(extack, "Illegal name"); goto rejected; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); goto rejected; } m = tipc_media_find(b_names.media_name); if (!m) { errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); goto rejected; } @@ -275,33 +281,43 @@ static int tipc_enable_bearer(struct net *net, const char *name, prio = m->priority; /* Check new bearer vs existing ones and find free bearer id if any */ - while (bearer_id < MAX_BEARERS) { - b = rtnl_dereference(tn->bearer_list[bearer_id]); - if (!b) - break; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } if (!strcmp(name, b->name)) { errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); goto rejected; } - bearer_id++; - if (b->priority != prio) - continue; - if (++with_this_prio <= 2) - continue; - pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", - name, prio); - if (prio == TIPC_MIN_LINK_PRI) { - errstr = "cannot adjust to lower"; - goto rejected; + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; } - pr_warn("Bearer <%s>: trying with adjusted priority\n", name); - prio--; - bearer_id = 0; - with_this_prio = 1; } if (bearer_id >= MAX_BEARERS) { errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); goto rejected; } @@ -315,6 +331,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { kfree(b); errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); goto rejected; } @@ -331,6 +348,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (res) { bearer_disable(net, b); errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); goto rejected; } @@ -909,6 +927,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) bearer = tipc_bearer_find(net, name); if (!bearer) { err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); goto err_out; } @@ -948,8 +967,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); bearer = tipc_bearer_find(net, name); - if (!bearer) + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } bearer_disable(net, bearer); @@ -1007,7 +1028,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } - return tipc_enable_bearer(net, bearer, domain, prio, attrs); + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); } int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) @@ -1046,6 +1068,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) b = tipc_bearer_find(net, name); if (!b) { rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; } @@ -1086,8 +1109,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); b = tipc_bearer_find(net, name); - if (!b) + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); return -EINVAL; + } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -1106,12 +1131,18 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); #endif @@ -1239,6 +1270,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) rtnl_lock(); media = tipc_media_find(name); if (!media) { + NL_SET_ERR_MSG(info->extack, "Media not found"); err = -EINVAL; goto err_out; } @@ -1275,9 +1307,10 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); m = tipc_media_find(name); - if (!m) + if (!m) { + NL_SET_ERR_MSG(info->extack, "Media not found"); return -EINVAL; - + } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; @@ -1293,12 +1326,18 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { - if (m->type_id != TIPC_MEDIA_TYPE_UDP) + if (m->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); return -EINVAL; + } #ifdef CONFIG_TIPC_MEDIA_UDP if (tipc_udp_mtu_bad(nla_get_u32 - (props[TIPC_NLA_PROP_MTU]))) + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); return -EINVAL; + } m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); #endif } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 6bf4550aa1ac..57c6a1a719e2 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -154,9 +154,9 @@ struct tipc_media { * care of initializing all other fields. */ struct tipc_bearer { - void __rcu *media_ptr; /* initalized by media */ - u32 mtu; /* initalized by media */ - struct tipc_media_addr addr; /* initalized by media */ + void __rcu *media_ptr; /* initialized by media */ + u32 mtu; /* initialized by media */ + struct tipc_media_addr addr; /* initialized by media */ char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 6f64acef73dc..76b8428c94a7 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1492,6 +1492,8 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, /* Allocate statistic structure */ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { + if (c->wq) + destroy_workqueue(c->wq); kfree_sensitive(c); return -ENOMEM; } diff --git a/net/tipc/node.c b/net/tipc/node.c index 61c38eaaa298..707d0dc71fad 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2014,7 +2014,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, return true; } - /* No synching needed if only one link */ + /* No syncing needed if only one link */ if (!pl || !tipc_link_is_up(pl)) return true; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 117a472a8e61..f21162aa0cf7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1450,7 +1450,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) ua = (struct tipc_uaddr *)&tsk->peer; if (!syn && ua->family != AF_TIPC) return -EDESTADDRREQ; - atype = ua->addrtype; + atype = ua->addrtype; } if (unlikely(syn)) { diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ddea6554ec46..60b877531b66 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -49,12 +49,13 @@ struct tipc_conn; /** * struct tipc_subscription - TIPC network topology subscription object + * @s: host-endian copy of the user subscription + * @evt: template for events generated by subscription * @kref: reference count for this subscription * @net: network namespace associated with subscription * @timer: timer governing subscription duration (optional) * @service_list: adjacent subscriptions in name sequence's subscription list * @sub_list: adjacent subscriptions in subscriber's subscription list - * @evt: template for events generated by subscription * @conid: connection identifier of topology server * @inactive: true if this subscription is inactive * @lock: serialize up/down and timer events diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bc7fb9bf3351..92a72f0e0d94 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1855,7 +1855,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (!transport || sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a - * peer has not connected or a local shutdown occured with the + * peer has not connected or a local shutdown occurred with the * SOCK_DONE flag. */ if (sock_flag(sk, SOCK_DONE)) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 21536c48deec..68db914df642 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3404,7 +3404,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy) } /* - * Restoring regulatory settings involves ingoring any + * Restoring regulatory settings involves ignoring any * possibly stale country IE information and user regulatory * settings if so desired, this includes any beacon hints * learned as we could have traveled outside to another country diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ff687b97b2d9..44d6566dd23e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1018,7 +1018,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, /* * current neighbour/link might impose additional limits - * on certain facilties + * on certain facilities */ x25_limit_facilities(&facilities, nb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b74f28cabe24..156347fd7e2e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -688,7 +688,7 @@ static void xfrm_hash_resize(struct work_struct *work) } /* Make sure *pol can be inserted into fastbin. - * Useful to check that later insert requests will be sucessful + * Useful to check that later insert requests will be successful * (provided xfrm_policy_lock is held throughout). */ static struct xfrm_pol_inexact_bin * diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5a0ef4361e43..df8bc8fc724c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1761,7 +1761,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived - * in netlink excl is a flag and you wouldnt need + * in netlink excl is a flag and you wouldn't need * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); |