diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 3 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 10 | ||||
-rw-r--r-- | net/core/devlink.c | 345 | ||||
-rw-r--r-- | net/core/filter.c | 10 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 32 | ||||
-rw-r--r-- | net/core/flow_offload.c | 12 | ||||
-rw-r--r-- | net/core/skmsg.c | 23 | ||||
-rw-r--r-- | net/core/sock.c | 3 | ||||
-rw-r--r-- | net/core/sock_map.c | 53 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 2 |
10 files changed, 347 insertions, 146 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index c02bae927812..b61075828358 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5591,7 +5591,7 @@ void netif_receive_skb_list(struct list_head *head) } EXPORT_SYMBOL(netif_receive_skb_list); -DEFINE_PER_CPU(struct work_struct, flush_works); +static DEFINE_PER_CPU(struct work_struct, flush_works); /* Network device is going away, flush any packets still pending */ static void flush_backlog(struct work_struct *work) @@ -7908,6 +7908,7 @@ EXPORT_SYMBOL(netdev_bonding_info_change); /** * netdev_get_xmit_slave - Get the xmit slave of master device + * @dev: device * @skb: The packet * @all_slaves: assume all the slaves are active * diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6393ba930097..54cd568e7c2f 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -690,6 +690,15 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* netif_addr_lock_bh() uses lockdep subclass 0, this is okay for two + * reasons: + * 1) This is always called without any addr_list_lock, so as the + * outermost one here, it must be 0. + * 2) This is called by some callers after unlinking the upper device, + * so the dev->lower_level becomes 1 again. + * Therefore, the subclass for 'from' is 0, for 'to' is either 1 or + * larger. + */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); @@ -911,6 +920,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; + /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); netif_addr_lock_nested(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); diff --git a/net/core/devlink.c b/net/core/devlink.c index 6ae36808c152..6335e1851088 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -386,19 +386,21 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) -#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) -#define DEVLINK_NL_FLAG_NEED_SB BIT(2) +#define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) +#define DEVLINK_NL_FLAG_NEED_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(2) +#define DEVLINK_NL_FLAG_NEED_SB BIT(3) /* The per devlink instance lock is taken by default in the pre-doit * operation, yet several commands do not require this. The global * devlink lock is taken and protects from disruption by user-calls. */ -#define DEVLINK_NL_FLAG_NO_LOCK BIT(3) +#define DEVLINK_NL_FLAG_NO_LOCK BIT(4) static int devlink_nl_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port; struct devlink *devlink; int err; @@ -413,14 +415,17 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK) { info->user_ptr[0] = devlink; } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) { - struct devlink_port *devlink_port; - devlink_port = devlink_port_get_from_info(devlink, info); if (IS_ERR(devlink_port)) { err = PTR_ERR(devlink_port); goto unlock; } info->user_ptr[0] = devlink_port; + } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) { + info->user_ptr[0] = devlink; + devlink_port = devlink_port_get_from_info(devlink, info); + if (!IS_ERR(devlink_port)) + info->user_ptr[1] = devlink_port; } if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_SB) { struct devlink_sb *devlink_sb; @@ -528,8 +533,14 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, { struct devlink_port_attrs *attrs = &devlink_port->attrs; - if (!attrs->set) + if (!devlink_port->attrs_set) return 0; + if (attrs->lanes) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_LANES, attrs->lanes)) + return -EMSGSIZE; + } + if (nla_put_u8(msg, DEVLINK_ATTR_PORT_SPLITTABLE, attrs->splittable)) + return -EMSGSIZE; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; switch (devlink_port->attrs.flavour) { @@ -934,6 +945,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; + struct devlink_port *devlink_port; u32 port_index; u32 count; @@ -941,8 +953,27 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, !info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]) return -EINVAL; + devlink_port = devlink_port_get_from_info(devlink, info); port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); + + if (IS_ERR(devlink_port)) + return -EINVAL; + + if (!devlink_port->attrs.splittable) { + /* Split ports cannot be split. */ + if (devlink_port->attrs.split) + NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further"); + else + NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split"); + return -EINVAL; + } + + if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count"); + return -EINVAL; + } + return devlink_port_split(devlink, port_index, count, info->extack); } @@ -5261,6 +5292,7 @@ struct devlink_health_reporter { void *priv; const struct devlink_health_reporter_ops *ops; struct devlink *devlink; + struct devlink_port *devlink_port; struct devlink_fmsg *dump_fmsg; struct mutex dump_lock; /* lock parallel read/write from dump buffers */ u64 graceful_period; @@ -5283,18 +5315,98 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter) EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); static struct devlink_health_reporter * -devlink_health_reporter_find_by_name(struct devlink *devlink, - const char *reporter_name) +__devlink_health_reporter_find_by_name(struct list_head *reporter_list, + struct mutex *list_lock, + const char *reporter_name) { struct devlink_health_reporter *reporter; - lockdep_assert_held(&devlink->reporters_lock); - list_for_each_entry(reporter, &devlink->reporter_list, list) + lockdep_assert_held(list_lock); + list_for_each_entry(reporter, reporter_list, list) if (!strcmp(reporter->ops->name, reporter_name)) return reporter; return NULL; } +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink->reporter_list, + &devlink->reporters_lock, + reporter_name); +} + +static struct devlink_health_reporter * +devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list, + &devlink_port->reporters_lock, + reporter_name); +} + +static struct devlink_health_reporter * +__devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + if (WARN_ON(graceful_period && !ops->recover)) + return ERR_PTR(-EINVAL); + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) + return ERR_PTR(-ENOMEM); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = !!ops->recover; + reporter->auto_dump = !!ops->dump; + mutex_init(&reporter->dump_lock); + refcount_set(&reporter->refcount, 1); + return reporter; +} + +/** + * devlink_port_health_reporter_create - create devlink health reporter for + * specified port instance + * + * @port: devlink_port which should contain the new reporter + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&port->reporters_lock); + if (__devlink_health_reporter_find_by_name(&port->reporter_list, + &port->reporters_lock, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + reporter = __devlink_health_reporter_create(port->devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) + goto unlock; + + reporter->devlink_port = port; + list_add_tail(&reporter->list, &port->reporter_list); +unlock: + mutex_unlock(&port->reporters_lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create); + /** * devlink_health_reporter_create - create devlink health reporter * @@ -5316,25 +5428,11 @@ devlink_health_reporter_create(struct devlink *devlink, goto unlock; } - if (WARN_ON(graceful_period && !ops->recover)) { - reporter = ERR_PTR(-EINVAL); + reporter = __devlink_health_reporter_create(devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) goto unlock; - } - - reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); - if (!reporter) { - reporter = ERR_PTR(-ENOMEM); - goto unlock; - } - reporter->priv = priv; - reporter->ops = ops; - reporter->devlink = devlink; - reporter->graceful_period = graceful_period; - reporter->auto_recover = !!ops->recover; - reporter->auto_dump = !!ops->dump; - mutex_init(&reporter->dump_lock); - refcount_set(&reporter->refcount, 1); list_add_tail(&reporter->list, &devlink->reporter_list); unlock: mutex_unlock(&devlink->reporters_lock); @@ -5342,6 +5440,29 @@ unlock: } EXPORT_SYMBOL_GPL(devlink_health_reporter_create); +static void +devlink_health_reporter_free(struct devlink_health_reporter *reporter) +{ + mutex_destroy(&reporter->dump_lock); + if (reporter->dump_fmsg) + devlink_fmsg_free(reporter->dump_fmsg); + kfree(reporter); +} + +static void +devlink_health_reporter_put(struct devlink_health_reporter *reporter) +{ + if (refcount_dec_and_test(&reporter->refcount)) + devlink_health_reporter_free(reporter); +} + +static void +__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + list_del(&reporter->list); + devlink_health_reporter_put(reporter); +} + /** * devlink_health_reporter_destroy - destroy devlink health reporter * @@ -5350,18 +5471,30 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create); void devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { - mutex_lock(&reporter->devlink->reporters_lock); - list_del(&reporter->list); - mutex_unlock(&reporter->devlink->reporters_lock); - while (refcount_read(&reporter->refcount) > 1) - msleep(100); - mutex_destroy(&reporter->dump_lock); - if (reporter->dump_fmsg) - devlink_fmsg_free(reporter->dump_fmsg); - kfree(reporter); + struct mutex *lock = &reporter->devlink->reporters_lock; + + mutex_lock(lock); + __devlink_health_reporter_destroy(reporter); + mutex_unlock(lock); } EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); +/** + * devlink_port_health_reporter_destroy - destroy devlink port health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + struct mutex *lock = &reporter->devlink_port->reporters_lock; + + mutex_lock(lock); + __devlink_health_reporter_destroy(reporter); + mutex_unlock(lock); +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy); + static int devlink_nl_health_reporter_fill(struct sk_buff *msg, struct devlink *devlink, @@ -5379,6 +5512,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; + if (reporter->devlink_port) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index)) + goto genlmsg_cancel; + } reporter_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_HEALTH_REPORTER); if (!reporter_attr) @@ -5586,17 +5723,28 @@ devlink_health_reporter_get_from_attrs(struct devlink *devlink, struct nlattr **attrs) { struct devlink_health_reporter *reporter; + struct devlink_port *devlink_port; char *reporter_name; if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) return NULL; reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - mutex_lock(&devlink->reporters_lock); - reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); - if (reporter) - refcount_inc(&reporter->refcount); - mutex_unlock(&devlink->reporters_lock); + devlink_port = devlink_port_get_from_attrs(devlink, attrs); + if (IS_ERR(devlink_port)) { + mutex_lock(&devlink->reporters_lock); + reporter = devlink_health_reporter_find_by_name(devlink, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink->reporters_lock); + } else { + mutex_lock(&devlink_port->reporters_lock); + reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name); + if (reporter) + refcount_inc(&reporter->refcount); + mutex_unlock(&devlink_port->reporters_lock); + } + return reporter; } @@ -5628,12 +5776,6 @@ unlock: return NULL; } -static void -devlink_health_reporter_put(struct devlink_health_reporter *reporter) -{ - refcount_dec(&reporter->refcount); -} - void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state) @@ -5690,6 +5832,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct devlink_health_reporter *reporter; + struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; int idx = 0; @@ -5720,6 +5863,31 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, } mutex_unlock(&devlink->reporters_lock); } + + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + list_for_each_entry(port, &devlink->port_list, list) { + mutex_lock(&port->reporters_lock); + list_for_each_entry(reporter, &port->reporter_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&port->reporters_lock); + goto out; + } + idx++; + } + mutex_unlock(&port->reporters_lock); + } + } out: mutex_unlock(&devlink_mutex); @@ -7099,7 +7267,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, /* can be retrieved by unprivileged users */ }, @@ -7108,7 +7276,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_set_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7116,7 +7284,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_recover_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7124,7 +7292,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7133,7 +7301,7 @@ static const struct genl_ops devlink_nl_ops[] = { GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7141,7 +7309,7 @@ static const struct genl_ops devlink_nl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | DEVLINK_NL_FLAG_NO_LOCK, }, { @@ -7401,6 +7569,8 @@ int devlink_port_register(struct devlink *devlink, list_add_tail(&devlink_port->list, &devlink->port_list); INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); + INIT_LIST_HEAD(&devlink_port->reporter_list); + mutex_init(&devlink_port->reporters_lock); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); devlink_port_type_warn_schedule(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); @@ -7417,6 +7587,8 @@ void devlink_port_unregister(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; + WARN_ON(!list_empty(&devlink_port->reporter_list)); + mutex_destroy(&devlink_port->reporters_lock); devlink_port_type_warn_cancel(devlink_port); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); mutex_lock(&devlink->lock); @@ -7510,24 +7682,20 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) EXPORT_SYMBOL_GPL(devlink_port_type_clear); static int __devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - const unsigned char *switch_id, - unsigned char switch_id_len) + enum devlink_port_flavour flavour) { struct devlink_port_attrs *attrs = &devlink_port->attrs; if (WARN_ON(devlink_port->registered)) return -EEXIST; - attrs->set = true; + devlink_port->attrs_set = true; attrs->flavour = flavour; - if (switch_id) { - attrs->switch_port = true; - if (WARN_ON(switch_id_len > MAX_PHYS_ITEM_ID_LEN)) - switch_id_len = MAX_PHYS_ITEM_ID_LEN; - memcpy(attrs->switch_id.id, switch_id, switch_id_len); - attrs->switch_id.id_len = switch_id_len; + if (attrs->switch_id.id_len) { + devlink_port->switch_port = true; + if (WARN_ON(attrs->switch_id.id_len > MAX_PHYS_ITEM_ID_LEN)) + attrs->switch_id.id_len = MAX_PHYS_ITEM_ID_LEN; } else { - attrs->switch_port = false; + devlink_port->switch_port = false; } return 0; } @@ -7536,33 +7704,18 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, * devlink_port_attrs_set - Set port attributes * * @devlink_port: devlink port - * @flavour: flavour of the port - * @port_number: number of the port that is facing user, for example - * the front panel port number - * @split: indicates if this is split port - * @split_subport_number: if the port is split, this is the number - * of subport. - * @switch_id: if the port is part of switch, this is buffer with ID, - * otwerwise this is NULL - * @switch_id_len: length of the switch_id buffer + * @attrs: devlink port attrs */ void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number, - const unsigned char *switch_id, - unsigned char switch_id_len) + struct devlink_port_attrs *attrs) { - struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - ret = __devlink_port_attrs_set(devlink_port, flavour, - switch_id, switch_id_len); + devlink_port->attrs = *attrs; + ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) return; - attrs->split = split; - attrs->phys.port_number = port_number; - attrs->phys.split_subport_number = split_subport_number; + WARN_ON(attrs->splittable && attrs->split); } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); @@ -7571,20 +7724,14 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set); * * @devlink_port: devlink port * @pf: associated PF for the devlink port instance - * @switch_id: if the port is part of switch, this is buffer with ID, - * otherwise this is NULL - * @switch_id_len: length of the switch_id buffer */ -void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf) +void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_PF, - switch_id, switch_id_len); + DEVLINK_PORT_FLAVOUR_PCI_PF); if (ret) return; @@ -7598,21 +7745,15 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set); * @devlink_port: devlink port * @pf: associated PF for the devlink port instance * @vf: associated VF of a PF for the devlink port instance - * @switch_id: if the port is part of switch, this is buffer with ID, - * otherwise this is NULL - * @switch_id_len: length of the switch_id buffer */ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, - const unsigned char *switch_id, - unsigned char switch_id_len, u16 pf, u16 vf) { struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; ret = __devlink_port_attrs_set(devlink_port, - DEVLINK_PORT_FLAVOUR_PCI_VF, - switch_id, switch_id_len); + DEVLINK_PORT_FLAVOUR_PCI_VF); if (ret) return; attrs->pci_vf.pf = pf; @@ -7626,7 +7767,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, struct devlink_port_attrs *attrs = &devlink_port->attrs; int n = 0; - if (!attrs->set) + if (!devlink_port->attrs_set) return -EOPNOTSUPP; switch (attrs->flavour) { @@ -9461,7 +9602,7 @@ int devlink_compat_switch_id_get(struct net_device *dev, * any devlink lock as only permanent values are accessed. */ devlink_port = netdev_to_devlink_port(dev); - if (!devlink_port || !devlink_port->attrs.switch_port) + if (!devlink_port || !devlink_port->switch_port) return -EOPNOTSUPP; memcpy(ppid, &devlink_port->attrs.switch_id, sizeof(*ppid)); diff --git a/net/core/filter.c b/net/core/filter.c index 4e572441e64a..bdd2382e655d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5894,12 +5894,16 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; - if (skb->protocol == cpu_to_be16(ETH_P_IP)) + switch (skb_protocol(skb, true)) { + case cpu_to_be16(ETH_P_IP): iphdr_len = sizeof(struct iphdr); - else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) + break; + case cpu_to_be16(ETH_P_IPV6): iphdr_len = sizeof(struct ipv6hdr); - else + break; + default: return 0; + } if (skb_headlen(skb) < iphdr_len) return 0; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d02df0b6d0d9..142a8824f0a8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -70,10 +70,10 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector, EXPORT_SYMBOL(skb_flow_dissector_init); #ifdef CONFIG_BPF_SYSCALL -int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) +int flow_dissector_bpf_prog_attach_check(struct net *net, + struct bpf_prog *prog) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; - struct bpf_prog *attached; if (net == &init_net) { /* BPF flow dissector in the root namespace overrides @@ -86,26 +86,17 @@ int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog) for_each_net(ns) { if (ns == &init_net) continue; - if (rcu_access_pointer(ns->bpf.progs[type])) + if (rcu_access_pointer(ns->bpf.run_array[type])) return -EEXIST; } } else { /* Make sure root flow dissector is not attached * when attaching to the non-root namespace. */ - if (rcu_access_pointer(init_net.bpf.progs[type])) + if (rcu_access_pointer(init_net.bpf.run_array[type])) return -EEXIST; } - attached = rcu_dereference_protected(net->bpf.progs[type], - lockdep_is_held(&netns_bpf_mutex)); - if (attached == prog) - /* The same program cannot be attached twice */ - return -EINVAL; - - rcu_assign_pointer(net->bpf.progs[type], prog); - if (attached) - bpf_prog_put(attached); return 0; } #endif /* CONFIG_BPF_SYSCALL */ @@ -903,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; - struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; bool mpls_el = false; @@ -960,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; + struct bpf_prog_array *run_array; rcu_read_lock(); - attached = rcu_dereference(init_net.bpf.progs[type]); - - if (!attached) - attached = rcu_dereference(net->bpf.progs[type]); + run_array = rcu_dereference(init_net.bpf.run_array[type]); + if (!run_array) + run_array = rcu_dereference(net->bpf.run_array[type]); - if (attached) { + if (run_array) { struct bpf_flow_keys flow_keys; struct bpf_flow_dissector ctx = { .flow_keys = &flow_keys, @@ -975,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net, .data_end = data + hlen, }; __be16 n_proto = proto; + struct bpf_prog *prog; if (skb) { ctx.skb = skb; @@ -985,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net, n_proto = skb->protocol; } - ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + prog = READ_ONCE(run_array->items[0].prog); + ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index b739cfab796e..b8cf6ff5f961 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -429,7 +429,7 @@ EXPORT_SYMBOL(flow_indr_dev_unregister); static void flow_block_indr_init(struct flow_block_cb *flow_block, struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, struct Qdisc *sch, void *data, void *cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { @@ -437,6 +437,7 @@ static void flow_block_indr_init(struct flow_block_cb *flow_block, flow_block->indr.data = data; flow_block->indr.cb_priv = cb_priv; flow_block->indr.dev = dev; + flow_block->indr.sch = sch; flow_block->indr.cleanup = cleanup; } @@ -444,7 +445,8 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv), struct flow_block_offload *bo, - struct net_device *dev, void *data, + struct net_device *dev, + struct Qdisc *sch, void *data, void *indr_cb_priv, void (*cleanup)(struct flow_block_cb *block_cb)) { @@ -454,7 +456,7 @@ struct flow_block_cb *flow_indr_block_cb_alloc(flow_setup_cb_t *cb, if (IS_ERR(block_cb)) goto out; - flow_block_indr_init(block_cb, bo, dev, data, indr_cb_priv, cleanup); + flow_block_indr_init(block_cb, bo, dev, sch, data, indr_cb_priv, cleanup); list_add(&block_cb->indr.list, &flow_block_indr_list); out: @@ -462,7 +464,7 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -int flow_indr_dev_setup_offload(struct net_device *dev, +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) @@ -471,7 +473,7 @@ int flow_indr_dev_setup_offload(struct net_device *dev, mutex_lock(&flow_indr_block_lock); list_for_each_entry(this, &flow_block_indr_dev_list, list) - this->cb(dev, this->cb_priv, type, bo, data, cleanup); + this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); mutex_unlock(&flow_indr_block_lock); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 351afbf6bfba..6a32a1fd34f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -683,7 +683,7 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) return container_of(parser, struct sk_psock, parser); } -static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) +static void sk_psock_skb_redirect(struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; @@ -715,12 +715,11 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) } } -static void sk_psock_tls_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) { switch (verdict) { case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_PASS: case __SK_DROP: @@ -741,8 +740,8 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } + sk_psock_tls_verdict_apply(skb, ret); rcu_read_unlock(); - sk_psock_tls_verdict_apply(psock, skb, ret); return ret; } EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); @@ -770,7 +769,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, } goto out_free; case __SK_REDIRECT: - sk_psock_skb_redirect(psock, skb); + sk_psock_skb_redirect(skb); break; case __SK_DROP: /* fall-through */ @@ -782,11 +781,18 @@ out_free: static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) { - struct sk_psock *psock = sk_psock_from_strp(strp); + struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; + struct sock *sk; rcu_read_lock(); + sk = strp->sk; + psock = sk_psock(sk); + if (unlikely(!psock)) { + kfree_skb(skb); + goto out; + } prog = READ_ONCE(psock->progs.skb_verdict); if (likely(prog)) { skb_orphan(skb); @@ -794,8 +800,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) ret = sk_psock_bpf_run(psock, prog, skb); ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); } - rcu_read_unlock(); sk_psock_verdict_apply(psock, skb, ret); +out: + rcu_read_unlock(); } static int sk_psock_strp_read_done(struct strparser *strp, int err) diff --git a/net/core/sock.c b/net/core/sock.c index f5b5fdd61c88..11d6f77dd562 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1917,7 +1917,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; - cgroup_sk_alloc(&newsk->sk_cgrp_data); + cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); @@ -3566,6 +3566,7 @@ int sock_load_diag_module(int family, int protocol) #ifdef CONFIG_INET if (family == AF_INET && protocol != IPPROTO_RAW && + protocol < MAX_INET_PROTOS && !rcu_access_pointer(inet_protos[protocol])) return -ENOENT; #endif diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4c1123c749bb..119f52a99dc1 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -70,11 +70,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) struct fd f; int ret; + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, attr->attach_type); + ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + fdput(f); + return ret; +} + +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + u32 ufd = attr->target_fd; + struct bpf_prog *prog; + struct bpf_map *map; + struct fd f; + int ret; + + if (attr->attach_flags || attr->replace_bpf_fd) + return -EINVAL; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + prog = bpf_prog_get(attr->attach_bpf_fd); + if (IS_ERR(prog)) { + ret = PTR_ERR(prog); + goto put_map; + } + + if (prog->type != ptype) { + ret = -EINVAL; + goto put_prog; + } + + ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); +put_prog: + bpf_prog_put(prog); +put_map: fdput(f); return ret; } @@ -1209,27 +1247,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) } int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - u32 which) + struct bpf_prog *old, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **pprog; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - psock_set_prog(&progs->msg_parser, prog); + pprog = &progs->msg_parser; break; case BPF_SK_SKB_STREAM_PARSER: - psock_set_prog(&progs->skb_parser, prog); + pprog = &progs->skb_parser; break; case BPF_SK_SKB_STREAM_VERDICT: - psock_set_prog(&progs->skb_verdict, prog); + pprog = &progs->skb_verdict; break; default: return -EOPNOTSUPP; } + if (old) + return psock_replace_prog(pprog, prog, old); + + psock_set_prog(pprog, prog); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); |