diff options
Diffstat (limited to 'net')
43 files changed, 515 insertions, 522 deletions
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 6c152f9ea26e..536aae52eead 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@ -41,9 +41,9 @@ static int lowpan_ctx_flag_active_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_active_fops, - lowpan_ctx_flag_active_get, - lowpan_ctx_flag_active_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(lowpan_ctx_flag_active_fops, + lowpan_ctx_flag_active_get, + lowpan_ctx_flag_active_set, "%llu\n"); static int lowpan_ctx_flag_c_set(void *data, u64 val) { @@ -66,8 +66,8 @@ static int lowpan_ctx_flag_c_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_flag_c_fops, lowpan_ctx_flag_c_get, - lowpan_ctx_flag_c_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(lowpan_ctx_flag_c_fops, lowpan_ctx_flag_c_get, + lowpan_ctx_flag_c_set, "%llu\n"); static int lowpan_ctx_plen_set(void *data, u64 val) { @@ -97,8 +97,8 @@ static int lowpan_ctx_plen_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_ctx_plen_fops, lowpan_ctx_plen_get, - lowpan_ctx_plen_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(lowpan_ctx_plen_fops, lowpan_ctx_plen_get, + lowpan_ctx_plen_set, "%llu\n"); static int lowpan_ctx_pfx_show(struct seq_file *file, void *offset) { @@ -184,15 +184,15 @@ static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, if (!root) return -EINVAL; - dentry = debugfs_create_file("active", 0644, root, - &ldev->ctx.table[id], - &lowpan_ctx_flag_active_fops); + dentry = debugfs_create_file_unsafe("active", 0644, root, + &ldev->ctx.table[id], + &lowpan_ctx_flag_active_fops); if (!dentry) return -EINVAL; - dentry = debugfs_create_file("compression", 0644, root, - &ldev->ctx.table[id], - &lowpan_ctx_flag_c_fops); + dentry = debugfs_create_file_unsafe("compression", 0644, root, + &ldev->ctx.table[id], + &lowpan_ctx_flag_c_fops); if (!dentry) return -EINVAL; @@ -202,9 +202,9 @@ static int lowpan_dev_debugfs_ctx_init(struct net_device *dev, if (!dentry) return -EINVAL; - dentry = debugfs_create_file("prefix_len", 0644, root, - &ldev->ctx.table[id], - &lowpan_ctx_plen_fops); + dentry = debugfs_create_file_unsafe("prefix_len", 0644, root, + &ldev->ctx.table[id], + &lowpan_ctx_plen_fops); if (!dentry) return -EINVAL; @@ -245,8 +245,8 @@ static int lowpan_short_addr_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get, - NULL, "0x%04llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(lowpan_short_addr_fops, lowpan_short_addr_get, NULL, + "0x%04llx\n"); static int lowpan_dev_debugfs_802154_init(const struct net_device *dev, struct lowpan_dev *ldev) @@ -260,9 +260,9 @@ static int lowpan_dev_debugfs_802154_init(const struct net_device *dev, if (!root) return -EINVAL; - dentry = debugfs_create_file("short_addr", 0444, root, - lowpan_802154_dev(dev)->wdev->ieee802154_ptr, - &lowpan_short_addr_fops); + dentry = debugfs_create_file_unsafe("short_addr", 0444, root, + lowpan_802154_dev(dev)->wdev->ieee802154_ptr, + &lowpan_short_addr_fops); if (!dentry) return -EINVAL; diff --git a/net/Kconfig b/net/Kconfig index 62da6148e9f8..1efe1f9ee492 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -429,21 +429,12 @@ config NET_SOCK_MSG with the help of BPF programs. config NET_DEVLINK - tristate "Network physical/parent device Netlink interface" + bool "Network physical/parent device Netlink interface" help Network physical/parent device Netlink interface provides infrastructure to support access to physical chip-wide config and monitoring. -config MAY_USE_DEVLINK - tristate - default m if NET_DEVLINK=m - default y if NET_DEVLINK=y || NET_DEVLINK=n - help - Drivers using the devlink infrastructure should have a dependency - on MAY_USE_DEVLINK to ensure they do not cause link errors when - devlink is a loadable module and the driver using it is built-in. - config PAGE_POOL bool diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9d79c7de234a..a7cd23f00bde 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -1108,8 +1108,8 @@ static int lowpan_enable_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get, - lowpan_enable_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(lowpan_enable_fops, lowpan_enable_get, + lowpan_enable_set, "%llu\n"); static ssize_t lowpan_control_write(struct file *fp, const char __user *user_buffer, @@ -1278,9 +1278,10 @@ static struct notifier_block bt_6lowpan_dev_notifier = { static int __init bt_6lowpan_init(void) { - lowpan_enable_debugfs = debugfs_create_file("6lowpan_enable", 0644, - bt_debugfs, NULL, - &lowpan_enable_fops); + lowpan_enable_debugfs = debugfs_create_file_unsafe("6lowpan_enable", + 0644, bt_debugfs, + NULL, + &lowpan_enable_fops); lowpan_control_debugfs = debugfs_create_file("6lowpan_control", 0644, bt_debugfs, NULL, &lowpan_control_fops); diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 58fc6333d412..5f918ea18b5a 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -174,7 +174,7 @@ static int a2mp_discover_req(struct amp_mgr *mgr, struct sk_buff *skb, num_ctrl++; } - len = num_ctrl * sizeof(struct a2mp_cl) + sizeof(*rsp); + len = struct_size(rsp, cl, num_ctrl); rsp = kmalloc(len, GFP_ATOMIC); if (!rsp) { read_unlock(&hci_dev_list_lock); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index deacc52d7ff1..8d12198eaa94 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,15 +154,25 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) } EXPORT_SYMBOL(bt_sock_unlink); -void bt_accept_enqueue(struct sock *parent, struct sock *sk) +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (bh) + bh_lock_sock_nested(sk); + else + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; - release_sock(sk); + + if (bh) + bh_unlock_sock(sk); + else + release_sock(sk); + parent->sk_ack_backlog++; } EXPORT_SYMBOL(bt_accept_enqueue); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7352fe85674b..26e3d36aee29 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2578,6 +2578,9 @@ static void hci_cmd_timeout(struct work_struct *work) bt_dev_err(hdev, "command tx timeout"); } + if (hdev->cmd_timeout) + hdev->cmd_timeout(hdev); + atomic_set(&hdev->cmd_cnt, 1); queue_work(hdev->workqueue, &hdev->cmd_work); } @@ -3401,7 +3404,7 @@ EXPORT_SYMBOL(hci_resume_dev); /* Reset HCI device */ int hci_reset_dev(struct hci_dev *hdev) { - const u8 hw_err[] = { HCI_EV_HARDWARE_ERROR, 0x01, 0x00 }; + static const u8 hw_err[] = { HCI_EV_HARDWARE_ERROR, 0x01, 0x00 }; struct sk_buff *skb; skb = bt_skb_alloc(3, GFP_ATOMIC); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ac2826ce162b..609fd6871c5a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3556,8 +3556,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) + - ev->num_hndl * sizeof(struct hci_comp_pkts_info)) { + if (skb->len < sizeof(*ev) || + skb->len < struct_size(ev, handles, ev->num_hndl)) { BT_DBG("%s bad parameters", hdev->name); return; } @@ -3644,8 +3644,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) + - ev->num_hndl * sizeof(struct hci_comp_blocks_info)) { + if (skb->len < sizeof(*ev) || + skb->len < struct_size(ev, handles, ev->num_hndl)) { BT_DBG("%s bad parameters", hdev->name); return; } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 65228bfa4487..d32077b28433 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -831,8 +831,6 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; - hdev = hci_pi(sk)->hdev; - switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); @@ -854,6 +852,7 @@ static int hci_sock_release(struct socket *sock) bt_sock_unlink(&hci_sk_list, sk); + hdev = hci_pi(sk)->hdev; if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* When releasing a user channel exclusive access, diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2a7fb517d460..f17e393b43b4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3337,16 +3337,22 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); + if (len < 0) + break; hint = type & L2CAP_CONF_HINT; type &= L2CAP_CONF_MASK; switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; mtu = val; break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; break; @@ -3354,26 +3360,30 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *) val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *) val, olen); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - remote_efs = 1; - memcpy(&efs, (void *) val, olen); - } + if (olen != sizeof(efs)) + break; + remote_efs = 1; + memcpy(&efs, (void *) val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP)) return -ECONNREFUSED; - set_bit(FLAG_EXT_CTRL, &chan->flags); set_bit(CONF_EWS_RECV, &chan->conf_state); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; @@ -3383,7 +3393,6 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data default: if (hint) break; - result = L2CAP_CONF_UNKNOWN; *((u8 *) ptr++) = type; break; @@ -3548,58 +3557,65 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, + endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, chan->flush_to, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, + chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); - + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) && rfc.mode != chan->mode) return -ECONNREFUSED; - chan->fcs = 0; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - memcpy(&efs, (void *)val, olen); - - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); - } + if (olen != sizeof(efs)) + break; + memcpy(&efs, (void *)val, olen); + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (*result == L2CAP_CONF_PENDING) if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, @@ -3728,13 +3744,18 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; txwin_ext = val; break; } @@ -4244,6 +4265,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, goto done; break; } + /* fall through */ default: l2cap_chan_set_err(chan, ECONNRESET); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 686bdc6b35b0..a3a2cd55e23a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1252,7 +1252,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) l2cap_sock_init(sk, parent); - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, false); release_sock(parent); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1a635df80643..3a9e9d9670be 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -483,6 +483,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) /* if closing a dlc in a session that hasn't been started, * just close and unlink the dlc */ + /* fall through */ default: rfcomm_dlc_clear_timer(d); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index aa0db1d1bd9b..b1f49fcc0478 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -988,7 +988,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * rfcomm_pi(sk)->channel = channel; sk->sk_state = BT_CONFIG; - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); /* Accept connection and return socket DLC */ *d = rfcomm_pi(sk)->dlc; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 529b38996d8b..9a580999ca57 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -193,7 +193,7 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, conn->sk = sk; if (parent) - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); } static int sco_chan_add(struct sco_conn *conn, struct sock *sk, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index af57c4a2b78a..921310d3cbae 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -67,12 +67,18 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS, .u.brport_flags = mask, }; + struct switchdev_notifier_port_attr_info info = { + .attr = &attr, + }; int err; if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD) return 0; - err = switchdev_port_attr_set(p->dev, &attr); + /* We run from atomic context here */ + err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev, + &info.info, NULL); + err = notifier_to_errno(err); if (err == -EOPNOTSUPP) return 0; diff --git a/net/core/devlink.c b/net/core/devlink.c index 4f31ddc883e7..6515fbec0dcd 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -723,7 +723,7 @@ static int devlink_port_type_set(struct devlink *devlink, { int err; - if (devlink->ops && devlink->ops->port_type_set) { + if (devlink->ops->port_type_set) { if (port_type == DEVLINK_PORT_TYPE_NOTSET) return -EINVAL; if (port_type == devlink_port->type) @@ -760,7 +760,7 @@ static int devlink_port_split(struct devlink *devlink, u32 port_index, u32 count, struct netlink_ext_ack *extack) { - if (devlink->ops && devlink->ops->port_split) + if (devlink->ops->port_split) return devlink->ops->port_split(devlink, port_index, count, extack); return -EOPNOTSUPP; @@ -786,7 +786,7 @@ static int devlink_port_unsplit(struct devlink *devlink, u32 port_index, struct netlink_ext_ack *extack) { - if (devlink->ops && devlink->ops->port_unsplit) + if (devlink->ops->port_unsplit) return devlink->ops->port_unsplit(devlink, port_index, extack); return -EOPNOTSUPP; } @@ -961,7 +961,7 @@ static int devlink_nl_cmd_sb_pool_get_doit(struct sk_buff *skb, if (err) return err; - if (!devlink->ops || !devlink->ops->sb_pool_get) + if (!devlink->ops->sb_pool_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1017,7 +1017,7 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops || !devlink->ops->sb_pool_get) + !devlink->ops->sb_pool_get) continue; mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { @@ -1046,7 +1046,7 @@ static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, { const struct devlink_ops *ops = devlink->ops; - if (ops && ops->sb_pool_set) + if (ops->sb_pool_set) return ops->sb_pool_set(devlink, sb_index, pool_index, size, threshold_type); return -EOPNOTSUPP; @@ -1151,7 +1151,7 @@ static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, if (err) return err; - if (!devlink->ops || !devlink->ops->sb_port_pool_get) + if (!devlink->ops->sb_port_pool_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1213,7 +1213,7 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops || !devlink->ops->sb_port_pool_get) + !devlink->ops->sb_port_pool_get) continue; mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { @@ -1242,7 +1242,7 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, { const struct devlink_ops *ops = devlink_port->devlink->ops; - if (ops && ops->sb_port_pool_set) + if (ops->sb_port_pool_set) return ops->sb_port_pool_set(devlink_port, sb_index, pool_index, threshold); return -EOPNOTSUPP; @@ -1355,7 +1355,7 @@ static int devlink_nl_cmd_sb_tc_pool_bind_get_doit(struct sk_buff *skb, if (err) return err; - if (!devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + if (!devlink->ops->sb_tc_pool_bind_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1439,7 +1439,7 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, mutex_lock(&devlink_mutex); list_for_each_entry(devlink, &devlink_list, list) { if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || - !devlink->ops || !devlink->ops->sb_tc_pool_bind_get) + !devlink->ops->sb_tc_pool_bind_get) continue; mutex_lock(&devlink->lock); @@ -1471,7 +1471,7 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, { const struct devlink_ops *ops = devlink_port->devlink->ops; - if (ops && ops->sb_tc_pool_bind_set) + if (ops->sb_tc_pool_bind_set) return ops->sb_tc_pool_bind_set(devlink_port, sb_index, tc_index, pool_type, pool_index, threshold); @@ -1519,7 +1519,7 @@ static int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, struct devlink_sb *devlink_sb = info->user_ptr[1]; const struct devlink_ops *ops = devlink->ops; - if (ops && ops->sb_occ_snapshot) + if (ops->sb_occ_snapshot) return ops->sb_occ_snapshot(devlink, devlink_sb->index); return -EOPNOTSUPP; } @@ -1531,7 +1531,7 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, struct devlink_sb *devlink_sb = info->user_ptr[1]; const struct devlink_ops *ops = devlink->ops; - if (ops && ops->sb_occ_max_clear) + if (ops->sb_occ_max_clear) return ops->sb_occ_max_clear(devlink, devlink_sb->index); return -EOPNOTSUPP; } @@ -1594,13 +1594,9 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; - const struct devlink_ops *ops = devlink->ops; struct sk_buff *msg; int err; - if (!ops) - return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -1625,9 +1621,6 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, int err = 0; u16 mode; - if (!ops) - return -EOPNOTSUPP; - if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) { if (!ops->eswitch_mode_set) return -EOPNOTSUPP; @@ -3869,7 +3862,7 @@ static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct sk_buff *msg; int err; - if (!devlink->ops || !devlink->ops->info_get) + if (!devlink->ops->info_get) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -5232,6 +5225,9 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) { struct devlink *devlink; + if (WARN_ON(!ops)) + return NULL; + devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); if (!devlink) return NULL; @@ -5253,6 +5249,7 @@ EXPORT_SYMBOL_GPL(devlink_alloc); * devlink_register - Register devlink instance * * @devlink: devlink + * @dev: parent device */ int devlink_register(struct devlink *devlink, struct device *dev) { @@ -5303,7 +5300,7 @@ EXPORT_SYMBOL_GPL(devlink_free); * * @devlink: devlink * @devlink_port: devlink port - * @port_index + * @port_index: driver-specific numerical identifier of the port * * Register devlink port with provided port index. User can use * any indexing, even hw-related one. devlink_port structure @@ -5633,13 +5630,10 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); * * @devlink: devlink * @resource_name: resource's name - * @top_hierarchy: top hierarchy - * @reload_required: reload is required for new configuration to - * apply * @resource_size: resource's size * @resource_id: resource's id - * @parent_reosurce_id: resource's parent id - * @size params: size parameters + * @parent_resource_id: resource's parent id + * @size_params: size parameters */ int devlink_resource_register(struct devlink *devlink, const char *resource_name, @@ -6091,7 +6085,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink, int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val) { - if (!devlink->ops || !devlink->ops->reload) + if (!devlink->ops->reload) return -EOPNOTSUPP; return __devlink_param_driverinit_value_get(&devlink->param_list, @@ -6138,7 +6132,7 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, { struct devlink *devlink = devlink_port->devlink; - if (!devlink->ops || !devlink->ops->reload) + if (!devlink->ops->reload) return -EOPNOTSUPP; return __devlink_param_driverinit_value_get(&devlink_port->param_list, @@ -6336,7 +6330,7 @@ EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get); * Multiple snapshots can be created on a region. * The @snapshot_id should be obtained using the getter function. * - * @devlink_region: devlink region of the snapshot + * @region: devlink region of the snapshot * @data_len: size of snapshot data * @data: snapshot data * @snapshot_id: snapshot id to be created @@ -6397,9 +6391,6 @@ static void __devlink_compat_running_version(struct devlink *devlink, struct sk_buff *msg; int rem, err; - if (!devlink->ops->info_get) - return; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -6431,71 +6422,54 @@ free_msg: void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { - struct devlink_port *devlink_port; struct devlink *devlink; + dev_hold(dev); + rtnl_unlock(); + mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - mutex_lock(&devlink->lock); - list_for_each_entry(devlink_port, &devlink->port_list, list) { - if (devlink_port->type == DEVLINK_PORT_TYPE_ETH && - devlink_port->type_dev == dev) { - __devlink_compat_running_version(devlink, - buf, len); - mutex_unlock(&devlink->lock); - goto out; - } - } - mutex_unlock(&devlink->lock); - } -out: + devlink = netdev_to_devlink(dev); + if (!devlink || !devlink->ops->info_get) + goto unlock_list; + + mutex_lock(&devlink->lock); + __devlink_compat_running_version(devlink, buf, len); + mutex_unlock(&devlink->lock); +unlock_list: mutex_unlock(&devlink_mutex); + + rtnl_lock(); + dev_put(dev); } int devlink_compat_flash_update(struct net_device *dev, const char *file_name) { - struct devlink_port *devlink_port; struct devlink *devlink; + int ret = -EOPNOTSUPP; - mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - mutex_lock(&devlink->lock); - list_for_each_entry(devlink_port, &devlink->port_list, list) { - int ret = -EOPNOTSUPP; + dev_hold(dev); + rtnl_unlock(); - if (devlink_port->type != DEVLINK_PORT_TYPE_ETH || - devlink_port->type_dev != dev) - continue; + mutex_lock(&devlink_mutex); + devlink = netdev_to_devlink(dev); + if (!devlink || !devlink->ops->flash_update) + goto unlock_list; - mutex_unlock(&devlink_mutex); - if (devlink->ops->flash_update) - ret = devlink->ops->flash_update(devlink, - file_name, - NULL, NULL); - mutex_unlock(&devlink->lock); - return ret; - } - mutex_unlock(&devlink->lock); - } + mutex_lock(&devlink->lock); + ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL); + mutex_unlock(&devlink->lock); +unlock_list: mutex_unlock(&devlink_mutex); - return -EOPNOTSUPP; -} + rtnl_lock(); + dev_put(dev); -static int __init devlink_module_init(void) -{ - return genl_register_family(&devlink_nl_family); + return ret; } -static void __exit devlink_module_exit(void) +static int __init devlink_init(void) { - genl_unregister_family(&devlink_nl_family); + return genl_register_family(&devlink_nl_family); } -module_init(devlink_module_init); -module_exit(devlink_module_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>"); -MODULE_DESCRIPTION("Network physical device Netlink interface"); -MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME); +subsys_initcall(devlink_init); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1320e8dce559..d4918ffddda8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -805,11 +805,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); - rtnl_unlock(); if (!info.fw_version[0]) devlink_compat_running_version(dev, info.fw_version, sizeof(info.fw_version)); - rtnl_lock(); if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; @@ -1719,7 +1717,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; + struct ethtool_pauseparam pauseparam = { .cmd = ETHTOOL_GPAUSEPARAM }; if (!dev->ethtool_ops->get_pauseparam) return -EOPNOTSUPP; @@ -2040,15 +2038,8 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev, return -EFAULT; efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; - if (!dev->ethtool_ops->flash_device) { - int ret; - - rtnl_unlock(); - ret = devlink_compat_flash_update(dev, efl.data); - rtnl_lock(); - - return ret; - } + if (!dev->ethtool_ops->flash_device) + return devlink_compat_flash_update(dev, efl.data); return dev->ethtool_ops->flash_device(dev, &efl); } @@ -2512,7 +2503,7 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr) static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM }; + struct ethtool_fecparam fecparam = { .cmd = ETHTOOL_GFECPARAM }; int rc; if (!dev->ethtool_ops->get_fecparam) diff --git a/net/core/sock.c b/net/core/sock.c index f5d82f3fa474..782343bb925b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1108,15 +1108,23 @@ set_rcvbuf: #endif case SO_MAX_PACING_RATE: - if (val != ~0U) + { + unsigned long ulval = (val == ~0U) ? ~0UL : val; + + if (sizeof(ulval) != sizeof(val) && + optlen >= sizeof(ulval) && + get_user(ulval, (unsigned long __user *)optval)) { + ret = -EFAULT; + break; + } + if (ulval != ~0UL) cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); - sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val; - sk->sk_pacing_rate = min(sk->sk_pacing_rate, - sk->sk_max_pacing_rate); + sk->sk_max_pacing_rate = ulval; + sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); break; - + } case SO_INCOMING_CPU: sk->sk_incoming_cpu = val; break; @@ -1211,6 +1219,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, union { int val; u64 val64; + unsigned long ulval; struct linger ling; struct old_timeval32 tm32; struct __kernel_old_timeval tm; @@ -1456,8 +1465,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname, #endif case SO_MAX_PACING_RATE: - /* 32bit version */ - v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); + if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { + lv = sizeof(v.ulval); + v.ulval = sk->sk_max_pacing_rate; + } else { + /* 32bit version */ + v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); + } break; case SO_INCOMING_CPU: diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 91e52973ee13..fab49132345f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -6,7 +6,7 @@ config HAVE_NET_DSA config NET_DSA tristate "Distributed Switch Architecture" - depends on HAVE_NET_DSA && MAY_USE_DEVLINK + depends on HAVE_NET_DSA depends on BRIDGE || BRIDGE=n select NET_SWITCHDEV select PHYLINK diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 47a1d1379d15..c6caa58c5c71 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -143,7 +143,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); -void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, diff --git a/net/dsa/port.c b/net/dsa/port.c index 6b114b045162..a2dad10646cb 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -85,7 +85,7 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) return 0; } -void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy) +void dsa_port_disable(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -94,7 +94,7 @@ void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy) dsa_port_set_state_now(dp, BR_STATE_DISABLED); if (ds->ops->port_disable) - ds->ops->port_disable(ds, port, phy); + ds->ops->port_disable(ds, port); } int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 90629b12beaf..1808a2cd6872 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -122,7 +122,7 @@ static int dsa_slave_close(struct net_device *dev) phylink_stop(dp->pl); - dsa_port_disable(dp, dev->phydev); + dsa_port_disable(dp); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); @@ -1118,10 +1118,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, }; -static const struct switchdev_ops dsa_slave_switchdev_ops = { - .switchdev_port_attr_set = dsa_slave_port_attr_set, -}; - static struct device_type dsa_type = { .name = "dsa", }; @@ -1382,7 +1378,6 @@ int dsa_slave_create(struct dsa_port *port) eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; - slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; slave_dev->min_mtu = 0; slave_dev->max_mtu = ETH_MAX_MTU; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); @@ -1524,6 +1519,19 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, return NOTIFY_DONE; } +static int +dsa_slave_switchdev_port_attr_set_event(struct net_device *netdev, + struct switchdev_notifier_port_attr_info *port_attr_info) +{ + int err; + + err = dsa_slave_port_attr_set(netdev, port_attr_info->attr, + port_attr_info->trans); + + port_attr_info->handled = true; + return notifier_from_errno(err); +} + struct dsa_switchdev_event_work { struct work_struct work; struct switchdev_notifier_fdb_info fdb_info; @@ -1602,6 +1610,9 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, if (!dsa_slave_dev_check(dev)) return NOTIFY_DONE; + if (event == SWITCHDEV_PORT_ATTR_SET) + return dsa_slave_switchdev_port_attr_set_event(dev, ptr); + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); if (!switchdev_work) return NOTIFY_BAD; @@ -1664,6 +1675,8 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, case SWITCHDEV_PORT_OBJ_ADD: /* fall through */ case SWITCHDEV_PORT_OBJ_DEL: return dsa_slave_switchdev_port_obj_event(event, dev, ptr); + case SWITCHDEV_PORT_ATTR_SET: + return dsa_slave_switchdev_port_attr_set_event(dev, ptr); } return NOTIFY_DONE; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index bd61633d2c32..4196bcd4105a 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -179,7 +179,6 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, skb->dev = ldev; skb->tstamp = fq->q.stamp; - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 9f69411251d0..737808e27f8b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -203,7 +203,6 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge); void inet_frag_destroy(struct inet_frag_queue *q) { - struct sk_buff *fp; struct netns_frags *nf; unsigned int sum, sum_truesize = 0; struct inet_frags *f; @@ -212,20 +211,9 @@ void inet_frag_destroy(struct inet_frag_queue *q) WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ - fp = q->fragments; nf = q->net; f = nf->f; - if (fp) { - do { - struct sk_buff *xp = fp->next; - - sum_truesize += fp->truesize; - kfree_skb(fp); - fp = xp; - } while (fp); - } else { - sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); - } + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); sum = sum_truesize + f->qsize; call_rcu(&q->rcu, inet_frag_destroy_rcu); @@ -489,26 +477,20 @@ EXPORT_SYMBOL(inet_frag_reasm_finish); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) { - struct sk_buff *head; + struct sk_buff *head, *skb; - if (q->fragments) { - head = q->fragments; - q->fragments = head->next; - } else { - struct sk_buff *skb; + head = skb_rb_first(&q->rb_fragments); + if (!head) + return NULL; + skb = FRAG_CB(head)->next_frag; + if (skb) + rb_replace_node(&head->rbnode, &skb->rbnode, + &q->rb_fragments); + else + rb_erase(&head->rbnode, &q->rb_fragments); + memset(&head->rbnode, 0, sizeof(head->rbnode)); + barrier(); - head = skb_rb_first(&q->rb_fragments); - if (!head) - return NULL; - skb = FRAG_CB(head)->next_frag; - if (skb) - rb_replace_node(&head->rbnode, &skb->rbnode, - &q->rb_fragments); - else - rb_erase(&head->rbnode, &q->rb_fragments); - memset(&head->rbnode, 0, sizeof(head->rbnode)); - barrier(); - } if (head == q->fragments_tail) q->fragments_tail = NULL; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 486ecb0aeb87..cf2b0a6a3337 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -261,7 +261,6 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.flags = 0; qp->q.len = 0; qp->q.meat = 0; - qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; @@ -451,7 +450,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, ip_send_check(iph); __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS); - qp->q.fragments = NULL; qp->q.rb_fragments = RB_ROOT; qp->q.fragments_tail = NULL; qp->q.last_run_head = NULL; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f4925a0d6b2..fd219f7bd3ea 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -578,7 +578,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark); + skb->mark, skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2973067b831d..2756fb725bf0 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -310,7 +310,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), tunnel->parms.link, - tunnel->fwmark); + tunnel->fwmark, 0); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -584,7 +584,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - 0, skb->mark); + 0, skb->mark, skb_get_hash(skb)); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; @@ -744,7 +744,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + tunnel->fwmark, skb_get_hash(skb)); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ecc12a768191..e3ac458b5d8b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1820,6 +1820,7 @@ out: int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) { + u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; struct flow_keys hash_keys; u32 mhash; @@ -1870,6 +1871,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } mhash = flow_hash_from_keys(&hash_keys); + if (multipath_hash) + mhash = jhash_2words(mhash, multipath_hash, 0); + return mhash >> 1; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 769508c75dce..ad07dd71063d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1412,7 +1412,8 @@ do_fault: /* It is the one place in all of TCP, except connection * reset, where we can be unlinking the send_head. */ - tcp_check_send_head(sk, skb); + if (tcp_write_queue_empty(sk)) + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); sk_wmem_free_skb(sk, skb); } @@ -3698,7 +3699,7 @@ bool tcp_alloc_md5sig_pool(void) if (!tcp_md5sig_pool_populated) { __tcp_alloc_md5sig_pool(); if (tcp_md5sig_pool_populated) - static_key_slow_inc(&tcp_md5_needed); + static_branch_inc(&tcp_md5_needed); } mutex_unlock(&tcp_md5sig_mutex); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7a027dec649b..4eb0c8ca3c60 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1574,9 +1574,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, return skb; } -static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, - struct tcp_sacktag_state *state, - u32 seq) +static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, u32 seq) { struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node; struct sk_buff *skb; @@ -1598,13 +1596,12 @@ static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, } static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, - struct tcp_sacktag_state *state, u32 skip_to_seq) { if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq)) return skb; - return tcp_sacktag_bsearch(sk, state, skip_to_seq); + return tcp_sacktag_bsearch(sk, skip_to_seq); } static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, @@ -1617,7 +1614,7 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, return skb; if (before(next_dup->start_seq, skip_to_seq)) { - skb = tcp_sacktag_skip(skb, sk, state, next_dup->start_seq); + skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq); skb = tcp_sacktag_walk(skb, sk, NULL, state, next_dup->start_seq, next_dup->end_seq, 1); @@ -1758,8 +1755,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, /* Head todo? */ if (before(start_seq, cache->start_seq)) { - skb = tcp_sacktag_skip(skb, sk, state, - start_seq); + skb = tcp_sacktag_skip(skb, sk, start_seq); skb = tcp_sacktag_walk(skb, sk, next_dup, state, start_seq, @@ -1785,7 +1781,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, goto walk; } - skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq); + skb = tcp_sacktag_skip(skb, sk, cache->end_seq); /* Check overlap against next cached too (past this one already) */ cache++; continue; @@ -1796,7 +1792,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (!skb) break; } - skb = tcp_sacktag_skip(skb, sk, state, start_seq); + skb = tcp_sacktag_skip(skb, sk, start_seq); walk: skb = tcp_sacktag_walk(skb, sk, next_dup, state, @@ -3595,7 +3591,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) * this segment (RFC793 Section 3.9). */ if (after(ack, tp->snd_nxt)) - goto invalid_ack; + return -1; if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; @@ -3714,10 +3710,6 @@ no_queue: tcp_process_tlp_ack(sk, ack, flag); return 1; -invalid_ack: - SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt); - return -1; - old_ack: /* If data was SACKed, tag it and see if we should send more data. * If data was DSACKed, see if we can undo a cwnd reduction. @@ -3731,7 +3723,6 @@ old_ack: tcp_xmit_recovery(sk, rexmit); } - SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); return 0; } @@ -4432,13 +4423,9 @@ static void tcp_ofo_queue(struct sock *sk) rb_erase(&skb->rbnode, &tp->out_of_order_queue); if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { - SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; } - SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(skb)->end_seq); tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); @@ -4502,8 +4489,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); seq = TCP_SKB_CB(skb)->seq; end_seq = TCP_SKB_CB(skb)->end_seq; - SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, seq, end_seq); p = &tp->out_of_order_queue.rb_node; if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { @@ -4779,10 +4764,6 @@ drop: if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ - SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(skb)->end_seq); - tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt); /* If window is closed, drop tail of packet. But after @@ -5061,8 +5042,6 @@ static int tcp_prune_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq); - NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4010ae3644f3..831d844a27ca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -973,7 +973,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) * We need to maintain these in the sk structure. */ -struct static_key tcp_md5_needed __read_mostly; +DEFINE_STATIC_KEY_FALSE(tcp_md5_needed); EXPORT_SYMBOL(tcp_md5_needed); /* Find the Key structure for an address. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 182595e2d40f..79900f783e0d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -294,12 +294,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * so the timewait ack generating code has the key. */ do { - struct tcp_md5sig_key *key; tcptw->tw_md5_key = NULL; - key = tp->af_specific->md5_lookup(sk, sk); - if (key) { - tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); + if (static_branch_unlikely(&tcp_md5_needed)) { + struct tcp_md5sig_key *key; + + key = tp->af_specific->md5_lookup(sk, sk); + if (key) { + tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); + BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); + } } } while (0); #endif @@ -338,10 +341,12 @@ EXPORT_SYMBOL(tcp_time_wait); void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG - struct tcp_timewait_sock *twsk = tcp_twsk(sk); + if (static_branch_unlikely(&tcp_md5_needed)) { + struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) - kfree_rcu(twsk->tw_md5_key, rcu); + if (twsk->tw_md5_key) + kfree_rcu(twsk->tw_md5_key, rcu); + } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e72aa0ff5785..4522579aaca2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -597,7 +597,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - if (static_key_false(&tcp_md5_needed) && + if (static_branch_unlikely(&tcp_md5_needed) && rcu_access_pointer(tp->md5sig_info)) { *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { @@ -734,7 +734,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb *md5 = NULL; #ifdef CONFIG_TCP_MD5SIG - if (static_key_false(&tcp_md5_needed) && + if (static_branch_unlikely(&tcp_md5_needed) && rcu_access_pointer(tp->md5sig_info)) { *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { @@ -1846,17 +1846,17 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, - struct sk_buff *skb, unsigned int len, +static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now, gfp_t gfp) { - struct sk_buff *buff; int nlen = skb->len - len; + struct sk_buff *buff; u8 flags; /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp); + return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) @@ -1892,7 +1892,7 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, /* Link BUFF into the send queue. */ __skb_header_release(buff); - tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); + tcp_insert_write_queue_after(skb, buff, sk, TCP_FRAG_IN_WRITE_QUEUE); return 0; } @@ -2391,8 +2391,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle); if (skb->len > limit && - unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, - skb, limit, mss_now, gfp))) + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; if (tcp_small_queue_check(sk, skb, 0)) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index cb1b4772dac0..3de0e9b0a482 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -365,7 +365,6 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_network_header_len(skb), skb->csum); - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 24264d0a4b85..1a832f5e190b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -304,7 +304,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e51cda79f0cc..57ef69a10889 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -220,8 +220,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; - SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); - if (__ipv6_only_sock(sk)) return -ENETUNREACH; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 945fb34ae721..c79aca29505e 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -559,8 +559,11 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); + bool orig_vlan_tag_present = false; + unsigned int vlan_hdr_count = 0; struct tcf_csum_params *params; u32 update_flags; + __be16 protocol; int action; params = rcu_dereference_bh(p->params); @@ -573,7 +576,9 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, goto drop; update_flags = params->update_flags; - switch (tc_skb_protocol(skb)) { + protocol = tc_skb_protocol(skb); +again: + switch (protocol) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) goto drop; @@ -582,13 +587,35 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, if (!tcf_csum_ipv6(skb, update_flags)) goto drop; break; + case cpu_to_be16(ETH_P_8021AD): /* fall through */ + case cpu_to_be16(ETH_P_8021Q): + if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) { + protocol = skb->protocol; + orig_vlan_tag_present = true; + } else { + struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data; + + protocol = vlan->h_vlan_encapsulated_proto; + skb_pull(skb, VLAN_HLEN); + skb_reset_network_header(skb); + vlan_hdr_count++; + } + goto again; + } + +out: + /* Restore the skb for the pulled VLAN tags */ + while (vlan_hdr_count--) { + skb_push(skb, VLAN_HLEN); + skb_reset_network_header(skb); } return action; drop: qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats)); - return TC_ACT_SHOT; + action = TC_ACT_SHOT; + goto out; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 3404af72b4c1..fc2b884b170c 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -201,8 +201,14 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) { if (!p) return; - if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { +#ifdef CONFIG_DST_CACHE + struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info; + + dst_cache_destroy(&info->dst_cache); +#endif dst_release(&p->tcft_enc_metadata->dst); + } kfree_rcu(p, rcu); } @@ -384,7 +390,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, release_dst_cache: #ifdef CONFIG_DST_CACHE - dst_cache_destroy(&metadata->u.tun_info.dst_cache); + if (metadata) + dst_cache_destroy(&metadata->u.tun_info.dst_cache); #endif release_tun_meta: dst_release(&metadata->dst); @@ -401,15 +408,8 @@ static void tunnel_key_release(struct tc_action *a) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; -#ifdef CONFIG_DST_CACHE - struct ip_tunnel_info *info; -#endif params = rcu_dereference_protected(t->params, 1); -#ifdef CONFIG_DST_CACHE - info = ¶ms->tcft_enc_metadata->u.tun_info; - dst_cache_destroy(&info->dst_cache); -#endif tunnel_key_release_params(params); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6593c245f714..478095d50f95 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -238,18 +238,23 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, tcf_proto_destroy(tp, rtnl_held, extack); } -static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) +static int walker_check_empty(struct tcf_proto *tp, void *fh, + struct tcf_walker *arg) { - return -1; + if (fh) { + arg->nonempty = true; + return -1; + } + return 0; } static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) { - struct tcf_walker walker = { .fn = walker_noop, }; + struct tcf_walker walker = { .fn = walker_check_empty, }; if (tp->ops->walk) { tp->ops->walk(tp, &walker, rtnl_held); - return !walker.stop; + return !walker.nonempty; } return true; } @@ -2907,12 +2912,12 @@ errout_block_locked: /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { - struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_chain *chain; long index_start; long index; u32 parent; @@ -2975,11 +2980,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - for (chain = __tcf_get_next_chain(block, NULL); - chain; - chain_prev = chain, - chain = __tcf_get_next_chain(block, chain), - tcf_chain_put(chain_prev)) { + mutex_lock(&block->lock); + list_for_each_entry(chain, &block->chain_list, list) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; @@ -2987,17 +2989,18 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; continue; } + if (tcf_chain_held_by_acts_only(chain)) + continue; err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN); - if (err <= 0) { - tcf_chain_put(chain); + if (err <= 0) break; - } index++; } + mutex_unlock(&block->lock); if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) tcf_block_refcnt_put(block, true); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 456ee6e62dfa..ad036b00427d 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -363,7 +363,10 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct fw_head *head = rtnl_dereference(tp->root); int h; - if (head == NULL || arg->stop) + if (head == NULL) + arg->stop = 1; + + if (arg->stop) return; for (h = 0; h < HTSIZE; h++) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 38e5add14fab..7bcee8b8f803 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -559,7 +559,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { }; static struct netdev_queue noop_netdev_queue = { - .qdisc = &noop_qdisc, + RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc), .qdisc_sleeping = &noop_qdisc, }; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index d1429371592f..1cc0c7b74aa3 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -17,9 +17,7 @@ * University of Oslo, Norway. * * References: - * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00 - * IEEE Conference on High Performance Switching and Routing 2013 : - * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem" + * RFC 8033: https://tools.ietf.org/html/rfc8033 */ #include <linux/module.h> @@ -31,9 +29,9 @@ #include <net/pkt_sched.h> #include <net/inet_ecn.h> -#define QUEUE_THRESHOLD 10000 +#define QUEUE_THRESHOLD 16384 #define DQCOUNT_INVALID -1 -#define MAX_PROB 0xffffffff +#define MAX_PROB 0xffffffffffffffff #define PIE_SCALE 8 /* parameters used */ @@ -49,14 +47,16 @@ struct pie_params { /* variables used */ struct pie_vars { - u32 prob; /* probability but scaled by u32 limit. */ + u64 prob; /* probability but scaled by u64 limit. */ psched_time_t burst_time; psched_time_t qdelay; psched_time_t qdelay_old; u64 dq_count; /* measured in bytes */ psched_time_t dq_tstamp; /* drain rate */ + u64 accu_prob; /* accumulated drop probability */ u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */ u32 qlen_old; /* in bytes */ + u8 accu_prob_overflows; /* overflows of accu_prob */ }; /* statistics gathering */ @@ -81,9 +81,9 @@ static void pie_params_init(struct pie_params *params) { params->alpha = 2; params->beta = 20; - params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */ + params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */ params->limit = 1000; /* default of 1000 packets */ - params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */ + params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */ params->ecn = false; params->bytemode = false; } @@ -91,16 +91,18 @@ static void pie_params_init(struct pie_params *params) static void pie_vars_init(struct pie_vars *vars) { vars->dq_count = DQCOUNT_INVALID; + vars->accu_prob = 0; vars->avg_dq_rate = 0; - /* default of 100 ms in pschedtime */ - vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC); + /* default of 150 ms in pschedtime */ + vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC); + vars->accu_prob_overflows = 0; } static bool drop_early(struct Qdisc *sch, u32 packet_size) { struct pie_sched_data *q = qdisc_priv(sch); - u32 rnd; - u32 local_prob = q->vars.prob; + u64 rnd; + u64 local_prob = q->vars.prob; u32 mtu = psched_mtu(qdisc_dev(sch)); /* If there is still burst allowance left skip random early drop */ @@ -124,13 +126,33 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size) * probablity. Smaller packets will have lower drop prob in this case */ if (q->params.bytemode && packet_size <= mtu) - local_prob = (local_prob / mtu) * packet_size; + local_prob = (u64)packet_size * div_u64(local_prob, mtu); else local_prob = q->vars.prob; - rnd = prandom_u32(); - if (rnd < local_prob) + if (local_prob == 0) { + q->vars.accu_prob = 0; + q->vars.accu_prob_overflows = 0; + } + + if (local_prob > MAX_PROB - q->vars.accu_prob) + q->vars.accu_prob_overflows++; + + q->vars.accu_prob += local_prob; + + if (q->vars.accu_prob_overflows == 0 && + q->vars.accu_prob < (MAX_PROB / 100) * 85) + return false; + if (q->vars.accu_prob_overflows == 8 && + q->vars.accu_prob >= MAX_PROB / 2) + return true; + + prandom_bytes(&rnd, 8); + if (rnd < local_prob) { + q->vars.accu_prob = 0; + q->vars.accu_prob_overflows = 0; return true; + } return false; } @@ -168,6 +190,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; + q->vars.accu_prob = 0; + q->vars.accu_prob_overflows = 0; return qdisc_drop(skb, sch, to_free); } @@ -317,9 +341,10 @@ static void calculate_probability(struct Qdisc *sch) u32 qlen = sch->qstats.backlog; /* queue size in bytes */ psched_time_t qdelay = 0; /* in pschedtime */ psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */ - s32 delta = 0; /* determines the change in probability */ - u32 oldprob; - u32 alpha, beta; + s64 delta = 0; /* determines the change in probability */ + u64 oldprob; + u64 alpha, beta; + u32 power; bool update_prob = true; q->vars.qdelay_old = q->vars.qdelay; @@ -339,38 +364,36 @@ static void calculate_probability(struct Qdisc *sch) * value for alpha as 0.125. In this implementation, we use values 0-32 * passed from user space to represent this. Also, alpha and beta have * unit of HZ and need to be scaled before they can used to update - * probability. alpha/beta are updated locally below by 1) scaling them - * appropriately 2) scaling down by 16 to come to 0-2 range. - * Please see paper for details. - * - * We scale alpha and beta differently depending on whether we are in - * light, medium or high dropping mode. + * probability. alpha/beta are updated locally below by scaling down + * by 16 to come to 0-2 range. */ - if (q->vars.prob < MAX_PROB / 100) { - alpha = - (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; - beta = - (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7; - } else if (q->vars.prob < MAX_PROB / 10) { - alpha = - (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; - beta = - (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5; - } else { - alpha = - (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; - beta = - (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4; + + /* We scale alpha and beta differently depending on how heavy the + * congestion is. Please see RFC 8033 for details. + */ + if (q->vars.prob < MAX_PROB / 10) { + alpha >>= 1; + beta >>= 1; + + power = 100; + while (q->vars.prob < div_u64(MAX_PROB, power) && + power <= 1000000) { + alpha >>= 2; + beta >>= 2; + power *= 10; + } } /* alpha and beta should be between 0 and 32, in multiples of 1/16 */ - delta += alpha * ((qdelay - q->params.target)); - delta += beta * ((qdelay - qdelay_old)); + delta += alpha * (u64)(qdelay - q->params.target); + delta += beta * (u64)(qdelay - qdelay_old); oldprob = q->vars.prob; /* to ensure we increase probability in steps of no more than 2% */ - if (delta > (s32)(MAX_PROB / (100 / 2)) && + if (delta > (s64)(MAX_PROB / (100 / 2)) && q->vars.prob >= MAX_PROB / 10) delta = (MAX_PROB / 100) * 2; @@ -406,7 +429,8 @@ static void calculate_probability(struct Qdisc *sch) */ if (qdelay == 0 && qdelay_old == 0 && update_prob) - q->vars.prob = (q->vars.prob * 98) / 100; + /* Reduce drop probability to 98.4% */ + q->vars.prob -= q->vars.prob / 64u; q->vars.qdelay = qdelay; q->vars.qlen_old = qlen; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 25dfbe343e26..8d2f6296279c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -754,9 +754,47 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, return rc; } +/* if handshake network device belongs to a roce device, return its + * IB device and port + */ +static void smc_pnet_find_rdma_dev(struct net_device *netdev, + struct smc_ib_device **smcibdev, + u8 *ibport, unsigned short vlan_id, u8 gid[]) +{ + struct smc_ib_device *ibdev; + + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + struct net_device *ndev; + int i; + + for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!rdma_is_port_valid(ibdev->ibdev, i)) + continue; + if (!ibdev->ibdev->ops.get_netdev) + continue; + ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); + if (!ndev) + continue; + dev_put(ndev); + if (netdev == ndev && + smc_ib_port_active(ibdev, i) && + !smc_ib_determine_gid(ibdev, i, vlan_id, gid, + NULL)) { + *smcibdev = ibdev; + *ibport = i; + break; + } + } + } + spin_unlock(&smc_ib_devices.lock); +} + /* Determine the corresponding IB device port based on the hardware PNETID. * Searching stops at the first matching active IB device port with vlan_id * configured. + * If nothing found, check pnetid table. + * If nothing found, try to use handshake device */ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_ib_device **smcibdev, @@ -770,8 +808,10 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, ndev = pnet_find_base_ndev(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, ndev_pnetid) && - smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) + smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) { + smc_pnet_find_rdma_dev(ndev, smcibdev, ibport, vlan_id, gid); return; /* pnetid could not be determined */ + } spin_lock(&smc_ib_devices.lock); list_for_each_entry(ibdev, &smc_ib_devices.list, list) { diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 7e1357db33d7..90ba4a1f0a6d 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -23,78 +23,6 @@ #include <linux/rtnetlink.h> #include <net/switchdev.h> -/** - * switchdev_trans_item_enqueue - Enqueue data item to transaction queue - * - * @trans: transaction - * @data: pointer to data being queued - * @destructor: data destructor - * @tritem: transaction item being queued - * - * Enqeueue data item to transaction queue. tritem is typically placed in - * cointainter pointed at by data pointer. Destructor is called on - * transaction abort and after successful commit phase in case - * the caller did not dequeue the item before. - */ -void switchdev_trans_item_enqueue(struct switchdev_trans *trans, - void *data, void (*destructor)(void const *), - struct switchdev_trans_item *tritem) -{ - tritem->data = data; - tritem->destructor = destructor; - list_add_tail(&tritem->list, &trans->item_list); -} -EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); - -static struct switchdev_trans_item * -__switchdev_trans_item_dequeue(struct switchdev_trans *trans) -{ - struct switchdev_trans_item *tritem; - - if (list_empty(&trans->item_list)) - return NULL; - tritem = list_first_entry(&trans->item_list, - struct switchdev_trans_item, list); - list_del(&tritem->list); - return tritem; -} - -/** - * switchdev_trans_item_dequeue - Dequeue data item from transaction queue - * - * @trans: transaction - */ -void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) -{ - struct switchdev_trans_item *tritem; - - tritem = __switchdev_trans_item_dequeue(trans); - BUG_ON(!tritem); - return tritem->data; -} -EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); - -static void switchdev_trans_init(struct switchdev_trans *trans) -{ - INIT_LIST_HEAD(&trans->item_list); -} - -static void switchdev_trans_items_destroy(struct switchdev_trans *trans) -{ - struct switchdev_trans_item *tritem; - - while ((tritem = __switchdev_trans_item_dequeue(trans))) - tritem->destructor(tritem->data); -} - -static void switchdev_trans_items_warn_destroy(struct net_device *dev, - struct switchdev_trans *trans) -{ - WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", - dev->name); - switchdev_trans_items_destroy(trans); -} - static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -174,81 +102,32 @@ static int switchdev_deferred_enqueue(struct net_device *dev, return 0; } -/** - * switchdev_port_attr_get - Get port attribute - * - * @dev: port device - * @attr: attribute to get - */ -int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static int switchdev_port_attr_notify(enum switchdev_notifier_type nt, + struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans) { - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct list_head *iter; - struct switchdev_attr first = { - .id = SWITCHDEV_ATTR_ID_UNDEFINED - }; - int err = -EOPNOTSUPP; + int err; + int rc; - if (ops && ops->switchdev_port_attr_get) - return ops->switchdev_port_attr_get(dev, attr); + struct switchdev_notifier_port_attr_info attr_info = { + .attr = attr, + .trans = trans, + .handled = false, + }; - if (attr->flags & SWITCHDEV_F_NO_RECURSE) + rc = call_switchdev_blocking_notifiers(nt, dev, + &attr_info.info, NULL); + err = notifier_to_errno(rc); + if (err) { + WARN_ON(!attr_info.handled); return err; - - /* Switch device port(s) may be stacked under - * bond/team/vlan dev, so recurse down to get attr on - * each port. Return -ENODATA if attr values don't - * compare across ports. - */ - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_attr_get(lower_dev, attr); - if (err) - break; - if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED) - first = *attr; - else if (memcmp(&first, attr, sizeof(*attr))) - return -ENODATA; } - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_attr_get); - -static int __switchdev_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct list_head *iter; - int err = -EOPNOTSUPP; - - if (ops && ops->switchdev_port_attr_set) { - err = ops->switchdev_port_attr_set(dev, attr, trans); - goto done; - } - - if (attr->flags & SWITCHDEV_F_NO_RECURSE) - goto done; - - /* Switch device port(s) may be stacked under - * bond/team/vlan dev, so recurse down to set attr on - * each port. - */ - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = __switchdev_port_attr_set(lower_dev, attr, trans); - if (err) - break; - } - -done: - if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) - err = 0; + if (!attr_info.handled) + return -EOPNOTSUPP; - return err; + return 0; } static int switchdev_port_attr_set_now(struct net_device *dev, @@ -257,8 +136,6 @@ static int switchdev_port_attr_set_now(struct net_device *dev, struct switchdev_trans trans; int err; - switchdev_trans_init(&trans); - /* Phase I: prepare for attr set. Driver/device should fail * here if there are going to be issues in the commit phase, * such as lack of resources or support. The driver/device @@ -267,18 +144,10 @@ static int switchdev_port_attr_set_now(struct net_device *dev, */ trans.ph_prepare = true; - err = __switchdev_port_attr_set(dev, attr, &trans); - if (err) { - /* Prepare phase failed: abort the transaction. Any - * resources reserved in the prepare phase are - * released. - */ - - if (err != -EOPNOTSUPP) - switchdev_trans_items_destroy(&trans); - + err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, + &trans); + if (err) return err; - } /* Phase II: commit attr set. This cannot fail as a fault * of driver/device. If it does, it's a bug in the driver/device @@ -286,10 +155,10 @@ static int switchdev_port_attr_set_now(struct net_device *dev, */ trans.ph_prepare = false; - err = __switchdev_port_attr_set(dev, attr, &trans); + err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, + &trans); WARN(err, "%s: Commit of attribute (id=%d) failed.\n", dev->name, attr->id); - switchdev_trans_items_warn_destroy(dev, &trans); return err; } @@ -388,8 +257,6 @@ static int switchdev_port_obj_add_now(struct net_device *dev, ASSERT_RTNL(); - switchdev_trans_init(&trans); - /* Phase I: prepare for obj add. Driver/device should fail * here if there are going to be issues in the commit phase, * such as lack of resources or support. The driver/device @@ -400,17 +267,8 @@ static int switchdev_port_obj_add_now(struct net_device *dev, trans.ph_prepare = true; err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, dev, obj, &trans, extack); - if (err) { - /* Prepare phase failed: abort the transaction. Any - * resources reserved in the prepare phase are - * released. - */ - - if (err != -EOPNOTSUPP) - switchdev_trans_items_destroy(&trans); - + if (err) return err; - } /* Phase II: commit obj add. This cannot fail as a fault * of driver/device. If it does, it's a bug in the driver/device @@ -421,7 +279,6 @@ static int switchdev_port_obj_add_now(struct net_device *dev, err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, dev, obj, &trans, extack); WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); - switchdev_trans_items_warn_destroy(dev, &trans); return err; } @@ -697,3 +554,54 @@ int switchdev_handle_port_obj_del(struct net_device *dev, return err; } EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del); + +static int __switchdev_handle_port_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + bool (*check_cb)(const struct net_device *dev), + int (*set_cb)(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans)) +{ + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (check_cb(dev)) { + port_attr_info->handled = true; + return set_cb(dev, port_attr_info->attr, + port_attr_info->trans); + } + + /* Switch ports might be stacked under e.g. a LAG. Ignore the + * unsupported devices, another driver might be able to handle them. But + * propagate to the callers any hard errors. + * + * If the driver does its own bookkeeping of stacked ports, it's not + * necessary to go through this helper. + */ + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info, + check_cb, set_cb); + if (err && err != -EOPNOTSUPP) + return err; + } + + return err; +} + +int switchdev_handle_port_attr_set(struct net_device *dev, + struct switchdev_notifier_port_attr_info *port_attr_info, + bool (*check_cb)(const struct net_device *dev), + int (*set_cb)(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans)) +{ + int err; + + err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb, + set_cb); + if (err == -EOPNOTSUPP) + err = 0; + return err; +} +EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set); |