diff options
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/mlx5/ib_rep.c | 103 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_user_pages.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/usnic/usnic_uiom.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_verbs.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_mem.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_qp_tx.c | 16 |
6 files changed, 81 insertions, 46 deletions
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index ddcfc116b19a..c7a4ee896121 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); +static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports) +{ + struct mlx5_core_dev *peer_dev; + int i; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); + + if (mlx5_lag_is_mpesw(peer_dev)) + *num_ports += peer_num_ports; + else + /* Only 1 ib port is the representor for all uplinks */ + *num_ports += peer_num_ports - 1; + } +} + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { u32 num_ports = mlx5_eswitch_get_total_vports(dev); + struct mlx5_core_dev *lag_master = dev; const struct mlx5_ib_profile *profile; struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; - int second_uplink = false; - u32 peer_num_ports; + int new_uplink = false; int vport_index; int ret; + int i; vport_index = rep->vport_index; if (mlx5_lag_is_shared_fdb(dev)) { - peer_dev = mlx5_lag_get_peer_mdev(dev); - peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); if (mlx5_lag_is_master(dev)) { - if (mlx5_lag_is_mpesw(dev)) - num_ports += peer_num_ports; - else - num_ports += peer_num_ports - 1; - + mlx5_ib_num_ports_update(dev, &num_ports); } else { if (rep->vport == MLX5_VPORT_UPLINK) { if (!mlx5_lag_is_mpesw(dev)) return 0; - second_uplink = true; + new_uplink = true; } + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev); + + if (mlx5_lag_is_master(peer_dev)) + lag_master = peer_dev; + else if (!mlx5_lag_is_mpesw(dev)) + /* Only 1 ib port is the representor for all uplinks */ + peer_n_ports--; - vport_index += peer_num_ports; - dev = peer_dev; + if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev)) + vport_index += peer_n_ports; + } } } - if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink) + if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(dev, rep, vport_index); + return mlx5_ib_set_vport_rep(lag_master, rep, vport_index); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) vport_index = rep->vport_index; ibdev->port[vport_index].rep = rep; ibdev->port[vport_index].roce.netdev = - mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); - ibdev->mdev = dev; + mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport); + ibdev->mdev = lag_master; ibdev->num_ports = num_ports; ret = __mlx5_ib_add(ibdev, profile); @@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto fail_add; rep->rep_data[REP_IB].priv = ibdev; - if (mlx5_lag_is_shared_fdb(dev)) - mlx5_ib_register_peer_vport_reps(dev); + if (mlx5_lag_is_shared_fdb(lag_master)) + mlx5_ib_register_peer_vport_reps(lag_master); return 0; @@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); int vport_index = rep->vport_index; struct mlx5_ib_port *port; + int i; if (WARN_ON(!mdev)) return; + if (!dev) + return; + if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev)) { - struct mlx5_core_dev *peer_mdev; - - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev)) return; - peer_mdev = mlx5_lag_get_peer_mdev(mdev); - vport_index += mlx5_eswitch_get_total_vports(peer_mdev); + for (i = 0; i < dev->num_ports; i++) { + if (dev->port[i].rep == rep) + break; + } + if (WARN_ON(i == dev->num_ports)) + return; + vport_index = i; } - if (!dev) - return; - port = &dev->port[vport_index]; write_lock(&port->roce.netdev_lock); port->roce.netdev = NULL; @@ -143,13 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) port->rep = NULL; if (rep->vport == MLX5_VPORT_UPLINK) { - struct mlx5_core_dev *peer_mdev; - struct mlx5_eswitch *esw; + + if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev)) + return; if (mlx5_lag_is_shared_fdb(mdev)) { - peer_mdev = mlx5_lag_get_peer_mdev(mdev); - esw = peer_mdev->priv.eswitch; - mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + struct mlx5_core_dev *peer_mdev; + struct mlx5_eswitch *esw; + + mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + } } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } @@ -163,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev); + struct mlx5_core_dev *peer_mdev; struct mlx5_eswitch *esw; + int i; - if (!peer_mdev) - return; - - esw = peer_mdev->priv.eswitch; - mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); + mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); + } } struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index f693bc753b6b..1bb7507325bc 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -111,7 +111,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages, ret = pin_user_pages(start_page + got * PAGE_SIZE, num_pages - got, FOLL_LONGTERM | FOLL_WRITE, - p + got, NULL); + p + got); if (ret < 0) { mmap_read_unlock(current->mm); goto bail_release; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 2a5cac2658ec..84e0f41e7dfa 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -140,7 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = pin_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - gup_flags, page_list, NULL); + gup_flags, page_list); if (ret < 0) goto out; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index f4321a172000..903f0b71447e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1395,7 +1395,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (cleanup_err) rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err); - kfree_rcu(mr); + kfree_rcu_mightsleep(mr); return 0; err_out: diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index f51ab2ccf151..e6e25f15567d 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -422,7 +422,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) umem->page_chunk[i].plist = plist; while (nents) { rv = pin_user_pages(first_page_va, nents, foll_flags, - plist, NULL); + plist); if (rv < 0) goto out_sem_up; diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 4b292e0504f1..7c7a51d36d0c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -312,7 +312,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, } /* - * 0copy TCP transmit interface: Use do_tcp_sendpages. + * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES. * * Using sendpage to push page by page appears to be less efficient * than using sendmsg, even if data are copied. @@ -323,20 +323,26 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, size_t size) { + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = (MSG_MORE | MSG_DONTWAIT | MSG_SPLICE_PAGES), + }; struct sock *sk = s->sk; - int i = 0, rv = 0, sent = 0, - flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST; + int i = 0, rv = 0, sent = 0; while (size) { size_t bytes = min_t(size_t, PAGE_SIZE - offset, size); if (size + offset <= PAGE_SIZE) - flags = MSG_MORE | MSG_DONTWAIT; + msg.msg_flags &= ~MSG_MORE; tcp_rate_check_app_limited(sk); + bvec_set_page(&bvec, page[i], bytes, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + try_page_again: lock_sock(sk); - rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags); + rv = tcp_sendmsg_locked(sk, &msg, size); release_sock(sk); if (rv > 0) { |