diff options
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx5/odp.c | 96 |
4 files changed, 102 insertions, 23 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 45afe1d9e125..614b2acbc621 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5829,8 +5829,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); cleanup_srcu_struct(&dev->mr_srcu); - drain_workqueue(dev->advise_mr_wq); - destroy_workqueue(dev->advise_mr_wq); } kfree(dev->port); } @@ -5885,18 +5883,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->memic.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - dev->advise_mr_wq = - alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); - if (!dev->advise_mr_wq) { - err = -ENOMEM; - goto err_mp; - } - err = init_srcu_struct(&dev->mr_srcu); - if (err) { - destroy_workqueue(dev->advise_mr_wq); + if (err) goto err_mp; - } } return 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f9817284c7a3..4a617d78eae1 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -589,6 +589,7 @@ struct mlx5_ib_mr { atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; + atomic_t num_pending_prefetch; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) @@ -929,7 +930,6 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; u32 null_mkey; - struct workqueue_struct *advise_mr_wq; struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 705a79cd21da..c85f00255884 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1339,8 +1339,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { mr->live = 1; + atomic_set(&mr->num_pending_prefetch, 0); + } return &mr->ibmr; error: @@ -1576,8 +1578,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) if (is_odp_mr(mr)) { struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - /* Prevent new page faults from succeeding */ + /* Prevent new page faults and + * prefetch requests from succeeding + */ mr->live = 0; + + /* dequeue pending prefetch requests for the mr */ + if (atomic_read(&mr->num_pending_prefetch)) + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_pending_prefetch)); + /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 335fd0c6ea2a..a1346978702f 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -535,6 +535,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); + atomic_set(&imr->num_pending_prefetch, 0); return imr; } @@ -1660,24 +1661,91 @@ struct prefetch_mr_work { struct ib_sge sg_list[0]; }; +static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, + struct ib_sge *sg_list, u32 num_sge, + u32 from) +{ + u32 i; + int srcu_key; + + srcu_key = srcu_read_lock(&dev->mr_srcu); + + for (i = from; i < num_sge; ++i) { + struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mr *mr; + + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(sg_list[i].lkey)); + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + atomic_dec(&mr->num_pending_prefetch); + } + + srcu_read_unlock(&dev->mr_srcu, srcu_key); +} + +static bool num_pending_prefetch_inc(struct ib_pd *pd, + struct ib_sge *sg_list, u32 num_sge) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + bool ret = true; + u32 i; + + for (i = 0; i < num_sge; ++i) { + struct mlx5_core_mkey *mmkey; + struct mlx5_ib_mr *mr; + + mmkey = __mlx5_mr_lookup(dev->mdev, + mlx5_base_mkey(sg_list[i].lkey)); + if (!mmkey || mmkey->key != sg_list[i].lkey) { + ret = false; + break; + } + + if (mmkey->type != MLX5_MKEY_MR) { + ret = false; + break; + } + + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + + if (mr->ibmr.pd != pd) { + ret = false; + break; + } + + if (!mr->live) { + ret = false; + break; + } + + atomic_inc(&mr->num_pending_prefetch); + } + + if (!ret) + num_pending_prefetch_dec(dev, sg_list, i, 0); + + return ret; +} + static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - int i; + u32 i; + int ret = 0; for (i = 0; i < num_sge; ++i) { struct ib_sge *sg = &sg_list[i]; int bytes_committed = 0; - int ret; ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr, sg->length, &bytes_committed, NULL, pf_flags); if (ret < 0) - return ret; + break; } - return 0; + + return ret < 0 ? ret : 0; } static void mlx5_ib_prefetch_mr_work(struct work_struct *work) @@ -1690,7 +1758,8 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work) w->num_sge); ib_device_put(&w->dev->ib_dev); } - put_device(&w->dev->ib_dev.dev); + + num_pending_prefetch_dec(w->dev, w->sg_list, w->num_sge, 0); kfree(w); } @@ -1701,6 +1770,8 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; struct prefetch_mr_work *work; + bool valid_req; + int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1715,12 +1786,21 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); - get_device(&dev->ib_dev.dev); work->dev = dev; work->pf_flags = pf_flags; work->num_sge = num_sge; INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); - schedule_work(&work->work); - return 0; + + srcu_key = srcu_read_lock(&dev->mr_srcu); + + valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge); + if (valid_req) + queue_work(system_unbound_wq, &work->work); + else + kfree(work); + + srcu_read_unlock(&dev->mr_srcu, srcu_key); + + return valid_req ? 0 : -EINVAL; } |