summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx5/main.c13
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c14
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c96
4 files changed, 102 insertions, 23 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 45afe1d9e125..614b2acbc621 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5829,8 +5829,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
srcu_barrier(&dev->mr_srcu);
cleanup_srcu_struct(&dev->mr_srcu);
- drain_workqueue(dev->advise_mr_wq);
- destroy_workqueue(dev->advise_mr_wq);
}
kfree(dev->port);
}
@@ -5885,18 +5883,9 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->memic.dev = mdev;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- dev->advise_mr_wq =
- alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
- if (!dev->advise_mr_wq) {
- err = -ENOMEM;
- goto err_mp;
- }
-
err = init_srcu_struct(&dev->mr_srcu);
- if (err) {
- destroy_workqueue(dev->advise_mr_wq);
+ if (err)
goto err_mp;
- }
}
return 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index f9817284c7a3..4a617d78eae1 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -589,6 +589,7 @@ struct mlx5_ib_mr {
atomic_t num_leaf_free;
wait_queue_head_t q_leaf_free;
struct mlx5_async_work cb_work;
+ atomic_t num_pending_prefetch;
};
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
@@ -929,7 +930,6 @@ struct mlx5_ib_dev {
*/
struct srcu_struct mr_srcu;
u32 null_mkey;
- struct workqueue_struct *advise_mr_wq;
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 705a79cd21da..c85f00255884 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1339,8 +1339,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
mr->live = 1;
+ atomic_set(&mr->num_pending_prefetch, 0);
+ }
return &mr->ibmr;
error:
@@ -1576,8 +1578,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
if (is_odp_mr(mr)) {
struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
- /* Prevent new page faults from succeeding */
+ /* Prevent new page faults and
+ * prefetch requests from succeeding
+ */
mr->live = 0;
+
+ /* dequeue pending prefetch requests for the mr */
+ if (atomic_read(&mr->num_pending_prefetch))
+ flush_workqueue(system_unbound_wq);
+ WARN_ON(atomic_read(&mr->num_pending_prefetch));
+
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 335fd0c6ea2a..a1346978702f 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -535,6 +535,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->umem = umem;
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
+ atomic_set(&imr->num_pending_prefetch, 0);
return imr;
}
@@ -1660,24 +1661,91 @@ struct prefetch_mr_work {
struct ib_sge sg_list[0];
};
+static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
+ struct ib_sge *sg_list, u32 num_sge,
+ u32 from)
+{
+ u32 i;
+ int srcu_key;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+
+ for (i = from; i < num_sge; ++i) {
+ struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mr *mr;
+
+ mmkey = __mlx5_mr_lookup(dev->mdev,
+ mlx5_base_mkey(sg_list[i].lkey));
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ atomic_dec(&mr->num_pending_prefetch);
+ }
+
+ srcu_read_unlock(&dev->mr_srcu, srcu_key);
+}
+
+static bool num_pending_prefetch_inc(struct ib_pd *pd,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ bool ret = true;
+ u32 i;
+
+ for (i = 0; i < num_sge; ++i) {
+ struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mr *mr;
+
+ mmkey = __mlx5_mr_lookup(dev->mdev,
+ mlx5_base_mkey(sg_list[i].lkey));
+ if (!mmkey || mmkey->key != sg_list[i].lkey) {
+ ret = false;
+ break;
+ }
+
+ if (mmkey->type != MLX5_MKEY_MR) {
+ ret = false;
+ break;
+ }
+
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+
+ if (mr->ibmr.pd != pd) {
+ ret = false;
+ break;
+ }
+
+ if (!mr->live) {
+ ret = false;
+ break;
+ }
+
+ atomic_inc(&mr->num_pending_prefetch);
+ }
+
+ if (!ret)
+ num_pending_prefetch_dec(dev, sg_list, i, 0);
+
+ return ret;
+}
+
static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags,
struct ib_sge *sg_list, u32 num_sge)
{
- int i;
+ u32 i;
+ int ret = 0;
for (i = 0; i < num_sge; ++i) {
struct ib_sge *sg = &sg_list[i];
int bytes_committed = 0;
- int ret;
ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr,
sg->length,
&bytes_committed, NULL,
pf_flags);
if (ret < 0)
- return ret;
+ break;
}
- return 0;
+
+ return ret < 0 ? ret : 0;
}
static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
@@ -1690,7 +1758,8 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
w->num_sge);
ib_device_put(&w->dev->ib_dev);
}
- put_device(&w->dev->ib_dev.dev);
+
+ num_pending_prefetch_dec(w->dev, w->sg_list, w->num_sge, 0);
kfree(w);
}
@@ -1701,6 +1770,8 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
struct mlx5_ib_dev *dev = to_mdev(pd->device);
u32 pf_flags = MLX5_PF_FLAGS_PREFETCH;
struct prefetch_mr_work *work;
+ bool valid_req;
+ int srcu_key;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
@@ -1715,12 +1786,21 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
- get_device(&dev->ib_dev.dev);
work->dev = dev;
work->pf_flags = pf_flags;
work->num_sge = num_sge;
INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
- schedule_work(&work->work);
- return 0;
+
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+
+ valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge);
+ if (valid_req)
+ queue_work(system_unbound_wq, &work->work);
+ else
+ kfree(work);
+
+ srcu_read_unlock(&dev->mr_srcu, srcu_key);
+
+ return valid_req ? 0 : -EINVAL;
}