summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1/mmu_rb.c
diff options
context:
space:
mode:
authorBrendan Cunningham <bcunningham@cornelisnetworks.com>2023-05-19 12:32:16 -0400
committerJason Gunthorpe <jgg@nvidia.com>2023-06-01 14:38:00 -0300
commitc9358de193ecfb360c3ce75f27ce839ca0b0bc8c (patch)
treec7f4c09ca5a4b606f355e6a1409f13e5e425a11c /drivers/infiniband/hw/hfi1/mmu_rb.c
parent1cc625cecce9cbbe7898c88cc9e6c06021d7d8f0 (diff)
downloadlinux-stable-c9358de193ecfb360c3ce75f27ce839ca0b0bc8c.tar.gz
linux-stable-c9358de193ecfb360c3ce75f27ce839ca0b0bc8c.tar.bz2
linux-stable-c9358de193ecfb360c3ce75f27ce839ca0b0bc8c.zip
IB/hfi1: Fix wrong mmu_node used for user SDMA packet after invalidate
The hfi1 user SDMA pinned-page cache will leave a stale cache entry when the cache-entry's virtual address range is invalidated but that cache entry is in-use by an outstanding SDMA request. Subsequent user SDMA requests with buffers in or spanning the virtual address range of the stale cache entry will result in packets constructed from the wrong memory, the physical pages pointed to by the stale cache entry. To fix this, remove mmu_rb_node cache entries from the mmu_rb_handler cache independent of the cache entry's refcount. Add 'struct kref refcount' to struct mmu_rb_node and manage mmu_rb_node lifetime with kref_get() and kref_put(). mmu_rb_node.refcount makes sdma_mmu_node.refcount redundant. Remove 'atomic_t refcount' from struct sdma_mmu_node and change sdma_mmu_node code to use mmu_rb_node.refcount. Move the mmu_rb_handler destructor call after a wait-for-SDMA-request-completion call so mmu_rb_nodes that need mmu_rb_handler's workqueue to queue themselves up for destruction from an interrupt context may do so. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Fixes: 00cbce5cbf88 ("IB/hfi1: Fix bugs with non-PAGE_SIZE-end multi-iovec user SDMA requests") Link: https://lore.kernel.org/r/168451393605.3700681.13493776139032178861.stgit@awfm-02.cornelisnetworks.com Reviewed-by: Dean Luick <dean.luick@cornelisnetworks.com> Signed-off-by: Brendan Cunningham <bcunningham@cornelisnetworks.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/mmu_rb.c')
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c101
1 files changed, 63 insertions, 38 deletions
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 1cea8b0c78e0..a864423c256d 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -19,8 +19,7 @@ static int mmu_notifier_range_start(struct mmu_notifier *,
const struct mmu_notifier_range *);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list);
+static void release_immediate(struct kref *refcount);
static void handle_remove(struct work_struct *work);
static const struct mmu_notifier_ops mn_opts = {
@@ -106,7 +105,11 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
}
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&rbnode->list);
+ kref_put(&rbnode->refcount, release_immediate);
+ }
/* Now the mm may be freed. */
mmdrop(handler->mn.mm);
@@ -134,12 +137,6 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
}
__mmu_int_rb_insert(mnode, &handler->root);
list_add_tail(&mnode->list, &handler->lru_list);
-
- ret = handler->ops->insert(handler->ops_arg, mnode);
- if (ret) {
- __mmu_int_rb_remove(mnode, &handler->root);
- list_del(&mnode->list); /* remove from LRU list */
- }
mnode->handler = handler;
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
@@ -183,6 +180,48 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
+/*
+ * Must NOT call while holding mnode->handler->lock.
+ * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
+ * spinlock.
+ */
+static void release_immediate(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
+}
+
+/* Caller must hold mnode->handler->lock */
+static void release_nolock(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ list_move(&mnode->list, &mnode->handler->del_list);
+ queue_work(mnode->handler->wq, &mnode->handler->del_work);
+}
+
+/*
+ * struct mmu_rb_node->refcount kref_put() callback.
+ * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
+ * handler->del_work on handler->wq.
+ * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
+ * Acquires mmu_rb_node->handler->lock; do not call while already holding
+ * handler->lock.
+ */
+void hfi1_mmu_rb_release(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ struct mmu_rb_handler *handler = mnode->handler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ list_move(&mnode->list, &mnode->handler->del_list);
+ spin_unlock_irqrestore(&handler->lock, flags);
+ queue_work(handler->wq, &handler->del_work);
+}
+
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
{
struct mmu_rb_node *rbnode, *ptr;
@@ -197,6 +236,10 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
spin_lock_irqsave(&handler->lock, flags);
list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
+ /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
+ if (kref_read(&rbnode->refcount) > 1)
+ continue;
+
if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
&stop)) {
__mmu_int_rb_remove(rbnode, &handler->root);
@@ -209,7 +252,7 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
spin_unlock_irqrestore(&handler->lock, flags);
list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
- handler->ops->remove(handler->ops_arg, rbnode);
+ kref_put(&rbnode->refcount, release_immediate);
}
}
@@ -221,7 +264,6 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
struct rb_root_cached *root = &handler->root;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
- bool added = false;
spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
@@ -230,39 +272,17 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
ptr = __mmu_int_rb_iter_next(node, range->start,
range->end - 1);
trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
- if (handler->ops->invalidate(handler->ops_arg, node)) {
- __mmu_int_rb_remove(node, root);
- /* move from LRU list to delete list */
- list_move(&node->list, &handler->del_list);
- added = true;
- }
+ /* Remove from rb tree and lru_list. */
+ __mmu_int_rb_remove(node, root);
+ list_del_init(&node->list);
+ kref_put(&node->refcount, release_nolock);
}
spin_unlock_irqrestore(&handler->lock, flags);
- if (added)
- queue_work(handler->wq, &handler->del_work);
-
return 0;
}
/*
- * Call the remove function for the given handler and the list. This
- * is expected to be called with a delete list extracted from handler.
- * The caller should not be holding the handler lock.
- */
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list)
-{
- struct mmu_rb_node *node;
-
- while (!list_empty(del_list)) {
- node = list_first_entry(del_list, struct mmu_rb_node, list);
- list_del(&node->list);
- handler->ops->remove(handler->ops_arg, node);
- }
-}
-
-/*
* Work queue function to remove all nodes that have been queued up to
* be removed. The key feature is that mm->mmap_lock is not being held
* and the remove callback can sleep while taking it, if needed.
@@ -274,11 +294,16 @@ static void handle_remove(struct work_struct *work)
del_work);
struct list_head del_list;
unsigned long flags;
+ struct mmu_rb_node *node;
/* remove anything that is queued to get removed */
spin_lock_irqsave(&handler->lock, flags);
list_replace_init(&handler->del_list, &del_list);
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ node = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&node->list);
+ handler->ops->remove(handler->ops_arg, node);
+ }
}