summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/iommufd/iommufd_private.h
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2023-11-12 15:44:08 -0400
committerJason Gunthorpe <jgg@nvidia.com>2023-11-29 20:30:03 -0400
commit6f9c4d8c468c189d6dc470324bd52955f8aa0a10 (patch)
tree5d3a68ae2502020f0e976c7b5bca9b9f78b382c0 /drivers/iommu/iommufd/iommufd_private.h
parentbd7a282650b8beb57bc9d19bfcb714b1ccae843a (diff)
downloadlinux-stable-6f9c4d8c468c189d6dc470324bd52955f8aa0a10.tar.gz
linux-stable-6f9c4d8c468c189d6dc470324bd52955f8aa0a10.tar.bz2
linux-stable-6f9c4d8c468c189d6dc470324bd52955f8aa0a10.zip
iommufd: Do not UAF during iommufd_put_object()
The mixture of kernel and user space lifecycle objects continues to be complicated inside iommufd. The obj->destroy_rwsem is used to bring order to the kernel driver destruction sequence but it cannot be sequenced right with the other refcounts so we end up possibly UAF'ing: BUG: KASAN: slab-use-after-free in __up_read+0x627/0x750 kernel/locking/rwsem.c:1342 Read of size 8 at addr ffff888073cde868 by task syz-executor934/6535 CPU: 1 PID: 6535 Comm: syz-executor934 Not tainted 6.6.0-rc7-syzkaller-00195-g2af9b20dbb39 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/09/2023 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 __up_read+0x627/0x750 kernel/locking/rwsem.c:1342 iommufd_put_object drivers/iommu/iommufd/iommufd_private.h:149 [inline] iommufd_vfio_ioas+0x46c/0x580 drivers/iommu/iommufd/vfio_compat.c:146 iommufd_fops_ioctl+0x347/0x4d0 drivers/iommu/iommufd/main.c:398 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __x64_sys_ioctl+0x18f/0x210 fs/ioctl.c:857 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd There are two races here, the more obvious one: CPU 0 CPU 1 iommufd_put_object() iommufd_destroy() refcount_dec(&obj->users) iommufd_object_remove() kfree() up_read(&obj->destroy_rwsem) // Boom And there is also perhaps some possibility that the rwsem could hit an issue: CPU 0 CPU 1 iommufd_put_object() iommufd_object_destroy_user() refcount_dec(&obj->users); down_write(&obj->destroy_rwsem) up_read(&obj->destroy_rwsem); atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); tmp = atomic_long_add_return_release() rwsem_try_write_lock() iommufd_object_remove() up_write(&obj->destroy_rwsem) kfree() clear_nonspinnable() // Boom Fix this by reorganizing this again so that two refcounts are used to keep track of things with a rule that users == 0 && shortterm_users == 0 means no other threads have that memory. Put a wait_queue in the iommufd_ctx object that is triggered when any sub object reaches a 0 shortterm_users. This allows the same wait for userspace ioctls to finish behavior that the rwsem was providing. This is weaker still than the prior versions: - There is no bias on shortterm_users so if some thread is waiting to destroy other threads can continue to get new read sides - If destruction fails, eg because of an active in-kernel user, then shortterm_users will have cycled to zero momentarily blocking new users - If userspace races destroy with other userspace operations they continue to get an EBUSY since we still can't intermix looking up an ID and sleeping for its unref In all cases these are things that userspace brings on itself, correct programs will not hit them. Fixes: 99f98a7c0d69 ("iommufd: IOMMUFD_DESTROY should not increase the refcount") Link: https://lore.kernel.org/all/2-v2-ca9e00171c5b+123-iommufd_syz4_jgg@nvidia.com/ Reported-by: syzbot+d31adfb277377ef8fcba@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/00000000000055ef9a0609336580@google.com Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/iommu/iommufd/iommufd_private.h')
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h67
1 files changed, 55 insertions, 12 deletions
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f918a22f0d48..abae041e256f 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -21,6 +21,7 @@ struct iommufd_ctx {
struct file *file;
struct xarray objects;
struct xarray groups;
+ wait_queue_head_t destroy_wait;
u8 account_mode;
/* Compatibility with VFIO no iommu */
@@ -135,7 +136,7 @@ enum iommufd_object_type {
/* Base struct for all objects with a userspace ID handle. */
struct iommufd_object {
- struct rw_semaphore destroy_rwsem;
+ refcount_t shortterm_users;
refcount_t users;
enum iommufd_object_type type;
unsigned int id;
@@ -143,10 +144,15 @@ struct iommufd_object {
static inline bool iommufd_lock_obj(struct iommufd_object *obj)
{
- if (!down_read_trylock(&obj->destroy_rwsem))
+ if (!refcount_inc_not_zero(&obj->users))
return false;
- if (!refcount_inc_not_zero(&obj->users)) {
- up_read(&obj->destroy_rwsem);
+ if (!refcount_inc_not_zero(&obj->shortterm_users)) {
+ /*
+ * If the caller doesn't already have a ref on obj this must be
+ * called under the xa_lock. Otherwise the caller is holding a
+ * ref on users. Thus it cannot be one before this decrement.
+ */
+ refcount_dec(&obj->users);
return false;
}
return true;
@@ -157,8 +163,13 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
static inline void iommufd_put_object(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
+ /*
+ * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
+ * a spurious !0 users with a 0 shortterm_users.
+ */
refcount_dec(&obj->users);
- up_read(&obj->destroy_rwsem);
+ if (refcount_dec_and_test(&obj->shortterm_users))
+ wake_up_interruptible_all(&ictx->destroy_wait);
}
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
@@ -166,17 +177,49 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
void iommufd_object_finalize(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
-void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj, bool allow_fail);
+
+enum {
+ REMOVE_WAIT_SHORTTERM = 1,
+};
+int iommufd_object_remove(struct iommufd_ctx *ictx,
+ struct iommufd_object *to_destroy, u32 id,
+ unsigned int flags);
+
+/*
+ * The caller holds a users refcount and wants to destroy the object. At this
+ * point the caller has no shortterm_users reference and at least the xarray
+ * will be holding one.
+ */
static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
- __iommufd_object_destroy_user(ictx, obj, false);
+ int ret;
+
+ ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+
+ /*
+ * If there is a bug and we couldn't destroy the object then we did put
+ * back the caller's users refcount and will eventually try to free it
+ * again during close.
+ */
+ WARN_ON(ret);
}
-static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj)
+
+/*
+ * The HWPT allocated by autodomains is used in possibly many devices and
+ * is automatically destroyed when its refcount reaches zero.
+ *
+ * If userspace uses the HWPT manually, even for a short term, then it will
+ * disrupt this refcounting and the auto-free in the kernel will not work.
+ * Userspace that tries to use the automatically allocated HWPT must be careful
+ * to ensure that it is consistently destroyed, eg by not racing accesses
+ * and by not attaching an automatic HWPT to a device manually.
+ */
+static inline void
+iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
{
- __iommufd_object_destroy_user(ictx, obj, true);
+ iommufd_object_remove(ictx, obj, obj->id, 0);
}
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
@@ -312,7 +355,7 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
lockdep_assert_not_held(&hwpt_paging->ioas->mutex);
if (hwpt_paging->auto_domain) {
- iommufd_object_deref_user(ictx, &hwpt->obj);
+ iommufd_object_put_and_try_destroy(ictx, &hwpt->obj);
return;
}
}