diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 88 | ||||
-rw-r--r-- | drivers/gpu/drm/nouveau/nouveau_drm.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_device.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_drv.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_mn.c | 157 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 1 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutables.h | 2 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutlbpurge.c | 84 | ||||
-rw-r--r-- | include/linux/hmm.h | 12 | ||||
-rw-r--r-- | include/linux/mm_types.h | 6 | ||||
-rw-r--r-- | include/linux/mmu_notifier.h | 35 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | mm/hmm.c | 121 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 215 |
17 files changed, 354 insertions, 383 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f2e8b4238efd..abc233088201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> +#include <linux/mmu_notifier.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -1464,6 +1465,7 @@ static void __exit amdgpu_exit(void) amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); + mmu_notifier_synchronize(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3933fb6a371e..9450e20d1709 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -686,9 +686,6 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; - /* Use for delayed freeing of kfd_process structure */ - struct rcu_head rcu; - unsigned int pasid; unsigned int doorbell_index; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8f1076c0c88a..e5e326f2f267 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -62,8 +62,8 @@ static struct workqueue_struct *kfd_restore_wq; static struct kfd_process *find_process(const struct task_struct *thread); static void kfd_process_ref_release(struct kref *ref); -static struct kfd_process *create_process(const struct task_struct *thread, - struct file *filep); +static struct kfd_process *create_process(const struct task_struct *thread); +static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); static void evict_process_worker(struct work_struct *work); static void restore_process_worker(struct work_struct *work); @@ -289,7 +289,15 @@ struct kfd_process *kfd_create_process(struct file *filep) if (process) { pr_debug("Process already found\n"); } else { - process = create_process(thread, filep); + process = create_process(thread); + if (IS_ERR(process)) + goto out; + + ret = kfd_process_init_cwsr_apu(process, filep); + if (ret) { + process = ERR_PTR(ret); + goto out; + } if (!procfs.kobj) goto out; @@ -478,11 +486,9 @@ static void kfd_process_ref_release(struct kref *ref) queue_work(kfd_process_wq, &p->release_work); } -static void kfd_process_destroy_delayed(struct rcu_head *rcu) +static void kfd_process_free_notifier(struct mmu_notifier *mn) { - struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); - - kfd_unref_process(p); + kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } static void kfd_process_notifier_release(struct mmu_notifier *mn, @@ -534,12 +540,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, mutex_unlock(&p->mutex); - mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); - mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); + mmu_notifier_put(&p->mmu_notifier); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, + .free_notifier = kfd_process_free_notifier, }; static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) @@ -609,81 +615,69 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) return 0; } -static struct kfd_process *create_process(const struct task_struct *thread, - struct file *filep) +/* + * On return the kfd_process is fully operational and will be freed when the + * mm is released + */ +static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; int err = -ENOMEM; process = kzalloc(sizeof(*process), GFP_KERNEL); - if (!process) goto err_alloc_process; - process->pasid = kfd_pasid_alloc(); - if (process->pasid == 0) - goto err_alloc_pasid; - - if (kfd_alloc_process_doorbells(process) < 0) - goto err_alloc_doorbells; - kref_init(&process->ref); - mutex_init(&process->mutex); - process->mm = thread->mm; - - /* register notifier */ - process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; - err = mmu_notifier_register(&process->mmu_notifier, process->mm); - if (err) - goto err_mmu_notifier; - - hash_add_rcu(kfd_processes_table, &process->kfd_processes, - (uintptr_t)process->mm); - process->lead_thread = thread->group_leader; - get_task_struct(process->lead_thread); - INIT_LIST_HEAD(&process->per_device_data); - + INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); + INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); + process->last_restore_timestamp = get_jiffies_64(); kfd_event_init_process(process); + process->is_32bit_user_mode = in_compat_syscall(); + + process->pasid = kfd_pasid_alloc(); + if (process->pasid == 0) + goto err_alloc_pasid; + + if (kfd_alloc_process_doorbells(process) < 0) + goto err_alloc_doorbells; err = pqm_init(&process->pqm, process); if (err != 0) goto err_process_pqm_init; /* init process apertures*/ - process->is_32bit_user_mode = in_compat_syscall(); err = kfd_init_apertures(process); if (err != 0) goto err_init_apertures; - INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); - INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); - process->last_restore_timestamp = get_jiffies_64(); - - err = kfd_process_init_cwsr_apu(process, filep); + /* Must be last, have to use release destruction after this */ + process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; + err = mmu_notifier_register(&process->mmu_notifier, process->mm); if (err) - goto err_init_cwsr; + goto err_register_notifier; + + get_task_struct(process->lead_thread); + hash_add_rcu(kfd_processes_table, &process->kfd_processes, + (uintptr_t)process->mm); return process; -err_init_cwsr: +err_register_notifier: kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: - hash_del_rcu(&process->kfd_processes); - synchronize_rcu(); - mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); -err_mmu_notifier: - mutex_destroy(&process->mutex); kfd_free_process_doorbells(process); err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: + mutex_destroy(&process->mutex); kfree(process); err_alloc_process: return ERR_PTR(err); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 7c2fcaba42d6..a0e48a482452 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -28,6 +28,7 @@ #include <linux/pci.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> +#include <linux/mmu_notifier.h> #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> @@ -1292,6 +1293,8 @@ nouveau_drm_exit(void) #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_unregister(&nouveau_platform_driver); #endif + if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)) + mmu_notifier_synchronize(); } module_init(nouveau_drm_init); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 32808e50be12..918164f90b11 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2451,9 +2451,6 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; - - struct mutex mn_lock; - DECLARE_HASHTABLE(mn_hash, 7); }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index dceb554e5674..788b1d8a80e6 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1325,8 +1325,6 @@ int radeon_device_init(struct radeon_device *rdev, init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); - mutex_init(&rdev->mn_lock); - hash_init(rdev->mn_hash); r = radeon_gem_init(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a6cbe11f79c6..b6535ac91fdb 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -35,6 +35,7 @@ #include <linux/module.h> #include <linux/pm_runtime.h> #include <linux/vga_switcheroo.h> +#include <linux/mmu_notifier.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> @@ -624,6 +625,7 @@ static void __exit radeon_exit(void) { pci_unregister_driver(pdriver); radeon_unregister_atpx_handler(); + mmu_notifier_synchronize(); } module_init(radeon_init); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 8c3871ed23a9..fc8254273a80 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -37,17 +37,8 @@ #include "radeon.h" struct radeon_mn { - /* constant after initialisation */ - struct radeon_device *rdev; - struct mm_struct *mm; struct mmu_notifier mn; - /* only used on destruction */ - struct work_struct work; - - /* protected by rdev->mn_lock */ - struct hlist_node node; - /* objects protected by lock */ struct mutex lock; struct rb_root_cached objects; @@ -59,55 +50,6 @@ struct radeon_mn_node { }; /** - * radeon_mn_destroy - destroy the rmn - * - * @work: previously sheduled work item - * - * Lazy destroys the notifier from a work item - */ -static void radeon_mn_destroy(struct work_struct *work) -{ - struct radeon_mn *rmn = container_of(work, struct radeon_mn, work); - struct radeon_device *rdev = rmn->rdev; - struct radeon_mn_node *node, *next_node; - struct radeon_bo *bo, *next_bo; - - mutex_lock(&rdev->mn_lock); - mutex_lock(&rmn->lock); - hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, - &rmn->objects.rb_root, it.rb) { - - interval_tree_remove(&node->it, &rmn->objects); - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); - } - kfree(node); - } - mutex_unlock(&rmn->lock); - mutex_unlock(&rdev->mn_lock); - mmu_notifier_unregister(&rmn->mn, rmn->mm); - kfree(rmn); -} - -/** - * radeon_mn_release - callback to notify about mm destruction - * - * @mn: our notifier - * @mn: the mm this callback is about - * - * Shedule a work item to lazy destroy our notifier. - */ -static void radeon_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); - INIT_WORK(&rmn->work, radeon_mn_destroy); - schedule_work(&rmn->work); -} - -/** * radeon_mn_invalidate_range_start - callback to notify about mm change * * @mn: our notifier @@ -183,65 +125,44 @@ out_unlock: return ret; } -static const struct mmu_notifier_ops radeon_mn_ops = { - .release = radeon_mn_release, - .invalidate_range_start = radeon_mn_invalidate_range_start, -}; +static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct mmu_notifier_range range = { + .mm = mm, + .start = 0, + .end = ULONG_MAX, + .flags = 0, + .event = MMU_NOTIFY_UNMAP, + }; + + radeon_mn_invalidate_range_start(mn, &range); +} -/** - * radeon_mn_get - create notifier context - * - * @rdev: radeon device pointer - * - * Creates a notifier context for current->mm. - */ -static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) +static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm) { - struct mm_struct *mm = current->mm; struct radeon_mn *rmn; - int r; - - if (down_write_killable(&mm->mmap_sem)) - return ERR_PTR(-EINTR); - - mutex_lock(&rdev->mn_lock); - - hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) - goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) { - rmn = ERR_PTR(-ENOMEM); - goto release_locks; - } + if (!rmn) + return ERR_PTR(-ENOMEM); - rmn->rdev = rdev; - rmn->mm = mm; - rmn->mn.ops = &radeon_mn_ops; mutex_init(&rmn->lock); rmn->objects = RB_ROOT_CACHED; - - r = __mmu_notifier_register(&rmn->mn, mm); - if (r) - goto free_rmn; - - hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm); - -release_locks: - mutex_unlock(&rdev->mn_lock); - up_write(&mm->mmap_sem); - - return rmn; - -free_rmn: - mutex_unlock(&rdev->mn_lock); - up_write(&mm->mmap_sem); - kfree(rmn); + return &rmn->mn; +} - return ERR_PTR(r); +static void radeon_mn_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct radeon_mn, mn)); } +static const struct mmu_notifier_ops radeon_mn_ops = { + .release = radeon_mn_release, + .invalidate_range_start = radeon_mn_invalidate_range_start, + .alloc_notifier = radeon_mn_alloc_notifier, + .free_notifier = radeon_mn_free_notifier, +}; + /** * radeon_mn_register - register a BO for notifier updates * @@ -254,15 +175,16 @@ free_rmn: int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) { unsigned long end = addr + radeon_bo_size(bo) - 1; - struct radeon_device *rdev = bo->rdev; + struct mmu_notifier *mn; struct radeon_mn *rmn; struct radeon_mn_node *node = NULL; struct list_head bos; struct interval_tree_node *it; - rmn = radeon_mn_get(rdev); - if (IS_ERR(rmn)) - return PTR_ERR(rmn); + mn = mmu_notifier_get(&radeon_mn_ops, current->mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + rmn = container_of(mn, struct radeon_mn, mn); INIT_LIST_HEAD(&bos); @@ -309,22 +231,13 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) */ void radeon_mn_unregister(struct radeon_bo *bo) { - struct radeon_device *rdev = bo->rdev; - struct radeon_mn *rmn; + struct radeon_mn *rmn = bo->mn; struct list_head *head; - mutex_lock(&rdev->mn_lock); - rmn = bo->mn; - if (rmn == NULL) { - mutex_unlock(&rdev->mn_lock); - return; - } - mutex_lock(&rmn->lock); /* save the next list entry for later */ head = bo->mn_list.next; - bo->mn = NULL; list_del(&bo->mn_list); if (list_empty(head)) { @@ -335,5 +248,7 @@ void radeon_mn_unregister(struct radeon_bo *bo) } mutex_unlock(&rmn->lock); - mutex_unlock(&rdev->mn_lock); + + mmu_notifier_put(&rmn->mn); + bo->mn = NULL; } diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index a2a142ae087b..9d042310214f 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -573,6 +573,7 @@ static void __exit gru_exit(void) gru_free_tables(); misc_deregister(&gru_miscdev); gru_proc_exit(); + mmu_notifier_synchronize(); } static const struct file_operations gru_fops = { diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 438191c22057..a7e44b2eb413 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -307,10 +307,8 @@ struct gru_mm_tracker { /* pack to reduce size */ struct gru_mm_struct { struct mmu_notifier ms_notifier; - atomic_t ms_refcnt; spinlock_t ms_asid_lock; /* protects ASID assignment */ atomic_t ms_range_active;/* num range_invals active */ - char ms_released; wait_queue_head_t ms_wait_queue; DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 59ba0adf23ce..10921cd2608d 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -235,83 +235,47 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn, gms, range->start, range->end); } -static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +static struct mmu_notifier *gru_alloc_notifier(struct mm_struct *mm) { - struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, - ms_notifier); + struct gru_mm_struct *gms; + + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (!gms) + return ERR_PTR(-ENOMEM); + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + init_waitqueue_head(&gms->ms_wait_queue); - gms->ms_released = 1; - gru_dbg(grudev, "gms %p\n", gms); + return &gms->ms_notifier; } +static void gru_free_notifier(struct mmu_notifier *mn) +{ + kfree(container_of(mn, struct gru_mm_struct, ms_notifier)); + STAT(gms_free); +} static const struct mmu_notifier_ops gru_mmuops = { .invalidate_range_start = gru_invalidate_range_start, .invalidate_range_end = gru_invalidate_range_end, - .release = gru_release, + .alloc_notifier = gru_alloc_notifier, + .free_notifier = gru_free_notifier, }; -/* Move this to the basic mmu_notifier file. But for now... */ -static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, - const struct mmu_notifier_ops *ops) -{ - struct mmu_notifier *mn, *gru_mn = NULL; - - if (mm->mmu_notifier_mm) { - rcu_read_lock(); - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, - hlist) - if (mn->ops == ops) { - gru_mn = mn; - break; - } - rcu_read_unlock(); - } - return gru_mn; -} - struct gru_mm_struct *gru_register_mmu_notifier(void) { - struct gru_mm_struct *gms; struct mmu_notifier *mn; - int err; - - mn = mmu_find_ops(current->mm, &gru_mmuops); - if (mn) { - gms = container_of(mn, struct gru_mm_struct, ms_notifier); - atomic_inc(&gms->ms_refcnt); - } else { - gms = kzalloc(sizeof(*gms), GFP_KERNEL); - if (!gms) - return ERR_PTR(-ENOMEM); - STAT(gms_alloc); - spin_lock_init(&gms->ms_asid_lock); - gms->ms_notifier.ops = &gru_mmuops; - atomic_set(&gms->ms_refcnt, 1); - init_waitqueue_head(&gms->ms_wait_queue); - err = __mmu_notifier_register(&gms->ms_notifier, current->mm); - if (err) - goto error; - } - if (gms) - gru_dbg(grudev, "gms %p, refcnt %d\n", gms, - atomic_read(&gms->ms_refcnt)); - return gms; -error: - kfree(gms); - return ERR_PTR(err); + + mn = mmu_notifier_get_locked(&gru_mmuops, current->mm); + if (IS_ERR(mn)) + return ERR_CAST(mn); + + return container_of(mn, struct gru_mm_struct, ms_notifier); } void gru_drop_mmu_notifier(struct gru_mm_struct *gms) { - gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, - atomic_read(&gms->ms_refcnt), gms->ms_released); - if (atomic_dec_return(&gms->ms_refcnt) == 0) { - if (!gms->ms_released) - mmu_notifier_unregister(&gms->ms_notifier, current->mm); - kfree(gms); - STAT(gms_free); - } + mmu_notifier_put(&gms->ms_notifier); } /* diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 51e18fbb8953..3fec513b9c00 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -84,15 +84,12 @@ * @notifiers: count of active mmu notifiers */ struct hmm { - struct mm_struct *mm; - struct kref kref; + struct mmu_notifier mmu_notifier; spinlock_t ranges_lock; struct list_head ranges; struct list_head mirrors; - struct mmu_notifier mmu_notifier; struct rw_semaphore mirrors_sem; wait_queue_head_t wq; - struct rcu_head rcu; long notifiers; }; @@ -409,13 +406,6 @@ long hmm_range_dma_unmap(struct hmm_range *range, */ #define HMM_RANGE_DEFAULT_TIMEOUT 1000 -/* Below are for HMM internal use only! Not to be used by device driver! */ -static inline void hmm_mm_init(struct mm_struct *mm) -{ - mm->hmm = NULL; -} -#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ -static inline void hmm_mm_init(struct mm_struct *mm) {} #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ #endif /* LINUX_HMM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3a37a89eb7a7..525d25d93330 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -25,7 +25,6 @@ struct address_space; struct mem_cgroup; -struct hmm; /* * Each physical page in the system has a struct page associated with @@ -502,11 +501,6 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; - -#ifdef CONFIG_HMM_MIRROR - /* HMM needs to track a few things per mm */ - struct hmm *hmm; -#endif } __randomize_layout; /* diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b6c004bd9f6a..31aa971315a1 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -211,6 +211,19 @@ struct mmu_notifier_ops { */ void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end); + + /* + * These callbacks are used with the get/put interface to manage the + * lifetime of the mmu_notifier memory. alloc_notifier() returns a new + * notifier for use with the mm. + * + * free_notifier() is only called after the mmu_notifier has been + * fully put, calls to any ops callback are prevented and no ops + * callbacks are currently running. It is called from a SRCU callback + * and cannot sleep. + */ + struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm); + void (*free_notifier)(struct mmu_notifier *mn); }; /* @@ -227,6 +240,9 @@ struct mmu_notifier_ops { struct mmu_notifier { struct hlist_node hlist; const struct mmu_notifier_ops *ops; + struct mm_struct *mm; + struct rcu_head rcu; + unsigned int users; }; static inline int mm_has_notifiers(struct mm_struct *mm) @@ -234,6 +250,21 @@ static inline int mm_has_notifiers(struct mm_struct *mm) return unlikely(mm->mmu_notifier_mm); } +struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, + struct mm_struct *mm); +static inline struct mmu_notifier * +mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm) +{ + struct mmu_notifier *ret; + + down_write(&mm->mmap_sem); + ret = mmu_notifier_get_locked(ops, mm); + up_write(&mm->mmap_sem); + return ret; +} +void mmu_notifier_put(struct mmu_notifier *mn); +void mmu_notifier_synchronize(void); + extern int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm); extern int __mmu_notifier_register(struct mmu_notifier *mn, @@ -581,6 +612,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define pudp_huge_clear_flush_notify pudp_huge_clear_flush #define set_pte_at_notify set_pte_at +static inline void mmu_notifier_synchronize(void) +{ +} + #endif /* CONFIG_MMU_NOTIFIER */ #endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/kernel/fork.c b/kernel/fork.c index d8ae0f1b4148..bd4a0762f12f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1007,7 +1007,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_owner(mm, p); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); - hmm_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; @@ -26,101 +26,37 @@ #include <linux/mmu_notifier.h> #include <linux/memory_hotplug.h> -static const struct mmu_notifier_ops hmm_mmu_notifier_ops; - -/** - * hmm_get_or_create - register HMM against an mm (HMM internal) - * - * @mm: mm struct to attach to - * Return: an HMM object, either by referencing the existing - * (per-process) object, or by creating a new one. - * - * This is not intended to be used directly by device drivers. If mm already - * has an HMM struct then it get a reference on it and returns it. Otherwise - * it allocates an HMM struct, initializes it, associate it with the mm and - * returns it. - */ -static struct hmm *hmm_get_or_create(struct mm_struct *mm) +static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm) { struct hmm *hmm; - lockdep_assert_held_write(&mm->mmap_sem); - - /* Abuse the page_table_lock to also protect mm->hmm. */ - spin_lock(&mm->page_table_lock); - hmm = mm->hmm; - if (mm->hmm && kref_get_unless_zero(&mm->hmm->kref)) - goto out_unlock; - spin_unlock(&mm->page_table_lock); - - hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); + hmm = kzalloc(sizeof(*hmm), GFP_KERNEL); if (!hmm) - return NULL; + return ERR_PTR(-ENOMEM); + init_waitqueue_head(&hmm->wq); INIT_LIST_HEAD(&hmm->mirrors); init_rwsem(&hmm->mirrors_sem); - hmm->mmu_notifier.ops = NULL; INIT_LIST_HEAD(&hmm->ranges); spin_lock_init(&hmm->ranges_lock); - kref_init(&hmm->kref); hmm->notifiers = 0; - hmm->mm = mm; - - hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; - if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { - kfree(hmm); - return NULL; - } - - mmgrab(hmm->mm); - - /* - * We hold the exclusive mmap_sem here so we know that mm->hmm is - * still NULL or 0 kref, and is safe to update. - */ - spin_lock(&mm->page_table_lock); - mm->hmm = hmm; - -out_unlock: - spin_unlock(&mm->page_table_lock); - return hmm; + return &hmm->mmu_notifier; } -static void hmm_free_rcu(struct rcu_head *rcu) +static void hmm_free_notifier(struct mmu_notifier *mn) { - struct hmm *hmm = container_of(rcu, struct hmm, rcu); + struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); - mmdrop(hmm->mm); + WARN_ON(!list_empty(&hmm->ranges)); + WARN_ON(!list_empty(&hmm->mirrors)); kfree(hmm); } -static void hmm_free(struct kref *kref) -{ - struct hmm *hmm = container_of(kref, struct hmm, kref); - - spin_lock(&hmm->mm->page_table_lock); - if (hmm->mm->hmm == hmm) - hmm->mm->hmm = NULL; - spin_unlock(&hmm->mm->page_table_lock); - - mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm); - mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu); -} - -static inline void hmm_put(struct hmm *hmm) -{ - kref_put(&hmm->kref, hmm_free); -} - static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); struct hmm_mirror *mirror; - /* Bail out if hmm is in the process of being freed */ - if (!kref_get_unless_zero(&hmm->kref)) - return; - /* * Since hmm_range_register() holds the mmget() lock hmm_release() is * prevented as long as a range exists. @@ -137,8 +73,6 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm) mirror->ops->release(mirror); } up_read(&hmm->mirrors_sem); - - hmm_put(hmm); } static void notifiers_decrement(struct hmm *hmm) @@ -169,9 +103,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, unsigned long flags; int ret = 0; - if (!kref_get_unless_zero(&hmm->kref)) - return 0; - spin_lock_irqsave(&hmm->ranges_lock, flags); hmm->notifiers++; list_for_each_entry(range, &hmm->ranges, list) { @@ -206,7 +137,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn, out: if (ret) notifiers_decrement(hmm); - hmm_put(hmm); return ret; } @@ -215,17 +145,15 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn, { struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier); - if (!kref_get_unless_zero(&hmm->kref)) - return; - notifiers_decrement(hmm); - hmm_put(hmm); } static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { .release = hmm_release, .invalidate_range_start = hmm_invalidate_range_start, .invalidate_range_end = hmm_invalidate_range_end, + .alloc_notifier = hmm_alloc_notifier, + .free_notifier = hmm_free_notifier, }; /* @@ -237,18 +165,27 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { * * To start mirroring a process address space, the device driver must register * an HMM mirror struct. + * + * The caller cannot unregister the hmm_mirror while any ranges are + * registered. + * + * Callers using this function must put a call to mmu_notifier_synchronize() + * in their module exit functions. */ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) { + struct mmu_notifier *mn; + lockdep_assert_held_write(&mm->mmap_sem); /* Sanity check */ if (!mm || !mirror || !mirror->ops) return -EINVAL; - mirror->hmm = hmm_get_or_create(mm); - if (!mirror->hmm) - return -ENOMEM; + mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + mirror->hmm = container_of(mn, struct hmm, mmu_notifier); down_write(&mirror->hmm->mirrors_sem); list_add(&mirror->list, &mirror->hmm->mirrors); @@ -272,7 +209,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror) down_write(&hmm->mirrors_sem); list_del(&mirror->list); up_write(&hmm->mirrors_sem); - hmm_put(hmm); + mmu_notifier_put(&hmm->mmu_notifier); } EXPORT_SYMBOL(hmm_mirror_unregister); @@ -854,14 +791,13 @@ int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror) return -EINVAL; /* Prevent hmm_release() from running while the range is valid */ - if (!mmget_not_zero(hmm->mm)) + if (!mmget_not_zero(hmm->mmu_notifier.mm)) return -EFAULT; /* Initialize range to track CPU page table updates. */ spin_lock_irqsave(&hmm->ranges_lock, flags); range->hmm = hmm; - kref_get(&hmm->kref); list_add(&range->list, &hmm->ranges); /* @@ -893,8 +829,7 @@ void hmm_range_unregister(struct hmm_range *range) spin_unlock_irqrestore(&hmm->ranges_lock, flags); /* Drop reference taken by hmm_range_register() */ - mmput(hmm->mm); - hmm_put(hmm); + mmput(hmm->mmu_notifier.mm); /* * The range is now invalid and the ref on the hmm is dropped, so @@ -944,14 +879,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) struct mm_walk mm_walk; int ret; - lockdep_assert_held(&hmm->mm->mmap_sem); + lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem); do { /* If range is no longer valid force retry. */ if (!range->valid) return -EBUSY; - vma = find_vma(hmm->mm, start); + vma = find_vma(hmm->mmu_notifier.mm, start); if (vma == NULL || (vma->vm_flags & device_vma)) return -EFAULT; diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index b5670620aea0..9e92ec8006fc 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -236,33 +236,41 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range); -static int do_mmu_notifier_register(struct mmu_notifier *mn, - struct mm_struct *mm, - int take_mmap_sem) +/* + * Same as mmu_notifier_register but here the caller must hold the + * mmap_sem in write mode. + */ +int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) { - struct mmu_notifier_mm *mmu_notifier_mm; + struct mmu_notifier_mm *mmu_notifier_mm = NULL; int ret; + lockdep_assert_held_write(&mm->mmap_sem); BUG_ON(atomic_read(&mm->mm_users) <= 0); - ret = -ENOMEM; - mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); - if (unlikely(!mmu_notifier_mm)) - goto out; + mn->mm = mm; + mn->users = 1; - if (take_mmap_sem) - down_write(&mm->mmap_sem); - ret = mm_take_all_locks(mm); - if (unlikely(ret)) - goto out_clean; + if (!mm->mmu_notifier_mm) { + /* + * kmalloc cannot be called under mm_take_all_locks(), but we + * know that mm->mmu_notifier_mm can't change while we hold + * the write side of the mmap_sem. + */ + mmu_notifier_mm = + kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); + if (!mmu_notifier_mm) + return -ENOMEM; - if (!mm_has_notifiers(mm)) { INIT_HLIST_HEAD(&mmu_notifier_mm->list); spin_lock_init(&mmu_notifier_mm->lock); - - mm->mmu_notifier_mm = mmu_notifier_mm; - mmu_notifier_mm = NULL; } + + ret = mm_take_all_locks(mm); + if (unlikely(ret)) + goto out_clean; + + /* Pairs with the mmdrop in mmu_notifier_unregister_* */ mmgrab(mm); /* @@ -273,48 +281,118 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, * We can't race against any other mmu notifier method either * thanks to mm_take_all_locks(). */ + if (mmu_notifier_mm) + mm->mmu_notifier_mm = mmu_notifier_mm; + spin_lock(&mm->mmu_notifier_mm->lock); hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_mm->list); spin_unlock(&mm->mmu_notifier_mm->lock); mm_drop_all_locks(mm); + BUG_ON(atomic_read(&mm->mm_users) <= 0); + return 0; + out_clean: - if (take_mmap_sem) - up_write(&mm->mmap_sem); kfree(mmu_notifier_mm); -out: - BUG_ON(atomic_read(&mm->mm_users) <= 0); return ret; } +EXPORT_SYMBOL_GPL(__mmu_notifier_register); -/* +/** + * mmu_notifier_register - Register a notifier on a mm + * @mn: The notifier to attach + * @mm: The mm to attach the notifier to + * * Must not hold mmap_sem nor any other VM related lock when calling * this registration function. Must also ensure mm_users can't go down * to zero while this runs to avoid races with mmu_notifier_release, * so mm has to be current->mm or the mm should be pinned safely such * as with get_task_mm(). If the mm is not current->mm, the mm_users * pin should be released by calling mmput after mmu_notifier_register - * returns. mmu_notifier_unregister must be always called to - * unregister the notifier. mm_count is automatically pinned to allow - * mmu_notifier_unregister to safely run at any time later, before or - * after exit_mmap. ->release will always be called before exit_mmap - * frees the pages. + * returns. + * + * mmu_notifier_unregister() or mmu_notifier_put() must be always called to + * unregister the notifier. + * + * While the caller has a mmu_notifier get the mn->mm pointer will remain + * valid, and can be converted to an active mm pointer via mmget_not_zero(). */ int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) { - return do_mmu_notifier_register(mn, mm, 1); + int ret; + + down_write(&mm->mmap_sem); + ret = __mmu_notifier_register(mn, mm); + up_write(&mm->mmap_sem); + return ret; } EXPORT_SYMBOL_GPL(mmu_notifier_register); -/* - * Same as mmu_notifier_register but here the caller must hold the - * mmap_sem in write mode. +static struct mmu_notifier * +find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) +{ + struct mmu_notifier *mn; + + spin_lock(&mm->mmu_notifier_mm->lock); + hlist_for_each_entry_rcu (mn, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops != ops) + continue; + + if (likely(mn->users != UINT_MAX)) + mn->users++; + else + mn = ERR_PTR(-EOVERFLOW); + spin_unlock(&mm->mmu_notifier_mm->lock); + return mn; + } + spin_unlock(&mm->mmu_notifier_mm->lock); + return NULL; +} + +/** + * mmu_notifier_get_locked - Return the single struct mmu_notifier for + * the mm & ops + * @ops: The operations struct being subscribe with + * @mm : The mm to attach notifiers too + * + * This function either allocates a new mmu_notifier via + * ops->alloc_notifier(), or returns an already existing notifier on the + * list. The value of the ops pointer is used to determine when two notifiers + * are the same. + * + * Each call to mmu_notifier_get() must be paired with a call to + * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem. + * + * While the caller has a mmu_notifier get the mm pointer will remain valid, + * and can be converted to an active mm pointer via mmget_not_zero(). */ -int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, + struct mm_struct *mm) { - return do_mmu_notifier_register(mn, mm, 0); + struct mmu_notifier *mn; + int ret; + + lockdep_assert_held_write(&mm->mmap_sem); + + if (mm->mmu_notifier_mm) { + mn = find_get_mmu_notifier(mm, ops); + if (mn) + return mn; + } + + mn = ops->alloc_notifier(mm); + if (IS_ERR(mn)) + return mn; + mn->ops = ops; + ret = __mmu_notifier_register(mn, mm); + if (ret) + goto out_free; + return mn; +out_free: + mn->ops->free_notifier(mn); + return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(__mmu_notifier_register); +EXPORT_SYMBOL_GPL(mmu_notifier_get_locked); /* this is called after the last mmu_notifier_unregister() returned */ void __mmu_notifier_mm_destroy(struct mm_struct *mm) @@ -394,6 +472,75 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, } EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); +static void mmu_notifier_free_rcu(struct rcu_head *rcu) +{ + struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu); + struct mm_struct *mm = mn->mm; + + mn->ops->free_notifier(mn); + /* Pairs with the get in __mmu_notifier_register() */ + mmdrop(mm); +} + +/** + * mmu_notifier_put - Release the reference on the notifier + * @mn: The notifier to act on + * + * This function must be paired with each mmu_notifier_get(), it releases the + * reference obtained by the get. If this is the last reference then process + * to free the notifier will be run asynchronously. + * + * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release + * when the mm_struct is destroyed. Instead free_notifier is always called to + * release any resources held by the user. + * + * As ops->release is not guaranteed to be called, the user must ensure that + * all sptes are dropped, and no new sptes can be established before + * mmu_notifier_put() is called. + * + * This function can be called from the ops->release callback, however the + * caller must still ensure it is called pairwise with mmu_notifier_get(). + * + * Modules calling this function must call mmu_notifier_synchronize() in + * their __exit functions to ensure the async work is completed. + */ +void mmu_notifier_put(struct mmu_notifier *mn) +{ + struct mm_struct *mm = mn->mm; + + spin_lock(&mm->mmu_notifier_mm->lock); + if (WARN_ON(!mn->users) || --mn->users) + goto out_unlock; + hlist_del_init_rcu(&mn->hlist); + spin_unlock(&mm->mmu_notifier_mm->lock); + + call_srcu(&srcu, &mn->rcu, mmu_notifier_free_rcu); + return; + +out_unlock: + spin_unlock(&mm->mmu_notifier_mm->lock); +} +EXPORT_SYMBOL_GPL(mmu_notifier_put); + +/** + * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed + * + * This function ensures that all outstanding async SRU work from + * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops + * associated with an unused mmu_notifier will no longer be called. + * + * Before using the caller must ensure that all of its mmu_notifiers have been + * fully released via mmu_notifier_put(). + * + * Modules using the mmu_notifier_put() API should call this in their __exit + * function to avoid module unloading races. + */ +void mmu_notifier_synchronize(void) +{ + synchronize_srcu(&srcu); +} +EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); + bool mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range) { |