summaryrefslogtreecommitdiffstats
path: root/mm/userfaultfd.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/userfaultfd.c')
-rw-r--r--mm/userfaultfd.c66
1 files changed, 49 insertions, 17 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 3c3539c573e7..defa5109cc62 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -56,17 +56,16 @@ struct vm_area_struct *find_vma_and_prepare_anon(struct mm_struct *mm,
#ifdef CONFIG_PER_VMA_LOCK
/*
- * lock_vma() - Lookup and lock vma corresponding to @address.
+ * uffd_lock_vma() - Lookup and lock vma corresponding to @address.
* @mm: mm to search vma in.
* @address: address that the vma should contain.
*
- * Should be called without holding mmap_lock. vma should be unlocked after use
- * with unlock_vma().
+ * Should be called without holding mmap_lock.
*
* Return: A locked vma containing @address, -ENOENT if no vma is found, or
* -ENOMEM if anon_vma couldn't be allocated.
*/
-static struct vm_area_struct *lock_vma(struct mm_struct *mm,
+static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
unsigned long address)
{
struct vm_area_struct *vma;
@@ -74,9 +73,8 @@ static struct vm_area_struct *lock_vma(struct mm_struct *mm,
vma = lock_vma_under_rcu(mm, address);
if (vma) {
/*
- * lock_vma_under_rcu() only checks anon_vma for private
- * anonymous mappings. But we need to ensure it is assigned in
- * private file-backed vmas as well.
+ * We know we're going to need to use anon_vma, so check
+ * that early.
*/
if (!(vma->vm_flags & VM_SHARED) && unlikely(!vma->anon_vma))
vma_end_read(vma);
@@ -107,7 +105,7 @@ static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm,
{
struct vm_area_struct *dst_vma;
- dst_vma = lock_vma(dst_mm, dst_start);
+ dst_vma = uffd_lock_vma(dst_mm, dst_start);
if (IS_ERR(dst_vma) || validate_dst_vma(dst_vma, dst_start + len))
return dst_vma;
@@ -180,9 +178,9 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
pte_t _dst_pte, *dst_pte;
bool writable = dst_vma->vm_flags & VM_WRITE;
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
- bool page_in_cache = page_mapping(page);
spinlock_t *ptl;
- struct folio *folio;
+ struct folio *folio = page_folio(page);
+ bool page_in_cache = folio_mapping(folio);
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
_dst_pte = pte_mkdirty(_dst_pte);
@@ -212,7 +210,6 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
if (!pte_none_mostly(ptep_get(dst_pte)))
goto out_unlock;
- folio = page_folio(page);
if (page_in_cache) {
/* Usually, cache pages are already added to LRU */
if (newly_allocated)
@@ -316,6 +313,38 @@ out_release:
goto out;
}
+static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr)
+{
+ struct folio *folio;
+ int ret = -ENOMEM;
+
+ folio = vma_alloc_zeroed_movable_folio(dst_vma, dst_addr);
+ if (!folio)
+ return ret;
+
+ if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL))
+ goto out_put;
+
+ /*
+ * The memory barrier inside __folio_mark_uptodate makes sure that
+ * zeroing out the folio become visible before mapping the page
+ * using set_pte_at(). See do_anonymous_page().
+ */
+ __folio_mark_uptodate(folio);
+
+ ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr,
+ &folio->page, true, 0);
+ if (ret)
+ goto out_put;
+
+ return 0;
+out_put:
+ folio_put(folio);
+ return ret;
+}
+
static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr)
@@ -324,6 +353,9 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
spinlock_t *ptl;
int ret;
+ if (mm_forbids_zeropage(dst_vma->vm_mm))
+ return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr);
+
_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
dst_vma->vm_page_prot));
ret = -EAGAIN;
@@ -1026,7 +1058,7 @@ static int move_present_pte(struct mm_struct *mm,
}
folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+ src_folio->index = linear_page_index(dst_vma, dst_addr);
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
@@ -1402,7 +1434,7 @@ static int uffd_move_lock(struct mm_struct *mm,
struct vm_area_struct *vma;
int err;
- vma = lock_vma(mm, dst_start);
+ vma = uffd_lock_vma(mm, dst_start);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1417,7 +1449,7 @@ static int uffd_move_lock(struct mm_struct *mm,
}
/*
- * Using lock_vma() to get src_vma can lead to following deadlock:
+ * Using uffd_lock_vma() to get src_vma can lead to following deadlock:
*
* Thread1 Thread2
* ------- -------
@@ -1439,7 +1471,7 @@ static int uffd_move_lock(struct mm_struct *mm,
err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
if (!err) {
/*
- * See comment in lock_vma() as to why not using
+ * See comment in uffd_lock_vma() as to why not using
* vma_start_read() here.
*/
down_read(&(*dst_vmap)->vm_lock->lock);
@@ -1662,9 +1694,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
/* Check if we can move the pmd without splitting it. */
if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) ||
!pmd_none(dst_pmdval)) {
- struct folio *folio = pfn_folio(pmd_pfn(*src_pmd));
+ struct folio *folio = pmd_folio(*src_pmd);
- if (!folio || (!is_huge_zero_page(&folio->page) &&
+ if (!folio || (!is_huge_zero_folio(folio) &&
!PageAnonExclusive(&folio->page))) {
spin_unlock(ptl);
err = -EBUSY;