summaryrefslogtreecommitdiffstats
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c108
1 files changed, 66 insertions, 42 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 44c2658cc128..2f8a2d89fde1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
goto no_page;
page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
- if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd);
@@ -811,7 +811,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
struct follow_page_context *ctx)
{
pgd_t *pgd;
- struct page *page;
struct mm_struct *mm = vma->vm_mm;
ctx->page_mask = 0;
@@ -820,16 +819,10 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
* Call hugetlb_follow_page_mask for hugetlb vmas as it will use
* special hugetlb page table walking code. This eliminates the
* need to check for hugetlb entries in the general walking code.
- *
- * hugetlb_follow_page_mask is only for follow_page() handling here.
- * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
*/
- if (is_vm_hugetlb_page(vma)) {
- page = hugetlb_follow_page_mask(vma, address, flags);
- if (!page)
- page = no_page_table(vma, flags);
- return page;
- }
+ if (is_vm_hugetlb_page(vma))
+ return hugetlb_follow_page_mask(vma, address, flags,
+ &ctx->page_mask);
pgd = pgd_offset(mm, address);
@@ -851,6 +844,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL;
+ /*
+ * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+ * to fail on PROT_NONE-mapped pages.
+ */
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -1211,7 +1208,7 @@ static long __get_user_pages(struct mm_struct *mm,
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
- pages ? &pages[i] : NULL);
+ pages ? &page : NULL);
if (ret)
goto out;
ctx.page_mask = 0;
@@ -1225,22 +1222,6 @@ static long __get_user_pages(struct mm_struct *mm,
ret = check_vma_flags(vma, gup_flags);
if (ret)
goto out;
-
- if (is_vm_hugetlb_page(vma)) {
- i = follow_hugetlb_page(mm, vma, pages,
- &start, &nr_pages, i,
- gup_flags, locked);
- if (!*locked) {
- /*
- * We've got a VM_FAULT_RETRY
- * and we've lost mmap_lock.
- * We must stop here.
- */
- BUG_ON(gup_flags & FOLL_NOWAIT);
- goto out;
- }
- continue;
- }
}
retry:
/*
@@ -1281,22 +1262,58 @@ retry:
ret = PTR_ERR(page);
goto out;
}
-
- goto next_page;
} else if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto out;
}
- if (pages) {
- pages[i] = page;
- flush_anon_page(vma, page, start);
- flush_dcache_page(page);
- ctx.page_mask = 0;
- }
next_page:
page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
if (page_increm > nr_pages)
page_increm = nr_pages;
+
+ if (pages) {
+ struct page *subpage;
+ unsigned int j;
+
+ /*
+ * This must be a large folio (and doesn't need to
+ * be the whole folio; it can be part of it), do
+ * the refcount work for all the subpages too.
+ *
+ * NOTE: here the page may not be the head page
+ * e.g. when start addr is not thp-size aligned.
+ * try_grab_folio() should have taken care of tail
+ * pages.
+ */
+ if (page_increm > 1) {
+ struct folio *folio;
+
+ /*
+ * Since we already hold refcount on the
+ * large folio, this should never fail.
+ */
+ folio = try_grab_folio(page, page_increm - 1,
+ foll_flags);
+ if (WARN_ON_ONCE(!folio)) {
+ /*
+ * Release the 1st page ref if the
+ * folio is problematic, fail hard.
+ */
+ gup_put_folio(page_folio(page), 1,
+ foll_flags);
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ for (j = 0; j < page_increm; j++) {
+ subpage = nth_page(page, j);
+ pages[i + j] = subpage;
+ flush_anon_page(vma, subpage, start + j * PAGE_SIZE);
+ flush_dcache_page(subpage);
+ }
+ }
+
i += page_increm;
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
@@ -2551,7 +2568,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
struct page *page;
struct folio *folio;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ /*
+ * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+ * pte_access_permitted() better should reject these pages
+ * either way: otherwise, GUP-fast might succeed in
+ * cases where ordinary GUP would fail due to VMA access
+ * permissions.
+ */
+ if (pte_protnone(pte))
goto pte_unmap;
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2576,7 +2600,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
if (!folio)
goto pte_unmap;
- if (unlikely(page_is_secretmem(page))) {
+ if (unlikely(folio_is_secretmem(folio))) {
gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
@@ -2970,8 +2994,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {
- if (pmd_protnone(pmd) &&
- !gup_can_follow_protnone(flags))
+ /* See gup_pte_range() */
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3175,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT |
- FOLL_PCI_P2PDMA)))
+ FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
return -EINVAL;
if (gup_flags & FOLL_PIN)