summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/khugepaged.c38
1 files changed, 29 insertions, 9 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 40d43eccdee8..d5650541083a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1476,7 +1476,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
struct page *hpage;
pte_t *start_pte, *pte;
pmd_t *pmd, pgt_pmd;
- spinlock_t *pml, *ptl;
+ spinlock_t *pml = NULL, *ptl;
int nr_ptes = 0, result = SCAN_FAIL;
int i;
@@ -1572,9 +1572,25 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
haddr, haddr + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);
notified = true;
- start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+ /*
+ * pmd_lock covers a wider range than ptl, and (if split from mm's
+ * page_table_lock) ptl nests inside pml. The less time we hold pml,
+ * the better; but userfaultfd's mfill_atomic_pte() on a private VMA
+ * inserts a valid as-if-COWed PTE without even looking up page cache.
+ * So page lock of hpage does not protect from it, so we must not drop
+ * ptl before pgt_pmd is removed, so uffd private needs pml taken now.
+ */
+ if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED))
+ pml = pmd_lock(mm, pmd);
+
+ start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl);
if (!start_pte) /* mmap_lock + page lock should prevent this */
goto abort;
+ if (!pml)
+ spin_lock(ptl);
+ else if (ptl != pml)
+ spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
/* step 2: clear page table and adjust rmap */
for (i = 0, addr = haddr, pte = start_pte;
@@ -1608,7 +1624,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
nr_ptes++;
}
- pte_unmap_unlock(start_pte, ptl);
+ pte_unmap(start_pte);
+ if (!pml)
+ spin_unlock(ptl);
/* step 3: set proper refcount and mm_counters. */
if (nr_ptes) {
@@ -1616,12 +1634,12 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
add_mm_counter(mm, mm_counter_file(hpage), -nr_ptes);
}
- /* step 4: remove page table */
-
- /* Huge page lock is still held, so page table must remain empty */
- pml = pmd_lock(mm, pmd);
- if (ptl != pml)
- spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+ /* step 4: remove empty page table */
+ if (!pml) {
+ pml = pmd_lock(mm, pmd);
+ if (ptl != pml)
+ spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+ }
pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd);
pmdp_get_lockless_sync();
if (ptl != pml)
@@ -1648,6 +1666,8 @@ abort:
}
if (start_pte)
pte_unmap_unlock(start_pte, ptl);
+ if (pml && pml != ptl)
+ spin_unlock(pml);
if (notified)
mmu_notifier_invalidate_range_end(&range);
drop_hpage: