diff options
-rw-r--r-- | mm/khugepaged.c | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 40d43eccdee8..d5650541083a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1476,7 +1476,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, struct page *hpage; pte_t *start_pte, *pte; pmd_t *pmd, pgt_pmd; - spinlock_t *pml, *ptl; + spinlock_t *pml = NULL, *ptl; int nr_ptes = 0, result = SCAN_FAIL; int i; @@ -1572,9 +1572,25 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, haddr, haddr + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); notified = true; - start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); + + /* + * pmd_lock covers a wider range than ptl, and (if split from mm's + * page_table_lock) ptl nests inside pml. The less time we hold pml, + * the better; but userfaultfd's mfill_atomic_pte() on a private VMA + * inserts a valid as-if-COWed PTE without even looking up page cache. + * So page lock of hpage does not protect from it, so we must not drop + * ptl before pgt_pmd is removed, so uffd private needs pml taken now. + */ + if (userfaultfd_armed(vma) && !(vma->vm_flags & VM_SHARED)) + pml = pmd_lock(mm, pmd); + + start_pte = pte_offset_map_nolock(mm, pmd, haddr, &ptl); if (!start_pte) /* mmap_lock + page lock should prevent this */ goto abort; + if (!pml) + spin_lock(ptl); + else if (ptl != pml) + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); /* step 2: clear page table and adjust rmap */ for (i = 0, addr = haddr, pte = start_pte; @@ -1608,7 +1624,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, nr_ptes++; } - pte_unmap_unlock(start_pte, ptl); + pte_unmap(start_pte); + if (!pml) + spin_unlock(ptl); /* step 3: set proper refcount and mm_counters. */ if (nr_ptes) { @@ -1616,12 +1634,12 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, add_mm_counter(mm, mm_counter_file(hpage), -nr_ptes); } - /* step 4: remove page table */ - - /* Huge page lock is still held, so page table must remain empty */ - pml = pmd_lock(mm, pmd); - if (ptl != pml) - spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + /* step 4: remove empty page table */ + if (!pml) { + pml = pmd_lock(mm, pmd); + if (ptl != pml) + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + } pgt_pmd = pmdp_collapse_flush(vma, haddr, pmd); pmdp_get_lockless_sync(); if (ptl != pml) @@ -1648,6 +1666,8 @@ abort: } if (start_pte) pte_unmap_unlock(start_pte, ptl); + if (pml && pml != ptl) + spin_unlock(pml); if (notified) mmu_notifier_invalidate_range_end(&range); drop_hpage: |