diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 371 |
1 files changed, 216 insertions, 155 deletions
diff --git a/mm/memory.c b/mm/memory.c index 97839f5c8c30..ac20b2a6a0c3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1983,167 +1983,91 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, } /* - * This routine handles present pages, when users try to write - * to a shared page. It is done by copying the page to a new address - * and decrementing the shared-page counter for the old page. - * - * Note that this routine assumes that the protection checks have been - * done by the caller (the low-level page fault routine in most cases). - * Thus we can safely just mark it writable once we've done any necessary - * COW. + * Handle write page faults for pages that can be reused in the current vma * - * We also mark the page dirty at this point even though the page will - * change only once the write actually happens. This avoids a few races, - * and potentially makes it more efficient. - * - * We enter with non-exclusive mmap_sem (to exclude vma changes, - * but allow concurrent faults), with pte both mapped and locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * This can happen either due to the mapping being with the VM_SHARED flag, + * or due to us being the last reference standing to the page. In either + * case, all we need to do here is to mark the page as writable and update + * any related book-keeping. */ -static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - spinlock_t *ptl, pte_t orig_pte) +static inline int wp_page_reuse(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, + struct page *page, int page_mkwrite, + int dirty_shared) __releases(ptl) { - struct page *old_page, *new_page = NULL; pte_t entry; - int ret = 0; - int page_mkwrite = 0; - bool dirty_shared = false; - unsigned long mmun_start = 0; /* For mmu_notifiers */ - unsigned long mmun_end = 0; /* For mmu_notifiers */ - struct mem_cgroup *memcg; - - old_page = vm_normal_page(vma, address, orig_pte); - if (!old_page) { - /* - * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a - * VM_PFNMAP VMA. - * - * We should not cow pages in a shared writeable mapping. - * Just mark the pages writable as we can't do any dirty - * accounting on raw pfn maps. - */ - if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == - (VM_WRITE|VM_SHARED)) - goto reuse; - goto gotten; - } - /* - * Take out anonymous pages first, anonymous shared vmas are - * not dirty accountable. + * Clear the pages cpupid information as the existing + * information potentially belongs to a now completely + * unrelated process. */ - if (PageAnon(old_page) && !PageKsm(old_page)) { - if (!trylock_page(old_page)) { - page_cache_get(old_page); - pte_unmap_unlock(page_table, ptl); - lock_page(old_page); - page_table = pte_offset_map_lock(mm, pmd, address, - &ptl); - if (!pte_same(*page_table, orig_pte)) { - unlock_page(old_page); - goto unlock; - } - page_cache_release(old_page); - } - if (reuse_swap_page(old_page)) { - /* - * The page is all ours. Move it to our anon_vma so - * the rmap code will not search our parent or siblings. - * Protected against the rmap code by the page lock. - */ - page_move_anon_rmap(old_page, vma, address); - unlock_page(old_page); - goto reuse; - } - unlock_page(old_page); - } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == - (VM_WRITE|VM_SHARED))) { - page_cache_get(old_page); - /* - * Only catch write-faults on shared writable pages, - * read-only shared pages can get COWed by - * get_user_pages(.write=1, .force=1). - */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) { - int tmp; - - pte_unmap_unlock(page_table, ptl); - tmp = do_page_mkwrite(vma, old_page, address); - if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { - page_cache_release(old_page); - return tmp; - } - /* - * Since we dropped the lock we need to revalidate - * the PTE as someone else may have changed it. If - * they did, we just return, as we can count on the - * MMU to tell us if they didn't also make it writable. - */ - page_table = pte_offset_map_lock(mm, pmd, address, - &ptl); - if (!pte_same(*page_table, orig_pte)) { - unlock_page(old_page); - goto unlock; - } - page_mkwrite = 1; - } - - dirty_shared = true; - -reuse: - /* - * Clear the pages cpupid information as the existing - * information potentially belongs to a now completely - * unrelated process. - */ - if (old_page) - page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1); - - flush_cache_page(vma, address, pte_pfn(orig_pte)); - entry = pte_mkyoung(orig_pte); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (ptep_set_access_flags(vma, address, page_table, entry,1)) - update_mmu_cache(vma, address, page_table); - pte_unmap_unlock(page_table, ptl); - ret |= VM_FAULT_WRITE; + if (page) + page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); - if (dirty_shared) { - struct address_space *mapping; - int dirtied; + flush_cache_page(vma, address, pte_pfn(orig_pte)); + entry = pte_mkyoung(orig_pte); + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (ptep_set_access_flags(vma, address, page_table, entry, 1)) + update_mmu_cache(vma, address, page_table); + pte_unmap_unlock(page_table, ptl); - if (!page_mkwrite) - lock_page(old_page); + if (dirty_shared) { + struct address_space *mapping; + int dirtied; - dirtied = set_page_dirty(old_page); - VM_BUG_ON_PAGE(PageAnon(old_page), old_page); - mapping = old_page->mapping; - unlock_page(old_page); - page_cache_release(old_page); + if (!page_mkwrite) + lock_page(page); - if ((dirtied || page_mkwrite) && mapping) { - /* - * Some device drivers do not set page.mapping - * but still dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } + dirtied = set_page_dirty(page); + VM_BUG_ON_PAGE(PageAnon(page), page); + mapping = page->mapping; + unlock_page(page); + page_cache_release(page); - if (!page_mkwrite) - file_update_time(vma->vm_file); + if ((dirtied || page_mkwrite) && mapping) { + /* + * Some device drivers do not set page.mapping + * but still dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); } - return ret; + if (!page_mkwrite) + file_update_time(vma->vm_file); } - /* - * Ok, we need to copy. Oh, well.. - */ - page_cache_get(old_page); -gotten: - pte_unmap_unlock(page_table, ptl); + return VM_FAULT_WRITE; +} + +/* + * Handle the case of a page which we actually need to copy to a new page. + * + * Called with mmap_sem locked and the old page referenced, but + * without the ptl held. + * + * High level logic flow: + * + * - Allocate a page, copy the content of the old page to the new one. + * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. + * - Take the PTL. If the pte changed, bail out and release the allocated page + * - If the pte is still the way we remember it, update the page table and all + * relevant references. This includes dropping the reference the page-table + * held to the old page, as well as updating the rmap. + * - In any case, unlock the PTL and drop the reference we took to the old page. + */ +static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + pte_t orig_pte, struct page *old_page) +{ + struct page *new_page = NULL; + spinlock_t *ptl = NULL; + pte_t entry; + int page_copied = 0; + const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */ + const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */ + struct mem_cgroup *memcg; if (unlikely(anon_vma_prepare(vma))) goto oom; @@ -2163,8 +2087,6 @@ gotten: if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) goto oom_free_new; - mmun_start = address & PAGE_MASK; - mmun_end = mmun_start + PAGE_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* @@ -2177,8 +2099,9 @@ gotten: dec_mm_counter_fast(mm, MM_FILEPAGES); inc_mm_counter_fast(mm, MM_ANONPAGES); } - } else + } else { inc_mm_counter_fast(mm, MM_ANONPAGES); + } flush_cache_page(vma, address, pte_pfn(orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -2227,29 +2150,29 @@ gotten: /* Free the old page.. */ new_page = old_page; - ret |= VM_FAULT_WRITE; - } else + page_copied = 1; + } else { mem_cgroup_cancel_charge(new_page, memcg); + } if (new_page) page_cache_release(new_page); -unlock: + pte_unmap_unlock(page_table, ptl); - if (mmun_end > mmun_start) - mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (old_page) { /* * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ - if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) { + if (page_copied && (vma->vm_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ munlock_vma_page(old_page); unlock_page(old_page); } page_cache_release(old_page); } - return ret; + return page_copied ? VM_FAULT_WRITE : 0; oom_free_new: page_cache_release(new_page); oom: @@ -2258,6 +2181,144 @@ oom: return VM_FAULT_OOM; } +static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, + pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, + struct page *old_page) + __releases(ptl) +{ + int page_mkwrite = 0; + + page_cache_get(old_page); + + /* + * Only catch write-faults on shared writable pages, + * read-only shared pages can get COWed by + * get_user_pages(.write=1, .force=1). + */ + if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + int tmp; + + pte_unmap_unlock(page_table, ptl); + tmp = do_page_mkwrite(vma, old_page, address); + if (unlikely(!tmp || (tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + page_cache_release(old_page); + return tmp; + } + /* + * Since we dropped the lock we need to revalidate + * the PTE as someone else may have changed it. If + * they did, we just return, as we can count on the + * MMU to tell us if they didn't also make it writable. + */ + page_table = pte_offset_map_lock(mm, pmd, address, + &ptl); + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + pte_unmap_unlock(page_table, ptl); + page_cache_release(old_page); + return 0; + } + page_mkwrite = 1; + } + + return wp_page_reuse(mm, vma, address, page_table, ptl, + orig_pte, old_page, page_mkwrite, 1); +} + +/* + * This routine handles present pages, when users try to write + * to a shared page. It is done by copying the page to a new address + * and decrementing the shared-page counter for the old page. + * + * Note that this routine assumes that the protection checks have been + * done by the caller (the low-level page fault routine in most cases). + * Thus we can safely just mark it writable once we've done any necessary + * COW. + * + * We also mark the page dirty at this point even though the page will + * change only once the write actually happens. This avoids a few races, + * and potentially makes it more efficient. + * + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), with pte both mapped and locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. + */ +static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + spinlock_t *ptl, pte_t orig_pte) + __releases(ptl) +{ + struct page *old_page; + + old_page = vm_normal_page(vma, address, orig_pte); + if (!old_page) { + /* + * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a + * VM_PFNMAP VMA. + * + * We should not cow pages in a shared writeable mapping. + * Just mark the pages writable as we can't do any dirty + * accounting on raw pfn maps. + */ + if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED)) + return wp_page_reuse(mm, vma, address, page_table, ptl, + orig_pte, old_page, 0, 0); + + pte_unmap_unlock(page_table, ptl); + return wp_page_copy(mm, vma, address, page_table, pmd, + orig_pte, old_page); + } + + /* + * Take out anonymous pages first, anonymous shared vmas are + * not dirty accountable. + */ + if (PageAnon(old_page) && !PageKsm(old_page)) { + if (!trylock_page(old_page)) { + page_cache_get(old_page); + pte_unmap_unlock(page_table, ptl); + lock_page(old_page); + page_table = pte_offset_map_lock(mm, pmd, address, + &ptl); + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + pte_unmap_unlock(page_table, ptl); + page_cache_release(old_page); + return 0; + } + page_cache_release(old_page); + } + if (reuse_swap_page(old_page)) { + /* + * The page is all ours. Move it to our anon_vma so + * the rmap code will not search our parent or siblings. + * Protected against the rmap code by the page lock. + */ + page_move_anon_rmap(old_page, vma, address); + unlock_page(old_page); + return wp_page_reuse(mm, vma, address, page_table, ptl, + orig_pte, old_page, 0, 0); + } + unlock_page(old_page); + } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED))) { + return wp_page_shared(mm, vma, address, page_table, pmd, + ptl, orig_pte, old_page); + } + + /* + * Ok, we need to copy. Oh, well.. + */ + page_cache_get(old_page); + + pte_unmap_unlock(page_table, ptl); + return wp_page_copy(mm, vma, address, page_table, pmd, + orig_pte, old_page); +} + static void unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) |