diff options
author | Dave Airlie <airlied@redhat.com> | 2015-04-20 11:32:26 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-04-20 13:05:20 +1000 |
commit | 2c33ce009ca2389dbf0535d0672214d09738e35e (patch) | |
tree | 6186a6458c3c160385d794a23eaf07c786a9e61b /mm/memory.c | |
parent | cec32a47010647e8b0603726ebb75b990a4057a4 (diff) | |
parent | 09d51602cf84a1264946711dd4ea0dddbac599a1 (diff) | |
download | linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.gz linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.bz2 linux-stable-2c33ce009ca2389dbf0535d0672214d09738e35e.zip |
Merge Linus master into drm-next
The merge is clean, but the arm build fails afterwards,
due to API changes in the regulator tree.
I've included the patch into the merge to fix the build.
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 419 |
1 files changed, 257 insertions, 162 deletions
diff --git a/mm/memory.c b/mm/memory.c index 97839f5c8c30..22e037e3364e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -690,12 +690,11 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, /* * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y */ - if (vma->vm_ops) - printk(KERN_ALERT "vma->vm_ops->fault: %pSR\n", - vma->vm_ops->fault); - if (vma->vm_file) - printk(KERN_ALERT "vma->vm_file->f_op->mmap: %pSR\n", - vma->vm_file->f_op->mmap); + pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n", + vma->vm_file, + vma->vm_ops ? vma->vm_ops->fault : NULL, + vma->vm_file ? vma->vm_file->f_op->mmap : NULL, + mapping ? mapping->a_ops->readpage : NULL); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } @@ -1983,167 +1982,91 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, } /* - * This routine handles present pages, when users try to write - * to a shared page. It is done by copying the page to a new address - * and decrementing the shared-page counter for the old page. + * Handle write page faults for pages that can be reused in the current vma * - * Note that this routine assumes that the protection checks have been - * done by the caller (the low-level page fault routine in most cases). - * Thus we can safely just mark it writable once we've done any necessary - * COW. - * - * We also mark the page dirty at this point even though the page will - * change only once the write actually happens. This avoids a few races, - * and potentially makes it more efficient. - * - * We enter with non-exclusive mmap_sem (to exclude vma changes, - * but allow concurrent faults), with pte both mapped and locked. - * We return with mmap_sem still held, but pte unmapped and unlocked. + * This can happen either due to the mapping being with the VM_SHARED flag, + * or due to us being the last reference standing to the page. In either + * case, all we need to do here is to mark the page as writable and update + * any related book-keeping. */ -static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *page_table, pmd_t *pmd, - spinlock_t *ptl, pte_t orig_pte) +static inline int wp_page_reuse(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, + struct page *page, int page_mkwrite, + int dirty_shared) __releases(ptl) { - struct page *old_page, *new_page = NULL; pte_t entry; - int ret = 0; - int page_mkwrite = 0; - bool dirty_shared = false; - unsigned long mmun_start = 0; /* For mmu_notifiers */ - unsigned long mmun_end = 0; /* For mmu_notifiers */ - struct mem_cgroup *memcg; - - old_page = vm_normal_page(vma, address, orig_pte); - if (!old_page) { - /* - * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a - * VM_PFNMAP VMA. - * - * We should not cow pages in a shared writeable mapping. - * Just mark the pages writable as we can't do any dirty - * accounting on raw pfn maps. - */ - if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == - (VM_WRITE|VM_SHARED)) - goto reuse; - goto gotten; - } - /* - * Take out anonymous pages first, anonymous shared vmas are - * not dirty accountable. + * Clear the pages cpupid information as the existing + * information potentially belongs to a now completely + * unrelated process. */ - if (PageAnon(old_page) && !PageKsm(old_page)) { - if (!trylock_page(old_page)) { - page_cache_get(old_page); - pte_unmap_unlock(page_table, ptl); - lock_page(old_page); - page_table = pte_offset_map_lock(mm, pmd, address, - &ptl); - if (!pte_same(*page_table, orig_pte)) { - unlock_page(old_page); - goto unlock; - } - page_cache_release(old_page); - } - if (reuse_swap_page(old_page)) { - /* - * The page is all ours. Move it to our anon_vma so - * the rmap code will not search our parent or siblings. - * Protected against the rmap code by the page lock. - */ - page_move_anon_rmap(old_page, vma, address); - unlock_page(old_page); - goto reuse; - } - unlock_page(old_page); - } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == - (VM_WRITE|VM_SHARED))) { - page_cache_get(old_page); - /* - * Only catch write-faults on shared writable pages, - * read-only shared pages can get COWed by - * get_user_pages(.write=1, .force=1). - */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) { - int tmp; - - pte_unmap_unlock(page_table, ptl); - tmp = do_page_mkwrite(vma, old_page, address); - if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { - page_cache_release(old_page); - return tmp; - } - /* - * Since we dropped the lock we need to revalidate - * the PTE as someone else may have changed it. If - * they did, we just return, as we can count on the - * MMU to tell us if they didn't also make it writable. - */ - page_table = pte_offset_map_lock(mm, pmd, address, - &ptl); - if (!pte_same(*page_table, orig_pte)) { - unlock_page(old_page); - goto unlock; - } - page_mkwrite = 1; - } - - dirty_shared = true; - -reuse: - /* - * Clear the pages cpupid information as the existing - * information potentially belongs to a now completely - * unrelated process. - */ - if (old_page) - page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1); + if (page) + page_cpupid_xchg_last(page, (1 << LAST_CPUPID_SHIFT) - 1); - flush_cache_page(vma, address, pte_pfn(orig_pte)); - entry = pte_mkyoung(orig_pte); - entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (ptep_set_access_flags(vma, address, page_table, entry,1)) - update_mmu_cache(vma, address, page_table); - pte_unmap_unlock(page_table, ptl); - ret |= VM_FAULT_WRITE; + flush_cache_page(vma, address, pte_pfn(orig_pte)); + entry = pte_mkyoung(orig_pte); + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (ptep_set_access_flags(vma, address, page_table, entry, 1)) + update_mmu_cache(vma, address, page_table); + pte_unmap_unlock(page_table, ptl); - if (dirty_shared) { - struct address_space *mapping; - int dirtied; + if (dirty_shared) { + struct address_space *mapping; + int dirtied; - if (!page_mkwrite) - lock_page(old_page); + if (!page_mkwrite) + lock_page(page); - dirtied = set_page_dirty(old_page); - VM_BUG_ON_PAGE(PageAnon(old_page), old_page); - mapping = old_page->mapping; - unlock_page(old_page); - page_cache_release(old_page); + dirtied = set_page_dirty(page); + VM_BUG_ON_PAGE(PageAnon(page), page); + mapping = page->mapping; + unlock_page(page); + page_cache_release(page); - if ((dirtied || page_mkwrite) && mapping) { - /* - * Some device drivers do not set page.mapping - * but still dirty their pages - */ - balance_dirty_pages_ratelimited(mapping); - } - - if (!page_mkwrite) - file_update_time(vma->vm_file); + if ((dirtied || page_mkwrite) && mapping) { + /* + * Some device drivers do not set page.mapping + * but still dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); } - return ret; + if (!page_mkwrite) + file_update_time(vma->vm_file); } - /* - * Ok, we need to copy. Oh, well.. - */ - page_cache_get(old_page); -gotten: - pte_unmap_unlock(page_table, ptl); + return VM_FAULT_WRITE; +} + +/* + * Handle the case of a page which we actually need to copy to a new page. + * + * Called with mmap_sem locked and the old page referenced, but + * without the ptl held. + * + * High level logic flow: + * + * - Allocate a page, copy the content of the old page to the new one. + * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc. + * - Take the PTL. If the pte changed, bail out and release the allocated page + * - If the pte is still the way we remember it, update the page table and all + * relevant references. This includes dropping the reference the page-table + * held to the old page, as well as updating the rmap. + * - In any case, unlock the PTL and drop the reference we took to the old page. + */ +static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + pte_t orig_pte, struct page *old_page) +{ + struct page *new_page = NULL; + spinlock_t *ptl = NULL; + pte_t entry; + int page_copied = 0; + const unsigned long mmun_start = address & PAGE_MASK; /* For mmu_notifiers */ + const unsigned long mmun_end = mmun_start + PAGE_SIZE; /* For mmu_notifiers */ + struct mem_cgroup *memcg; if (unlikely(anon_vma_prepare(vma))) goto oom; @@ -2163,8 +2086,6 @@ gotten: if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) goto oom_free_new; - mmun_start = address & PAGE_MASK; - mmun_end = mmun_start + PAGE_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* @@ -2177,8 +2098,9 @@ gotten: dec_mm_counter_fast(mm, MM_FILEPAGES); inc_mm_counter_fast(mm, MM_ANONPAGES); } - } else + } else { inc_mm_counter_fast(mm, MM_ANONPAGES); + } flush_cache_page(vma, address, pte_pfn(orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -2227,29 +2149,29 @@ gotten: /* Free the old page.. */ new_page = old_page; - ret |= VM_FAULT_WRITE; - } else + page_copied = 1; + } else { mem_cgroup_cancel_charge(new_page, memcg); + } if (new_page) page_cache_release(new_page); -unlock: + pte_unmap_unlock(page_table, ptl); - if (mmun_end > mmun_start) - mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (old_page) { /* * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ - if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) { + if (page_copied && (vma->vm_flags & VM_LOCKED)) { lock_page(old_page); /* LRU manipulation */ munlock_vma_page(old_page); unlock_page(old_page); } page_cache_release(old_page); } - return ret; + return page_copied ? VM_FAULT_WRITE : 0; oom_free_new: page_cache_release(new_page); oom: @@ -2258,6 +2180,179 @@ oom: return VM_FAULT_OOM; } +/* + * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED + * mapping + */ +static int wp_pfn_shared(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + pte_t *page_table, spinlock_t *ptl, pte_t orig_pte, + pmd_t *pmd) +{ + if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { + struct vm_fault vmf = { + .page = NULL, + .pgoff = linear_page_index(vma, address), + .virtual_address = (void __user *)(address & PAGE_MASK), + .flags = FAULT_FLAG_WRITE | FAULT_FLAG_MKWRITE, + }; + int ret; + + pte_unmap_unlock(page_table, ptl); + ret = vma->vm_ops->pfn_mkwrite(vma, &vmf); + if (ret & VM_FAULT_ERROR) + return ret; + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); + /* + * We might have raced with another page fault while we + * released the pte_offset_map_lock. + */ + if (!pte_same(*page_table, orig_pte)) { + pte_unmap_unlock(page_table, ptl); + return 0; + } + } + return wp_page_reuse(mm, vma, address, page_table, ptl, orig_pte, + NULL, 0, 0); +} + +static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, + pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte, + struct page *old_page) + __releases(ptl) +{ + int page_mkwrite = 0; + + page_cache_get(old_page); + + /* + * Only catch write-faults on shared writable pages, + * read-only shared pages can get COWed by + * get_user_pages(.write=1, .force=1). + */ + if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + int tmp; + + pte_unmap_unlock(page_table, ptl); + tmp = do_page_mkwrite(vma, old_page, address); + if (unlikely(!tmp || (tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + page_cache_release(old_page); + return tmp; + } + /* + * Since we dropped the lock we need to revalidate + * the PTE as someone else may have changed it. If + * they did, we just return, as we can count on the + * MMU to tell us if they didn't also make it writable. + */ + page_table = pte_offset_map_lock(mm, pmd, address, + &ptl); + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + pte_unmap_unlock(page_table, ptl); + page_cache_release(old_page); + return 0; + } + page_mkwrite = 1; + } + + return wp_page_reuse(mm, vma, address, page_table, ptl, + orig_pte, old_page, page_mkwrite, 1); +} + +/* + * This routine handles present pages, when users try to write + * to a shared page. It is done by copying the page to a new address + * and decrementing the shared-page counter for the old page. + * + * Note that this routine assumes that the protection checks have been + * done by the caller (the low-level page fault routine in most cases). + * Thus we can safely just mark it writable once we've done any necessary + * COW. + * + * We also mark the page dirty at this point even though the page will + * change only once the write actually happens. This avoids a few races, + * and potentially makes it more efficient. + * + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), with pte both mapped and locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. + */ +static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, + spinlock_t *ptl, pte_t orig_pte) + __releases(ptl) +{ + struct page *old_page; + + old_page = vm_normal_page(vma, address, orig_pte); + if (!old_page) { + /* + * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a + * VM_PFNMAP VMA. + * + * We should not cow pages in a shared writeable mapping. + * Just mark the pages writable and/or call ops->pfn_mkwrite. + */ + if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED)) + return wp_pfn_shared(mm, vma, address, page_table, ptl, + orig_pte, pmd); + + pte_unmap_unlock(page_table, ptl); + return wp_page_copy(mm, vma, address, page_table, pmd, + orig_pte, old_page); + } + + /* + * Take out anonymous pages first, anonymous shared vmas are + * not dirty accountable. + */ + if (PageAnon(old_page) && !PageKsm(old_page)) { + if (!trylock_page(old_page)) { + page_cache_get(old_page); + pte_unmap_unlock(page_table, ptl); + lock_page(old_page); + page_table = pte_offset_map_lock(mm, pmd, address, + &ptl); + if (!pte_same(*page_table, orig_pte)) { + unlock_page(old_page); + pte_unmap_unlock(page_table, ptl); + page_cache_release(old_page); + return 0; + } + page_cache_release(old_page); + } + if (reuse_swap_page(old_page)) { + /* + * The page is all ours. Move it to our anon_vma so + * the rmap code will not search our parent or siblings. + * Protected against the rmap code by the page lock. + */ + page_move_anon_rmap(old_page, vma, address); + unlock_page(old_page); + return wp_page_reuse(mm, vma, address, page_table, ptl, + orig_pte, old_page, 0, 0); + } + unlock_page(old_page); + } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED))) { + return wp_page_shared(mm, vma, address, page_table, pmd, + ptl, orig_pte, old_page); + } + + /* + * Ok, we need to copy. Oh, well.. + */ + page_cache_get(old_page); + + pte_unmap_unlock(page_table, ptl); + return wp_page_copy(mm, vma, address, page_table, pmd, + orig_pte, old_page); +} + static void unmap_mapping_range_vma(struct vm_area_struct *vma, unsigned long start_addr, unsigned long end_addr, struct zap_details *details) @@ -2784,7 +2879,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, struct vm_fault vmf; int off; - nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT; + nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; start_addr = max(address & mask, vma->vm_start); |