diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 16 | ||||
-rw-r--r-- | mm/mmap.c | 11 | ||||
-rw-r--r-- | mm/page_alloc.c | 57 | ||||
-rw-r--r-- | mm/page_cgroup.c | 2 | ||||
-rw-r--r-- | mm/shmem.c | 149 | ||||
-rw-r--r-- | mm/swap.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
7 files changed, 144 insertions, 96 deletions
diff --git a/mm/memory.c b/mm/memory.c index 27f425378112..61e66f026563 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1412,9 +1412,8 @@ no_page_table: static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { - return (vma->vm_flags & VM_GROWSDOWN) && - (vma->vm_start == addr) && - !vma_stack_continue(vma->vm_prev, addr); + return stack_guard_page_start(vma, addr) || + stack_guard_page_end(vma, addr+PAGE_SIZE); } /** @@ -1551,12 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, continue; } - /* - * For mlock, just skip the stack guard page. - */ - if ((gup_flags & FOLL_MLOCK) && stack_guard_page(vma, start)) - goto next_page; - do { struct page *page; unsigned int foll_flags = gup_flags; @@ -1573,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int ret; unsigned int fault_flags = 0; + /* For mlock, just skip the stack guard page. */ + if (foll_flags & FOLL_MLOCK) { + if (stack_guard_page(vma, start)) + goto next_page; + } if (foll_flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (nonblocking) diff --git a/mm/mmap.c b/mm/mmap.c index e27e0cf0de03..772140c53ab1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1767,10 +1767,13 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) size = address - vma->vm_start; grow = (address - vma->vm_end) >> PAGE_SHIFT; - error = acct_stack_growth(vma, size, grow); - if (!error) { - vma->vm_end = address; - perf_event_mmap(vma); + error = -ENOMEM; + if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { + error = acct_stack_growth(vma, size, grow); + if (!error) { + vma->vm_end = address; + perf_event_mmap(vma); + } } } vma_unlock_anon_vma(vma); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f8a97b9a350..3f8bce264df6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); +static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) +{ + if (addr) { + unsigned long alloc_end = addr + (PAGE_SIZE << order); + unsigned long used = addr + PAGE_ALIGN(size); + + split_page(virt_to_page((void *)addr), order); + while (used < alloc_end) { + free_page(used); + used += PAGE_SIZE; + } + } + return (void *)addr; +} + /** * alloc_pages_exact - allocate an exact number physically-contiguous pages. * @size: the number of bytes to allocate @@ -2336,22 +2351,33 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) unsigned long addr; addr = __get_free_pages(gfp_mask, order); - if (addr) { - unsigned long alloc_end = addr + (PAGE_SIZE << order); - unsigned long used = addr + PAGE_ALIGN(size); - - split_page(virt_to_page((void *)addr), order); - while (used < alloc_end) { - free_page(used); - used += PAGE_SIZE; - } - } - - return (void *)addr; + return make_alloc_exact(addr, order, size); } EXPORT_SYMBOL(alloc_pages_exact); /** + * alloc_pages_exact_nid - allocate an exact number of physically-contiguous + * pages on a node. + * @nid: the preferred node ID where memory should be allocated + * @size: the number of bytes to allocate + * @gfp_mask: GFP flags for the allocation + * + * Like alloc_pages_exact(), but try to allocate on node nid first before falling + * back. + * Note this is not alloc_pages_exact_node() which allocates on a specific node, + * but is not exact. + */ +void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) +{ + unsigned order = get_order(size); + struct page *p = alloc_pages_node(nid, gfp_mask, order); + if (!p) + return NULL; + return make_alloc_exact((unsigned long)page_address(p), order, size); +} +EXPORT_SYMBOL(alloc_pages_exact_nid); + +/** * free_pages_exact - release memory allocated via alloc_pages_exact() * @virt: the value returned by alloc_pages_exact. * @size: size of allocation, same value as passed to alloc_pages_exact(). @@ -3564,7 +3590,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) if (!slab_is_available()) { zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, alloc_size); + alloc_bootmem_node_nopanic(pgdat, alloc_size); } else { /* * This case means that a zone whose size was 0 gets new memory @@ -4141,7 +4167,8 @@ static void __init setup_usemap(struct pglist_data *pgdat, unsigned long usemapsize = usemap_size(zonesize); zone->pageblock_flags = NULL; if (usemapsize) - zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); + zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, + usemapsize); } #else static inline void setup_usemap(struct pglist_data *pgdat, @@ -4307,7 +4334,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) size = (end - start) * sizeof(struct page); map = alloc_remap(pgdat->node_id, size); if (!map) - map = alloc_bootmem_node(pgdat, size); + map = alloc_bootmem_node_nopanic(pgdat, size); pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); } #ifndef CONFIG_NEED_MULTIPLE_NODES diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 99055010cece..2daadc322ba6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -134,7 +134,7 @@ static void *__init_refok alloc_page_cgroup(size_t size, int nid) { void *addr = NULL; - addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN); + addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN); if (addr) return addr; diff --git a/mm/shmem.c b/mm/shmem.c index 8fa27e4e582a..dfc7069102ee 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) { - struct inode *inode; + struct address_space *mapping; unsigned long idx; unsigned long size; unsigned long limit; @@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s if (size > SHMEM_NR_DIRECT) size = SHMEM_NR_DIRECT; offset = shmem_find_swp(entry, ptr, ptr+size); - if (offset >= 0) + if (offset >= 0) { + shmem_swp_balance_unmap(); goto found; + } if (!info->i_indirect) goto lost2; @@ -914,11 +916,11 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s if (size > ENTRIES_PER_PAGE) size = ENTRIES_PER_PAGE; offset = shmem_find_swp(entry, ptr, ptr+size); - shmem_swp_unmap(ptr); if (offset >= 0) { shmem_dir_unmap(dir); goto found; } + shmem_swp_unmap(ptr); } } lost1: @@ -928,8 +930,7 @@ lost2: return 0; found: idx += offset; - inode = igrab(&info->vfs_inode); - spin_unlock(&info->lock); + ptr += offset; /* * Move _head_ to start search for next from here. @@ -940,37 +941,18 @@ found: */ if (shmem_swaplist.next != &info->swaplist) list_move_tail(&shmem_swaplist, &info->swaplist); - mutex_unlock(&shmem_swaplist_mutex); - error = 1; - if (!inode) - goto out; /* - * Charge page using GFP_KERNEL while we can wait. - * Charged back to the user(not to caller) when swap account is used. - * add_to_page_cache() will be called with GFP_NOWAIT. + * We rely on shmem_swaplist_mutex, not only to protect the swaplist, + * but also to hold up shmem_evict_inode(): so inode cannot be freed + * beneath us (pagelock doesn't help until the page is in pagecache). */ - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); - if (error) - goto out; - error = radix_tree_preload(GFP_KERNEL); - if (error) { - mem_cgroup_uncharge_cache_page(page); - goto out; - } - error = 1; - - spin_lock(&info->lock); - ptr = shmem_swp_entry(info, idx, NULL); - if (ptr && ptr->val == entry.val) { - error = add_to_page_cache_locked(page, inode->i_mapping, - idx, GFP_NOWAIT); - /* does mem_cgroup_uncharge_cache_page on error */ - } else /* we must compensate for our precharge above */ - mem_cgroup_uncharge_cache_page(page); + mapping = info->vfs_inode.i_mapping; + error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); + /* which does mem_cgroup_uncharge_cache_page on error */ if (error == -EEXIST) { - struct page *filepage = find_get_page(inode->i_mapping, idx); + struct page *filepage = find_get_page(mapping, idx); error = 1; if (filepage) { /* @@ -990,14 +972,8 @@ found: swap_free(entry); error = 1; /* not an error, but entry was found */ } - if (ptr) - shmem_swp_unmap(ptr); + shmem_swp_unmap(ptr); spin_unlock(&info->lock); - radix_tree_preload_end(); -out: - unlock_page(page); - page_cache_release(page); - iput(inode); /* allows for NULL */ return error; } @@ -1009,6 +985,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page) struct list_head *p, *next; struct shmem_inode_info *info; int found = 0; + int error; + + /* + * Charge page using GFP_KERNEL while we can wait, before taking + * the shmem_swaplist_mutex which might hold up shmem_writepage(). + * Charged back to the user (not to caller) when swap account is used. + * add_to_page_cache() will be called with GFP_NOWAIT. + */ + error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); + if (error) + goto out; + /* + * Try to preload while we can wait, to not make a habit of + * draining atomic reserves; but don't latch on to this cpu, + * it's okay if sometimes we get rescheduled after this. + */ + error = radix_tree_preload(GFP_KERNEL); + if (error) + goto uncharge; + radix_tree_preload_end(); mutex_lock(&shmem_swaplist_mutex); list_for_each_safe(p, next, &shmem_swaplist) { @@ -1016,17 +1012,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page) found = shmem_unuse_inode(info, entry, page); cond_resched(); if (found) - goto out; + break; } mutex_unlock(&shmem_swaplist_mutex); - /* - * Can some race bring us here? We've been holding page lock, - * so I think not; but would rather try again later than BUG() - */ + +uncharge: + if (!found) + mem_cgroup_uncharge_cache_page(page); + if (found < 0) + error = found; +out: unlock_page(page); page_cache_release(page); -out: - return (found < 0) ? found : 0; + return error; } /* @@ -1064,7 +1062,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) else swap.val = 0; + /* + * Add inode to shmem_unuse()'s list of swapped-out inodes, + * if it's not already there. Do it now because we cannot take + * mutex while holding spinlock, and must do so before the page + * is moved to swap cache, when its pagelock no longer protects + * the inode from eviction. But don't unlock the mutex until + * we've taken the spinlock, because shmem_unuse_inode() will + * prune a !swapped inode from the swaplist under both locks. + */ + if (swap.val) { + mutex_lock(&shmem_swaplist_mutex); + if (list_empty(&info->swaplist)) + list_add_tail(&info->swaplist, &shmem_swaplist); + } + spin_lock(&info->lock); + if (swap.val) + mutex_unlock(&shmem_swaplist_mutex); + if (index >= info->next_index) { BUG_ON(!(info->flags & SHMEM_TRUNCATE)); goto unlock; @@ -1084,21 +1100,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) delete_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); - if (list_empty(&info->swaplist)) - inode = igrab(inode); - else - inode = NULL; spin_unlock(&info->lock); swap_shmem_alloc(swap); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); - if (inode) { - mutex_lock(&shmem_swaplist_mutex); - /* move instead of add in case we're racing */ - list_move_tail(&info->swaplist, &shmem_swaplist); - mutex_unlock(&shmem_swaplist_mutex); - iput(inode); - } return 0; } @@ -1400,20 +1405,14 @@ repeat: if (sbinfo->max_blocks) { if (percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) >= 0 || - shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } + shmem_acct_block(info->flags)) + goto nospace; percpu_counter_inc(&sbinfo->used_blocks); spin_lock(&inode->i_lock); inode->i_blocks += BLOCKS_PER_PAGE; spin_unlock(&inode->i_lock); - } else if (shmem_acct_block(info->flags)) { - spin_unlock(&info->lock); - error = -ENOSPC; - goto failed; - } + } else if (shmem_acct_block(info->flags)) + goto nospace; if (!filepage) { int ret; @@ -1493,6 +1492,24 @@ done: error = 0; goto out; +nospace: + /* + * Perhaps the page was brought in from swap between find_lock_page + * and taking info->lock? We allow for that at add_to_page_cache_lru, + * but must also avoid reporting a spurious ENOSPC while working on a + * full tmpfs. (When filepage has been passed in to shmem_getpage, it + * is already in page cache, which prevents this race from occurring.) + */ + if (!filepage) { + struct page *page = find_get_page(mapping, idx); + if (page) { + spin_unlock(&info->lock); + page_cache_release(page); + goto repeat; + } + } + spin_unlock(&info->lock); + error = -ENOSPC; failed: if (*pagep != filepage) { unlock_page(filepage); diff --git a/mm/swap.c b/mm/swap.c index a448db377cb0..5602f1a1b1e7 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -396,6 +396,9 @@ static void lru_deactivate_fn(struct page *page, void *arg) if (!PageLRU(page)) return; + if (PageUnevictable(page)) + return; + /* Some processes are using the page */ if (page_mapped(page)) return; diff --git a/mm/vmscan.c b/mm/vmscan.c index f6b435c80079..8bfd45050a61 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -937,7 +937,7 @@ keep_lumpy: * back off and wait for congestion to clear because further reclaim * will encounter the same problem */ - if (nr_dirty == nr_congested && nr_dirty != 0) + if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc)) zone_set_flag(zone, ZONE_CONGESTED); free_page_list(&free_pages); |