diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 93 |
1 files changed, 59 insertions, 34 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2aec4241b42a..eb3dd37ccd7c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -46,6 +46,7 @@ #include <linux/oom.h> #include <linux/prefetch.h> #include <linux/printk.h> +#include <linux/dax.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -106,8 +107,6 @@ struct scan_control { unsigned long nr_reclaimed; }; -#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) - #ifdef ARCH_HAS_PREFETCH #define prefetch_prev_lru_page(_page, _base, _field) \ do { \ @@ -197,11 +196,13 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) unsigned long nr; nr = zone_page_state(zone, NR_ACTIVE_FILE) + - zone_page_state(zone, NR_INACTIVE_FILE); + zone_page_state(zone, NR_INACTIVE_FILE) + + zone_page_state(zone, NR_ISOLATED_FILE); if (get_nr_swap_pages() > 0) nr += zone_page_state(zone, NR_ACTIVE_ANON) + - zone_page_state(zone, NR_INACTIVE_ANON); + zone_page_state(zone, NR_INACTIVE_ANON) + + zone_page_state(zone, NR_ISOLATED_ANON); return nr; } @@ -411,7 +412,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct shrinker *shrinker; unsigned long freed = 0; - if (memcg && !memcg_kmem_is_active(memcg)) + if (memcg && !memcg_kmem_online(memcg)) return 0; if (nr_scanned == 0) @@ -594,7 +595,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, /* synchronous write or broken a_ops? */ ClearPageReclaim(page); } - trace_mm_vmscan_writepage(page, trace_reclaim_flags(page)); + trace_mm_vmscan_writepage(page); inc_zone_page_state(page, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } @@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, * inode reclaim needs to empty out the radix tree or * the nodes are lost. Don't plant shadows behind its * back. + * + * We also don't store shadows for DAX mappings because the + * only page cache pages found in these are zero pages + * covering holes, and because we don't want to mix DAX + * exceptional entries and shadow exceptional entries in the + * same page_tree. */ if (reclaimed && page_is_file_cache(page) && - !mapping_exiting(mapping)) + !mapping_exiting(mapping) && !dax_mapping(mapping)) shadow = workingset_eviction(mapping, page); __delete_from_page_cache(page, shadow, memcg); spin_unlock_irqrestore(&mapping->tree_lock, flags); @@ -906,6 +913,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, int may_enter_fs; enum page_references references = PAGEREF_RECLAIM_CLEAN; bool dirty, writeback; + bool lazyfree = false; + int ret = SWAP_SUCCESS; cond_resched(); @@ -1049,6 +1058,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep_locked; if (!add_to_swap(page, page_list)) goto activate_locked; + lazyfree = true; may_enter_fs = 1; /* Adding to swap updated mapping */ @@ -1060,14 +1070,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, * processes. Try to unmap it here. */ if (page_mapped(page) && mapping) { - switch (try_to_unmap(page, - ttu_flags|TTU_BATCH_FLUSH)) { + switch (ret = try_to_unmap(page, lazyfree ? + (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) : + (ttu_flags | TTU_BATCH_FLUSH))) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: goto keep_locked; case SWAP_MLOCK: goto cull_mlocked; + case SWAP_LZFREE: + goto lazyfree; case SWAP_SUCCESS: ; /* try to free the page below */ } @@ -1174,6 +1187,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, } } +lazyfree: if (!mapping || !__remove_mapping(mapping, page, true)) goto keep_locked; @@ -1184,8 +1198,11 @@ static unsigned long shrink_page_list(struct list_head *page_list, * we obviously don't have to worry about waking up a process * waiting on the page lock, because there are no references. */ - __clear_page_locked(page); + __ClearPageLocked(page); free_it: + if (ret == SWAP_LZFREE) + count_vm_event(PGLAZYFREED); + nr_reclaimed++; /* @@ -1204,7 +1221,7 @@ cull_mlocked: activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ - if (PageSwapCache(page) && vm_swap_full()) + if (PageSwapCache(page) && mem_cgroup_swap_full(page)) try_to_free_swap(page); VM_BUG_ON_PAGE(PageActive(page), page); SetPageActive(page); @@ -1426,6 +1443,7 @@ int isolate_lru_page(struct page *page) int ret = -EBUSY; VM_BUG_ON_PAGE(!page_count(page), page); + VM_BUG_ON_PAGE(PageTail(page), page); if (PageLRU(page)) { struct zone *zone = page_zone(page); @@ -1691,11 +1709,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, current_may_throttle()) wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); - trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, - zone_idx(zone), - nr_scanned, nr_reclaimed, - sc->priority, - trace_shrink_flags(file)); + trace_mm_vmscan_lru_shrink_inactive(zone, nr_scanned, nr_reclaimed, + sc->priority, file); return nr_reclaimed; } @@ -1958,10 +1973,11 @@ enum scan_balance { * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan */ -static void get_scan_count(struct lruvec *lruvec, int swappiness, +static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, struct scan_control *sc, unsigned long *nr, unsigned long *lru_pages) { + int swappiness = mem_cgroup_swappiness(memcg); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2]; u64 denominator = 0; /* gcc */ @@ -1988,14 +2004,14 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness, if (current_is_kswapd()) { if (!zone_reclaimable(zone)) force_scan = true; - if (!mem_cgroup_lruvec_online(lruvec)) + if (!mem_cgroup_online(memcg)) force_scan = true; } if (!global_reclaim(sc)) force_scan = true; /* If we have no swap space, do not bother scanning anon pages. */ - if (!sc->may_swap || (get_nr_swap_pages() <= 0)) { + if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { scan_balance = SCAN_FILE; goto out; } @@ -2046,10 +2062,16 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness, } /* - * There is enough inactive page cache, do not reclaim - * anything from the anonymous working set right now. + * If there is enough inactive page cache, i.e. if the size of the + * inactive list is greater than that of the active list *and* the + * inactive list actually has some pages to scan on this priority, we + * do not reclaim anything from the anonymous working set right now. + * Without the second condition we could end up never scanning an + * lruvec even if it has plenty of old anonymous pages unless the + * system is under heavy pressure. */ - if (!inactive_file_is_low(lruvec)) { + if (!inactive_file_is_low(lruvec) && + get_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2179,9 +2201,10 @@ static inline void init_tlb_ubc(void) /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_lruvec(struct lruvec *lruvec, int swappiness, - struct scan_control *sc, unsigned long *lru_pages) +static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg, + struct scan_control *sc, unsigned long *lru_pages) { + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); unsigned long nr[NR_LRU_LISTS]; unsigned long targets[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -2191,7 +2214,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness, struct blk_plug plug; bool scan_adjusted; - get_scan_count(lruvec, swappiness, sc, nr, lru_pages); + get_scan_count(lruvec, memcg, sc, nr, lru_pages); /* Record the original scan target for proportional adjustments later */ memcpy(targets, nr, sizeof(nr)); @@ -2393,9 +2416,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, memcg = mem_cgroup_iter(root, NULL, &reclaim); do { unsigned long lru_pages; + unsigned long reclaimed; unsigned long scanned; - struct lruvec *lruvec; - int swappiness; if (mem_cgroup_low(root, memcg)) { if (!sc->may_thrash) @@ -2403,11 +2425,10 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, mem_cgroup_events(memcg, MEMCG_LOW, 1); } - lruvec = mem_cgroup_zone_lruvec(zone, memcg); - swappiness = mem_cgroup_swappiness(memcg); + reclaimed = sc->nr_reclaimed; scanned = sc->nr_scanned; - shrink_lruvec(lruvec, swappiness, sc, &lru_pages); + shrink_zone_memcg(zone, memcg, sc, &lru_pages); zone_lru_pages += lru_pages; if (memcg && is_classzone) @@ -2415,6 +2436,11 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, memcg, sc->nr_scanned - scanned, lru_pages); + /* Record the group's reclaim efficiency */ + vmpressure(sc->gfp_mask, memcg, false, + sc->nr_scanned - scanned, + sc->nr_reclaimed - reclaimed); + /* * Direct reclaim and kswapd have to scan all memory * cgroups to fulfill the overall scan target for the @@ -2446,7 +2472,8 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, reclaim_state->reclaimed_slab = 0; } - vmpressure(sc->gfp_mask, sc->target_mem_cgroup, + /* Record the subtree's reclaim efficiency */ + vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, sc->nr_scanned - nr_scanned, sc->nr_reclaimed - nr_reclaimed); @@ -2871,8 +2898,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !noswap, }; - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); - int swappiness = mem_cgroup_swappiness(memcg); unsigned long lru_pages; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | @@ -2889,7 +2914,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_lruvec(lruvec, swappiness, &sc, &lru_pages); + shrink_zone_memcg(zone, memcg, &sc, &lru_pages); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |