diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 234 |
1 files changed, 116 insertions, 118 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 532a2a750952..7bb23ff229b6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -234,22 +234,39 @@ bool pgdat_reclaimable(struct pglist_data *pgdat) pgdat_reclaimable_pages(pgdat) * 6; } -unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) +/** + * lruvec_lru_size - Returns the number of pages on the given LRU list. + * @lruvec: lru vector + * @lru: lru to use + * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list) + */ +unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) { + unsigned long lru_size; + int zid; + if (!mem_cgroup_disabled()) - return mem_cgroup_get_lru_size(lruvec, lru); + lru_size = mem_cgroup_get_lru_size(lruvec, lru); + else + lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); - return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru); -} + for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) { + struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; + unsigned long size; -unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru, - int zone_idx) -{ - if (!mem_cgroup_disabled()) - return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx); + if (!managed_zone(zone)) + continue; + + if (!mem_cgroup_disabled()) + size = mem_cgroup_get_zone_lru_size(lruvec, lru, zid); + else + size = zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zid], + NR_ZONE_LRU_BASE + lru); + lru_size -= min(size, lru_size); + } + + return lru_size; - return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx], - NR_ZONE_LRU_BASE + lru); } /* @@ -912,6 +929,17 @@ static void page_check_dirty_writeback(struct page *page, mapping->a_ops->is_dirty_writeback(page, dirty, writeback); } +struct reclaim_stat { + unsigned nr_dirty; + unsigned nr_unqueued_dirty; + unsigned nr_congested; + unsigned nr_writeback; + unsigned nr_immediate; + unsigned nr_activate; + unsigned nr_ref_keep; + unsigned nr_unmap_fail; +}; + /* * shrink_page_list() returns the number of reclaimed pages */ @@ -919,22 +947,20 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct pglist_data *pgdat, struct scan_control *sc, enum ttu_flags ttu_flags, - unsigned long *ret_nr_dirty, - unsigned long *ret_nr_unqueued_dirty, - unsigned long *ret_nr_congested, - unsigned long *ret_nr_writeback, - unsigned long *ret_nr_immediate, + struct reclaim_stat *stat, bool force_reclaim) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); int pgactivate = 0; - unsigned long nr_unqueued_dirty = 0; - unsigned long nr_dirty = 0; - unsigned long nr_congested = 0; - unsigned long nr_reclaimed = 0; - unsigned long nr_writeback = 0; - unsigned long nr_immediate = 0; + unsigned nr_unqueued_dirty = 0; + unsigned nr_dirty = 0; + unsigned nr_congested = 0; + unsigned nr_reclaimed = 0; + unsigned nr_writeback = 0; + unsigned nr_immediate = 0; + unsigned nr_ref_keep = 0; + unsigned nr_unmap_fail = 0; cond_resched(); @@ -1073,6 +1099,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, case PAGEREF_ACTIVATE: goto activate_locked; case PAGEREF_KEEP: + nr_ref_keep++; goto keep_locked; case PAGEREF_RECLAIM: case PAGEREF_RECLAIM_CLEAN: @@ -1110,6 +1137,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, (ttu_flags | TTU_BATCH_FLUSH | TTU_LZFREE) : (ttu_flags | TTU_BATCH_FLUSH))) { case SWAP_FAIL: + nr_unmap_fail++; goto activate_locked; case SWAP_AGAIN: goto keep_locked; @@ -1276,11 +1304,16 @@ keep: list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); - *ret_nr_dirty += nr_dirty; - *ret_nr_congested += nr_congested; - *ret_nr_unqueued_dirty += nr_unqueued_dirty; - *ret_nr_writeback += nr_writeback; - *ret_nr_immediate += nr_immediate; + if (stat) { + stat->nr_dirty = nr_dirty; + stat->nr_congested = nr_congested; + stat->nr_unqueued_dirty = nr_unqueued_dirty; + stat->nr_writeback = nr_writeback; + stat->nr_immediate = nr_immediate; + stat->nr_activate = pgactivate; + stat->nr_ref_keep = nr_ref_keep; + stat->nr_unmap_fail = nr_unmap_fail; + } return nr_reclaimed; } @@ -1292,7 +1325,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, .priority = DEF_PRIORITY, .may_unmap = 1, }; - unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5; + unsigned long ret; struct page *page, *next; LIST_HEAD(clean_pages); @@ -1305,8 +1338,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, } ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, - TTU_UNMAP|TTU_IGNORE_ACCESS, - &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); + TTU_UNMAP|TTU_IGNORE_ACCESS, NULL, true); list_splice(&clean_pages, page_list); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret); return ret; @@ -1437,6 +1469,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, unsigned long nr_taken = 0; unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; + unsigned long skipped = 0, total_skipped = 0; unsigned long scan, nr_pages; LIST_HEAD(pages_skipped); @@ -1488,14 +1521,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, */ if (!list_empty(&pages_skipped)) { int zid; - unsigned long total_skipped = 0; for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_skipped[zid]) continue; __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]); - total_skipped += nr_skipped[zid]; + skipped += nr_skipped[zid]; } /* @@ -1503,13 +1535,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, * close to unreclaimable. If the LRU list is empty, account * skipped pages as a full scan. */ - scan += list_empty(src) ? total_skipped : total_skipped >> 2; + total_skipped = list_empty(src) ? skipped : skipped >> 2; list_splice(&pages_skipped, src); } - *nr_scanned = scan; - trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan, - nr_taken, mode, is_file_lru(lru)); + *nr_scanned = scan + total_skipped; + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, + scan, skipped, nr_taken, mode, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } @@ -1669,30 +1701,6 @@ static int current_may_throttle(void) bdi_write_congested(current->backing_dev_info); } -static bool inactive_reclaimable_pages(struct lruvec *lruvec, - struct scan_control *sc, enum lru_list lru) -{ - int zid; - struct zone *zone; - int file = is_file_lru(lru); - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - - if (!global_reclaim(sc)) - return true; - - for (zid = sc->reclaim_idx; zid >= 0; zid--) { - zone = &pgdat->node_zones[zid]; - if (!managed_zone(zone)) - continue; - - if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE + - LRU_FILE * file) >= SWAP_CLUSTER_MAX) - return true; - } - - return false; -} - /* * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages @@ -1705,19 +1713,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_dirty = 0; - unsigned long nr_congested = 0; - unsigned long nr_unqueued_dirty = 0; - unsigned long nr_writeback = 0; - unsigned long nr_immediate = 0; + struct reclaim_stat stat = {}; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - if (!inactive_reclaimable_pages(lruvec, sc, lru)) - return 0; - while (unlikely(too_many_isolated(pgdat, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1754,9 +1755,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP, - &nr_dirty, &nr_unqueued_dirty, &nr_congested, - &nr_writeback, &nr_immediate, - false); + &stat, false); spin_lock_irq(&pgdat->lru_lock); @@ -1790,7 +1789,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * of pages under pages flagged for immediate reclaim and stall if any * are encountered in the nr_immediate check below. */ - if (nr_writeback && nr_writeback == nr_taken) + if (stat.nr_writeback && stat.nr_writeback == nr_taken) set_bit(PGDAT_WRITEBACK, &pgdat->flags); /* @@ -1802,7 +1801,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * Tag a zone as congested if all the dirty pages scanned were * backed by a congested BDI and wait_iff_congested will stall. */ - if (nr_dirty && nr_dirty == nr_congested) + if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) set_bit(PGDAT_CONGESTED, &pgdat->flags); /* @@ -1811,7 +1810,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * the pgdat PGDAT_DIRTY and kswapd will start writing pages from * reclaim context. */ - if (nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) set_bit(PGDAT_DIRTY, &pgdat->flags); /* @@ -1820,7 +1819,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * that pages are cycling through the LRU faster than * they are written so also forcibly stall. */ - if (nr_immediate && current_may_throttle()) + if (stat.nr_immediate && current_may_throttle()) congestion_wait(BLK_RW_ASYNC, HZ/10); } @@ -1835,6 +1834,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, nr_scanned, nr_reclaimed, + stat.nr_dirty, stat.nr_writeback, + stat.nr_congested, stat.nr_immediate, + stat.nr_activate, stat.nr_ref_keep, + stat.nr_unmap_fail, sc->priority, file); return nr_reclaimed; } @@ -1855,17 +1858,19 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * * The downside is that we have to touch page->_refcount against each page. * But we had to alter page->flags anyway. + * + * Returns the number of pages moved to the given lru. */ -static void move_active_pages_to_lru(struct lruvec *lruvec, +static unsigned move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *list, struct list_head *pages_to_free, enum lru_list lru) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); - unsigned long pgmoved = 0; struct page *page; int nr_pages; + int nr_moved = 0; while (!list_empty(list)) { page = lru_to_page(list); @@ -1877,7 +1882,6 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, nr_pages = hpage_nr_pages(page); update_lru_size(lruvec, lru, page_zonenum(page), nr_pages); list_move(&page->lru, &lruvec->lists[lru]); - pgmoved += nr_pages; if (put_page_testzero(page)) { __ClearPageLRU(page); @@ -1891,11 +1895,15 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, spin_lock_irq(&pgdat->lru_lock); } else list_add(&page->lru, pages_to_free); + } else { + nr_moved += nr_pages; } } if (!is_active_lru(lru)) - __count_vm_events(PGDEACTIVATE, pgmoved); + __count_vm_events(PGDEACTIVATE, nr_moved); + + return nr_moved; } static void shrink_active_list(unsigned long nr_to_scan, @@ -1911,7 +1919,8 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_inactive); struct page *page; struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - unsigned long nr_rotated = 0; + unsigned nr_deactivate, nr_activate; + unsigned nr_rotated = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -1989,13 +1998,15 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); - move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); + nr_activate = move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); + nr_deactivate = move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&pgdat->lru_lock); mem_cgroup_uncharge_list(&l_hold); free_hot_cold_page_list(&l_hold, true); + trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, + nr_deactivate, nr_rotated, sc->priority, file); } /* @@ -2025,14 +2036,13 @@ static void shrink_active_list(unsigned long nr_to_scan, * 10TB 320 32GB */ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, - struct scan_control *sc) + struct scan_control *sc, bool trace) { unsigned long inactive_ratio; - unsigned long inactive; - unsigned long active; + unsigned long inactive, active; + enum lru_list inactive_lru = file * LRU_FILE; + enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE; unsigned long gb; - struct pglist_data *pgdat = lruvec_pgdat(lruvec); - int zid; /* * If we don't have swap space, anonymous page deactivation @@ -2041,27 +2051,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, if (!file && !total_swap_pages) return false; - inactive = lruvec_lru_size(lruvec, file * LRU_FILE); - active = lruvec_lru_size(lruvec, file * LRU_FILE + LRU_ACTIVE); - - /* - * For zone-constrained allocations, it is necessary to check if - * deactivations are required for lowmem to be reclaimed. This - * calculates the inactive/active pages available in eligible zones. - */ - for (zid = sc->reclaim_idx + 1; zid < MAX_NR_ZONES; zid++) { - struct zone *zone = &pgdat->node_zones[zid]; - unsigned long inactive_zone, active_zone; - - if (!managed_zone(zone)) - continue; - - inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid); - active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid); - - inactive -= min(inactive, inactive_zone); - active -= min(active, active_zone); - } + inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx); + active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -2069,6 +2060,13 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file, else inactive_ratio = 1; + if (trace) + trace_mm_vmscan_inactive_list_is_low(lruvec_pgdat(lruvec)->node_id, + sc->reclaim_idx, + lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive, + lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active, + inactive_ratio, file); + return inactive * inactive_ratio < active; } @@ -2076,7 +2074,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, is_file_lru(lru), sc)) + if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -2207,8 +2205,8 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * lruvec even if it has plenty of old anonymous pages unless the * system is under heavy pressure. */ - if (!inactive_list_is_low(lruvec, true, sc) && - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE) >> sc->priority) { + if (!inactive_list_is_low(lruvec, true, sc, false) && + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) { scan_balance = SCAN_FILE; goto out; } @@ -2234,10 +2232,10 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg, * anon in [0], file in [1] */ - anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON) + - lruvec_lru_size(lruvec, LRU_INACTIVE_ANON); - file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + - lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); + anon = lruvec_lru_size(lruvec, LRU_ACTIVE_ANON, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, MAX_NR_ZONES); + file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES) + + lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES); spin_lock_irq(&pgdat->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { @@ -2275,7 +2273,7 @@ out: unsigned long size; unsigned long scan; - size = lruvec_lru_size(lruvec, lru); + size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); scan = size >> sc->priority; if (!scan && pass && force_scan) @@ -2432,7 +2430,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_list_is_low(lruvec, false, sc)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } @@ -3082,7 +3080,7 @@ static void age_active_anon(struct pglist_data *pgdat, do { struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg); - if (inactive_list_is_low(lruvec, false, sc)) + if (inactive_list_is_low(lruvec, false, sc, true)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); |