From a9dd0a83104c01269ea36a9b4ec42b51edf85427 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 28 Jul 2016 15:46:02 -0700 Subject: mm, vmscan: make shrink_node decisions more node-centric Earlier patches focused on having direct reclaim and kswapd use data that is node-centric for reclaiming but shrink_node() itself still uses too much zone information. This patch removes unnecessary zone-based information with the most important decision being whether to continue reclaim or not. Some memcg APIs are adjusted as a result even though memcg itself still uses some zone information. [mgorman@techsingularity.net: optimization] Link: http://lkml.kernel.org/r/1468588165-12461-2-git-send-email-mgorman@techsingularity.net Link: http://lkml.kernel.org/r/1467970510-21195-14-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Hillf Danton Acked-by: Johannes Weiner Cc: Joonsoo Kim Cc: Minchan Kim Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 61 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 26 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index b7a276f4b1b0..46f7a71ed13b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2224,12 +2224,13 @@ static inline void init_tlb_ubc(void) #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ /* - * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. + * This is a basic per-node page freer. Used by both kswapd and direct reclaim. */ -static void shrink_zone_memcg(struct zone *zone, struct mem_cgroup *memcg, +static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg, struct scan_control *sc, unsigned long *lru_pages) { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + struct zone *zone = &pgdat->node_zones[sc->reclaim_idx]; + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); unsigned long nr[NR_LRU_LISTS]; unsigned long targets[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -2362,13 +2363,14 @@ static bool in_reclaim_compaction(struct scan_control *sc) * calls try_to_compact_zone() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct zone *zone, +static inline bool should_continue_reclaim(struct pglist_data *pgdat, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; + int z; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) @@ -2402,21 +2404,29 @@ static inline bool should_continue_reclaim(struct zone *zone, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE); + inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); if (get_nr_swap_pages() > 0) - inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON); + inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(zone, sc->order, 0, 0)) { - case COMPACT_PARTIAL: - case COMPACT_CONTINUE: - return false; - default: - return true; + for (z = 0; z <= sc->reclaim_idx; z++) { + struct zone *zone = &pgdat->node_zones[z]; + if (!populated_zone(zone)) + continue; + + switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { + case COMPACT_PARTIAL: + case COMPACT_CONTINUE: + return false; + default: + /* check next zone */ + ; + } } + return true; } static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, @@ -2425,15 +2435,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, struct reclaim_state *reclaim_state = current->reclaim_state; unsigned long nr_reclaimed, nr_scanned; bool reclaimable = false; - struct zone *zone = &pgdat->node_zones[classzone_idx]; do { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, + .zone = &pgdat->node_zones[classzone_idx], .priority = sc->priority, }; - unsigned long zone_lru_pages = 0; + unsigned long node_lru_pages = 0; struct mem_cgroup *memcg; nr_reclaimed = sc->nr_reclaimed; @@ -2454,11 +2463,11 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, reclaimed = sc->nr_reclaimed; scanned = sc->nr_scanned; - shrink_zone_memcg(zone, memcg, sc, &lru_pages); - zone_lru_pages += lru_pages; + shrink_node_memcg(pgdat, memcg, sc, &lru_pages); + node_lru_pages += lru_pages; if (!global_reclaim(sc)) - shrink_slab(sc->gfp_mask, zone_to_nid(zone), + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->nr_scanned - scanned, lru_pages); @@ -2470,7 +2479,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, /* * Direct reclaim and kswapd have to scan all memory * cgroups to fulfill the overall scan target for the - * zone. + * node. * * Limit reclaim, on the other hand, only cares about * nr_to_reclaim pages to be reclaimed and it will @@ -2489,9 +2498,9 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, * the eligible LRU pages were scanned. */ if (global_reclaim(sc)) - shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL, + shrink_slab(sc->gfp_mask, pgdat->node_id, NULL, sc->nr_scanned - nr_scanned, - zone_lru_pages); + node_lru_pages); if (reclaim_state) { sc->nr_reclaimed += reclaim_state->reclaimed_slab; @@ -2506,7 +2515,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc, if (sc->nr_reclaimed - nr_reclaimed) reclaimable = true; - } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, + } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); return reclaimable; @@ -2906,7 +2915,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_MEMCG -unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, +unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, struct zone *zone, unsigned long *nr_scanned) @@ -2931,11 +2940,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, /* * NOTE: Although we can get the priority field, using it * here is not a good idea, since it limits the pages we can scan. - * if we don't reclaim here, the shrink_zone from balance_pgdat + * if we don't reclaim here, the shrink_node from balance_pgdat * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_zone_memcg(zone, memcg, &sc, &lru_pages); + shrink_node_memcg(zone->zone_pgdat, memcg, &sc, &lru_pages); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2994,7 +3003,7 @@ static void age_active_anon(struct pglist_data *pgdat, memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); if (inactive_list_is_low(lruvec, false)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, -- cgit v1.2.3