diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b620d74b0f66..3783e45bfc92 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6736,6 +6736,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * meet watermarks. */ for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { + enum zone_stat_item item; unsigned long free_pages; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) @@ -6746,11 +6747,33 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) /* * In defrag_mode, watermarks must be met in whole * blocks to avoid polluting allocator fallbacks. + * + * However, kswapd usually cannot accomplish this on + * its own and needs kcompactd support. Once it's + * reclaimed a compaction gap, and kswapd_shrink_node + * has dropped order, simply ensure there are enough + * base pages for compaction, wake kcompactd & sleep. */ - if (defrag_mode) - free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS); + if (defrag_mode && order) + item = NR_FREE_PAGES_BLOCKS; else - free_pages = zone_page_state(zone, NR_FREE_PAGES); + item = NR_FREE_PAGES; + + /* + * When there is a high number of CPUs in the system, + * the cumulative error from the vmstat per-cpu cache + * can blur the line between the watermarks. In that + * case, be safe and get an accurate snapshot. + * + * TODO: NR_FREE_PAGES_BLOCKS moves in steps of + * pageblock_nr_pages, while the vmstat pcp threshold + * is limited to 125. On many configurations that + * counter won't actually be per-cpu cached. But keep + * things simple for now; revisit when somebody cares. + */ + free_pages = zone_page_state(zone, item); + if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark) + free_pages = zone_page_state_snapshot(zone, item); if (__zone_watermark_ok(zone, order, mark, highest_zoneidx, 0, free_pages)) |