diff options
author | Mel Gorman <mgorman@techsingularity.net> | 2016-07-28 15:47:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 16:07:41 -0700 |
commit | bca6759258dbef378bcf5b872177bcd2259ceb68 (patch) | |
tree | 17b2a1307ab70e4fb6f8f2cf4b535240b2433d5d /mm/page-writeback.c | |
parent | e2ecc8a79ed49f7838b4fdf352c4c48cec9424ac (diff) | |
download | linux-bca6759258dbef378bcf5b872177bcd2259ceb68.tar.gz linux-bca6759258dbef378bcf5b872177bcd2259ceb68.tar.bz2 linux-bca6759258dbef378bcf5b872177bcd2259ceb68.zip |
mm, vmstat: remove zone and node double accounting by approximating retries
The number of LRU pages, dirty pages and writeback pages must be
accounted for on both zones and nodes because of the reclaim retry
logic, compaction retry logic and highmem calculations all depending on
per-zone stats.
Many lowmem allocations are immune from OOM kill due to a check in
__alloc_pages_may_oom for (ac->high_zoneidx < ZONE_NORMAL) since commit
03668b3ceb0c ("oom: avoid oom killer for lowmem allocations"). The
exception is costly high-order allocations or allocations that cannot
fail. If the __alloc_pages_may_oom avoids OOM-kill for low-order lowmem
allocations then it would fall through to __alloc_pages_direct_compact.
This patch will blindly retry reclaim for zone-constrained allocations
in should_reclaim_retry up to MAX_RECLAIM_RETRIES. This is not ideal
but without per-zone stats there are not many alternatives. The impact
it that zone-constrained allocations may delay before considering the
OOM killer.
As there is no guarantee enough memory can ever be freed to satisfy
compaction, this patch avoids retrying compaction for zone-contrained
allocations.
In combination, that means that the per-node stats can be used when
deciding whether to continue reclaim using a rough approximation. While
it is possible this will make the wrong decision on occasion, it will
not infinite loop as the number of reclaim attempts is capped by
MAX_RECLAIM_RETRIES.
The final step is calculating the number of dirtyable highmem pages. As
those calculations only care about the global count of file pages in
highmem. This patch uses a global counter used instead of per-zone
stats as it is sufficient.
In combination, this allows the per-zone LRU and dirty state counters to
be removed.
[mgorman@techsingularity.net: fix acct_highmem_file_pages()]
Link: http://lkml.kernel.org/r/1468853426-12858-4-git-send-email-mgorman@techsingularity.netLink: http://lkml.kernel.org/r/1467970510-21195-35-git-send-email-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Suggested by: Michal Hocko <mhocko@kernel.org>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3c02aa603f5a..0bca2376bd42 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -299,6 +299,9 @@ static unsigned long node_dirtyable_memory(struct pglist_data *pgdat) return nr_pages; } +#ifdef CONFIG_HIGHMEM +atomic_t highmem_file_pages; +#endif static unsigned long highmem_dirtyable_memory(unsigned long total) { @@ -306,18 +309,17 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) int node; unsigned long x = 0; int i; + unsigned long dirtyable = atomic_read(&highmem_file_pages); for_each_node_state(node, N_HIGH_MEMORY) { for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) { struct zone *z; - unsigned long dirtyable; if (!is_highmem_idx(i)) continue; z = &NODE_DATA(node)->node_zones[i]; - dirtyable = zone_page_state(z, NR_FREE_PAGES) + - zone_page_state(z, NR_ZONE_LRU_FILE); + dirtyable += zone_page_state(z, NR_FREE_PAGES); /* watch for underflows */ dirtyable -= min(dirtyable, high_wmark_pages(z)); @@ -2460,7 +2462,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY); __inc_node_page_state(page, NR_FILE_DIRTY); - __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); __inc_node_page_state(page, NR_DIRTIED); __inc_wb_stat(wb, WB_RECLAIMABLE); __inc_wb_stat(wb, WB_DIRTIED); @@ -2482,7 +2483,6 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, if (mapping_cap_account_dirty(mapping)) { mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); dec_node_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); task_io_account_cancelled_write(PAGE_SIZE); } @@ -2739,7 +2739,6 @@ int clear_page_dirty_for_io(struct page *page) if (TestClearPageDirty(page)) { mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); dec_node_page_state(page, NR_FILE_DIRTY); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; } @@ -2786,7 +2785,6 @@ int test_clear_page_writeback(struct page *page) if (ret) { mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); dec_node_page_state(page, NR_WRITEBACK); - dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); } unlock_page_memcg(page); @@ -2841,7 +2839,6 @@ int __test_set_page_writeback(struct page *page, bool keep_write) if (!ret) { mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); inc_node_page_state(page, NR_WRITEBACK); - inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); } unlock_page_memcg(page); return ret; |