From a5c6d6509342785bef53bf9508e1842b303f1878 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Apr 2018 16:23:09 -0700 Subject: mm, page_alloc: extend kernelcore and movablecore for percent Both kernelcore= and movablecore= can be used to define the amount of ZONE_NORMAL and ZONE_MOVABLE on a system, respectively. This requires the system memory capacity to be known when specifying the command line, however. This introduces the ability to define both kernelcore= and movablecore= as a percentage of total system memory. This is convenient for systems software that wants to define the amount of ZONE_MOVABLE, for example, as a proportion of a system's memory rather than a hardcoded byte value. To define the percentage, the final character of the parameter should be a '%'. mhocko: "why is anyone using these options nowadays?" rientjes: : : Fragmentation of non-__GFP_MOVABLE pages due to low on memory : situations can pollute most pageblocks on the system, as much as 1GB of : slab being fragmented over 128GB of memory, for example. When the : amount of kernel memory is well bounded for certain systems, it is : better to aggressively reclaim from existing MIGRATE_UNMOVABLE : pageblocks rather than eagerly fallback to others. : : We have additional patches that help with this fragmentation if you're : interested, specifically kcompactd compaction of MIGRATE_UNMOVABLE : pageblocks triggered by fallback of non-__GFP_MOVABLE allocations and : draining of pcp lists back to the zone free area to prevent stranding. [rientjes@google.com: updates] Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802131700160.71590@chino.kir.corp.google.com Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1802121622470.179479@chino.kir.corp.google.com Signed-off-by: David Rientjes Reviewed-by: Andrew Morton Reviewed-by: Mike Kravetz Cc: Jonathan Corbet Cc: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 54 ++++++++++++------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5ffe4c4121bd..7993021a1f6e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1840,30 +1840,29 @@ keepinitrd [HW,ARM] kernelcore= [KNL,X86,IA-64,PPC] - Format: nn[KMGTPE] | "mirror" - This parameter - specifies the amount of memory usable by the kernel - for non-movable allocations. The requested amount is - spread evenly throughout all nodes in the system. The - remaining memory in each node is used for Movable - pages. In the event, a node is too small to have both - kernelcore and Movable pages, kernelcore pages will - take priority and other nodes will have a larger number - of Movable pages. The Movable zone is used for the - allocation of pages that may be reclaimed or moved - by the page migration subsystem. This means that - HugeTLB pages may not be allocated from this zone. - Note that allocations like PTEs-from-HighMem still - use the HighMem zone if it exists, and the Normal + Format: nn[KMGTPE] | nn% | "mirror" + This parameter specifies the amount of memory usable by + the kernel for non-movable allocations. The requested + amount is spread evenly throughout all nodes in the + system as ZONE_NORMAL. The remaining memory is used for + movable memory in its own zone, ZONE_MOVABLE. In the + event, a node is too small to have both ZONE_NORMAL and + ZONE_MOVABLE, kernelcore memory will take priority and + other nodes will have a larger ZONE_MOVABLE. + + ZONE_MOVABLE is used for the allocation of pages that + may be reclaimed or moved by the page migration + subsystem. Note that allocations like PTEs-from-HighMem + still use the HighMem zone if it exists, and the Normal zone if it does not. - Instead of specifying the amount of memory (nn[KMGTPE]), - you can specify "mirror" option. In case "mirror" + It is possible to specify the exact amount of memory in + the form of "nn[KMGTPE]", a percentage of total system + memory in the form of "nn%", or "mirror". If "mirror" option is specified, mirrored (reliable) memory is used for non-movable allocations and remaining memory is used - for Movable pages. nn[KMGTPE] and "mirror" are exclusive, - so you can NOT specify nn[KMGTPE] and "mirror" at the same - time. + for Movable pages. "nn[KMGTPE]", "nn%", and "mirror" + are exclusive, so you cannot specify multiple forms. kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port. Format: [,poll interval] @@ -2377,13 +2376,14 @@ mousedev.yres= [MOUSE] Vertical screen resolution, used for devices reporting absolute coordinates, such as tablets - movablecore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter - is similar to kernelcore except it specifies the - amount of memory used for migratable allocations. - If both kernelcore and movablecore is specified, - then kernelcore will be at *least* the specified - value but may be more. If movablecore on its own - is specified, the administrator must be careful + movablecore= [KNL,X86,IA-64,PPC] + Format: nn[KMGTPE] | nn% + This parameter is the complement to kernelcore=, it + specifies the amount of memory used for migratable + allocations. If both kernelcore and movablecore is + specified, then kernelcore will be at *least* the + specified value but may be more. If movablecore on its + own is specified, the administrator must be careful that the amount of memory usable for all allocations is not too small. -- cgit v1.2.3 From 5ecd9d403ad081ed2de7b118c1e96124d4e0ba6c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Apr 2018 16:25:16 -0700 Subject: mm, page_alloc: wakeup kcompactd even if kswapd cannot free more memory Kswapd will not wakeup if per-zone watermarks are not failing or if too many previous attempts at background reclaim have failed. This can be true if there is a lot of free memory available. For high- order allocations, kswapd is responsible for waking up kcompactd for background compaction. If the zone is not below its watermarks or reclaim has recently failed (lots of free memory, nothing left to reclaim), kcompactd does not get woken up. When __GFP_DIRECT_RECLAIM is not allowed, allow kcompactd to still be woken up even if kswapd will not reclaim. This allows high-order allocations, such as thp, to still trigger background compaction even when the zone has an abundance of free memory. Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803111659420.209721@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index ba976805853a..66bfd8396877 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -111,7 +111,7 @@ my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flag my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; -my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)'; +my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*) gfp_flags=([A-Z_|]*)'; my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) classzone_idx=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_skipped=([0-9]*) nr_taken=([0-9]*) lru=([a-z_]*)'; my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) nr_dirty=([0-9]*) nr_writeback=([0-9]*) nr_congested=([0-9]*) nr_immediate=([0-9]*) nr_activate=([0-9]*) nr_ref_keep=([0-9]*) nr_unmap_fail=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)'; my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; @@ -201,7 +201,7 @@ $regex_kswapd_sleep = generate_traceevent_regex( $regex_wakeup_kswapd = generate_traceevent_regex( "vmscan/mm_vmscan_wakeup_kswapd", $regex_wakeup_kswapd_default, - "nid", "zid", "order"); + "nid", "zid", "order", "gfp_flags"); $regex_lru_isolate = generate_traceevent_regex( "vmscan/mm_vmscan_lru_isolate", $regex_lru_isolate_default, -- cgit v1.2.3