summaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2017-07-10 15:48:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-10 16:32:31 -0700
commit4db9b2efe94967be34e3b136a93251a3c1736dd5 (patch)
treebff61c2620df913698bf2617ce6acb293e3fe817 /mm/hugetlb.c
parent7f252f277b66854c61d3abdd4c196d6dc64fa333 (diff)
downloadlinux-4db9b2efe94967be34e3b136a93251a3c1736dd5.tar.gz
linux-4db9b2efe94967be34e3b136a93251a3c1736dd5.tar.bz2
linux-4db9b2efe94967be34e3b136a93251a3c1736dd5.zip
hugetlb, memory_hotplug: prefer to use reserved pages for migration
new_node_page will try to use the origin's next NUMA node as the migration destination for hugetlb pages. If such a node doesn't have any preallocated pool it falls back to __alloc_buddy_huge_page_no_mpol to allocate a surplus page instead. This is quite subotpimal for any configuration when hugetlb pages are no distributed to all NUMA nodes evenly. Say we have a hotplugable node 4 and spare hugetlb pages are node 0 /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages:10000 /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node3/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node4/hugepages/hugepages-2048kB/nr_hugepages:10000 /sys/devices/system/node/node5/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node6/hugepages/hugepages-2048kB/nr_hugepages:0 /sys/devices/system/node/node7/hugepages/hugepages-2048kB/nr_hugepages:0 Now we consume the whole pool on node 4 and try to offline this node. All the allocated pages should be moved to node0 which has enough preallocated pages to hold them. With the current implementation offlining very likely fails because hugetlb allocations during runtime are much less reliable. Fix this by reusing the nodemask which excludes migration source and try to find a first node which has a page in the preallocated pool first and fall back to __alloc_buddy_huge_page_no_mpol only when the whole pool is consumed. [akpm@linux-foundation.org: remove bogus arg from alloc_huge_page_nodemask() stub] Link: http://lkml.kernel.org/r/20170608074553.22152-3-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: zhong jiang <zhongjiang@huawei.com> Cc: Joonsoo Kim <js1304@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c27
1 files changed, 27 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 761a669d0b62..01c11ceb47d6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1723,6 +1723,33 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
return page;
}
+struct page *alloc_huge_page_nodemask(struct hstate *h, const nodemask_t *nmask)
+{
+ struct page *page = NULL;
+ int node;
+
+ spin_lock(&hugetlb_lock);
+ if (h->free_huge_pages - h->resv_huge_pages > 0) {
+ for_each_node_mask(node, *nmask) {
+ page = dequeue_huge_page_node_exact(h, node);
+ if (page)
+ break;
+ }
+ }
+ spin_unlock(&hugetlb_lock);
+ if (page)
+ return page;
+
+ /* No reservations, try to overcommit */
+ for_each_node_mask(node, *nmask) {
+ page = __alloc_buddy_huge_page_no_mpol(h, node);
+ if (page)
+ return page;
+ }
+
+ return NULL;
+}
+
/*
* Increase the hugetlb pool such that it can accommodate a reservation
* of size 'delta'.