summaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorBaolin Wang <baolin.wang@linux.alibaba.com>2021-11-05 13:41:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-06 13:30:39 -0700
commit38e719ab26735aa2c5d9d422fc4b741cbd36e700 (patch)
tree0259e625d5c1c364b3f42a4936ee8f94b9fd1506 /mm/hugetlb.c
parent12b613206474cea36671d6e3a7be7d1db7eb8741 (diff)
downloadlinux-stable-38e719ab26735aa2c5d9d422fc4b741cbd36e700.tar.gz
linux-stable-38e719ab26735aa2c5d9d422fc4b741cbd36e700.tar.bz2
linux-stable-38e719ab26735aa2c5d9d422fc4b741cbd36e700.zip
hugetlb: support node specified when using cma for gigantic hugepages
Now the size of CMA area for gigantic hugepages runtime allocation is balanced for all online nodes, but we also want to specify the size of CMA per-node, or only one node in some cases, which are similar with patch [1]. For example, on some multi-nodes systems, each node's memory can be different, allocating the same size of CMA for each node is not suitable for the low-memory nodes. Meanwhile some workloads like DPDK mentioned by Zhenguo in patch [1] only need hugepages in one node. On the other hand, we have some machines with multiple types of memory, like DRAM and PMEM (persistent memory). On this system, we may want to specify all the hugepages only on DRAM node, or specify the proportion of DRAM node and PMEM node, to tuning the performance of the workloads. Thus this patch adds node format for 'hugetlb_cma' parameter to support specifying the size of CMA per-node. An example is as follows: hugetlb_cma=0:5G,2:5G which means allocating 5G size of CMA area on node 0 and node 2 respectively. And the users should use the node specific sysfs file to allocate the gigantic hugepages if specified the CMA size on that node. Link: https://lkml.kernel.org/r/20211005054729.86457-1-yaozhenguo1@gmail.com [1] Link: https://lkml.kernel.org/r/bb790775ca60bb8f4b26956bb3f6988f74e075c7.1634261144.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Roman Gushchin <guro@fb.com> Cc: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c86
1 files changed, 77 insertions, 9 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8028fb7677eb..b86d27870c54 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -50,6 +50,7 @@ struct hstate hstates[HUGE_MAX_HSTATE];
#ifdef CONFIG_CMA
static struct cma *hugetlb_cma[MAX_NUMNODES];
+static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
static bool hugetlb_cma_page(struct page *page, unsigned int order)
{
return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
@@ -6762,7 +6763,38 @@ static bool cma_reserve_called __initdata;
static int __init cmdline_parse_hugetlb_cma(char *p)
{
- hugetlb_cma_size = memparse(p, &p);
+ int nid, count = 0;
+ unsigned long tmp;
+ char *s = p;
+
+ while (*s) {
+ if (sscanf(s, "%lu%n", &tmp, &count) != 1)
+ break;
+
+ if (s[count] == ':') {
+ nid = tmp;
+ if (nid < 0 || nid >= MAX_NUMNODES)
+ break;
+
+ s += count + 1;
+ tmp = memparse(s, &s);
+ hugetlb_cma_size_in_node[nid] = tmp;
+ hugetlb_cma_size += tmp;
+
+ /*
+ * Skip the separator if have one, otherwise
+ * break the parsing.
+ */
+ if (*s == ',')
+ s++;
+ else
+ break;
+ } else {
+ hugetlb_cma_size = memparse(p, &p);
+ break;
+ }
+ }
+
return 0;
}
@@ -6771,6 +6803,7 @@ early_param("hugetlb_cma", cmdline_parse_hugetlb_cma);
void __init hugetlb_cma_reserve(int order)
{
unsigned long size, reserved, per_node;
+ bool node_specific_cma_alloc = false;
int nid;
cma_reserve_called = true;
@@ -6778,6 +6811,31 @@ void __init hugetlb_cma_reserve(int order)
if (!hugetlb_cma_size)
return;
+ for (nid = 0; nid < MAX_NUMNODES; nid++) {
+ if (hugetlb_cma_size_in_node[nid] == 0)
+ continue;
+
+ if (!node_state(nid, N_ONLINE)) {
+ pr_warn("hugetlb_cma: invalid node %d specified\n", nid);
+ hugetlb_cma_size -= hugetlb_cma_size_in_node[nid];
+ hugetlb_cma_size_in_node[nid] = 0;
+ continue;
+ }
+
+ if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) {
+ pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n",
+ nid, (PAGE_SIZE << order) / SZ_1M);
+ hugetlb_cma_size -= hugetlb_cma_size_in_node[nid];
+ hugetlb_cma_size_in_node[nid] = 0;
+ } else {
+ node_specific_cma_alloc = true;
+ }
+ }
+
+ /* Validate the CMA size again in case some invalid nodes specified. */
+ if (!hugetlb_cma_size)
+ return;
+
if (hugetlb_cma_size < (PAGE_SIZE << order)) {
pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n",
(PAGE_SIZE << order) / SZ_1M);
@@ -6785,20 +6843,30 @@ void __init hugetlb_cma_reserve(int order)
return;
}
- /*
- * If 3 GB area is requested on a machine with 4 numa nodes,
- * let's allocate 1 GB on first three nodes and ignore the last one.
- */
- per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
- pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
- hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+ if (!node_specific_cma_alloc) {
+ /*
+ * If 3 GB area is requested on a machine with 4 numa nodes,
+ * let's allocate 1 GB on first three nodes and ignore the last one.
+ */
+ per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes);
+ pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n",
+ hugetlb_cma_size / SZ_1M, per_node / SZ_1M);
+ }
reserved = 0;
for_each_node_state(nid, N_ONLINE) {
int res;
char name[CMA_MAX_NAME];
- size = min(per_node, hugetlb_cma_size - reserved);
+ if (node_specific_cma_alloc) {
+ if (hugetlb_cma_size_in_node[nid] == 0)
+ continue;
+
+ size = hugetlb_cma_size_in_node[nid];
+ } else {
+ size = min(per_node, hugetlb_cma_size - reserved);
+ }
+
size = round_up(size, PAGE_SIZE << order);
snprintf(name, sizeof(name), "hugetlb%d", nid);