summaryrefslogtreecommitdiffstats
path: root/mm/hugetlb_vmemmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-22 16:11:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-22 16:11:53 -0700
commit3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (patch)
tree06114247eb7760edca7b57cc0108a351ffe1971b /mm/hugetlb_vmemmap.c
parent3fe2f7446f1e029b220f7f650df6d138f91651f2 (diff)
parent15423a52cc84e23bc11e4a903cd775adc7c6ab00 (diff)
downloadlinux-stable-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.tar.gz
linux-stable-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.tar.bz2
linux-stable-3bf03b9a0839c9fb06927ae53ebd0f960b19d408.zip
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - A few misc subsystems: kthread, scripts, ntfs, ocfs2, block, and vfs - Most the MM patches which precede the patches in Willy's tree: kasan, pagecache, gup, swap, shmem, memcg, selftests, pagemap, mremap, sparsemem, vmalloc, pagealloc, memory-failure, mlock, hugetlb, userfaultfd, vmscan, compaction, mempolicy, oom-kill, migration, thp, cma, autonuma, psi, ksm, page-poison, madvise, memory-hotplug, rmap, zswap, uaccess, ioremap, highmem, cleanups, kfence, hmm, and damon. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (227 commits) mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release() Docs/ABI/testing: add DAMON sysfs interface ABI document Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface selftests/damon: add a test for DAMON sysfs interface mm/damon/sysfs: support DAMOS stats mm/damon/sysfs: support DAMOS watermarks mm/damon/sysfs: support schemes prioritization mm/damon/sysfs: support DAMOS quotas mm/damon/sysfs: support DAMON-based Operation Schemes mm/damon/sysfs: support the physical address space monitoring mm/damon/sysfs: link DAMON for virtual address spaces monitoring mm/damon: implement a minimal stub for sysfs-based DAMON interface mm/damon/core: add number of each enum type values mm/damon/core: allow non-exclusive DAMON start/stop Docs/damon: update outdated term 'regions update interval' Docs/vm/damon/design: update DAMON-Idle Page Tracking interference handling Docs/vm/damon: call low level monitoring primitives the operations mm/damon: remove unnecessary CONFIG_DAMON option mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}() mm/damon/dbgfs-test: fix is_target_id() change ...
Diffstat (limited to 'mm/hugetlb_vmemmap.c')
-rw-r--r--mm/hugetlb_vmemmap.c68
1 files changed, 37 insertions, 31 deletions
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index c540c21e26f5..791626983c2e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -124,9 +124,9 @@
* page of page structs (page 0) associated with the HugeTLB page contains the 4
* page structs necessary to describe the HugeTLB. The only use of the remaining
* pages of page structs (page 1 to page 7) is to point to page->compound_head.
- * Therefore, we can remap pages 2 to 7 to page 1. Only 2 pages of page structs
+ * Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of page structs
* will be used for each HugeTLB page. This will allow us to free the remaining
- * 6 pages to the buddy allocator.
+ * 7 pages to the buddy allocator.
*
* Here is how things look after remapping.
*
@@ -134,30 +134,30 @@
* +-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
* | | | 0 | -------------> | 0 |
* | | +-----------+ +-----------+
- * | | | 1 | -------------> | 1 |
- * | | +-----------+ +-----------+
- * | | | 2 | ----------------^ ^ ^ ^ ^ ^
- * | | +-----------+ | | | | |
- * | | | 3 | ------------------+ | | | |
- * | | +-----------+ | | | |
- * | | | 4 | --------------------+ | | |
- * | PMD | +-----------+ | | |
- * | level | | 5 | ----------------------+ | |
- * | mapping | +-----------+ | |
- * | | | 6 | ------------------------+ |
- * | | +-----------+ |
- * | | | 7 | --------------------------+
+ * | | | 1 | ---------------^ ^ ^ ^ ^ ^ ^
+ * | | +-----------+ | | | | | |
+ * | | | 2 | -----------------+ | | | | |
+ * | | +-----------+ | | | | |
+ * | | | 3 | -------------------+ | | | |
+ * | | +-----------+ | | | |
+ * | | | 4 | ---------------------+ | | |
+ * | PMD | +-----------+ | | |
+ * | level | | 5 | -----------------------+ | |
+ * | mapping | +-----------+ | |
+ * | | | 6 | -------------------------+ |
+ * | | +-----------+ |
+ * | | | 7 | ---------------------------+
* | | +-----------+
* | |
* | |
* | |
* +-----------+
*
- * When a HugeTLB is freed to the buddy system, we should allocate 6 pages for
+ * When a HugeTLB is freed to the buddy system, we should allocate 7 pages for
* vmemmap pages and restore the previous mapping relationship.
*
* For the HugeTLB page of the pud level mapping. It is similar to the former.
- * We also can use this approach to free (PAGE_SIZE - 2) vmemmap pages.
+ * We also can use this approach to free (PAGE_SIZE - 1) vmemmap pages.
*
* Apart from the HugeTLB page of the pmd/pud level mapping, some architectures
* (e.g. aarch64) provides a contiguous bit in the translation table entries
@@ -166,7 +166,13 @@
*
* The contiguous bit is used to increase the mapping size at the pmd and pte
* (last) level. So this type of HugeTLB page can be optimized only when its
- * size of the struct page structs is greater than 2 pages.
+ * size of the struct page structs is greater than 1 page.
+ *
+ * Notice: The head vmemmap page is not freed to the buddy allocator and all
+ * tail vmemmap pages are mapped to the head vmemmap page frame. So we can see
+ * more than one struct page struct with PG_head (e.g. 8 per 2 MB HugeTLB page)
+ * associated with each HugeTLB page. The compound_head() can handle this
+ * correctly (more details refer to the comment above compound_head()).
*/
#define pr_fmt(fmt) "HugeTLB: " fmt
@@ -175,19 +181,21 @@
/*
* There are a lot of struct page structures associated with each HugeTLB page.
* For tail pages, the value of compound_head is the same. So we can reuse first
- * page of tail page structures. We map the virtual addresses of the remaining
- * pages of tail page structures to the first tail page struct, and then free
- * these page frames. Therefore, we need to reserve two pages as vmemmap areas.
+ * page of head page structures. We map the virtual addresses of all the pages
+ * of tail page structures to the head page struct, and then free these page
+ * frames. Therefore, we need to reserve one pages as vmemmap areas.
*/
-#define RESERVE_VMEMMAP_NR 2U
+#define RESERVE_VMEMMAP_NR 1U
#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
-bool hugetlb_free_vmemmap_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON);
+DEFINE_STATIC_KEY_MAYBE(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON,
+ hugetlb_free_vmemmap_enabled_key);
+EXPORT_SYMBOL(hugetlb_free_vmemmap_enabled_key);
static int __init early_hugetlb_free_vmemmap_param(char *buf)
{
/* We cannot optimize if a "struct page" crosses page boundaries. */
- if ((!is_power_of_2(sizeof(struct page)))) {
+ if (!is_power_of_2(sizeof(struct page))) {
pr_warn("cannot free vmemmap pages because \"struct page\" crosses page boundaries\n");
return 0;
}
@@ -196,9 +204,9 @@ static int __init early_hugetlb_free_vmemmap_param(char *buf)
return -EINVAL;
if (!strcmp(buf, "on"))
- hugetlb_free_vmemmap_enabled = true;
+ static_branch_enable(&hugetlb_free_vmemmap_enabled_key);
else if (!strcmp(buf, "off"))
- hugetlb_free_vmemmap_enabled = false;
+ static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
else
return -EINVAL;
@@ -236,7 +244,6 @@ int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
*/
ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse,
GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE);
-
if (!ret)
ClearHPageVmemmapOptimized(head);
@@ -277,14 +284,13 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
BUILD_BUG_ON(__NR_USED_SUBPAGE >=
RESERVE_VMEMMAP_SIZE / sizeof(struct page));
- if (!hugetlb_free_vmemmap_enabled)
+ if (!hugetlb_free_vmemmap_enabled())
return;
vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT;
/*
- * The head page and the first tail page are not to be freed to buddy
- * allocator, the other pages will map to the first tail page, so they
- * can be freed.
+ * The head page is not to be freed to buddy allocator, the other tail
+ * pages will map to the head page, so they can be freed.
*
* Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true
* on some architectures (e.g. aarch64). See Documentation/arm64/