summaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c1161
1 files changed, 548 insertions, 613 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b586cdd75930..db895230ee7e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -54,13 +54,13 @@ struct hstate hstates[HUGE_MAX_HSTATE];
#ifdef CONFIG_CMA
static struct cma *hugetlb_cma[MAX_NUMNODES];
static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
{
- return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
+ return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
1 << order);
}
#else
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
{
return false;
}
@@ -255,6 +255,152 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
return subpool_inode(file_inode(vma->vm_file));
}
+/*
+ * hugetlb vma_lock helper routines
+ */
+static bool __vma_shareable_lock(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
+ vma->vm_private_data;
+}
+
+void hugetlb_vma_lock_read(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ down_read(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ up_read(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_lock_write(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ down_write(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ up_write(&vma_lock->rw_sema);
+ }
+}
+
+int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
+{
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ if (!__vma_shareable_lock(vma))
+ return 1;
+
+ return down_write_trylock(&vma_lock->rw_sema);
+}
+
+void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ lockdep_assert_held(&vma_lock->rw_sema);
+ }
+}
+
+void hugetlb_vma_lock_release(struct kref *kref)
+{
+ struct hugetlb_vma_lock *vma_lock = container_of(kref,
+ struct hugetlb_vma_lock, refs);
+
+ kfree(vma_lock);
+}
+
+static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
+{
+ struct vm_area_struct *vma = vma_lock->vma;
+
+ /*
+ * vma_lock structure may or not be released as a result of put,
+ * it certainly will no longer be attached to vma so clear pointer.
+ * Semaphore synchronizes access to vma_lock->vma field.
+ */
+ vma_lock->vma = NULL;
+ vma->vm_private_data = NULL;
+ up_write(&vma_lock->rw_sema);
+ kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
+}
+
+static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_lock(vma)) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ __hugetlb_vma_unlock_write_put(vma_lock);
+ }
+}
+
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
+{
+ /*
+ * Only present in sharable vmas.
+ */
+ if (!vma || !__vma_shareable_lock(vma))
+ return;
+
+ if (vma->vm_private_data) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ down_write(&vma_lock->rw_sema);
+ __hugetlb_vma_unlock_write_put(vma_lock);
+ }
+}
+
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+{
+ struct hugetlb_vma_lock *vma_lock;
+
+ /* Only establish in (flags) sharable vmas */
+ if (!vma || !(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ /* Should never get here with non-NULL vm_private_data */
+ if (vma->vm_private_data)
+ return;
+
+ vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
+ if (!vma_lock) {
+ /*
+ * If we can not allocate structure, then vma can not
+ * participate in pmd sharing. This is only a possible
+ * performance enhancement and memory saving issue.
+ * However, the lock is also used to synchronize page
+ * faults with truncation. If the lock is not present,
+ * unlikely races could leave pages in a file past i_size
+ * until the file is removed. Warn in the unlikely case of
+ * allocation failure.
+ */
+ pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
+ return;
+ }
+
+ kref_init(&vma_lock->refs);
+ init_rwsem(&vma_lock->rw_sema);
+ vma_lock->vma = vma;
+ vma->vm_private_data = vma_lock;
+}
+
/* Helper that removes a struct file_region from the resv_map cache and returns
* it for use.
*/
@@ -1014,15 +1160,23 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma)
VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
/*
* Clear vm_private_data
+ * - For shared mappings this is a per-vma semaphore that may be
+ * allocated in a subsequent call to hugetlb_vm_op_open.
+ * Before clearing, make sure pointer is not associated with vma
+ * as this will leak the structure. This is the case when called
+ * via clear_vma_resv_huge_pages() and hugetlb_vm_op_open has already
+ * been called to allocate a new structure.
* - For MAP_PRIVATE mappings, this is the reserve map which does
* not apply to children. Faults generated by the children are
* not guaranteed to succeed, even if read-only.
- * - For shared mappings this is a per-vma semaphore that may be
- * allocated in a subsequent call to hugetlb_vm_op_open.
*/
- vma->vm_private_data = (void *)0;
- if (!(vma->vm_flags & VM_MAYSHARE))
- return;
+ if (vma->vm_flags & VM_MAYSHARE) {
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ if (vma_lock && vma_lock->vma != vma)
+ vma->vm_private_data = NULL;
+ } else
+ vma->vm_private_data = NULL;
}
/*
@@ -1119,17 +1273,17 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
return false;
}
-static void enqueue_huge_page(struct hstate *h, struct page *page)
+static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
{
- int nid = page_to_nid(page);
+ int nid = folio_nid(folio);
lockdep_assert_held(&hugetlb_lock);
- VM_BUG_ON_PAGE(page_count(page), page);
+ VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
- list_move(&page->lru, &h->hugepage_freelists[nid]);
+ list_move(&folio->lru, &h->hugepage_freelists[nid]);
h->free_huge_pages++;
h->free_huge_pages_node[nid]++;
- SetHPageFreed(page);
+ folio_set_hugetlb_freed(folio);
}
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@ -1317,76 +1471,76 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
nr_nodes--)
/* used to demote non-gigantic_huge pages as well */
-static void __destroy_compound_gigantic_page(struct page *page,
+static void __destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order, bool demote)
{
int i;
int nr_pages = 1 << order;
struct page *p;
- atomic_set(compound_mapcount_ptr(page), 0);
- atomic_set(compound_pincount_ptr(page), 0);
+ atomic_set(folio_mapcount_ptr(folio), 0);
+ atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+ atomic_set(folio_pincount_ptr(folio), 0);
for (i = 1; i < nr_pages; i++) {
- p = nth_page(page, i);
+ p = folio_page(folio, i);
p->mapping = NULL;
clear_compound_head(p);
if (!demote)
set_page_refcounted(p);
}
- set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
- page[1].compound_nr = 0;
-#endif
- __ClearPageHead(page);
+ folio_set_compound_order(folio, 0);
+ __folio_clear_head(folio);
}
-static void destroy_compound_hugetlb_page_for_demote(struct page *page,
+static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
unsigned int order)
{
- __destroy_compound_gigantic_page(page, order, true);
+ __destroy_compound_gigantic_folio(folio, order, true);
}
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static void destroy_compound_gigantic_page(struct page *page,
+static void destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order)
{
- __destroy_compound_gigantic_page(page, order, false);
+ __destroy_compound_gigantic_folio(folio, order, false);
}
-static void free_gigantic_page(struct page *page, unsigned int order)
+static void free_gigantic_folio(struct folio *folio, unsigned int order)
{
/*
* If the page isn't allocated using the cma allocator,
* cma_release() returns false.
*/
#ifdef CONFIG_CMA
- if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+ int nid = folio_nid(folio);
+
+ if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
return;
#endif
- free_contig_range(page_to_pfn(page), 1 << order);
+ free_contig_range(folio_pfn(folio), 1 << order);
}
#ifdef CONFIG_CONTIG_ALLOC
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nodemask)
{
+ struct page *page;
unsigned long nr_pages = pages_per_huge_page(h);
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
#ifdef CONFIG_CMA
{
- struct page *page;
int node;
if (hugetlb_cma[nid]) {
page = cma_alloc(hugetlb_cma[nid], nr_pages,
huge_page_order(h), true);
if (page)
- return page;
+ return page_folio(page);
}
if (!(gfp_mask & __GFP_THISNODE)) {
@@ -1397,17 +1551,18 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
page = cma_alloc(hugetlb_cma[node], nr_pages,
huge_page_order(h), true);
if (page)
- return page;
+ return page_folio(page);
}
}
}
#endif
- return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+ page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+ return page ? page_folio(page) : NULL;
}
#else /* !CONFIG_CONTIG_ALLOC */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nodemask)
{
return NULL;
@@ -1415,40 +1570,41 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
#endif /* CONFIG_CONTIG_ALLOC */
#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nodemask)
{
return NULL;
}
-static inline void free_gigantic_page(struct page *page, unsigned int order) { }
-static inline void destroy_compound_gigantic_page(struct page *page,
+static inline void free_gigantic_folio(struct folio *folio,
+ unsigned int order) { }
+static inline void destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order) { }
#endif
/*
- * Remove hugetlb page from lists, and update dtor so that page appears
+ * Remove hugetlb folio from lists, and update dtor so that the folio appears
* as just a compound page.
*
- * A reference is held on the page, except in the case of demote.
+ * A reference is held on the folio, except in the case of demote.
*
* Must be called with hugetlb lock held.
*/
-static void __remove_hugetlb_page(struct hstate *h, struct page *page,
+static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus,
bool demote)
{
- int nid = page_to_nid(page);
+ int nid = folio_nid(folio);
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
+ VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio);
+ VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio);
lockdep_assert_held(&hugetlb_lock);
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
- list_del(&page->lru);
+ list_del(&folio->lru);
- if (HPageFreed(page)) {
+ if (folio_test_hugetlb_freed(folio)) {
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
}
@@ -1467,50 +1623,50 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
*
* For gigantic pages set the destructor to the null dtor. This
* destructor will never be called. Before freeing the gigantic
- * page destroy_compound_gigantic_page will turn the compound page
- * into a simple group of pages. After this the destructor does not
+ * page destroy_compound_gigantic_folio will turn the folio into a
+ * simple group of pages. After this the destructor does not
* apply.
*
* This handles the case where more than one ref is held when and
- * after update_and_free_page is called.
+ * after update_and_free_hugetlb_folio is called.
*
* In the case of demote we do not ref count the page as it will soon
* be turned into a page of smaller size.
*/
if (!demote)
- set_page_refcounted(page);
+ folio_ref_unfreeze(folio, 1);
if (hstate_is_gigantic(h))
- set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+ folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
else
- set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+ folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
}
-static void remove_hugetlb_page(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
- __remove_hugetlb_page(h, page, adjust_surplus, false);
+ __remove_hugetlb_folio(h, folio, adjust_surplus, false);
}
-static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
- __remove_hugetlb_page(h, page, adjust_surplus, true);
+ __remove_hugetlb_folio(h, folio, adjust_surplus, true);
}
-static void add_hugetlb_page(struct hstate *h, struct page *page,
+static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
bool adjust_surplus)
{
int zeroed;
- int nid = page_to_nid(page);
+ int nid = folio_nid(folio);
- VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
lockdep_assert_held(&hugetlb_lock);
- INIT_LIST_HEAD(&page->lru);
+ INIT_LIST_HEAD(&folio->lru);
h->nr_huge_pages++;
h->nr_huge_pages_node[nid]++;
@@ -1519,21 +1675,21 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
h->surplus_huge_pages_node[nid]++;
}
- set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
- set_page_private(page, 0);
+ folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+ folio_change_private(folio, NULL);
/*
- * We have to set HPageVmemmapOptimized again as above
- * set_page_private(page, 0) cleared it.
+ * We have to set hugetlb_vmemmap_optimized again as above
+ * folio_change_private(folio, NULL) cleared it.
*/
- SetHPageVmemmapOptimized(page);
+ folio_set_hugetlb_vmemmap_optimized(folio);
/*
- * This page is about to be managed by the hugetlb allocator and
+ * This folio is about to be managed by the hugetlb allocator and
* should have no users. Drop our reference, and check for others
* just in case.
*/
- zeroed = put_page_testzero(page);
- if (!zeroed)
+ zeroed = folio_put_testzero(folio);
+ if (unlikely(!zeroed))
/*
* It is VERY unlikely soneone else has taken a ref on
* the page. In this case, we simply return as the
@@ -1542,13 +1698,14 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
*/
return;
- arch_clear_hugepage_flags(page);
- enqueue_huge_page(h, page);
+ arch_clear_hugepage_flags(&folio->page);
+ enqueue_hugetlb_folio(h, folio);
}
static void __update_and_free_page(struct hstate *h, struct page *page)
{
int i;
+ struct folio *folio = page_folio(page);
struct page *subpage;
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
@@ -1558,7 +1715,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
* If we don't know which subpages are hwpoisoned, we can't free
* the hugepage, so it's leaked intentionally.
*/
- if (HPageRawHwpUnreliable(page))
+ if (folio_test_hugetlb_raw_hwp_unreliable(folio))
return;
if (hugetlb_vmemmap_restore(h, page)) {
@@ -1568,7 +1725,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
* page and put the page back on the hugetlb free list and treat
* as a surplus page.
*/
- add_hugetlb_page(h, page, true);
+ add_hugetlb_folio(h, folio, true);
spin_unlock_irq(&hugetlb_lock);
return;
}
@@ -1577,11 +1734,11 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
* Move PageHWPoison flag from head page to the raw error pages,
* which makes any healthy subpages reusable.
*/
- if (unlikely(PageHWPoison(page)))
- hugetlb_clear_page_hwpoison(page);
+ if (unlikely(folio_test_hwpoison(folio)))
+ hugetlb_clear_page_hwpoison(&folio->page);
for (i = 0; i < pages_per_huge_page(h); i++) {
- subpage = nth_page(page, i);
+ subpage = folio_page(folio, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
@@ -1590,19 +1747,19 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
/*
* Non-gigantic pages demoted from CMA allocated gigantic pages
- * need to be given back to CMA in free_gigantic_page.
+ * need to be given back to CMA in free_gigantic_folio.
*/
if (hstate_is_gigantic(h) ||
- hugetlb_cma_page(page, huge_page_order(h))) {
- destroy_compound_gigantic_page(page, huge_page_order(h));
- free_gigantic_page(page, huge_page_order(h));
+ hugetlb_cma_folio(folio, huge_page_order(h))) {
+ destroy_compound_gigantic_folio(folio, huge_page_order(h));
+ free_gigantic_folio(folio, huge_page_order(h));
} else {
__free_pages(page, huge_page_order(h));
}
}
/*
- * As update_and_free_page() can be called under any context, so we cannot
+ * As update_and_free_hugetlb_folio() can be called under any context, so we cannot
* use GFP_KERNEL to allocate vmemmap pages. However, we can defer the
* actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate
* the vmemmap pages.
@@ -1631,8 +1788,9 @@ static void free_hpage_workfn(struct work_struct *work)
/*
* The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate()
* is going to trigger because a previous call to
- * remove_hugetlb_page() will set_compound_page_dtor(page,
- * NULL_COMPOUND_DTOR), so do not use page_hstate() directly.
+ * remove_hugetlb_folio() will call folio_set_compound_dtor
+ * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate()
+ * directly.
*/
h = size_to_hstate(page_size(page));
@@ -1649,11 +1807,11 @@ static inline void flush_free_hpage_work(struct hstate *h)
flush_work(&free_hpage_work);
}
-static void update_and_free_page(struct hstate *h, struct page *page,
+static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio,
bool atomic)
{
- if (!HPageVmemmapOptimized(page) || !atomic) {
- __update_and_free_page(h, page);
+ if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) {
+ __update_and_free_page(h, &folio->page);
return;
}
@@ -1664,16 +1822,18 @@ static void update_and_free_page(struct hstate *h, struct page *page,
* empty. Otherwise, schedule_work() had been called but the workfn
* hasn't retrieved the list yet.
*/
- if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
+ if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist))
schedule_work(&free_hpage_work);
}
static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
{
struct page *page, *t_page;
+ struct folio *folio;
list_for_each_entry_safe(page, t_page, list, lru) {
- update_and_free_page(h, page, false);
+ folio = page_folio(page);
+ update_and_free_hugetlb_folio(h, folio, false);
cond_resched();
}
}
@@ -1695,21 +1855,22 @@ void free_huge_page(struct page *page)
* Can't pass hstate in here because it is called from the
* compound page destructor.
*/
- struct hstate *h = page_hstate(page);
- int nid = page_to_nid(page);
- struct hugepage_subpool *spool = hugetlb_page_subpool(page);
+ struct folio *folio = page_folio(page);
+ struct hstate *h = folio_hstate(folio);
+ int nid = folio_nid(folio);
+ struct hugepage_subpool *spool = hugetlb_folio_subpool(folio);
bool restore_reserve;
unsigned long flags;
- VM_BUG_ON_PAGE(page_count(page), page);
- VM_BUG_ON_PAGE(page_mapcount(page), page);
+ VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+ VM_BUG_ON_FOLIO(folio_mapcount(folio), folio);
- hugetlb_set_page_subpool(page, NULL);
- if (PageAnon(page))
- __ClearPageAnonExclusive(page);
- page->mapping = NULL;
- restore_reserve = HPageRestoreReserve(page);
- ClearHPageRestoreReserve(page);
+ hugetlb_set_folio_subpool(folio, NULL);
+ if (folio_test_anon(folio))
+ __ClearPageAnonExclusive(&folio->page);
+ folio->mapping = NULL;
+ restore_reserve = folio_test_hugetlb_restore_reserve(folio);
+ folio_clear_hugetlb_restore_reserve(folio);
/*
* If HPageRestoreReserve was set on page, page allocation consumed a
@@ -1731,26 +1892,26 @@ void free_huge_page(struct page *page)
}
spin_lock_irqsave(&hugetlb_lock, flags);
- ClearHPageMigratable(page);
- hugetlb_cgroup_uncharge_page(hstate_index(h),
- pages_per_huge_page(h), page);
- hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
- pages_per_huge_page(h), page);
+ folio_clear_hugetlb_migratable(folio);
+ hugetlb_cgroup_uncharge_folio(hstate_index(h),
+ pages_per_huge_page(h), folio);
+ hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+ pages_per_huge_page(h), folio);
if (restore_reserve)
h->resv_huge_pages++;
- if (HPageTemporary(page)) {
- remove_hugetlb_page(h, page, false);
+ if (folio_test_hugetlb_temporary(folio)) {
+ remove_hugetlb_folio(h, folio, false);
spin_unlock_irqrestore(&hugetlb_lock, flags);
- update_and_free_page(h, page, true);
+ update_and_free_hugetlb_folio(h, folio, true);
} else if (h->surplus_huge_pages_node[nid]) {
/* remove the page from active list */
- remove_hugetlb_page(h, page, true);
+ remove_hugetlb_folio(h, folio, true);
spin_unlock_irqrestore(&hugetlb_lock, flags);
- update_and_free_page(h, page, true);
+ update_and_free_hugetlb_folio(h, folio, true);
} else {
arch_clear_hugepage_flags(page);
- enqueue_huge_page(h, page);
+ enqueue_hugetlb_folio(h, folio);
spin_unlock_irqrestore(&hugetlb_lock, flags);
}
}
@@ -1765,36 +1926,37 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
h->nr_huge_pages_node[nid]++;
}
-static void __prep_new_huge_page(struct hstate *h, struct page *page)
+static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
{
- hugetlb_vmemmap_optimize(h, page);
- INIT_LIST_HEAD(&page->lru);
- set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
- hugetlb_set_page_subpool(page, NULL);
- set_hugetlb_cgroup(page, NULL);
- set_hugetlb_cgroup_rsvd(page, NULL);
+ hugetlb_vmemmap_optimize(h, &folio->page);
+ INIT_LIST_HEAD(&folio->lru);
+ folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+ hugetlb_set_folio_subpool(folio, NULL);
+ set_hugetlb_cgroup(folio, NULL);
+ set_hugetlb_cgroup_rsvd(folio, NULL);
}
-static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
{
- __prep_new_huge_page(h, page);
+ __prep_new_hugetlb_folio(h, folio);
spin_lock_irq(&hugetlb_lock);
__prep_account_new_huge_page(h, nid);
spin_unlock_irq(&hugetlb_lock);
}
-static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
- bool demote)
+static bool __prep_compound_gigantic_folio(struct folio *folio,
+ unsigned int order, bool demote)
{
int i, j;
int nr_pages = 1 << order;
struct page *p;
- /* we rely on prep_new_huge_page to set the destructor */
- set_compound_order(page, order);
- __SetPageHead(page);
+ __folio_clear_reserved(folio);
+ __folio_set_head(folio);
+ /* we rely on prep_new_hugetlb_folio to set the destructor */
+ folio_set_compound_order(folio, order);
for (i = 0; i < nr_pages; i++) {
- p = nth_page(page, i);
+ p = folio_page(folio, i);
/*
* For gigantic hugepages allocated through bootmem at
@@ -1808,7 +1970,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
* on the head page when they need know if put_page() is needed
* after get_user_pages().
*/
- __ClearPageReserved(p);
+ if (i != 0) /* head page cleared above */
+ __ClearPageReserved(p);
/*
* Subtle and very unlikely
*
@@ -1835,42 +1998,41 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
VM_BUG_ON_PAGE(page_count(p), p);
}
if (i != 0)
- set_compound_head(p, page);
+ set_compound_head(p, &folio->page);
}
- atomic_set(compound_mapcount_ptr(page), -1);
- atomic_set(compound_pincount_ptr(page), 0);
+ atomic_set(folio_mapcount_ptr(folio), -1);
+ atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+ atomic_set(folio_pincount_ptr(folio), 0);
return true;
out_error:
/* undo page modifications made above */
for (j = 0; j < i; j++) {
- p = nth_page(page, j);
+ p = folio_page(folio, j);
if (j != 0)
clear_compound_head(p);
set_page_refcounted(p);
}
/* need to clear PG_reserved on remaining tail pages */
for (; j < nr_pages; j++) {
- p = nth_page(page, j);
+ p = folio_page(folio, j);
__ClearPageReserved(p);
}
- set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
- page[1].compound_nr = 0;
-#endif
- __ClearPageHead(page);
+ folio_set_compound_order(folio, 0);
+ __folio_clear_head(folio);
return false;
}
-static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
+static bool prep_compound_gigantic_folio(struct folio *folio,
+ unsigned int order)
{
- return __prep_compound_gigantic_page(page, order, false);
+ return __prep_compound_gigantic_folio(folio, order, false);
}
-static bool prep_compound_gigantic_page_for_demote(struct page *page,
+static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
unsigned int order)
{
- return __prep_compound_gigantic_page(page, order, true);
+ return __prep_compound_gigantic_folio(folio, order, true);
}
/*
@@ -1935,7 +2097,7 @@ pgoff_t hugetlb_basepage_index(struct page *page)
return (index << compound_order(page_head)) + compound_idx;
}
-static struct page *alloc_buddy_huge_page(struct hstate *h,
+static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
gfp_t gfp_mask, int nid, nodemask_t *nmask,
nodemask_t *node_alloc_noretry)
{
@@ -1973,11 +2135,6 @@ retry:
page = NULL;
}
- if (page)
- __count_vm_event(HTLB_BUDDY_PGALLOC);
- else
- __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
/*
* If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
* indicates an overall state change. Clear bit so that we resume
@@ -1994,7 +2151,13 @@ retry:
if (node_alloc_noretry && !page && alloc_try_hard)
node_set(nid, *node_alloc_noretry);
- return page;
+ if (!page) {
+ __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+ return NULL;
+ }
+
+ __count_vm_event(HTLB_BUDDY_PGALLOC);
+ return page_folio(page);
}
/*
@@ -2004,29 +2167,28 @@ retry:
* Note that returned page is 'frozen': ref count of head page and all tail
* pages is zero.
*/
-static struct page *alloc_fresh_huge_page(struct hstate *h,
+static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
gfp_t gfp_mask, int nid, nodemask_t *nmask,
nodemask_t *node_alloc_noretry)
{
- struct page *page;
+ struct folio *folio;
bool retry = false;
retry:
if (hstate_is_gigantic(h))
- page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
+ folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
else
- page = alloc_buddy_huge_page(h, gfp_mask,
+ folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
nid, nmask, node_alloc_noretry);
- if (!page)
+ if (!folio)
return NULL;
-
if (hstate_is_gigantic(h)) {
- if (!prep_compound_gigantic_page(page, huge_page_order(h))) {
+ if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
/*
* Rare failure to convert pages to compound page.
* Free pages and try again - ONCE!
*/
- free_gigantic_page(page, huge_page_order(h));
+ free_gigantic_folio(folio, huge_page_order(h));
if (!retry) {
retry = true;
goto retry;
@@ -2034,9 +2196,9 @@ retry:
return NULL;
}
}
- prep_new_huge_page(h, page, page_to_nid(page));
+ prep_new_hugetlb_folio(h, folio, folio_nid(folio));
- return page;
+ return folio;
}
/*
@@ -2046,23 +2208,20 @@ retry:
static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
nodemask_t *node_alloc_noretry)
{
- struct page *page;
+ struct folio *folio;
int nr_nodes, node;
gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
- page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
- node_alloc_noretry);
- if (page)
- break;
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
+ nodes_allowed, node_alloc_noretry);
+ if (folio) {
+ free_huge_page(&folio->page); /* free it into the hugepage allocator */
+ return 1;
+ }
}
- if (!page)
- return 0;
-
- free_huge_page(page); /* free it into the hugepage allocator */
-
- return 1;
+ return 0;
}
/*
@@ -2078,6 +2237,7 @@ static struct page *remove_pool_huge_page(struct hstate *h,
{
int nr_nodes, node;
struct page *page = NULL;
+ struct folio *folio;
lockdep_assert_held(&hugetlb_lock);
for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
@@ -2089,7 +2249,8 @@ static struct page *remove_pool_huge_page(struct hstate *h,
!list_empty(&h->hugepage_freelists[node])) {
page = list_entry(h->hugepage_freelists[node].next,
struct page, lru);
- remove_hugetlb_page(h, page, acct_surplus);
+ folio = page_folio(page);
+ remove_hugetlb_folio(h, folio, acct_surplus);
break;
}
}
@@ -2114,21 +2275,21 @@ static struct page *remove_pool_huge_page(struct hstate *h,
int dissolve_free_huge_page(struct page *page)
{
int rc = -EBUSY;
+ struct folio *folio = page_folio(page);
retry:
/* Not to disrupt normal path by vainly holding hugetlb_lock */
- if (!PageHuge(page))
+ if (!folio_test_hugetlb(folio))
return 0;
spin_lock_irq(&hugetlb_lock);
- if (!PageHuge(page)) {
+ if (!folio_test_hugetlb(folio)) {
rc = 0;
goto out;
}
- if (!page_count(page)) {
- struct page *head = compound_head(page);
- struct hstate *h = page_hstate(head);
+ if (!folio_ref_count(folio)) {
+ struct hstate *h = folio_hstate(folio);
if (!available_huge_pages(h))
goto out;
@@ -2136,7 +2297,7 @@ retry:
* We should make sure that the page is already on the free list
* when it is dissolved.
*/
- if (unlikely(!HPageFreed(head))) {
+ if (unlikely(!folio_test_hugetlb_freed(folio))) {
spin_unlock_irq(&hugetlb_lock);
cond_resched();
@@ -2151,24 +2312,24 @@ retry:
goto retry;
}
- remove_hugetlb_page(h, head, false);
+ remove_hugetlb_folio(h, folio, false);
h->max_huge_pages--;
spin_unlock_irq(&hugetlb_lock);
/*
- * Normally update_and_free_page will allocate required vmemmmap
- * before freeing the page. update_and_free_page will fail to
+ * Normally update_and_free_hugtlb_folio will allocate required vmemmmap
+ * before freeing the page. update_and_free_hugtlb_folio will fail to
* free the page if it can not allocate required vmemmap. We
* need to adjust max_huge_pages if the page is not freed.
* Attempt to allocate vmemmmap here so that we can take
* appropriate action on failure.
*/
- rc = hugetlb_vmemmap_restore(h, head);
+ rc = hugetlb_vmemmap_restore(h, &folio->page);
if (!rc) {
- update_and_free_page(h, head, false);
+ update_and_free_hugetlb_folio(h, folio, false);
} else {
spin_lock_irq(&hugetlb_lock);
- add_hugetlb_page(h, head, false);
+ add_hugetlb_folio(h, folio, false);
h->max_huge_pages++;
spin_unlock_irq(&hugetlb_lock);
}
@@ -2219,7 +2380,7 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
if (hstate_is_gigantic(h))
return NULL;
@@ -2229,8 +2390,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
goto out_unlock;
spin_unlock_irq(&hugetlb_lock);
- page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
- if (!page)
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+ if (!folio)
return NULL;
spin_lock_irq(&hugetlb_lock);
@@ -2242,43 +2403,42 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
* codeflow
*/
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
- SetHPageTemporary(page);
+ folio_set_hugetlb_temporary(folio);
spin_unlock_irq(&hugetlb_lock);
- free_huge_page(page);
+ free_huge_page(&folio->page);
return NULL;
}
h->surplus_huge_pages++;
- h->surplus_huge_pages_node[page_to_nid(page)]++;
+ h->surplus_huge_pages_node[folio_nid(folio)]++;
out_unlock:
spin_unlock_irq(&hugetlb_lock);
- return page;
+ return &folio->page;
}
static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
{
- struct page *page;
+ struct folio *folio;
if (hstate_is_gigantic(h))
return NULL;
- page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
- if (!page)
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+ if (!folio)
return NULL;
/* fresh huge pages are frozen */
- set_page_refcounted(page);
-
+ folio_ref_unfreeze(folio, 1);
/*
* We do not account these pages as surplus because they are only
* temporary and will be released properly on the last reference
*/
- SetHPageTemporary(page);
+ folio_set_hugetlb_temporary(folio);
- return page;
+ return &folio->page;
}
/*
@@ -2420,7 +2580,7 @@ retry:
if ((--needed) < 0)
break;
/* Add the page to the hugetlb allocator */
- enqueue_huge_page(h, page);
+ enqueue_hugetlb_folio(h, page_folio(page));
}
free:
spin_unlock_irq(&hugetlb_lock);
@@ -2727,51 +2887,52 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
}
/*
- * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+ * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
+ * the old one
* @h: struct hstate old page belongs to
- * @old_page: Old page to dissolve
+ * @old_folio: Old folio to dissolve
* @list: List to isolate the page in case we need to
* Returns 0 on success, otherwise negated error.
*/
-static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
- struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
+ struct folio *old_folio, struct list_head *list)
{
gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
- int nid = page_to_nid(old_page);
- struct page *new_page;
+ int nid = folio_nid(old_folio);
+ struct folio *new_folio;
int ret = 0;
/*
- * Before dissolving the page, we need to allocate a new one for the
- * pool to remain stable. Here, we allocate the page and 'prep' it
+ * Before dissolving the folio, we need to allocate a new one for the
+ * pool to remain stable. Here, we allocate the folio and 'prep' it
* by doing everything but actually updating counters and adding to
* the pool. This simplifies and let us do most of the processing
* under the lock.
*/
- new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
- if (!new_page)
+ new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
+ if (!new_folio)
return -ENOMEM;
- __prep_new_huge_page(h, new_page);
+ __prep_new_hugetlb_folio(h, new_folio);
retry:
spin_lock_irq(&hugetlb_lock);
- if (!PageHuge(old_page)) {
+ if (!folio_test_hugetlb(old_folio)) {
/*
- * Freed from under us. Drop new_page too.
+ * Freed from under us. Drop new_folio too.
*/
goto free_new;
- } else if (page_count(old_page)) {
+ } else if (folio_ref_count(old_folio)) {
/*
- * Someone has grabbed the page, try to isolate it here.
+ * Someone has grabbed the folio, try to isolate it here.
* Fail with -EBUSY if not possible.
*/
spin_unlock_irq(&hugetlb_lock);
- ret = isolate_hugetlb(old_page, list);
+ ret = isolate_hugetlb(&old_folio->page, list);
spin_lock_irq(&hugetlb_lock);
goto free_new;
- } else if (!HPageFreed(old_page)) {
+ } else if (!folio_test_hugetlb_freed(old_folio)) {
/*
- * Page's refcount is 0 but it has not been enqueued in the
+ * Folio's refcount is 0 but it has not been enqueued in the
* freelist yet. Race window is small, so we can succeed here if
* we retry.
*/
@@ -2780,35 +2941,35 @@ retry:
goto retry;
} else {
/*
- * Ok, old_page is still a genuine free hugepage. Remove it from
+ * Ok, old_folio is still a genuine free hugepage. Remove it from
* the freelist and decrease the counters. These will be
* incremented again when calling __prep_account_new_huge_page()
- * and enqueue_huge_page() for new_page. The counters will remain
- * stable since this happens under the lock.
+ * and enqueue_hugetlb_folio() for new_folio. The counters will
+ * remain stable since this happens under the lock.
*/
- remove_hugetlb_page(h, old_page, false);
+ remove_hugetlb_folio(h, old_folio, false);
/*
- * Ref count on new page is already zero as it was dropped
+ * Ref count on new_folio is already zero as it was dropped
* earlier. It can be directly added to the pool free list.
*/
__prep_account_new_huge_page(h, nid);
- enqueue_huge_page(h, new_page);
+ enqueue_hugetlb_folio(h, new_folio);
/*
- * Pages have been replaced, we can safely free the old one.
+ * Folio has been replaced, we can safely free the old one.
*/
spin_unlock_irq(&hugetlb_lock);
- update_and_free_page(h, old_page, false);
+ update_and_free_hugetlb_folio(h, old_folio, false);
}
return ret;
free_new:
spin_unlock_irq(&hugetlb_lock);
- /* Page has a zero ref count, but needs a ref to be freed */
- set_page_refcounted(new_page);
- update_and_free_page(h, new_page, false);
+ /* Folio has a zero ref count, but needs a ref to be freed */
+ folio_ref_unfreeze(new_folio, 1);
+ update_and_free_hugetlb_folio(h, new_folio, false);
return ret;
}
@@ -2816,7 +2977,7 @@ free_new:
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
{
struct hstate *h;
- struct page *head;
+ struct folio *folio = page_folio(page);
int ret = -EBUSY;
/*
@@ -2825,9 +2986,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
* Return success when racing as if we dissolved the page ourselves.
*/
spin_lock_irq(&hugetlb_lock);
- if (PageHuge(page)) {
- head = compound_head(page);
- h = page_hstate(head);
+ if (folio_test_hugetlb(folio)) {
+ h = folio_hstate(folio);
} else {
spin_unlock_irq(&hugetlb_lock);
return 0;
@@ -2842,10 +3002,10 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
if (hstate_is_gigantic(h))
return -ENOMEM;
- if (page_count(head) && !isolate_hugetlb(head, list))
+ if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
ret = 0;
- else if (!page_count(head))
- ret = alloc_and_dissolve_huge_page(h, head, list);
+ else if (!folio_ref_count(folio))
+ ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
return ret;
}
@@ -2856,6 +3016,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct page *page;
+ struct folio *folio;
long map_chg, map_commit;
long gbl_chg;
int ret, idx;
@@ -2924,15 +3085,16 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
+ spin_lock_irq(&hugetlb_lock);
if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
SetHPageRestoreReserve(page);
h->resv_huge_pages--;
}
- spin_lock_irq(&hugetlb_lock);
list_add(&page->lru, &h->hugepage_activelist);
set_page_refcounted(page);
/* Fall through */
}
+ folio = page_folio(page);
hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
/* If allocation is not consuming a reservation, also store the
* hugetlb_cgroup pointer on the page.
@@ -2962,8 +3124,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
rsv_adjust = hugepage_subpool_put_pages(spool, 1);
hugetlb_acct_memory(h, -rsv_adjust);
if (deferred_reserve)
- hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
- pages_per_huge_page(h), page);
+ hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+ pages_per_huge_page(h), folio);
}
return page;
@@ -3028,17 +3190,18 @@ static void __init gather_bootmem_prealloc(void)
list_for_each_entry(m, &huge_boot_pages, list) {
struct page *page = virt_to_page(m);
+ struct folio *folio = page_folio(page);
struct hstate *h = m->hstate;
VM_BUG_ON(!hstate_is_gigantic(h));
- WARN_ON(page_count(page) != 1);
- if (prep_compound_gigantic_page(page, huge_page_order(h))) {
- WARN_ON(PageReserved(page));
- prep_new_huge_page(h, page, page_to_nid(page));
+ WARN_ON(folio_ref_count(folio) != 1);
+ if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
+ WARN_ON(folio_test_reserved(folio));
+ prep_new_hugetlb_folio(h, folio, folio_nid(folio));
free_huge_page(page); /* add to the hugepage allocator */
} else {
/* VERY unlikely inflated ref count on a tail page */
- free_gigantic_page(page, huge_page_order(h));
+ free_gigantic_folio(folio, huge_page_order(h));
}
/*
@@ -3060,14 +3223,14 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
if (!alloc_bootmem_huge_page(h, nid))
break;
} else {
- struct page *page;
+ struct folio *folio;
gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
- page = alloc_fresh_huge_page(h, gfp_mask, nid,
+ folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
&node_states[N_MEMORY], NULL);
- if (!page)
+ if (!folio)
break;
- free_huge_page(page); /* free it into the hugepage allocator */
+ free_huge_page(&folio->page); /* free it into the hugepage allocator */
}
cond_resched();
}
@@ -3212,7 +3375,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
goto out;
if (PageHighMem(page))
continue;
- remove_hugetlb_page(h, page, false);
+ remove_hugetlb_folio(h, page_folio(page), false);
list_add(&page->lru, &page_list);
}
}
@@ -3417,12 +3580,13 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
{
int i, nid = page_to_nid(page);
struct hstate *target_hstate;
+ struct folio *folio = page_folio(page);
struct page *subpage;
int rc = 0;
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
- remove_hugetlb_page_for_demote(h, page, false);
+ remove_hugetlb_folio_for_demote(h, folio, false);
spin_unlock_irq(&hugetlb_lock);
rc = hugetlb_vmemmap_restore(h, page);
@@ -3430,15 +3594,15 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
/* Allocation of vmemmmap failed, we can not demote page */
spin_lock_irq(&hugetlb_lock);
set_page_refcounted(page);
- add_hugetlb_page(h, page, false);
+ add_hugetlb_folio(h, page_folio(page), false);
return rc;
}
/*
- * Use destroy_compound_hugetlb_page_for_demote for all huge page
+ * Use destroy_compound_hugetlb_folio_for_demote for all huge page
* sizes as it will not ref count pages.
*/
- destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
+ destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h));
/*
* Taking target hstate mutex synchronizes with set_max_huge_pages.
@@ -3452,13 +3616,14 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
for (i = 0; i < pages_per_huge_page(h);
i += pages_per_huge_page(target_hstate)) {
subpage = nth_page(page, i);
+ folio = page_folio(subpage);
if (hstate_is_gigantic(target_hstate))
- prep_compound_gigantic_page_for_demote(subpage,
+ prep_compound_gigantic_folio_for_demote(folio,
target_hstate->order);
else
prep_compound_page(subpage, target_hstate->order);
set_page_private(subpage, 0);
- prep_new_huge_page(target_hstate, subpage, nid);
+ prep_new_hugetlb_folio(target_hstate, folio, nid);
free_huge_page(subpage);
}
mutex_unlock(&target_hstate->resize_lock);
@@ -4601,6 +4766,7 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
struct resv_map *resv = vma_resv_map(vma);
/*
+ * HPAGE_RESV_OWNER indicates a private mapping.
* This new VMA should share its siblings reservation map if present.
* The VMA will only ever have a valid reservation map pointer where
* it is being copied for another still existing VMA. As that VMA
@@ -4615,11 +4781,21 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
/*
* vma_lock structure for sharable mappings is vma specific.
- * Clear old pointer (if copied via vm_area_dup) and create new.
+ * Clear old pointer (if copied via vm_area_dup) and allocate
+ * new structure. Before clearing, make sure vma_lock is not
+ * for this vma.
*/
if (vma->vm_flags & VM_MAYSHARE) {
- vma->vm_private_data = NULL;
- hugetlb_vma_lock_alloc(vma);
+ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+ if (vma_lock) {
+ if (vma_lock->vma != vma) {
+ vma->vm_private_data = NULL;
+ hugetlb_vma_lock_alloc(vma);
+ } else
+ pr_warn("HugeTLB: vma_lock already exists in %s.\n", __func__);
+ } else
+ hugetlb_vma_lock_alloc(vma);
}
}
@@ -4756,7 +4932,6 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
hugepage_add_new_anon_rmap(new_page, vma, addr);
set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
- ClearHPageRestoreReserve(new_page);
SetHPageMigratable(new_page);
}
@@ -5045,7 +5220,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
struct page *page;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
- struct mmu_notifier_range range;
unsigned long last_addr_mask;
bool force_flush = false;
@@ -5060,13 +5234,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
- /*
- * If sharing possible, alert mmu notifiers of worst case.
- */
- mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
- end);
- adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
- mmu_notifier_invalidate_range_start(&range);
last_addr_mask = hugetlb_mask_last_page(h);
address = start;
for (; address < end; address += sz) {
@@ -5096,7 +5263,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
* unmapped and its refcount is dropped, so just clear pte here.
*/
if (unlikely(!pte_present(pte))) {
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
/*
* If the pte was wr-protected by uffd-wp in any of the
* swap forms, meanwhile the caller does not want to
@@ -5108,7 +5274,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
else
-#endif
huge_pte_clear(mm, address, ptep, sz);
spin_unlock(ptl);
continue;
@@ -5137,13 +5302,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
if (huge_pte_dirty(pte))
set_page_dirty(page);
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
/* Leave a uffd-wp pte marker if needed */
if (huge_pte_uffd_wp(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
set_huge_pte_at(mm, address, ptep,
make_pte_marker(PTE_MARKER_UFFD_WP));
-#endif
hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page, vma, true);
@@ -5155,7 +5318,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
if (ref_page)
break;
}
- mmu_notifier_invalidate_range_end(&range);
tlb_end_vma(tlb, vma);
/*
@@ -5183,29 +5345,43 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
hugetlb_vma_lock_write(vma);
i_mmap_lock_write(vma->vm_file->f_mapping);
+ /* mmu notification performed in caller */
__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
- /*
- * Unlock and free the vma lock before releasing i_mmap_rwsem. When
- * the vma_lock is freed, this makes the vma ineligible for pmd
- * sharing. And, i_mmap_rwsem is required to set up pmd sharing.
- * This is important as page tables for this unmapped range will
- * be asynchrously deleted. If the page tables are shared, there
- * will be issues when accessed by someone else.
- */
- __hugetlb_vma_unlock_write_free(vma);
-
- i_mmap_unlock_write(vma->vm_file->f_mapping);
+ if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
+ /*
+ * Unlock and free the vma lock before releasing i_mmap_rwsem.
+ * When the vma_lock is freed, this makes the vma ineligible
+ * for pmd sharing. And, i_mmap_rwsem is required to set up
+ * pmd sharing. This is important as page tables for this
+ * unmapped range will be asynchrously deleted. If the page
+ * tables are shared, there will be issues when accessed by
+ * someone else.
+ */
+ __hugetlb_vma_unlock_write_free(vma);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ } else {
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ hugetlb_vma_unlock_write(vma);
+ }
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct page *ref_page,
zap_flags_t zap_flags)
{
+ struct mmu_notifier_range range;
struct mmu_gather tlb;
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+ start, end);
+ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+ mmu_notifier_invalidate_range_start(&range);
tlb_gather_mmu(&tlb, vma->vm_mm);
+
__unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+ mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
@@ -5284,9 +5460,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long haddr = address & huge_page_mask(h);
struct mmu_notifier_range range;
- VM_BUG_ON(unshare && (flags & FOLL_WRITE));
- VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
-
/*
* hugetlb does not support FOLL_FORCE-style write faults that keep the
* PTE mapped R/O such as maybe_mkwrite() would do.
@@ -5296,8 +5469,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
/* Let's take out MAP_SHARED mappings first. */
if (vma->vm_flags & VM_MAYSHARE) {
- if (unlikely(unshare))
- return 0;
set_huge_ptep_writable(vma, haddr, ptep);
return 0;
}
@@ -5419,8 +5590,6 @@ retry_avoidcopy:
spin_lock(ptl);
ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
- ClearHPageRestoreReserve(new_page);
-
/* Break COW or unshare */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
@@ -5715,10 +5884,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
if (!pte_same(huge_ptep_get(ptep), old_pte))
goto backout;
- if (anon_rmap) {
- ClearHPageRestoreReserve(page);
+ if (anon_rmap)
hugepage_add_new_anon_rmap(page, vma, haddr);
- } else
+ else
page_dup_file_rmap(page, true);
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
@@ -6092,6 +6260,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lock(h, dst_mm, dst_pte);
+ ret = -EIO;
+ if (PageHWPoison(page))
+ goto out_release_unlock;
+
/*
* We allow to overwrite a pte marker: consider when both MISSING|WP
* registered, we firstly wr-protect a none pte which has no page cache
@@ -6101,12 +6273,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
goto out_release_unlock;
- if (page_in_pagecache) {
+ if (page_in_pagecache)
page_dup_file_rmap(page, true);
- } else {
- ClearHPageRestoreReserve(page);
+ else
hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
- }
/*
* For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
@@ -6171,7 +6341,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
}
}
-static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
+static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
+ unsigned int flags, pte_t *pte,
bool *unshare)
{
pte_t pteval = huge_ptep_get(pte);
@@ -6183,13 +6354,69 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
return false;
if (flags & FOLL_WRITE)
return true;
- if (gup_must_unshare(flags, pte_page(pteval))) {
+ if (gup_must_unshare(vma, flags, pte_page(pteval))) {
*unshare = true;
return true;
}
return false;
}
+struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags)
+{
+ struct hstate *h = hstate_vma(vma);
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long haddr = address & huge_page_mask(h);
+ struct page *page = NULL;
+ spinlock_t *ptl;
+ pte_t *pte, entry;
+
+ /*
+ * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
+ * follow_hugetlb_page().
+ */
+ if (WARN_ON_ONCE(flags & FOLL_PIN))
+ return NULL;
+
+retry:
+ pte = huge_pte_offset(mm, haddr, huge_page_size(h));
+ if (!pte)
+ return NULL;
+
+ ptl = huge_pte_lock(h, mm, pte);
+ entry = huge_ptep_get(pte);
+ if (pte_present(entry)) {
+ page = pte_page(entry) +
+ ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+ /*
+ * Note that page may be a sub-page, and with vmemmap
+ * optimizations the page struct may be read only.
+ * try_grab_page() will increase the ref count on the
+ * head page, so this will be OK.
+ *
+ * try_grab_page() should always be able to get the page here,
+ * because we hold the ptl lock and have verified pte_present().
+ */
+ if (try_grab_page(page, flags)) {
+ page = NULL;
+ goto out;
+ }
+ } else {
+ if (is_hugetlb_entry_migration(entry)) {
+ spin_unlock(ptl);
+ __migration_entry_wait_huge(pte, ptl);
+ goto retry;
+ }
+ /*
+ * hwpoisoned entry is treated as no_page_table in
+ * follow_page_mask().
+ */
+ }
+out:
+ spin_unlock(ptl);
+ return page;
+}
+
long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, unsigned long *nr_pages,
@@ -6256,7 +6483,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* directly from any kind of swap entries.
*/
if (absent ||
- __follow_hugetlb_must_fault(flags, pte, &unshare)) {
+ __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
vm_fault_t ret;
unsigned int fault_flags = 0;
@@ -6266,9 +6493,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_WRITE;
else if (unshare)
fault_flags |= FAULT_FLAG_UNSHARE;
- if (locked)
+ if (locked) {
fault_flags |= FAULT_FLAG_ALLOW_RETRY |
FAULT_FLAG_KILLABLE;
+ if (flags & FOLL_INTERRUPTIBLE)
+ fault_flags |= FAULT_FLAG_INTERRUPTIBLE;
+ }
if (flags & FOLL_NOWAIT)
fault_flags |= FAULT_FLAG_ALLOW_RETRY |
FAULT_FLAG_RETRY_NOWAIT;
@@ -6342,8 +6572,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* tables. If the huge page is present, then the tail
* pages must also be present. The ptl prevents the
* head page and tail pages from being rearranged in
- * any way. So this page must be available at this
- * point, unless the page refcount overflowed:
+ * any way. As this is hugetlb, the pages will never
+ * be p2pdma or not longterm pinable. So this page
+ * must be available at this point, unless the page
+ * refcount overflowed:
*/
if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs,
flags))) {
@@ -6527,7 +6759,8 @@ bool hugetlb_reserve_pages(struct inode *inode,
}
/*
- * vma specific semaphore used for pmd sharing synchronization
+ * vma specific semaphore used for pmd sharing and fault/truncation
+ * synchronization
*/
hugetlb_vma_lock_alloc(vma);
@@ -6783,149 +7016,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
*end = ALIGN(*end, PUD_SIZE);
}
-static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma)
-{
- return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
- vma->vm_private_data;
-}
-
-void hugetlb_vma_lock_read(struct vm_area_struct *vma)
-{
- if (__vma_shareable_flags_pmd(vma)) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- down_read(&vma_lock->rw_sema);
- }
-}
-
-void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
-{
- if (__vma_shareable_flags_pmd(vma)) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- up_read(&vma_lock->rw_sema);
- }
-}
-
-void hugetlb_vma_lock_write(struct vm_area_struct *vma)
-{
- if (__vma_shareable_flags_pmd(vma)) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- down_write(&vma_lock->rw_sema);
- }
-}
-
-void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
-{
- if (__vma_shareable_flags_pmd(vma)) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- up_write(&vma_lock->rw_sema);
- }
-}
-
-int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
-{
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- if (!__vma_shareable_flags_pmd(vma))
- return 1;
-
- return down_write_trylock(&vma_lock->rw_sema);
-}
-
-void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
-{
- if (__vma_shareable_flags_pmd(vma)) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- lockdep_assert_held(&vma_lock->rw_sema);
- }
-}
-
-void hugetlb_vma_lock_release(struct kref *kref)
-{
- struct hugetlb_vma_lock *vma_lock = container_of(kref,
- struct hugetlb_vma_lock, refs);
-
- kfree(vma_lock);
-}
-
-static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
-{
- struct vm_area_struct *vma = vma_lock->vma;
-
- /*
- * vma_lock structure may or not be released as a result of put,
- * it certainly will no longer be attached to vma so clear pointer.
- * Semaphore synchronizes access to vma_lock->vma field.
- */
- vma_lock->vma = NULL;
- vma->vm_private_data = NULL;
- up_write(&vma_lock->rw_sema);
- kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
-}
-
-static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
-{
- if (__vma_shareable_flags_pmd(vma)) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- __hugetlb_vma_unlock_write_put(vma_lock);
- }
-}
-
-static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
-{
- /*
- * Only present in sharable vmas.
- */
- if (!vma || !__vma_shareable_flags_pmd(vma))
- return;
-
- if (vma->vm_private_data) {
- struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
-
- down_write(&vma_lock->rw_sema);
- __hugetlb_vma_unlock_write_put(vma_lock);
- }
-}
-
-static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
-{
- struct hugetlb_vma_lock *vma_lock;
-
- /* Only establish in (flags) sharable vmas */
- if (!vma || !(vma->vm_flags & VM_MAYSHARE))
- return;
-
- /* Should never get here with non-NULL vm_private_data */
- if (vma->vm_private_data)
- return;
-
- vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL);
- if (!vma_lock) {
- /*
- * If we can not allocate structure, then vma can not
- * participate in pmd sharing. This is only a possible
- * performance enhancement and memory saving issue.
- * However, the lock is also used to synchronize page
- * faults with truncation. If the lock is not present,
- * unlikely races could leave pages in a file past i_size
- * until the file is removed. Warn in the unlikely case of
- * allocation failure.
- */
- pr_warn_once("HugeTLB: unable to allocate vma specific lock\n");
- return;
- }
-
- kref_init(&vma_lock->refs);
- init_rwsem(&vma_lock->rw_sema);
- vma_lock->vma = vma;
- vma->vm_private_data = vma_lock;
-}
-
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
@@ -7014,47 +7104,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
-void hugetlb_vma_lock_read(struct vm_area_struct *vma)
-{
-}
-
-void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
-{
-}
-
-void hugetlb_vma_lock_write(struct vm_area_struct *vma)
-{
-}
-
-void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
-{
-}
-
-int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
-{
- return 1;
-}
-
-void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
-{
-}
-
-void hugetlb_vma_lock_release(struct kref *kref)
-{
-}
-
-static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
-{
-}
-
-static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
-{
-}
-
-static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
-{
-}
-
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{
@@ -7182,122 +7231,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h)
* These functions are overwritable if your architecture needs its own
* behavior.
*/
-struct page * __weak
-follow_huge_addr(struct mm_struct *mm, unsigned long address,
- int write)
-{
- return ERR_PTR(-EINVAL);
-}
-
-struct page * __weak
-follow_huge_pd(struct vm_area_struct *vma,
- unsigned long address, hugepd_t hpd, int flags, int pdshift)
-{
- WARN(1, "hugepd follow called with no support for hugepage directory format\n");
- return NULL;
-}
-
-struct page * __weak
-follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
-{
- struct hstate *h = hstate_vma(vma);
- struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
- spinlock_t *ptl;
- pte_t *ptep, pte;
-
- /*
- * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
- * follow_hugetlb_page().
- */
- if (WARN_ON_ONCE(flags & FOLL_PIN))
- return NULL;
-
-retry:
- ptep = huge_pte_offset(mm, address, huge_page_size(h));
- if (!ptep)
- return NULL;
-
- ptl = huge_pte_lock(h, mm, ptep);
- pte = huge_ptep_get(ptep);
- if (pte_present(pte)) {
- page = pte_page(pte) +
- ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
- /*
- * try_grab_page() should always succeed here, because: a) we
- * hold the pmd (ptl) lock, and b) we've just checked that the
- * huge pmd (head) page is present in the page tables. The ptl
- * prevents the head page and tail pages from being rearranged
- * in any way. So this page must be available at this point,
- * unless the page refcount overflowed:
- */
- if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
- page = NULL;
- goto out;
- }
- } else {
- if (is_hugetlb_entry_migration(pte)) {
- spin_unlock(ptl);
- __migration_entry_wait_huge(ptep, ptl);
- goto retry;
- }
- /*
- * hwpoisoned entry is treated as no_page_table in
- * follow_page_mask().
- */
- }
-out:
- spin_unlock(ptl);
- return page;
-}
-
-struct page * __weak
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int flags)
-{
- struct page *page = NULL;
- spinlock_t *ptl;
- pte_t pte;
-
- if (WARN_ON_ONCE(flags & FOLL_PIN))
- return NULL;
-
-retry:
- ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud);
- if (!pud_huge(*pud))
- goto out;
- pte = huge_ptep_get((pte_t *)pud);
- if (pte_present(pte)) {
- page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
- if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
- page = NULL;
- goto out;
- }
- } else {
- if (is_hugetlb_entry_migration(pte)) {
- spin_unlock(ptl);
- __migration_entry_wait(mm, (pte_t *)pud, ptl);
- goto retry;
- }
- /*
- * hwpoisoned entry is treated as no_page_table in
- * follow_page_mask().
- */
- }
-out:
- spin_unlock(ptl);
- return page;
-}
-
-struct page * __weak
-follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
-{
- if (flags & (FOLL_GET | FOLL_PIN))
- return NULL;
-
- return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
-}
-
int isolate_hugetlb(struct page *page, struct list_head *list)
{
int ret = 0;
@@ -7316,7 +7249,7 @@ unlock:
return ret;
}
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
{
int ret = 0;
@@ -7326,7 +7259,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
*hugetlb = true;
if (HPageFreed(page))
ret = 0;
- else if (HPageMigratable(page))
+ else if (HPageMigratable(page) || unpoison)
ret = get_page_unless_zero(page);
else
ret = -EBUSY;
@@ -7335,12 +7268,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
return ret;
}
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+ bool *migratable_cleared)
{
int ret;
spin_lock_irq(&hugetlb_lock);
- ret = __get_huge_page_for_hwpoison(pfn, flags);
+ ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
spin_unlock_irq(&hugetlb_lock);
return ret;
}
@@ -7354,15 +7288,15 @@ void putback_active_hugepage(struct page *page)
put_page(page);
}
-void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason)
{
- struct hstate *h = page_hstate(oldpage);
+ struct hstate *h = folio_hstate(old_folio);
- hugetlb_cgroup_migrate(oldpage, newpage);
- set_page_owner_migrate_reason(newpage, reason);
+ hugetlb_cgroup_migrate(old_folio, new_folio);
+ set_page_owner_migrate_reason(&new_folio->page, reason);
/*
- * transfer temporary state of the new huge page. This is
+ * transfer temporary state of the new hugetlb folio. This is
* reverse to other transitions because the newpage is going to
* be final while the old one will be freed so it takes over
* the temporary status.
@@ -7371,12 +7305,13 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
* here as well otherwise the global surplus count will not match
* the per-node's.
*/
- if (HPageTemporary(newpage)) {
- int old_nid = page_to_nid(oldpage);
- int new_nid = page_to_nid(newpage);
+ if (folio_test_hugetlb_temporary(new_folio)) {
+ int old_nid = folio_nid(old_folio);
+ int new_nid = folio_nid(new_folio);
+
+ folio_set_hugetlb_temporary(old_folio);
+ folio_clear_hugetlb_temporary(new_folio);
- SetHPageTemporary(oldpage);
- ClearHPageTemporary(newpage);
/*
* There is no need to transfer the per-node surplus state