diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 1161 |
1 files changed, 548 insertions, 613 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b586cdd75930..db895230ee7e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -54,13 +54,13 @@ struct hstate hstates[HUGE_MAX_HSTATE]; #ifdef CONFIG_CMA static struct cma *hugetlb_cma[MAX_NUMNODES]; static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; -static bool hugetlb_cma_page(struct page *page, unsigned int order) +static bool hugetlb_cma_folio(struct folio *folio, unsigned int order) { - return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page, + return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page, 1 << order); } #else -static bool hugetlb_cma_page(struct page *page, unsigned int order) +static bool hugetlb_cma_folio(struct folio *folio, unsigned int order) { return false; } @@ -255,6 +255,152 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma) return subpool_inode(file_inode(vma->vm_file)); } +/* + * hugetlb vma_lock helper routines + */ +static bool __vma_shareable_lock(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && + vma->vm_private_data; +} + +void hugetlb_vma_lock_read(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_read(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_unlock_read(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + up_read(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_lock_write(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_write(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_unlock_write(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + up_write(&vma_lock->rw_sema); + } +} + +int hugetlb_vma_trylock_write(struct vm_area_struct *vma) +{ + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + if (!__vma_shareable_lock(vma)) + return 1; + + return down_write_trylock(&vma_lock->rw_sema); +} + +void hugetlb_vma_assert_locked(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + lockdep_assert_held(&vma_lock->rw_sema); + } +} + +void hugetlb_vma_lock_release(struct kref *kref) +{ + struct hugetlb_vma_lock *vma_lock = container_of(kref, + struct hugetlb_vma_lock, refs); + + kfree(vma_lock); +} + +static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) +{ + struct vm_area_struct *vma = vma_lock->vma; + + /* + * vma_lock structure may or not be released as a result of put, + * it certainly will no longer be attached to vma so clear pointer. + * Semaphore synchronizes access to vma_lock->vma field. + */ + vma_lock->vma = NULL; + vma->vm_private_data = NULL; + up_write(&vma_lock->rw_sema); + kref_put(&vma_lock->refs, hugetlb_vma_lock_release); +} + +static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) +{ + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + __hugetlb_vma_unlock_write_put(vma_lock); + } +} + +static void hugetlb_vma_lock_free(struct vm_area_struct *vma) +{ + /* + * Only present in sharable vmas. + */ + if (!vma || !__vma_shareable_lock(vma)) + return; + + if (vma->vm_private_data) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + down_write(&vma_lock->rw_sema); + __hugetlb_vma_unlock_write_put(vma_lock); + } +} + +static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) +{ + struct hugetlb_vma_lock *vma_lock; + + /* Only establish in (flags) sharable vmas */ + if (!vma || !(vma->vm_flags & VM_MAYSHARE)) + return; + + /* Should never get here with non-NULL vm_private_data */ + if (vma->vm_private_data) + return; + + vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); + if (!vma_lock) { + /* + * If we can not allocate structure, then vma can not + * participate in pmd sharing. This is only a possible + * performance enhancement and memory saving issue. + * However, the lock is also used to synchronize page + * faults with truncation. If the lock is not present, + * unlikely races could leave pages in a file past i_size + * until the file is removed. Warn in the unlikely case of + * allocation failure. + */ + pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); + return; + } + + kref_init(&vma_lock->refs); + init_rwsem(&vma_lock->rw_sema); + vma_lock->vma = vma; + vma->vm_private_data = vma_lock; +} + /* Helper that removes a struct file_region from the resv_map cache and returns * it for use. */ @@ -1014,15 +1160,23 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma) VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma); /* * Clear vm_private_data + * - For shared mappings this is a per-vma semaphore that may be + * allocated in a subsequent call to hugetlb_vm_op_open. + * Before clearing, make sure pointer is not associated with vma + * as this will leak the structure. This is the case when called + * via clear_vma_resv_huge_pages() and hugetlb_vm_op_open has already + * been called to allocate a new structure. * - For MAP_PRIVATE mappings, this is the reserve map which does * not apply to children. Faults generated by the children are * not guaranteed to succeed, even if read-only. - * - For shared mappings this is a per-vma semaphore that may be - * allocated in a subsequent call to hugetlb_vm_op_open. */ - vma->vm_private_data = (void *)0; - if (!(vma->vm_flags & VM_MAYSHARE)) - return; + if (vma->vm_flags & VM_MAYSHARE) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + if (vma_lock && vma_lock->vma != vma) + vma->vm_private_data = NULL; + } else + vma->vm_private_data = NULL; } /* @@ -1119,17 +1273,17 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) return false; } -static void enqueue_huge_page(struct hstate *h, struct page *page) +static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio) { - int nid = page_to_nid(page); + int nid = folio_nid(folio); lockdep_assert_held(&hugetlb_lock); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); - list_move(&page->lru, &h->hugepage_freelists[nid]); + list_move(&folio->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; - SetHPageFreed(page); + folio_set_hugetlb_freed(folio); } static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) @@ -1317,76 +1471,76 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) nr_nodes--) /* used to demote non-gigantic_huge pages as well */ -static void __destroy_compound_gigantic_page(struct page *page, +static void __destroy_compound_gigantic_folio(struct folio *folio, unsigned int order, bool demote) { int i; int nr_pages = 1 << order; struct page *p; - atomic_set(compound_mapcount_ptr(page), 0); - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(folio_mapcount_ptr(folio), 0); + atomic_set(folio_subpages_mapcount_ptr(folio), 0); + atomic_set(folio_pincount_ptr(folio), 0); for (i = 1; i < nr_pages; i++) { - p = nth_page(page, i); + p = folio_page(folio, i); p->mapping = NULL; clear_compound_head(p); if (!demote) set_page_refcounted(p); } - set_compound_order(page, 0); -#ifdef CONFIG_64BIT - page[1].compound_nr = 0; -#endif - __ClearPageHead(page); + folio_set_compound_order(folio, 0); + __folio_clear_head(folio); } -static void destroy_compound_hugetlb_page_for_demote(struct page *page, +static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio, unsigned int order) { - __destroy_compound_gigantic_page(page, order, true); + __destroy_compound_gigantic_folio(folio, order, true); } #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static void destroy_compound_gigantic_page(struct page *page, +static void destroy_compound_gigantic_folio(struct folio *folio, unsigned int order) { - __destroy_compound_gigantic_page(page, order, false); + __destroy_compound_gigantic_folio(folio, order, false); } -static void free_gigantic_page(struct page *page, unsigned int order) +static void free_gigantic_folio(struct folio *folio, unsigned int order) { /* * If the page isn't allocated using the cma allocator, * cma_release() returns false. */ #ifdef CONFIG_CMA - if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order)) + int nid = folio_nid(folio); + + if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order)) return; #endif - free_contig_range(page_to_pfn(page), 1 << order); + free_contig_range(folio_pfn(folio), 1 << order); } #ifdef CONFIG_CONTIG_ALLOC -static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { + struct page *page; unsigned long nr_pages = pages_per_huge_page(h); if (nid == NUMA_NO_NODE) nid = numa_mem_id(); #ifdef CONFIG_CMA { - struct page *page; int node; if (hugetlb_cma[nid]) { page = cma_alloc(hugetlb_cma[nid], nr_pages, huge_page_order(h), true); if (page) - return page; + return page_folio(page); } if (!(gfp_mask & __GFP_THISNODE)) { @@ -1397,17 +1551,18 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, page = cma_alloc(hugetlb_cma[node], nr_pages, huge_page_order(h), true); if (page) - return page; + return page_folio(page); } } } #endif - return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); + page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); + return page ? page_folio(page) : NULL; } #else /* !CONFIG_CONTIG_ALLOC */ -static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { return NULL; @@ -1415,40 +1570,41 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, #endif /* CONFIG_CONTIG_ALLOC */ #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */ -static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, +static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nodemask) { return NULL; } -static inline void free_gigantic_page(struct page *page, unsigned int order) { } -static inline void destroy_compound_gigantic_page(struct page *page, +static inline void free_gigantic_folio(struct folio *folio, + unsigned int order) { } +static inline void destroy_compound_gigantic_folio(struct folio *folio, unsigned int order) { } #endif /* - * Remove hugetlb page from lists, and update dtor so that page appears + * Remove hugetlb folio from lists, and update dtor so that the folio appears * as just a compound page. * - * A reference is held on the page, except in the case of demote. + * A reference is held on the folio, except in the case of demote. * * Must be called with hugetlb lock held. */ -static void __remove_hugetlb_page(struct hstate *h, struct page *page, +static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus, bool demote) { - int nid = page_to_nid(page); + int nid = folio_nid(folio); - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); - VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page); + VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio); + VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio); lockdep_assert_held(&hugetlb_lock); if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) return; - list_del(&page->lru); + list_del(&folio->lru); - if (HPageFreed(page)) { + if (folio_test_hugetlb_freed(folio)) { h->free_huge_pages--; h->free_huge_pages_node[nid]--; } @@ -1467,50 +1623,50 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page, * * For gigantic pages set the destructor to the null dtor. This * destructor will never be called. Before freeing the gigantic - * page destroy_compound_gigantic_page will turn the compound page - * into a simple group of pages. After this the destructor does not + * page destroy_compound_gigantic_folio will turn the folio into a + * simple group of pages. After this the destructor does not * apply. * * This handles the case where more than one ref is held when and - * after update_and_free_page is called. + * after update_and_free_hugetlb_folio is called. * * In the case of demote we do not ref count the page as it will soon * be turned into a page of smaller size. */ if (!demote) - set_page_refcounted(page); + folio_ref_unfreeze(folio, 1); if (hstate_is_gigantic(h)) - set_compound_page_dtor(page, NULL_COMPOUND_DTOR); + folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR); else - set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); + folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR); h->nr_huge_pages--; h->nr_huge_pages_node[nid]--; } -static void remove_hugetlb_page(struct hstate *h, struct page *page, +static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { - __remove_hugetlb_page(h, page, adjust_surplus, false); + __remove_hugetlb_folio(h, folio, adjust_surplus, false); } -static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page, +static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio, bool adjust_surplus) { - __remove_hugetlb_page(h, page, adjust_surplus, true); + __remove_hugetlb_folio(h, folio, adjust_surplus, true); } -static void add_hugetlb_page(struct hstate *h, struct page *page, +static void add_hugetlb_folio(struct hstate *h, struct folio *folio, bool adjust_surplus) { int zeroed; - int nid = page_to_nid(page); + int nid = folio_nid(folio); - VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page); + VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio); lockdep_assert_held(&hugetlb_lock); - INIT_LIST_HEAD(&page->lru); + INIT_LIST_HEAD(&folio->lru); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; @@ -1519,21 +1675,21 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, h->surplus_huge_pages_node[nid]++; } - set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); - set_page_private(page, 0); + folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR); + folio_change_private(folio, NULL); /* - * We have to set HPageVmemmapOptimized again as above - * set_page_private(page, 0) cleared it. + * We have to set hugetlb_vmemmap_optimized again as above + * folio_change_private(folio, NULL) cleared it. */ - SetHPageVmemmapOptimized(page); + folio_set_hugetlb_vmemmap_optimized(folio); /* - * This page is about to be managed by the hugetlb allocator and + * This folio is about to be managed by the hugetlb allocator and * should have no users. Drop our reference, and check for others * just in case. */ - zeroed = put_page_testzero(page); - if (!zeroed) + zeroed = folio_put_testzero(folio); + if (unlikely(!zeroed)) /* * It is VERY unlikely soneone else has taken a ref on * the page. In this case, we simply return as the @@ -1542,13 +1698,14 @@ static void add_hugetlb_page(struct hstate *h, struct page *page, */ return; - arch_clear_hugepage_flags(page); - enqueue_huge_page(h, page); + arch_clear_hugepage_flags(&folio->page); + enqueue_hugetlb_folio(h, folio); } static void __update_and_free_page(struct hstate *h, struct page *page) { int i; + struct folio *folio = page_folio(page); struct page *subpage; if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) @@ -1558,7 +1715,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * If we don't know which subpages are hwpoisoned, we can't free * the hugepage, so it's leaked intentionally. */ - if (HPageRawHwpUnreliable(page)) + if (folio_test_hugetlb_raw_hwp_unreliable(folio)) return; if (hugetlb_vmemmap_restore(h, page)) { @@ -1568,7 +1725,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * page and put the page back on the hugetlb free list and treat * as a surplus page. */ - add_hugetlb_page(h, page, true); + add_hugetlb_folio(h, folio, true); spin_unlock_irq(&hugetlb_lock); return; } @@ -1577,11 +1734,11 @@ static void __update_and_free_page(struct hstate *h, struct page *page) * Move PageHWPoison flag from head page to the raw error pages, * which makes any healthy subpages reusable. */ - if (unlikely(PageHWPoison(page))) - hugetlb_clear_page_hwpoison(page); + if (unlikely(folio_test_hwpoison(folio))) + hugetlb_clear_page_hwpoison(&folio->page); for (i = 0; i < pages_per_huge_page(h); i++) { - subpage = nth_page(page, i); + subpage = folio_page(folio, i); subpage->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | 1 << PG_active | 1 << PG_private | @@ -1590,19 +1747,19 @@ static void __update_and_free_page(struct hstate *h, struct page *page) /* * Non-gigantic pages demoted from CMA allocated gigantic pages - * need to be given back to CMA in free_gigantic_page. + * need to be given back to CMA in free_gigantic_folio. */ if (hstate_is_gigantic(h) || - hugetlb_cma_page(page, huge_page_order(h))) { - destroy_compound_gigantic_page(page, huge_page_order(h)); - free_gigantic_page(page, huge_page_order(h)); + hugetlb_cma_folio(folio, huge_page_order(h))) { + destroy_compound_gigantic_folio(folio, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); } else { __free_pages(page, huge_page_order(h)); } } /* - * As update_and_free_page() can be called under any context, so we cannot + * As update_and_free_hugetlb_folio() can be called under any context, so we cannot * use GFP_KERNEL to allocate vmemmap pages. However, we can defer the * actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate * the vmemmap pages. @@ -1631,8 +1788,9 @@ static void free_hpage_workfn(struct work_struct *work) /* * The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate() * is going to trigger because a previous call to - * remove_hugetlb_page() will set_compound_page_dtor(page, - * NULL_COMPOUND_DTOR), so do not use page_hstate() directly. + * remove_hugetlb_folio() will call folio_set_compound_dtor + * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate() + * directly. */ h = size_to_hstate(page_size(page)); @@ -1649,11 +1807,11 @@ static inline void flush_free_hpage_work(struct hstate *h) flush_work(&free_hpage_work); } -static void update_and_free_page(struct hstate *h, struct page *page, +static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio, bool atomic) { - if (!HPageVmemmapOptimized(page) || !atomic) { - __update_and_free_page(h, page); + if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) { + __update_and_free_page(h, &folio->page); return; } @@ -1664,16 +1822,18 @@ static void update_and_free_page(struct hstate *h, struct page *page, * empty. Otherwise, schedule_work() had been called but the workfn * hasn't retrieved the list yet. */ - if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist)) + if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist)) schedule_work(&free_hpage_work); } static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list) { struct page *page, *t_page; + struct folio *folio; list_for_each_entry_safe(page, t_page, list, lru) { - update_and_free_page(h, page, false); + folio = page_folio(page); + update_and_free_hugetlb_folio(h, folio, false); cond_resched(); } } @@ -1695,21 +1855,22 @@ void free_huge_page(struct page *page) * Can't pass hstate in here because it is called from the * compound page destructor. */ - struct hstate *h = page_hstate(page); - int nid = page_to_nid(page); - struct hugepage_subpool *spool = hugetlb_page_subpool(page); + struct folio *folio = page_folio(page); + struct hstate *h = folio_hstate(folio); + int nid = folio_nid(folio); + struct hugepage_subpool *spool = hugetlb_folio_subpool(folio); bool restore_reserve; unsigned long flags; - VM_BUG_ON_PAGE(page_count(page), page); - VM_BUG_ON_PAGE(page_mapcount(page), page); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); + VM_BUG_ON_FOLIO(folio_mapcount(folio), folio); - hugetlb_set_page_subpool(page, NULL); - if (PageAnon(page)) - __ClearPageAnonExclusive(page); - page->mapping = NULL; - restore_reserve = HPageRestoreReserve(page); - ClearHPageRestoreReserve(page); + hugetlb_set_folio_subpool(folio, NULL); + if (folio_test_anon(folio)) + __ClearPageAnonExclusive(&folio->page); + folio->mapping = NULL; + restore_reserve = folio_test_hugetlb_restore_reserve(folio); + folio_clear_hugetlb_restore_reserve(folio); /* * If HPageRestoreReserve was set on page, page allocation consumed a @@ -1731,26 +1892,26 @@ void free_huge_page(struct page *page) } spin_lock_irqsave(&hugetlb_lock, flags); - ClearHPageMigratable(page); - hugetlb_cgroup_uncharge_page(hstate_index(h), - pages_per_huge_page(h), page); - hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), - pages_per_huge_page(h), page); + folio_clear_hugetlb_migratable(folio); + hugetlb_cgroup_uncharge_folio(hstate_index(h), + pages_per_huge_page(h), folio); + hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), + pages_per_huge_page(h), folio); if (restore_reserve) h->resv_huge_pages++; - if (HPageTemporary(page)) { - remove_hugetlb_page(h, page, false); + if (folio_test_hugetlb_temporary(folio)) { + remove_hugetlb_folio(h, folio, false); spin_unlock_irqrestore(&hugetlb_lock, flags); - update_and_free_page(h, page, true); + update_and_free_hugetlb_folio(h, folio, true); } else if (h->surplus_huge_pages_node[nid]) { /* remove the page from active list */ - remove_hugetlb_page(h, page, true); + remove_hugetlb_folio(h, folio, true); spin_unlock_irqrestore(&hugetlb_lock, flags); - update_and_free_page(h, page, true); + update_and_free_hugetlb_folio(h, folio, true); } else { arch_clear_hugepage_flags(page); - enqueue_huge_page(h, page); + enqueue_hugetlb_folio(h, folio); spin_unlock_irqrestore(&hugetlb_lock, flags); } } @@ -1765,36 +1926,37 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid) h->nr_huge_pages_node[nid]++; } -static void __prep_new_huge_page(struct hstate *h, struct page *page) +static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio) { - hugetlb_vmemmap_optimize(h, page); - INIT_LIST_HEAD(&page->lru); - set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); - hugetlb_set_page_subpool(page, NULL); - set_hugetlb_cgroup(page, NULL); - set_hugetlb_cgroup_rsvd(page, NULL); + hugetlb_vmemmap_optimize(h, &folio->page); + INIT_LIST_HEAD(&folio->lru); + folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR); + hugetlb_set_folio_subpool(folio, NULL); + set_hugetlb_cgroup(folio, NULL); + set_hugetlb_cgroup_rsvd(folio, NULL); } -static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) +static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid) { - __prep_new_huge_page(h, page); + __prep_new_hugetlb_folio(h, folio); spin_lock_irq(&hugetlb_lock); __prep_account_new_huge_page(h, nid); spin_unlock_irq(&hugetlb_lock); } -static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, - bool demote) +static bool __prep_compound_gigantic_folio(struct folio *folio, + unsigned int order, bool demote) { int i, j; int nr_pages = 1 << order; struct page *p; - /* we rely on prep_new_huge_page to set the destructor */ - set_compound_order(page, order); - __SetPageHead(page); + __folio_clear_reserved(folio); + __folio_set_head(folio); + /* we rely on prep_new_hugetlb_folio to set the destructor */ + folio_set_compound_order(folio, order); for (i = 0; i < nr_pages; i++) { - p = nth_page(page, i); + p = folio_page(folio, i); /* * For gigantic hugepages allocated through bootmem at @@ -1808,7 +1970,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, * on the head page when they need know if put_page() is needed * after get_user_pages(). */ - __ClearPageReserved(p); + if (i != 0) /* head page cleared above */ + __ClearPageReserved(p); /* * Subtle and very unlikely * @@ -1835,42 +1998,41 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, VM_BUG_ON_PAGE(page_count(p), p); } if (i != 0) - set_compound_head(p, page); + set_compound_head(p, &folio->page); } - atomic_set(compound_mapcount_ptr(page), -1); - atomic_set(compound_pincount_ptr(page), 0); + atomic_set(folio_mapcount_ptr(folio), -1); + atomic_set(folio_subpages_mapcount_ptr(folio), 0); + atomic_set(folio_pincount_ptr(folio), 0); return true; out_error: /* undo page modifications made above */ for (j = 0; j < i; j++) { - p = nth_page(page, j); + p = folio_page(folio, j); if (j != 0) clear_compound_head(p); set_page_refcounted(p); } /* need to clear PG_reserved on remaining tail pages */ for (; j < nr_pages; j++) { - p = nth_page(page, j); + p = folio_page(folio, j); __ClearPageReserved(p); } - set_compound_order(page, 0); -#ifdef CONFIG_64BIT - page[1].compound_nr = 0; -#endif - __ClearPageHead(page); + folio_set_compound_order(folio, 0); + __folio_clear_head(folio); return false; } -static bool prep_compound_gigantic_page(struct page *page, unsigned int order) +static bool prep_compound_gigantic_folio(struct folio *folio, + unsigned int order) { - return __prep_compound_gigantic_page(page, order, false); + return __prep_compound_gigantic_folio(folio, order, false); } -static bool prep_compound_gigantic_page_for_demote(struct page *page, +static bool prep_compound_gigantic_folio_for_demote(struct folio *folio, unsigned int order) { - return __prep_compound_gigantic_page(page, order, true); + return __prep_compound_gigantic_folio(folio, order, true); } /* @@ -1935,7 +2097,7 @@ pgoff_t hugetlb_basepage_index(struct page *page) return (index << compound_order(page_head)) + compound_idx; } -static struct page *alloc_buddy_huge_page(struct hstate *h, +static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { @@ -1973,11 +2135,6 @@ retry: page = NULL; } - if (page) - __count_vm_event(HTLB_BUDDY_PGALLOC); - else - __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); - /* * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this * indicates an overall state change. Clear bit so that we resume @@ -1994,7 +2151,13 @@ retry: if (node_alloc_noretry && !page && alloc_try_hard) node_set(nid, *node_alloc_noretry); - return page; + if (!page) { + __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); + return NULL; + } + + __count_vm_event(HTLB_BUDDY_PGALLOC); + return page_folio(page); } /* @@ -2004,29 +2167,28 @@ retry: * Note that returned page is 'frozen': ref count of head page and all tail * pages is zero. */ -static struct page *alloc_fresh_huge_page(struct hstate *h, +static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask, nodemask_t *node_alloc_noretry) { - struct page *page; + struct folio *folio; bool retry = false; retry: if (hstate_is_gigantic(h)) - page = alloc_gigantic_page(h, gfp_mask, nid, nmask); + folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask); else - page = alloc_buddy_huge_page(h, gfp_mask, + folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, nmask, node_alloc_noretry); - if (!page) + if (!folio) return NULL; - if (hstate_is_gigantic(h)) { - if (!prep_compound_gigantic_page(page, huge_page_order(h))) { + if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) { /* * Rare failure to convert pages to compound page. * Free pages and try again - ONCE! */ - free_gigantic_page(page, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); if (!retry) { retry = true; goto retry; @@ -2034,9 +2196,9 @@ retry: return NULL; } } - prep_new_huge_page(h, page, page_to_nid(page)); + prep_new_hugetlb_folio(h, folio, folio_nid(folio)); - return page; + return folio; } /* @@ -2046,23 +2208,20 @@ retry: static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, nodemask_t *node_alloc_noretry) { - struct page *page; + struct folio *folio; int nr_nodes, node; gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { - page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed, - node_alloc_noretry); - if (page) - break; + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node, + nodes_allowed, node_alloc_noretry); + if (folio) { + free_huge_page(&folio->page); /* free it into the hugepage allocator */ + return 1; + } } - if (!page) - return 0; - - free_huge_page(page); /* free it into the hugepage allocator */ - - return 1; + return 0; } /* @@ -2078,6 +2237,7 @@ static struct page *remove_pool_huge_page(struct hstate *h, { int nr_nodes, node; struct page *page = NULL; + struct folio *folio; lockdep_assert_held(&hugetlb_lock); for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { @@ -2089,7 +2249,8 @@ static struct page *remove_pool_huge_page(struct hstate *h, !list_empty(&h->hugepage_freelists[node])) { page = list_entry(h->hugepage_freelists[node].next, struct page, lru); - remove_hugetlb_page(h, page, acct_surplus); + folio = page_folio(page); + remove_hugetlb_folio(h, folio, acct_surplus); break; } } @@ -2114,21 +2275,21 @@ static struct page *remove_pool_huge_page(struct hstate *h, int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; + struct folio *folio = page_folio(page); retry: /* Not to disrupt normal path by vainly holding hugetlb_lock */ - if (!PageHuge(page)) + if (!folio_test_hugetlb(folio)) return 0; spin_lock_irq(&hugetlb_lock); - if (!PageHuge(page)) { + if (!folio_test_hugetlb(folio)) { rc = 0; goto out; } - if (!page_count(page)) { - struct page *head = compound_head(page); - struct hstate *h = page_hstate(head); + if (!folio_ref_count(folio)) { + struct hstate *h = folio_hstate(folio); if (!available_huge_pages(h)) goto out; @@ -2136,7 +2297,7 @@ retry: * We should make sure that the page is already on the free list * when it is dissolved. */ - if (unlikely(!HPageFreed(head))) { + if (unlikely(!folio_test_hugetlb_freed(folio))) { spin_unlock_irq(&hugetlb_lock); cond_resched(); @@ -2151,24 +2312,24 @@ retry: goto retry; } - remove_hugetlb_page(h, head, false); + remove_hugetlb_folio(h, folio, false); h->max_huge_pages--; spin_unlock_irq(&hugetlb_lock); /* - * Normally update_and_free_page will allocate required vmemmmap - * before freeing the page. update_and_free_page will fail to + * Normally update_and_free_hugtlb_folio will allocate required vmemmmap + * before freeing the page. update_and_free_hugtlb_folio will fail to * free the page if it can not allocate required vmemmap. We * need to adjust max_huge_pages if the page is not freed. * Attempt to allocate vmemmmap here so that we can take * appropriate action on failure. */ - rc = hugetlb_vmemmap_restore(h, head); + rc = hugetlb_vmemmap_restore(h, &folio->page); if (!rc) { - update_and_free_page(h, head, false); + update_and_free_hugetlb_folio(h, folio, false); } else { spin_lock_irq(&hugetlb_lock); - add_hugetlb_page(h, head, false); + add_hugetlb_folio(h, folio, false); h->max_huge_pages++; spin_unlock_irq(&hugetlb_lock); } @@ -2219,7 +2380,7 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { - struct page *page = NULL; + struct folio *folio = NULL; if (hstate_is_gigantic(h)) return NULL; @@ -2229,8 +2390,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, goto out_unlock; spin_unlock_irq(&hugetlb_lock); - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL); - if (!page) + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + if (!folio) return NULL; spin_lock_irq(&hugetlb_lock); @@ -2242,43 +2403,42 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, * codeflow */ if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) { - SetHPageTemporary(page); + folio_set_hugetlb_temporary(folio); spin_unlock_irq(&hugetlb_lock); - free_huge_page(page); + free_huge_page(&folio->page); return NULL; } h->surplus_huge_pages++; - h->surplus_huge_pages_node[page_to_nid(page)]++; + h->surplus_huge_pages_node[folio_nid(folio)]++; out_unlock: spin_unlock_irq(&hugetlb_lock); - return page; + return &folio->page; } static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask, int nid, nodemask_t *nmask) { - struct page *page; + struct folio *folio; if (hstate_is_gigantic(h)) return NULL; - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL); - if (!page) + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL); + if (!folio) return NULL; /* fresh huge pages are frozen */ - set_page_refcounted(page); - + folio_ref_unfreeze(folio, 1); /* * We do not account these pages as surplus because they are only * temporary and will be released properly on the last reference */ - SetHPageTemporary(page); + folio_set_hugetlb_temporary(folio); - return page; + return &folio->page; } /* @@ -2420,7 +2580,7 @@ retry: if ((--needed) < 0) break; /* Add the page to the hugetlb allocator */ - enqueue_huge_page(h, page); + enqueue_hugetlb_folio(h, page_folio(page)); } free: spin_unlock_irq(&hugetlb_lock); @@ -2727,51 +2887,52 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, } /* - * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one + * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve + * the old one * @h: struct hstate old page belongs to - * @old_page: Old page to dissolve + * @old_folio: Old folio to dissolve * @list: List to isolate the page in case we need to * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, - struct list_head *list) +static int alloc_and_dissolve_hugetlb_folio(struct hstate *h, + struct folio *old_folio, struct list_head *list) { gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - int nid = page_to_nid(old_page); - struct page *new_page; + int nid = folio_nid(old_folio); + struct folio *new_folio; int ret = 0; /* - * Before dissolving the page, we need to allocate a new one for the - * pool to remain stable. Here, we allocate the page and 'prep' it + * Before dissolving the folio, we need to allocate a new one for the + * pool to remain stable. Here, we allocate the folio and 'prep' it * by doing everything but actually updating counters and adding to * the pool. This simplifies and let us do most of the processing * under the lock. */ - new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL); - if (!new_page) + new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL); + if (!new_folio) return -ENOMEM; - __prep_new_huge_page(h, new_page); + __prep_new_hugetlb_folio(h, new_folio); retry: spin_lock_irq(&hugetlb_lock); - if (!PageHuge(old_page)) { + if (!folio_test_hugetlb(old_folio)) { /* - * Freed from under us. Drop new_page too. + * Freed from under us. Drop new_folio too. */ goto free_new; - } else if (page_count(old_page)) { + } else if (folio_ref_count(old_folio)) { /* - * Someone has grabbed the page, try to isolate it here. + * Someone has grabbed the folio, try to isolate it here. * Fail with -EBUSY if not possible. */ spin_unlock_irq(&hugetlb_lock); - ret = isolate_hugetlb(old_page, list); + ret = isolate_hugetlb(&old_folio->page, list); spin_lock_irq(&hugetlb_lock); goto free_new; - } else if (!HPageFreed(old_page)) { + } else if (!folio_test_hugetlb_freed(old_folio)) { /* - * Page's refcount is 0 but it has not been enqueued in the + * Folio's refcount is 0 but it has not been enqueued in the * freelist yet. Race window is small, so we can succeed here if * we retry. */ @@ -2780,35 +2941,35 @@ retry: goto retry; } else { /* - * Ok, old_page is still a genuine free hugepage. Remove it from + * Ok, old_folio is still a genuine free hugepage. Remove it from * the freelist and decrease the counters. These will be * incremented again when calling __prep_account_new_huge_page() - * and enqueue_huge_page() for new_page. The counters will remain - * stable since this happens under the lock. + * and enqueue_hugetlb_folio() for new_folio. The counters will + * remain stable since this happens under the lock. */ - remove_hugetlb_page(h, old_page, false); + remove_hugetlb_folio(h, old_folio, false); /* - * Ref count on new page is already zero as it was dropped + * Ref count on new_folio is already zero as it was dropped * earlier. It can be directly added to the pool free list. */ __prep_account_new_huge_page(h, nid); - enqueue_huge_page(h, new_page); + enqueue_hugetlb_folio(h, new_folio); /* - * Pages have been replaced, we can safely free the old one. + * Folio has been replaced, we can safely free the old one. */ spin_unlock_irq(&hugetlb_lock); - update_and_free_page(h, old_page, false); + update_and_free_hugetlb_folio(h, old_folio, false); } return ret; free_new: spin_unlock_irq(&hugetlb_lock); - /* Page has a zero ref count, but needs a ref to be freed */ - set_page_refcounted(new_page); - update_and_free_page(h, new_page, false); + /* Folio has a zero ref count, but needs a ref to be freed */ + folio_ref_unfreeze(new_folio, 1); + update_and_free_hugetlb_folio(h, new_folio, false); return ret; } @@ -2816,7 +2977,7 @@ free_new: int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) { struct hstate *h; - struct page *head; + struct folio *folio = page_folio(page); int ret = -EBUSY; /* @@ -2825,9 +2986,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) * Return success when racing as if we dissolved the page ourselves. */ spin_lock_irq(&hugetlb_lock); - if (PageHuge(page)) { - head = compound_head(page); - h = page_hstate(head); + if (folio_test_hugetlb(folio)) { + h = folio_hstate(folio); } else { spin_unlock_irq(&hugetlb_lock); return 0; @@ -2842,10 +3002,10 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) if (hstate_is_gigantic(h)) return -ENOMEM; - if (page_count(head) && !isolate_hugetlb(head, list)) + if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list)) ret = 0; - else if (!page_count(head)) - ret = alloc_and_dissolve_huge_page(h, head, list); + else if (!folio_ref_count(folio)) + ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); return ret; } @@ -2856,6 +3016,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct page *page; + struct folio *folio; long map_chg, map_commit; long gbl_chg; int ret, idx; @@ -2924,15 +3085,16 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, page = alloc_buddy_huge_page_with_mpol(h, vma, addr); if (!page) goto out_uncharge_cgroup; + spin_lock_irq(&hugetlb_lock); if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) { SetHPageRestoreReserve(page); h->resv_huge_pages--; } - spin_lock_irq(&hugetlb_lock); list_add(&page->lru, &h->hugepage_activelist); set_page_refcounted(page); /* Fall through */ } + folio = page_folio(page); hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page); /* If allocation is not consuming a reservation, also store the * hugetlb_cgroup pointer on the page. @@ -2962,8 +3124,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, rsv_adjust = hugepage_subpool_put_pages(spool, 1); hugetlb_acct_memory(h, -rsv_adjust); if (deferred_reserve) - hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h), - pages_per_huge_page(h), page); + hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), + pages_per_huge_page(h), folio); } return page; @@ -3028,17 +3190,18 @@ static void __init gather_bootmem_prealloc(void) list_for_each_entry(m, &huge_boot_pages, list) { struct page *page = virt_to_page(m); + struct folio *folio = page_folio(page); struct hstate *h = m->hstate; VM_BUG_ON(!hstate_is_gigantic(h)); - WARN_ON(page_count(page) != 1); - if (prep_compound_gigantic_page(page, huge_page_order(h))) { - WARN_ON(PageReserved(page)); - prep_new_huge_page(h, page, page_to_nid(page)); + WARN_ON(folio_ref_count(folio) != 1); + if (prep_compound_gigantic_folio(folio, huge_page_order(h))) { + WARN_ON(folio_test_reserved(folio)); + prep_new_hugetlb_folio(h, folio, folio_nid(folio)); free_huge_page(page); /* add to the hugepage allocator */ } else { /* VERY unlikely inflated ref count on a tail page */ - free_gigantic_page(page, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); } /* @@ -3060,14 +3223,14 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) if (!alloc_bootmem_huge_page(h, nid)) break; } else { - struct page *page; + struct folio *folio; gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; - page = alloc_fresh_huge_page(h, gfp_mask, nid, + folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, &node_states[N_MEMORY], NULL); - if (!page) + if (!folio) break; - free_huge_page(page); /* free it into the hugepage allocator */ + free_huge_page(&folio->page); /* free it into the hugepage allocator */ } cond_resched(); } @@ -3212,7 +3375,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count, goto out; if (PageHighMem(page)) continue; - remove_hugetlb_page(h, page, false); + remove_hugetlb_folio(h, page_folio(page), false); list_add(&page->lru, &page_list); } } @@ -3417,12 +3580,13 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) { int i, nid = page_to_nid(page); struct hstate *target_hstate; + struct folio *folio = page_folio(page); struct page *subpage; int rc = 0; target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); - remove_hugetlb_page_for_demote(h, page, false); + remove_hugetlb_folio_for_demote(h, folio, false); spin_unlock_irq(&hugetlb_lock); rc = hugetlb_vmemmap_restore(h, page); @@ -3430,15 +3594,15 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) /* Allocation of vmemmmap failed, we can not demote page */ spin_lock_irq(&hugetlb_lock); set_page_refcounted(page); - add_hugetlb_page(h, page, false); + add_hugetlb_folio(h, page_folio(page), false); return rc; } /* - * Use destroy_compound_hugetlb_page_for_demote for all huge page + * Use destroy_compound_hugetlb_folio_for_demote for all huge page * sizes as it will not ref count pages. */ - destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h)); + destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h)); /* * Taking target hstate mutex synchronizes with set_max_huge_pages. @@ -3452,13 +3616,14 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) for (i = 0; i < pages_per_huge_page(h); i += pages_per_huge_page(target_hstate)) { subpage = nth_page(page, i); + folio = page_folio(subpage); if (hstate_is_gigantic(target_hstate)) - prep_compound_gigantic_page_for_demote(subpage, + prep_compound_gigantic_folio_for_demote(folio, target_hstate->order); else prep_compound_page(subpage, target_hstate->order); set_page_private(subpage, 0); - prep_new_huge_page(target_hstate, subpage, nid); + prep_new_hugetlb_folio(target_hstate, folio, nid); free_huge_page(subpage); } mutex_unlock(&target_hstate->resize_lock); @@ -4601,6 +4766,7 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) struct resv_map *resv = vma_resv_map(vma); /* + * HPAGE_RESV_OWNER indicates a private mapping. * This new VMA should share its siblings reservation map if present. * The VMA will only ever have a valid reservation map pointer where * it is being copied for another still existing VMA. As that VMA @@ -4615,11 +4781,21 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) /* * vma_lock structure for sharable mappings is vma specific. - * Clear old pointer (if copied via vm_area_dup) and create new. + * Clear old pointer (if copied via vm_area_dup) and allocate + * new structure. Before clearing, make sure vma_lock is not + * for this vma. */ if (vma->vm_flags & VM_MAYSHARE) { - vma->vm_private_data = NULL; - hugetlb_vma_lock_alloc(vma); + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + if (vma_lock) { + if (vma_lock->vma != vma) { + vma->vm_private_data = NULL; + hugetlb_vma_lock_alloc(vma); + } else + pr_warn("HugeTLB: vma_lock already exists in %s.\n", __func__); + } else + hugetlb_vma_lock_alloc(vma); } } @@ -4756,7 +4932,6 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr hugepage_add_new_anon_rmap(new_page, vma, addr); set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1)); hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm); - ClearHPageRestoreReserve(new_page); SetHPageMigratable(new_page); } @@ -5045,7 +5220,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - struct mmu_notifier_range range; unsigned long last_addr_mask; bool force_flush = false; @@ -5060,13 +5234,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct tlb_change_page_size(tlb, sz); tlb_start_vma(tlb, vma); - /* - * If sharing possible, alert mmu notifiers of worst case. - */ - mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start, - end); - adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); - mmu_notifier_invalidate_range_start(&range); last_addr_mask = hugetlb_mask_last_page(h); address = start; for (; address < end; address += sz) { @@ -5096,7 +5263,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct * unmapped and its refcount is dropped, so just clear pte here. */ if (unlikely(!pte_present(pte))) { -#ifdef CONFIG_PTE_MARKER_UFFD_WP /* * If the pte was wr-protected by uffd-wp in any of the * swap forms, meanwhile the caller does not want to @@ -5108,7 +5274,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct set_huge_pte_at(mm, address, ptep, make_pte_marker(PTE_MARKER_UFFD_WP)); else -#endif huge_pte_clear(mm, address, ptep, sz); spin_unlock(ptl); continue; @@ -5137,13 +5302,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); -#ifdef CONFIG_PTE_MARKER_UFFD_WP /* Leave a uffd-wp pte marker if needed */ if (huge_pte_uffd_wp(pte) && !(zap_flags & ZAP_FLAG_DROP_MARKER)) set_huge_pte_at(mm, address, ptep, make_pte_marker(PTE_MARKER_UFFD_WP)); -#endif hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, vma, true); @@ -5155,7 +5318,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct if (ref_page) break; } - mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); /* @@ -5183,29 +5345,43 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, hugetlb_vma_lock_write(vma); i_mmap_lock_write(vma->vm_file->f_mapping); + /* mmu notification performed in caller */ __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) { + struct mmu_notifier_range range; struct mmu_gather tlb; + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + start, end); + adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + mmu_notifier_invalidate_range_start(&range); tlb_gather_mmu(&tlb, vma->vm_mm); + __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags); + + mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } @@ -5284,9 +5460,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr = address & huge_page_mask(h); struct mmu_notifier_range range; - VM_BUG_ON(unshare && (flags & FOLL_WRITE)); - VM_BUG_ON(!unshare && !(flags & FOLL_WRITE)); - /* * hugetlb does not support FOLL_FORCE-style write faults that keep the * PTE mapped R/O such as maybe_mkwrite() would do. @@ -5296,8 +5469,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, /* Let's take out MAP_SHARED mappings first. */ if (vma->vm_flags & VM_MAYSHARE) { - if (unlikely(unshare)) - return 0; set_huge_ptep_writable(vma, haddr, ptep); return 0; } @@ -5419,8 +5590,6 @@ retry_avoidcopy: spin_lock(ptl); ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { - ClearHPageRestoreReserve(new_page); - /* Break COW or unshare */ huge_ptep_clear_flush(vma, haddr, ptep); mmu_notifier_invalidate_range(mm, range.start, range.end); @@ -5715,10 +5884,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, if (!pte_same(huge_ptep_get(ptep), old_pte)) goto backout; - if (anon_rmap) { - ClearHPageRestoreReserve(page); + if (anon_rmap) hugepage_add_new_anon_rmap(page, vma, haddr); - } else + else page_dup_file_rmap(page, true); new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); @@ -6092,6 +6260,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ptl = huge_pte_lock(h, dst_mm, dst_pte); + ret = -EIO; + if (PageHWPoison(page)) + goto out_release_unlock; + /* * We allow to overwrite a pte marker: consider when both MISSING|WP * registered, we firstly wr-protect a none pte which has no page cache @@ -6101,12 +6273,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, if (!huge_pte_none_mostly(huge_ptep_get(dst_pte))) goto out_release_unlock; - if (page_in_pagecache) { + if (page_in_pagecache) page_dup_file_rmap(page, true); - } else { - ClearHPageRestoreReserve(page); + else hugepage_add_new_anon_rmap(page, dst_vma, dst_addr); - } /* * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY @@ -6171,7 +6341,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma, } } -static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte, +static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma, + unsigned int flags, pte_t *pte, bool *unshare) { pte_t pteval = huge_ptep_get(pte); @@ -6183,13 +6354,69 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte, return false; if (flags & FOLL_WRITE) return true; - if (gup_must_unshare(flags, pte_page(pteval))) { + if (gup_must_unshare(vma, flags, pte_page(pteval))) { *unshare = true; return true; } return false; } +struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) +{ + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; + unsigned long haddr = address & huge_page_mask(h); + struct page *page = NULL; + spinlock_t *ptl; + pte_t *pte, entry; + + /* + * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via + * follow_hugetlb_page(). + */ + if (WARN_ON_ONCE(flags & FOLL_PIN)) + return NULL; + +retry: + pte = huge_pte_offset(mm, haddr, huge_page_size(h)); + if (!pte) + return NULL; + + ptl = huge_pte_lock(h, mm, pte); + entry = huge_ptep_get(pte); + if (pte_present(entry)) { + page = pte_page(entry) + + ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); + /* + * Note that page may be a sub-page, and with vmemmap + * optimizations the page struct may be read only. + * try_grab_page() will increase the ref count on the + * head page, so this will be OK. + * + * try_grab_page() should always be able to get the page here, + * because we hold the ptl lock and have verified pte_present(). + */ + if (try_grab_page(page, flags)) { + page = NULL; + goto out; + } + } else { + if (is_hugetlb_entry_migration(entry)) { + spin_unlock(ptl); + __migration_entry_wait_huge(pte, ptl); + goto retry; + } + /* + * hwpoisoned entry is treated as no_page_table in + * follow_page_mask(). + */ + } +out: + spin_unlock(ptl); + return page; +} + long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, @@ -6256,7 +6483,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * directly from any kind of swap entries. */ if (absent || - __follow_hugetlb_must_fault(flags, pte, &unshare)) { + __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) { vm_fault_t ret; unsigned int fault_flags = 0; @@ -6266,9 +6493,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_WRITE; else if (unshare) fault_flags |= FAULT_FLAG_UNSHARE; - if (locked) + if (locked) { fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (flags & FOLL_INTERRUPTIBLE) + fault_flags |= FAULT_FLAG_INTERRUPTIBLE; + } if (flags & FOLL_NOWAIT) fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; @@ -6342,8 +6572,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * tables. If the huge page is present, then the tail * pages must also be present. The ptl prevents the * head page and tail pages from being rearranged in - * any way. So this page must be available at this - * point, unless the page refcount overflowed: + * any way. As this is hugetlb, the pages will never + * be p2pdma or not longterm pinable. So this page + * must be available at this point, unless the page + * refcount overflowed: */ if (WARN_ON_ONCE(!try_grab_folio(pages[i], refs, flags))) { @@ -6527,7 +6759,8 @@ bool hugetlb_reserve_pages(struct inode *inode, } /* - * vma specific semaphore used for pmd sharing synchronization + * vma specific semaphore used for pmd sharing and fault/truncation + * synchronization */ hugetlb_vma_lock_alloc(vma); @@ -6783,149 +7016,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, *end = ALIGN(*end, PUD_SIZE); } -static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma) -{ - return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) && - vma->vm_private_data; -} - -void hugetlb_vma_lock_read(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_read(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_unlock_read(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - up_read(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_lock_write(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_write(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_unlock_write(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - up_write(&vma_lock->rw_sema); - } -} - -int hugetlb_vma_trylock_write(struct vm_area_struct *vma) -{ - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - if (!__vma_shareable_flags_pmd(vma)) - return 1; - - return down_write_trylock(&vma_lock->rw_sema); -} - -void hugetlb_vma_assert_locked(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - lockdep_assert_held(&vma_lock->rw_sema); - } -} - -void hugetlb_vma_lock_release(struct kref *kref) -{ - struct hugetlb_vma_lock *vma_lock = container_of(kref, - struct hugetlb_vma_lock, refs); - - kfree(vma_lock); -} - -static void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock) -{ - struct vm_area_struct *vma = vma_lock->vma; - - /* - * vma_lock structure may or not be released as a result of put, - * it certainly will no longer be attached to vma so clear pointer. - * Semaphore synchronizes access to vma_lock->vma field. - */ - vma_lock->vma = NULL; - vma->vm_private_data = NULL; - up_write(&vma_lock->rw_sema); - kref_put(&vma_lock->refs, hugetlb_vma_lock_release); -} - -static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) -{ - if (__vma_shareable_flags_pmd(vma)) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - __hugetlb_vma_unlock_write_put(vma_lock); - } -} - -static void hugetlb_vma_lock_free(struct vm_area_struct *vma) -{ - /* - * Only present in sharable vmas. - */ - if (!vma || !__vma_shareable_flags_pmd(vma)) - return; - - if (vma->vm_private_data) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - - down_write(&vma_lock->rw_sema); - __hugetlb_vma_unlock_write_put(vma_lock); - } -} - -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ - struct hugetlb_vma_lock *vma_lock; - - /* Only establish in (flags) sharable vmas */ - if (!vma || !(vma->vm_flags & VM_MAYSHARE)) - return; - - /* Should never get here with non-NULL vm_private_data */ - if (vma->vm_private_data) - return; - - vma_lock = kmalloc(sizeof(*vma_lock), GFP_KERNEL); - if (!vma_lock) { - /* - * If we can not allocate structure, then vma can not - * participate in pmd sharing. This is only a possible - * performance enhancement and memory saving issue. - * However, the lock is also used to synchronize page - * faults with truncation. If the lock is not present, - * unlikely races could leave pages in a file past i_size - * until the file is removed. Warn in the unlikely case of - * allocation failure. - */ - pr_warn_once("HugeTLB: unable to allocate vma specific lock\n"); - return; - } - - kref_init(&vma_lock->refs); - init_rwsem(&vma_lock->rw_sema); - vma_lock->vma = vma; - vma->vm_private_data = vma_lock; -} - /* * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * and returns the corresponding pte. While this is not necessary for the @@ -7014,47 +7104,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ -void hugetlb_vma_lock_read(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_unlock_read(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_lock_write(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_unlock_write(struct vm_area_struct *vma) -{ -} - -int hugetlb_vma_trylock_write(struct vm_area_struct *vma) -{ - return 1; -} - -void hugetlb_vma_assert_locked(struct vm_area_struct *vma) -{ -} - -void hugetlb_vma_lock_release(struct kref *kref) -{ -} - -static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma) -{ -} - -static void hugetlb_vma_lock_free(struct vm_area_struct *vma) -{ -} - -static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma) -{ -} - pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { @@ -7182,122 +7231,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h) * These functions are overwritable if your architecture needs its own * behavior. */ -struct page * __weak -follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - -struct page * __weak -follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, int flags, int pdshift) -{ - WARN(1, "hugepd follow called with no support for hugepage directory format\n"); - return NULL; -} - -struct page * __weak -follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) -{ - struct hstate *h = hstate_vma(vma); - struct mm_struct *mm = vma->vm_mm; - struct page *page = NULL; - spinlock_t *ptl; - pte_t *ptep, pte; - - /* - * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via - * follow_hugetlb_page(). - */ - if (WARN_ON_ONCE(flags & FOLL_PIN)) - return NULL; - -retry: - ptep = huge_pte_offset(mm, address, huge_page_size(h)); - if (!ptep) - return NULL; - - ptl = huge_pte_lock(h, mm, ptep); - pte = huge_ptep_get(ptep); - if (pte_present(pte)) { - page = pte_page(pte) + - ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); - /* - * try_grab_page() should always succeed here, because: a) we - * hold the pmd (ptl) lock, and b) we've just checked that the - * huge pmd (head) page is present in the page tables. The ptl - * prevents the head page and tail pages from being rearranged - * in any way. So this page must be available at this point, - * unless the page refcount overflowed: - */ - if (WARN_ON_ONCE(!try_grab_page(page, flags))) { - page = NULL; - goto out; - } - } else { - if (is_hugetlb_entry_migration(pte)) { - spin_unlock(ptl); - __migration_entry_wait_huge(ptep, ptl); - goto retry; - } - /* - * hwpoisoned entry is treated as no_page_table in - * follow_page_mask(). - */ - } -out: - spin_unlock(ptl); - return page; -} - -struct page * __weak -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int flags) -{ - struct page *page = NULL; - spinlock_t *ptl; - pte_t pte; - - if (WARN_ON_ONCE(flags & FOLL_PIN)) - return NULL; - -retry: - ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud); - if (!pud_huge(*pud)) - goto out; - pte = huge_ptep_get((pte_t *)pud); - if (pte_present(pte)) { - page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); - if (WARN_ON_ONCE(!try_grab_page(page, flags))) { - page = NULL; - goto out; - } - } else { - if (is_hugetlb_entry_migration(pte)) { - spin_unlock(ptl); - __migration_entry_wait(mm, (pte_t *)pud, ptl); - goto retry; - } - /* - * hwpoisoned entry is treated as no_page_table in - * follow_page_mask(). - */ - } -out: - spin_unlock(ptl); - return page; -} - -struct page * __weak -follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags) -{ - if (flags & (FOLL_GET | FOLL_PIN)) - return NULL; - - return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT); -} - int isolate_hugetlb(struct page *page, struct list_head *list) { int ret = 0; @@ -7316,7 +7249,7 @@ unlock: return ret; } -int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) { int ret = 0; @@ -7326,7 +7259,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) *hugetlb = true; if (HPageFreed(page)) ret = 0; - else if (HPageMigratable(page)) + else if (HPageMigratable(page) || unpoison) ret = get_page_unless_zero(page); else ret = -EBUSY; @@ -7335,12 +7268,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) return ret; } -int get_huge_page_for_hwpoison(unsigned long pfn, int flags) +int get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { int ret; spin_lock_irq(&hugetlb_lock); - ret = __get_huge_page_for_hwpoison(pfn, flags); + ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared); spin_unlock_irq(&hugetlb_lock); return ret; } @@ -7354,15 +7288,15 @@ void putback_active_hugepage(struct page *page) put_page(page); } -void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) +void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason) { - struct hstate *h = page_hstate(oldpage); + struct hstate *h = folio_hstate(old_folio); - hugetlb_cgroup_migrate(oldpage, newpage); - set_page_owner_migrate_reason(newpage, reason); + hugetlb_cgroup_migrate(old_folio, new_folio); + set_page_owner_migrate_reason(&new_folio->page, reason); /* - * transfer temporary state of the new huge page. This is + * transfer temporary state of the new hugetlb folio. This is * reverse to other transitions because the newpage is going to * be final while the old one will be freed so it takes over * the temporary status. @@ -7371,12 +7305,13 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) * here as well otherwise the global surplus count will not match * the per-node's. */ - if (HPageTemporary(newpage)) { - int old_nid = page_to_nid(oldpage); - int new_nid = page_to_nid(newpage); + if (folio_test_hugetlb_temporary(new_folio)) { + int old_nid = folio_nid(old_folio); + int new_nid = folio_nid(new_folio); + + folio_set_hugetlb_temporary(old_folio); + folio_clear_hugetlb_temporary(new_folio); - SetHPageTemporary(oldpage); - ClearHPageTemporary(newpage); /* * There is no need to transfer the per-node surplus state |