From be8a80b3e20e3ecb24c2a5f22cd1c9b120341f3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: remove a pointless CONFIG_ZONE_DEVICE check in memremap_pages Patch series "start sorting out the ZONE_DEVICE refcount mess", v2. This series removes the offset by one refcount for ZONE_DEVICE pages that are freed back to the driver owning them, which is just device private ones for now, but also the planned device coherent pages and the ehanced p2p ones pending. It does not address the fsdax pages yet, which will be attacked in a follow on series. This patch (of 27): memremap.c is only built when CONFIG_ZONE_DEVICE is set, so remove the superflous extra check. Link: https://lkml.kernel.org/r/20220210072828.2930359-1-hch@lst.de Link: https://lkml.kernel.org/r/20220210072828.2930359-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Muchun Song Reviewed-by: Dan Williams Reviewed-by: Miaohe Lin Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Felix Kuehling Cc: Alex Deucher Cc: Christian Knig Cc: "Pan, Xinhui" Cc: Ben Skeggs Cc: Karol Herbst Cc: Lyude Paul Cc: Alistair Popple Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/memremap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/memremap.c') diff --git a/mm/memremap.c b/mm/memremap.c index 6aa5f0c2d11f..5f04a0709e43 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -328,8 +328,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_FS_DAX: - if (!IS_ENABLED(CONFIG_ZONE_DEVICE) || - IS_ENABLED(CONFIG_FS_DAX_LIMITED)) { + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) { WARN(1, "File system DAX not supported\n"); return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 75e55d8a107edb2fd6e02b1fa8a81531209cda04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: move free_devmap_managed_page to memremap.c free_devmap_managed_page has nothing to do with the code in swap.c, move it to live with the rest of the code for devmap handling. Link: https://lkml.kernel.org/r/20220210072828.2930359-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Muchun Song Reviewed-by: Dan Williams Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Christian Knig Cc: Felix Kuehling Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/memremap.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'mm/memremap.c') diff --git a/mm/memremap.c b/mm/memremap.c index 5f04a0709e43..55d23e9f5c04 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -501,4 +501,25 @@ void free_devmap_managed_page(struct page *page) page->mapping = NULL; page->pgmap->ops->page_free(page); } + +void put_devmap_managed_page(struct page *page) +{ + int count; + + if (WARN_ON_ONCE(!page_is_devmap_managed(page))) + return; + + count = page_ref_dec_return(page); + + /* + * devmap page refcounts are 1-based, rather than 0-based: if + * refcount is 1, then the page is free and the refcount is + * stable because nobody holds a reference on the page. + */ + if (count == 1) + free_devmap_managed_page(page); + else if (!count) + __put_page(page); +} +EXPORT_SYMBOL(put_devmap_managed_page); #endif /* CONFIG_DEV_PAGEMAP_OPS */ -- cgit v1.2.3 From 895749455f6054e0c7b40a6ec449a3ab6db51bdd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:35 +1100 Subject: mm: simplify freeing of devmap managed pages Make put_devmap_managed_page return if it took charge of the page or not and remove the separate page_is_devmap_managed helper. Link: https://lkml.kernel.org/r/20220210072828.2930359-6-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Dan Williams Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Christian Knig Cc: Felix Kuehling Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/memremap.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'mm/memremap.c') diff --git a/mm/memremap.c b/mm/memremap.c index 55d23e9f5c04..f41233a67edb 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -502,24 +502,22 @@ void free_devmap_managed_page(struct page *page) page->pgmap->ops->page_free(page); } -void put_devmap_managed_page(struct page *page) +bool __put_devmap_managed_page(struct page *page) { - int count; - - if (WARN_ON_ONCE(!page_is_devmap_managed(page))) - return; - - count = page_ref_dec_return(page); - /* * devmap page refcounts are 1-based, rather than 0-based: if * refcount is 1, then the page is free and the refcount is * stable because nobody holds a reference on the page. */ - if (count == 1) + switch (page_ref_dec_return(page)) { + case 1: free_devmap_managed_page(page); - else if (!count) + break; + case 0: __put_page(page); + break; + } + return true; } -EXPORT_SYMBOL(put_devmap_managed_page); +EXPORT_SYMBOL(__put_devmap_managed_page); #endif /* CONFIG_DEV_PAGEMAP_OPS */ -- cgit v1.2.3 From dc90f0846df4870b6cc8528c31e5c60f18fb68be Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:36 +1100 Subject: mm: don't include in Move the check for the actual pgmap types that need the free at refcount one behavior into the out of line helper, and thus avoid the need to pull memremap.h into mm.h. Link: https://lkml.kernel.org/r/20220210072828.2930359-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Acked-by: Felix Kuehling Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Chaitanya Kulkarni Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/memremap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'mm/memremap.c') diff --git a/mm/memremap.c b/mm/memremap.c index f41233a67edb..a0ece2344c2c 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -504,6 +504,10 @@ void free_devmap_managed_page(struct page *page) bool __put_devmap_managed_page(struct page *page) { + if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && + page->pgmap->type != MEMORY_DEVICE_FS_DAX) + return false; + /* * devmap page refcounts are 1-based, rather than 0-based: if * refcount is 1, then the page is free and the refcount is -- cgit v1.2.3 From 27674ef6c73f0c9096a9827dc5d6ba9fc7808422 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:36 +1100 Subject: mm: remove the extra ZONE_DEVICE struct page refcount ZONE_DEVICE struct pages have an extra reference count that complicates the code for put_page() and several places in the kernel that need to check the reference count to see that a page is not being used (gup, compaction, migration, etc.). Clean up the code so the reference count doesn't need to be treated specially for ZONE_DEVICE pages. Note that this excludes the special idle page wakeup for fsdax pages, which still happens at refcount 1. This is a separate issue and will be sorted out later. Given that only fsdax pages require the notifiacation when the refcount hits 1 now, the PAGEMAP_OPS Kconfig symbol can go away and be replaced with a FS_DAX check for this hook in the put_page fastpath. Based on an earlier patch from Ralph Campbell . Link: https://lkml.kernel.org/r/20220210072828.2930359-8-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Reviewed-by: Ralph Campbell Reviewed-by: Jason Gunthorpe Reviewed-by: Dan Williams Acked-by: Felix Kuehling Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Chaitanya Kulkarni Cc: Christian Knig Cc: Karol Herbst Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/memremap.c | 57 +++++++++++++++++++-------------------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) (limited to 'mm/memremap.c') diff --git a/mm/memremap.c b/mm/memremap.c index a0ece2344c2c..fef5734d5e49 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -12,6 +12,7 @@ #include #include #include +#include "internal.h" static DEFINE_XARRAY(pgmap_array); @@ -37,21 +38,19 @@ unsigned long memremap_compat_align(void) EXPORT_SYMBOL_GPL(memremap_compat_align); #endif -#ifdef CONFIG_DEV_PAGEMAP_OPS +#ifdef CONFIG_FS_DAX DEFINE_STATIC_KEY_FALSE(devmap_managed_key); EXPORT_SYMBOL(devmap_managed_key); static void devmap_managed_enable_put(struct dev_pagemap *pgmap) { - if (pgmap->type == MEMORY_DEVICE_PRIVATE || - pgmap->type == MEMORY_DEVICE_FS_DAX) + if (pgmap->type == MEMORY_DEVICE_FS_DAX) static_branch_dec(&devmap_managed_key); } static void devmap_managed_enable_get(struct dev_pagemap *pgmap) { - if (pgmap->type == MEMORY_DEVICE_PRIVATE || - pgmap->type == MEMORY_DEVICE_FS_DAX) + if (pgmap->type == MEMORY_DEVICE_FS_DAX) static_branch_inc(&devmap_managed_key); } #else @@ -61,7 +60,7 @@ static void devmap_managed_enable_get(struct dev_pagemap *pgmap) static void devmap_managed_enable_put(struct dev_pagemap *pgmap) { } -#endif /* CONFIG_DEV_PAGEMAP_OPS */ +#endif /* CONFIG_FS_DAX */ static void pgmap_array_delete(struct range *range) { @@ -102,23 +101,12 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id) return (range->start + range_len(range)) >> PAGE_SHIFT; } -static unsigned long pfn_next(struct dev_pagemap *pgmap, unsigned long pfn) -{ - if (pfn % (1024 << pgmap->vmemmap_shift)) - cond_resched(); - return pfn + pgmap_vmemmap_nr(pgmap); -} - static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id) { return (pfn_end(pgmap, range_id) - pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift; } -#define for_each_device_pfn(pfn, map, i) \ - for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); \ - pfn = pfn_next(map, pfn)) - static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) { struct range *range = &pgmap->ranges[range_id]; @@ -147,13 +135,11 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id) void memunmap_pages(struct dev_pagemap *pgmap) { - unsigned long pfn; int i; percpu_ref_kill(&pgmap->ref); for (i = 0; i < pgmap->nr_range; i++) - for_each_device_pfn(pfn, pgmap, i) - put_page(pfn_to_page(pfn)); + percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i)); wait_for_completion(&pgmap->done); percpu_ref_exit(&pgmap->ref); @@ -464,14 +450,10 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, } EXPORT_SYMBOL_GPL(get_dev_pagemap); -#ifdef CONFIG_DEV_PAGEMAP_OPS -void free_devmap_managed_page(struct page *page) +void free_zone_device_page(struct page *page) { - /* notify page idle for dax */ - if (!is_device_private_page(page)) { - wake_up_var(&page->_refcount); + if (WARN_ON_ONCE(!is_device_private_page(page))) return; - } __ClearPageWaiters(page); @@ -500,28 +482,27 @@ void free_devmap_managed_page(struct page *page) */ page->mapping = NULL; page->pgmap->ops->page_free(page); + + /* + * Reset the page count to 1 to prepare for handing out the page again. + */ + set_page_count(page, 1); } +#ifdef CONFIG_FS_DAX bool __put_devmap_managed_page(struct page *page) { - if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && - page->pgmap->type != MEMORY_DEVICE_FS_DAX) + if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) return false; /* - * devmap page refcounts are 1-based, rather than 0-based: if + * fsdax page refcounts are 1-based, rather than 0-based: if * refcount is 1, then the page is free and the refcount is * stable because nobody holds a reference on the page. */ - switch (page_ref_dec_return(page)) { - case 1: - free_devmap_managed_page(page); - break; - case 0: - __put_page(page); - break; - } + if (page_ref_dec_return(page) == 1) + wake_up_var(&page->_refcount); return true; } EXPORT_SYMBOL(__put_devmap_managed_page); -#endif /* CONFIG_DEV_PAGEMAP_OPS */ +#endif /* CONFIG_FS_DAX */ -- cgit v1.2.3 From 5cbf9942c9630f82bd71dfac141e6c1b511a9460 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Feb 2022 15:31:37 +1100 Subject: mm: generalize the pgmap based page_free infrastructure Key off on the existence of ->page_free to prepare for adding support for more pgmap types that are device managed and thus need the free callback. Link: https://lkml.kernel.org/r/20220210072828.2930359-10-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: "Sierra Guiza, Alejandro (Alex)" Cc: Alex Deucher Cc: Alistair Popple Cc: Ben Skeggs Cc: Chaitanya Kulkarni Cc: Christian Knig Cc: Dan Williams Cc: Felix Kuehling Cc: Jason Gunthorpe Cc: Karol Herbst Cc: Logan Gunthorpe Cc: Lyude Paul Cc: Miaohe Lin Cc: Muchun Song Cc: "Pan, Xinhui" Cc: Ralph Campbell Signed-off-by: Andrew Morton Signed-off-by: Matthew Wilcox (Oracle) --- mm/memremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memremap.c') diff --git a/mm/memremap.c b/mm/memremap.c index fef5734d5e49..e00ffcdba7b6 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -452,7 +452,7 @@ EXPORT_SYMBOL_GPL(get_dev_pagemap); void free_zone_device_page(struct page *page) { - if (WARN_ON_ONCE(!is_device_private_page(page))) + if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free)) return; __ClearPageWaiters(page); @@ -460,7 +460,7 @@ void free_zone_device_page(struct page *page) mem_cgroup_uncharge(page_folio(page)); /* - * When a device_private page is freed, the page->mapping field + * When a device managed page is freed, the page->mapping field * may still contain a (stale) mapping value. For example, the * lower bits of page->mapping may still identify the page as an * anonymous page. Ultimately, this entire field is just stale -- cgit v1.2.3