diff options
52 files changed, 1120 insertions, 525 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b44992ec9643..4e369dfb83b1 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -685,12 +685,14 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; @@ -725,7 +727,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index ac46f0d60b66..7d9bd20319ff 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7af4e05bb61e..18278b448530 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY); + MEMMAP_EARLY, NULL); return 0; } @@ -509,9 +509,10 @@ void __meminit memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { - if (!vmem_map) - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); - else { + if (!vmem_map) { + memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, + NULL); + } else { struct page *start; struct memmap_init_callback_data args; @@ -647,13 +648,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); @@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (ret) pr_warn("%s: Problem encountered in __remove_pages() as" " ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index f6eb7e8f4c93..fdb424a29f03 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; @@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - /* altmap lookups only work at section boundaries */ - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); - - p = __vmemmap_alloc_block_buf(page_size, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(page_size, altmap); + else + p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; @@ -256,7 +256,8 @@ static unsigned long vmemmap_list_free(unsigned long start) return vmem_back->phys; } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); @@ -267,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct vmem_altmap *altmap; struct page *section_base; struct page *page; @@ -287,7 +287,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; - altmap = to_vmem_altmap((unsigned long) section_base); if (altmap) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1281c6eb3a85..fe8c61149fb8 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -143,15 +144,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct vmem_altmap *altmap; struct page *page; int ret; @@ -160,11 +160,10 @@ int arch_remove_memory(u64 start, u64 size) * when querying the zone. */ page = pfn_to_page(start_pfn); - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); if (ret) return ret; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 671535e64aba..3fa3e5323612 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); if (rc) vmem_remove_mapping(start, size); return rc; } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3316d463fc29..db55561c5981 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long pgt_prot, sgt_prot; unsigned long address = start; @@ -296,7 +297,8 @@ out: return ret; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index bf726af5f1a5..ce0bbaa7e404 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); return ret; } -EXPORT_SYMBOL_GPL(arch_add_memory); #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 addr) @@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; @@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (unlikely(ret)) pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, ret); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 55ba62957e64..995f9490334d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, - int node) + int node, struct vmem_altmap *altmap) { unsigned long pte_base; @@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 135c9a7898c7..79cb066f40c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -829,23 +829,24 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + return __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4a837289f2ad..1ab42c852069 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, want_memblock); + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } -EXPORT_SYMBOL_GPL(arch_add_memory); #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order) +static void __meminit free_pagetable(struct page *page, int order, + struct vmem_altmap *altmap) { unsigned long magic; unsigned int nr_pages = 1 << order; - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); if (altmap) { vmem_altmap_free(altmap, nr_pages); @@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order) free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, + struct vmem_altmap *altmap) { pte_t *pte; int i; @@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0); + free_pagetable(pmd_page(*pmd), 0, altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, + struct vmem_altmap *altmap) { pmd_t *pmd; int i; @@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0); + free_pagetable(pud_page(*pud), 0, altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, + struct vmem_altmap *altmap) { pud_t *pud; int i; @@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0); + free_pagetable(p4d_page(*p4d), 0, altmap); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, - bool direct) + bool direct, struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte_base; @@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PMD_SIZE)) { if (!direct) free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, direct); - free_pte_table(pte_base, pmd); + remove_pte_table(pte_base, addr, next, altmap, direct); + free_pte_table(pte_base, pmd, altmap); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pmd_t *pmd_base; @@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, } pmd_base = pmd_offset(pud, 0); - remove_pmd_table(pmd_base, addr, next, direct); - free_pmd_table(pmd_base, pud); + remove_pmd_table(pmd_base, addr, next, direct, altmap); + free_pmd_table(pmd_base, pud, altmap); } if (direct) @@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, static void __meminit remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pud_t *pud_base; @@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, BUILD_BUG_ON(p4d_large(*p4d)); pud_base = pud_offset(p4d, 0); - remove_pud_table(pud_base, addr, next, direct); + remove_pud_table(pud_base, addr, next, altmap, direct); /* * For 4-level page tables we do not want to free PUDs, but in the * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d); + free_pud_table(pud_base, p4d, altmap); } if (direct) @@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, /* start and end are both virtual address. */ static void __meminit -remove_pagetable(unsigned long start, unsigned long end, bool direct) +remove_pagetable(unsigned long start, unsigned long end, bool direct, + struct vmem_altmap *altmap) { unsigned long next; unsigned long addr; @@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) continue; p4d = p4d_offset(pgd, 0); - remove_p4d_table(p4d, addr, next, direct); + remove_p4d_table(p4d, addr, next, altmap, direct); } flush_tlb_all(); } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { - remove_pagetable(start, end, false); + remove_pagetable(start, end, false, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) start = (unsigned long)__va(start); end = (unsigned long)__va(end); - remove_pagetable(start, end, true); + remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(u64 start, u64 size) +int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); - struct vmem_altmap *altmap; struct zone *zone; int ret; /* With altmap the first mapped page is offset from @start */ - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); @@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(PMD_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (p) { pte_t entry; @@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, return 0; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (boot_cpu_has(X86_FEATURE_PSE)) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index abeb4df4f22e..bbe48ad20886 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -838,6 +838,18 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_capabilities *pcap) +{ + struct device *dev = acpi_desc->dev; + u32 mask; + + mask = (1 << (pcap->highest_capability + 1)) - 1; + acpi_desc->platform_cap = pcap->capabilities & mask; + dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, struct nfit_table_prev *prev, void *table, const void *end) { @@ -883,6 +895,10 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); break; + case ACPI_NFIT_TYPE_CAPABILITIES: + if (!add_platform_cap(acpi_desc, table)) + return err; + break; default: dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); break; @@ -1867,6 +1883,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) struct kernfs_node *nfit_kernfs; nvdimm = nfit_mem->nvdimm; + if (!nvdimm) + continue; + nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, @@ -2656,6 +2675,12 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, else ndr_desc->numa_node = NUMA_NO_NODE; + if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) + set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags); + + if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags); + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; struct nd_mapping_desc *mapping; @@ -3464,6 +3489,7 @@ static __init int nfit_init(void) BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); + BUILD_BUG_ON(sizeof(struct acpi_nfit_capabilities) != 16); guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]); guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]); diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index f0cf18b2da8b..50d36e166d70 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -202,6 +202,7 @@ struct acpi_nfit_desc { unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_nfit_cmd_force_en; + unsigned int platform_cap; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 7b0bf825c4e7..2137dbc29877 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -133,7 +133,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, dax_region->base = addr; if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { kfree(dax_region); - return NULL;; + return NULL; } kref_get(&dax_region->kref); diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 8d8c852ba8f2..31b6ecce4c64 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -21,6 +21,7 @@ struct dax_pmem { struct device *dev; struct percpu_ref ref; + struct dev_pagemap pgmap; struct completion cmp; }; @@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev) struct nd_namespace_common *ndns; struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; - struct vmem_altmap __altmap, *altmap = NULL; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); nsio = to_nd_namespace_io(&ndns->dev); + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); + if (!dax_pmem) + return -ENOMEM; + /* parse the 'pfn' info block via ->rw_bytes */ rc = devm_nsio_enable(dev, nsio); if (rc) return rc; - altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); - if (IS_ERR(altmap)) - return PTR_ERR(altmap); + rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap); + if (rc) + return rc; devm_nsio_disable(dev, nsio); pfn_sb = nd_pfn->pfn_sb; @@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev) return -EBUSY; } - dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); - if (!dax_pmem) - return -ENOMEM; - dax_pmem->dev = dev; init_completion(&dax_pmem->cmp); rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, @@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev) if (rc) return rc; - addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + dax_pmem->pgmap.ref = &dax_pmem->ref; + addr = devm_memremap_pages(dev, &dax_pmem->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); @@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev) return rc; /* adjust the dax_region resource to the start of data */ + memcpy(&res, &dax_pmem->pgmap.res, sizeof(res)); res.start += le64_to_cpu(pfn_sb->dataoff); rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 3ec804672601..473af694ad1c 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -15,6 +15,7 @@ #include <linux/mount.h> #include <linux/magic.h> #include <linux/genhd.h> +#include <linux/pfn_t.h> #include <linux/cdev.h> #include <linux/hash.h> #include <linux/slab.h> @@ -123,6 +124,15 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) return len < 0 ? len : -EIO; } + if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) + || pfn_t_devmap(pfn)) + /* pass */; + else { + pr_debug("VFS (%s): error: dax support not enabled\n", + sb->s_id); + return -EOPNOTSUPP; + } + return 0; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index c586bcdb5190..2ef544f10ec8 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -753,6 +753,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, return NULL; arena->nd_btt = btt->nd_btt; arena->sector_size = btt->sector_size; + mutex_init(&arena->err_lock); if (!size) return arena; @@ -891,7 +892,6 @@ static int discover_arenas(struct btt *btt) goto out; } - mutex_init(&arena->err_lock); ret = btt_freelist_init(arena); if (ret) goto out; diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 0a5e6cd758fe..78eabc3a1ab1 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -1142,9 +1142,6 @@ int __init nvdimm_bus_init(void) { int rc; - BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128); - BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8); - rc = bus_register(&nvdimm_bus_type); if (rc) return rc; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index bb3ba8cf24d4..658ada497be0 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2408,7 +2408,7 @@ static struct device **scan_labels(struct nd_region *nd_region) static struct device **create_namespaces(struct nd_region *nd_region) { - struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nd_mapping *nd_mapping; struct device **devs; int i; diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e958f3724c41..8d6375ee0fda 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); #if IS_ENABLED(CONFIG_ND_CLAIM) -struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap); +int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); #else -static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap) +static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct dev_pagemap *pgmap) { - return ERR_PTR(-ENXIO); + return -ENXIO; } static inline int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 2adada1a5855..f5c4e8c6e29d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -542,9 +542,10 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap) +static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) { + struct resource *res = &pgmap->res; + struct vmem_altmap *altmap = &pgmap->altmap; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); @@ -561,11 +562,13 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, res->start += start_pad; res->end -= end_trunc; + pgmap->type = MEMORY_DEVICE_HOST; + if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < SZ_8K) - return ERR_PTR(-EINVAL); + return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); - altmap = NULL; + pgmap->altmap_valid = false; } else if (nd_pfn->mode == PFN_MODE_PMEM) { nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) - offset) / PAGE_SIZE); @@ -577,10 +580,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, memcpy(altmap, &__altmap, sizeof(*altmap)); altmap->free = PHYS_PFN(offset - SZ_8K); altmap->alloc = 0; + pgmap->altmap_valid = true; } else - return ERR_PTR(-ENXIO); + return -ENXIO; - return altmap; + return 0; } static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) @@ -708,19 +712,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * Determine the effective resource range and vmem_altmap from an nd_pfn * instance. */ -struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap) +int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) { int rc; if (!nd_pfn->uuid || !nd_pfn->ndns) - return ERR_PTR(-ENODEV); + return -ENODEV; rc = nd_pfn_init(nd_pfn); if (rc) - return ERR_PTR(rc); + return rc; - /* we need a valid pfn_sb before we can init a vmem_altmap */ - return __nvdimm_setup_pfn(nd_pfn, res, altmap); + /* we need a valid pfn_sb before we can init a dev_pagemap */ + return __nvdimm_setup_pfn(nd_pfn, pgmap); } EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 7fbc5c5dc8e1..10041ac4032c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -35,6 +35,7 @@ #include "pmem.h" #include "pfn.h" #include "nd.h" +#include "nd-core.h" static struct device *to_dev(struct pmem_device *pmem) { @@ -298,34 +299,34 @@ static int pmem_attach_disk(struct device *dev, { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); - struct vmem_altmap __altmap, *altmap = NULL; int nid = dev_to_node(dev), fua, wbc; struct resource *res = &nsio->res; + struct resource bb_res; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; - struct resource pfn_res; struct request_queue *q; struct device *gendev; struct gendisk *disk; void *addr; + int rc; + + pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); + if (!pmem) + return -ENOMEM; /* while nsio_rw_bytes is active, parse a pfn info block if present */ if (is_nd_pfn(dev)) { nd_pfn = to_nd_pfn(dev); - altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); - if (IS_ERR(altmap)) - return PTR_ERR(altmap); + rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap); + if (rc) + return rc; } /* we're attaching a block device, disable raw namespace access */ devm_nsio_disable(dev, nsio); - pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); - if (!pmem) - return -ENOMEM; - dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); @@ -334,7 +335,8 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } - wbc = nvdimm_has_cache(nd_region); + wbc = nvdimm_has_cache(nd_region) && + !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { @@ -350,19 +352,22 @@ static int pmem_attach_disk(struct device *dev, return -ENOMEM; pmem->pfn_flags = PFN_DEV; + pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { - addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, - altmap); + addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); + pmem->pfn_pad = resource_size(res) - + resource_size(&pmem->pgmap.res); pmem->pfn_flags |= PFN_MAP; - res = &pfn_res; /* for badblocks populate */ - res->start += pmem->data_offset; + memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); + bb_res.start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { - addr = devm_memremap_pages(dev, &nsio->res, - &q->q_usage_counter, NULL); + memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); + pmem->pgmap.altmap_valid = false; + addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; + memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); } else addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); @@ -401,7 +406,7 @@ static int pmem_attach_disk(struct device *dev, / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, res); + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); disk->bb = &pmem->bb; dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 6a3cd2a10db6..a64ebc78b5df 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -22,6 +22,7 @@ struct pmem_device { struct badblocks bb; struct dax_device *dax_dev; struct gendisk *disk; + struct dev_pagemap pgmap; }; long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index abaf38c61220..e6d01911e092 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -528,6 +528,18 @@ static ssize_t resource_show(struct device *dev, } static DEVICE_ATTR_RO(resource); +static ssize_t persistence_domain_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + unsigned long flags = nd_region->flags; + + return sprintf(buf, "%s%s\n", + flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "", + flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : ""); +} +static DEVICE_ATTR_RO(persistence_domain); + static struct attribute *nd_region_attributes[] = { &dev_attr_size.attr, &dev_attr_nstype.attr, @@ -543,6 +555,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_init_namespaces.attr, &dev_attr_badblocks.attr, &dev_attr_resource.attr, + &dev_attr_persistence_domain.attr, NULL, }; diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index bc27d716aa6b..1444333210c7 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -16,6 +16,7 @@ config BLK_DEV_XPRAM config DCSSBLK def_tristate m select DAX + select FS_DAX_LIMITED prompt "DCSSBLK support" depends on S390 && BLOCK help diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6aaefb780436..9cae08b36b80 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, dev_sz = dev_info->end - dev_info->start + 1; *kaddr = (void *) dev_info->start + offset; - *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); + *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), + PFN_DEV|PFN_SPECIAL); return (dev_sz - offset) / PAGE_SIZE; } diff --git a/fs/Kconfig b/fs/Kconfig index 9774588da60e..bc821a86d965 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -58,6 +58,13 @@ config FS_DAX_PMD depends on ZONE_DEVICE depends on TRANSPARENT_HUGEPAGE +# Selected by DAX drivers that do not expect filesystem DAX to support +# get_user_pages() of DAX mappings. I.e. "limited" indicates no support +# for fork() of processes with MAP_SHARED mappings or support for +# direct-I/O to a DAX mapping. +config FS_DAX_LIMITED + bool + endif # BLOCK # Posix ACL utility routines diff --git a/fs/ext2/super.c b/fs/ext2/super.c index db5f9daa7780..0a638e79bf7c 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -962,8 +962,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { err = bdev_dax_supported(sb, blocksize); - if (err) - goto failed_mount; + if (err) { + ext2_msg(sb, KERN_ERR, + "DAX unsupported by block device. Turning off DAX."); + sbi->s_mount_opt &= ~EXT2_MOUNT_DAX; + } } /* If the blocksize doesn't match, re-read the thing.. */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 421222ec3509..39bf464c35f1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3712,11 +3712,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_has_feature_inline_data(sb)) { ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" " that may contain inline data"); - goto failed_mount; + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX; } err = bdev_dax_supported(sb, blocksize); - if (err) - goto failed_mount; + if (err) { + ext4_msg(sb, KERN_ERR, + "DAX unsupported by block device. Turning off DAX."); + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX; + } } if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f8109ddb5ef1..ff855ed965fb 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -47,6 +47,17 @@ enum { /* region flag indicating to direct-map persistent memory by default */ ND_REGION_PAGEMAP = 0, + /* + * Platform ensures entire CPU store data path is flushed to pmem on + * system power loss. + */ + ND_REGION_PERSIST_CACHE = 1, + /* + * Platform provides mechanisms to automatically flush outstanding + * write data from memory controler to pmem on system power loss. + * (ADR) + */ + ND_REGION_PERSIST_MEMCTRL = 2, /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..aba5f86eb038 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,6 +13,7 @@ struct pglist_data; struct mem_section; struct memory_block; struct resource; +struct vmem_altmap; #ifdef CONFIG_MEMORY_HOTPLUG /* @@ -125,24 +126,26 @@ static inline bool movable_node_is_enabled(void) #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); -extern int arch_remove_memory(u64 start, u64 size); +extern int arch_remove_memory(u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ -extern int __add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #else /* ARCH_HAS_ADD_PAGES */ -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA @@ -318,15 +321,17 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); -extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); +extern int arch_add_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); -extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); +extern int sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset); + unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 10d23c367048..7b4899c06f49 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -26,18 +26,6 @@ struct vmem_altmap { unsigned long alloc; }; -unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); -void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); - -#ifdef CONFIG_ZONE_DEVICE -struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); -#else -static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) -{ - return NULL; -} -#endif - /* * Specialize ZONE_DEVICE memory into multiple types each having differents * usage. @@ -125,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data); struct dev_pagemap { dev_page_fault_t page_fault; dev_page_free_t page_free; - struct vmem_altmap *altmap; - const struct resource *res; + struct vmem_altmap altmap; + bool altmap_valid; + struct resource res; struct percpu_ref *ref; struct device *dev; void *data; @@ -134,15 +123,17 @@ struct dev_pagemap { }; #ifdef CONFIG_ZONE_DEVICE -void *devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap); -struct dev_pagemap *find_dev_pagemap(resource_size_t phys); +void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); +struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap); + +unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); +void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); static inline bool is_zone_device_page(const struct page *page); #else static inline void *devm_memremap_pages(struct device *dev, - struct resource *res, struct percpu_ref *ref, - struct vmem_altmap *altmap) + struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without @@ -153,11 +144,22 @@ static inline void *devm_memremap_pages(struct device *dev, return ERR_PTR(-ENXIO); } -static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) +static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap) { return NULL; } -#endif + +static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +{ + return 0; +} + +static inline void vmem_altmap_free(struct vmem_altmap *altmap, + unsigned long nr_pfns) +{ +} +#endif /* CONFIG_ZONE_DEVICE */ #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) static inline bool is_device_private_page(const struct page *page) @@ -173,39 +175,6 @@ static inline bool is_device_public_page(const struct page *page) } #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ -/** - * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn - * @pfn: page frame number to lookup page_map - * @pgmap: optional known pgmap that already has a reference - * - * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the - * same mapping. - */ -static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap) -{ - const struct resource *res = pgmap ? pgmap->res : NULL; - resource_size_t phys = PFN_PHYS(pfn); - - /* - * In the cached case we're already holding a live reference so - * we can simply do a blind increment - */ - if (res && phys >= res->start && phys <= res->end) { - percpu_ref_get(pgmap->ref); - return pgmap; - } - - /* fall back to slow path lookup */ - rcu_read_lock(); - pgmap = find_dev_pagemap(phys); - if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) - pgmap = NULL; - rcu_read_unlock(); - - return pgmap; -} - static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) diff --git a/include/linux/mm.h b/include/linux/mm.h index 173d2484f6e3..ad06d42adb1a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2075,8 +2075,8 @@ static inline void zero_resv_unavail(void) {} #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, - unsigned long, enum memmap_context); +extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, + enum memmap_context, struct vmem_altmap *); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); @@ -2544,7 +2544,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, unsigned long map_count, int nodeid); -struct page *sparse_mem_map_populate(unsigned long pnum, int nid); +struct page *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); @@ -2552,20 +2553,17 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; -void *__vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap); -static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) -{ - return __vmemmap_alloc_block_buf(size, node, NULL); -} - +void *vmemmap_alloc_block_buf(unsigned long size, int node); +void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); -int vmemmap_populate(unsigned long start, unsigned long end, int node); +int vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG -void vmemmap_free(unsigned long start, unsigned long end); +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap); #endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages); diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 43b1d7648e82..a03c2642a87c 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -15,8 +15,10 @@ #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) +#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) #define PFN_FLAGS_TRACE \ + { PFN_SPECIAL, "SPECIAL" }, \ { PFN_SG_CHAIN, "SG_CHAIN" }, \ { PFN_SG_LAST, "SG_LAST" }, \ { PFN_DEV, "DEV" }, \ @@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud); #endif #endif /* __HAVE_ARCH_PTE_DEVMAP */ +#ifdef __HAVE_ARCH_PTE_SPECIAL +static inline bool pfn_t_special(pfn_t pfn) +{ + return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; +} +#else +static inline bool pfn_t_special(pfn_t pfn) +{ + return false; +} +#endif /* __HAVE_ARCH_PTE_SPECIAL */ #endif /* _LINUX_PFN_T_H_ */ diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 3f03567631cb..7e27070b9440 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -15,54 +15,6 @@ #include <linux/types.h> -struct nd_cmd_smart { - __u32 status; - __u8 data[128]; -} __packed; - -#define ND_SMART_HEALTH_VALID (1 << 0) -#define ND_SMART_SPARES_VALID (1 << 1) -#define ND_SMART_USED_VALID (1 << 2) -#define ND_SMART_TEMP_VALID (1 << 3) -#define ND_SMART_CTEMP_VALID (1 << 4) -#define ND_SMART_ALARM_VALID (1 << 9) -#define ND_SMART_SHUTDOWN_VALID (1 << 10) -#define ND_SMART_VENDOR_VALID (1 << 11) -#define ND_SMART_SPARE_TRIP (1 << 0) -#define ND_SMART_TEMP_TRIP (1 << 1) -#define ND_SMART_CTEMP_TRIP (1 << 2) -#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0) -#define ND_SMART_CRITICAL_HEALTH (1 << 1) -#define ND_SMART_FATAL_HEALTH (1 << 2) - -struct nd_smart_payload { - __u32 flags; - __u8 reserved0[4]; - __u8 health; - __u8 spares; - __u8 life_used; - __u8 alarm_flags; - __u16 temperature; - __u16 ctrl_temperature; - __u8 reserved1[15]; - __u8 shutdown_state; - __u32 vendor_size; - __u8 vendor_data[92]; -} __packed; - -struct nd_cmd_smart_threshold { - __u32 status; - __u8 data[8]; -} __packed; - -struct nd_smart_threshold_payload { - __u8 alarm_control; - __u8 reserved0; - __u16 temperature; - __u8 spares; - __u8 reserved[3]; -} __packed; - struct nd_cmd_dimm_flags { __u32 status; __u32 flags; @@ -211,12 +163,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL 'N' -#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\ - struct nd_cmd_smart) - -#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\ - struct nd_cmd_smart_threshold) - #define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ struct nd_cmd_dimm_flags) @@ -263,7 +209,7 @@ enum nd_driver_flags { }; enum { - ND_MIN_NAMESPACE_SIZE = 0x00400000, + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, }; enum ars_masks { diff --git a/kernel/memremap.c b/kernel/memremap.c index 403ab9cdb949..4849be5f9b3c 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL); #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) -struct page_map { - struct resource res; - struct percpu_ref *ref; - struct dev_pagemap pgmap; - struct vmem_altmap altmap; -}; - static unsigned long order_at(struct resource *res, unsigned long pgoff) { unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; @@ -248,34 +241,36 @@ int device_private_entry_fault(struct vm_area_struct *vma, EXPORT_SYMBOL(device_private_entry_fault); #endif /* CONFIG_DEVICE_PRIVATE */ -static void pgmap_radix_release(struct resource *res) +static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff) { unsigned long pgoff, order; mutex_lock(&pgmap_lock); - foreach_order_pgoff(res, order, pgoff) + foreach_order_pgoff(res, order, pgoff) { + if (pgoff >= end_pgoff) + break; radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); + } mutex_unlock(&pgmap_lock); synchronize_rcu(); } -static unsigned long pfn_first(struct page_map *page_map) +static unsigned long pfn_first(struct dev_pagemap *pgmap) { - struct dev_pagemap *pgmap = &page_map->pgmap; - const struct resource *res = &page_map->res; - struct vmem_altmap *altmap = pgmap->altmap; + const struct resource *res = &pgmap->res; + struct vmem_altmap *altmap = &pgmap->altmap; unsigned long pfn; pfn = res->start >> PAGE_SHIFT; - if (altmap) + if (pgmap->altmap_valid) pfn += vmem_altmap_offset(altmap); return pfn; } -static unsigned long pfn_end(struct page_map *page_map) +static unsigned long pfn_end(struct dev_pagemap *pgmap) { - const struct resource *res = &page_map->res; + const struct resource *res = &pgmap->res; return (res->start + resource_size(res)) >> PAGE_SHIFT; } @@ -283,15 +278,15 @@ static unsigned long pfn_end(struct page_map *page_map) #define for_each_device_pfn(pfn, map) \ for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) -static void devm_memremap_pages_release(struct device *dev, void *data) +static void devm_memremap_pages_release(void *data) { - struct page_map *page_map = data; - struct resource *res = &page_map->res; + struct dev_pagemap *pgmap = data; + struct device *dev = pgmap->dev; + struct resource *res = &pgmap->res; resource_size_t align_start, align_size; - struct dev_pagemap *pgmap = &page_map->pgmap; unsigned long pfn; - for_each_device_pfn(pfn, page_map) + for_each_device_pfn(pfn, pgmap) put_page(pfn_to_page(pfn)); if (percpu_ref_tryget_live(pgmap->ref)) { @@ -301,56 +296,51 @@ static void devm_memremap_pages_release(struct device *dev, void *data) /* pages are dead and unused, undo the arch mapping */ align_start = res->start & ~(SECTION_SIZE - 1); - align_size = ALIGN(resource_size(res), SECTION_SIZE); + align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) + - align_start; mem_hotplug_begin(); - arch_remove_memory(align_start, align_size); + arch_remove_memory(align_start, align_size, pgmap->altmap_valid ? + &pgmap->altmap : NULL); mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); - pgmap_radix_release(res); - dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, - "%s: failed to free all reserved pages\n", __func__); -} - -/* assumes rcu_read_lock() held at entry */ -struct dev_pagemap *find_dev_pagemap(resource_size_t phys) -{ - struct page_map *page_map; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); - return page_map ? &page_map->pgmap : NULL; + pgmap_radix_release(res, -1); + dev_WARN_ONCE(dev, pgmap->altmap.alloc, + "%s: failed to free all reserved pages\n", __func__); } /** * devm_memremap_pages - remap and provide memmap backing for the given resource * @dev: hosting device for @res - * @res: "host memory" address range - * @ref: a live per-cpu reference count - * @altmap: optional descriptor for allocating the memmap from @res + * @pgmap: pointer to a struct dev_pgmap * * Notes: - * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time - * (or devm release event). The expected order of events is that @ref has + * 1/ At a minimum the res, ref and type members of @pgmap must be initialized + * by the caller before passing it to this function + * + * 2/ The altmap field may optionally be initialized, in which case altmap_valid + * must be set to true + * + * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages() + * time (or devm release event). The expected order of events is that ref has * been through percpu_ref_kill() before devm_memremap_pages_release(). The * wait for the completion of all references being dropped and * percpu_ref_exit() must occur after devm_memremap_pages_release(). * - * 2/ @res is expected to be a host memory range that could feasibly be + * 4/ res is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but * this is not enforced. */ -void *devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { resource_size_t align_start, align_size, align_end; + struct vmem_altmap *altmap = pgmap->altmap_valid ? + &pgmap->altmap : NULL; unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; - struct dev_pagemap *pgmap; - struct page_map *page_map; int error, nid, is_ram, i = 0; + struct resource *res = &pgmap->res; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -367,47 +357,18 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (is_ram == REGION_INTERSECTS) return __va(res->start); - if (!ref) + if (!pgmap->ref) return ERR_PTR(-EINVAL); - page_map = devres_alloc_node(devm_memremap_pages_release, - sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); - if (!page_map) - return ERR_PTR(-ENOMEM); - pgmap = &page_map->pgmap; - - memcpy(&page_map->res, res, sizeof(*res)); - pgmap->dev = dev; - if (altmap) { - memcpy(&page_map->altmap, altmap, sizeof(*altmap)); - pgmap->altmap = &page_map->altmap; - } - pgmap->ref = ref; - pgmap->res = &page_map->res; - pgmap->type = MEMORY_DEVICE_HOST; - pgmap->page_fault = NULL; - pgmap->page_free = NULL; - pgmap->data = NULL; mutex_lock(&pgmap_lock); error = 0; align_end = align_start + align_size - 1; foreach_order_pgoff(res, order, pgoff) { - struct dev_pagemap *dup; - - rcu_read_lock(); - dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff)); - rcu_read_unlock(); - if (dup) { - dev_err(dev, "%s: %pr collides with mapping for %s\n", - __func__, res, dev_name(dup->dev)); - error = -EBUSY; - break; - } error = __radix_tree_insert(&pgmap_radix, - PHYS_PFN(res->start) + pgoff, order, page_map); + PHYS_PFN(res->start) + pgoff, order, pgmap); if (error) { dev_err(dev, "%s: failed: %d\n", __func__, error); break; @@ -427,16 +388,16 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, goto err_pfn_remap; mem_hotplug_begin(); - error = arch_add_memory(nid, align_start, align_size, false); + error = arch_add_memory(nid, align_start, align_size, altmap, false); if (!error) move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT); + align_size >> PAGE_SHIFT, altmap); mem_hotplug_done(); if (error) goto err_add_memory; - for_each_device_pfn(pfn, page_map) { + for_each_device_pfn(pfn, pgmap) { struct page *page = pfn_to_page(pfn); /* @@ -447,19 +408,21 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, */ list_del(&page->lru); page->pgmap = pgmap; - percpu_ref_get(ref); + percpu_ref_get(pgmap->ref); if (!(++i % 1024)) cond_resched(); } - devres_add(dev, page_map); + + devm_add_action(dev, devm_memremap_pages_release, pgmap); + return __va(res->start); err_add_memory: untrack_pfn(NULL, PHYS_PFN(align_start), align_size); err_pfn_remap: err_radix: - pgmap_radix_release(res); - devres_free(page_map); + pgmap_radix_release(res, pgoff); + devres_free(pgmap); return ERR_PTR(error); } EXPORT_SYMBOL(devm_memremap_pages); @@ -475,34 +438,39 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) altmap->alloc -= nr_pfns; } -struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) +/** + * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn + * @pfn: page frame number to lookup page_map + * @pgmap: optional known pgmap that already has a reference + * + * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap + * is non-NULL but does not cover @pfn the reference to it will be released. + */ +struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap) { - /* - * 'memmap_start' is the virtual address for the first "struct - * page" in this range of the vmemmap array. In the case of - * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple - * pointer arithmetic, so we can perform this to_vmem_altmap() - * conversion without concern for the initialization state of - * the struct page fields. - */ - struct page *page = (struct page *) memmap_start; - struct dev_pagemap *pgmap; + resource_size_t phys = PFN_PHYS(pfn); /* - * Unconditionally retrieve a dev_pagemap associated with the - * given physical address, this is only for use in the - * arch_{add|remove}_memory() for setting up and tearing down - * the memmap. + * In the cached case we're already holding a live reference. */ + if (pgmap) { + if (phys >= pgmap->res.start && phys <= pgmap->res.end) + return pgmap; + put_dev_pagemap(pgmap); + } + + /* fall back to slow path lookup */ rcu_read_lock(); - pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page))); + pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); + if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) + pgmap = NULL; rcu_read_unlock(); - return pgmap ? pgmap->altmap : NULL; + return pgmap; } #endif /* CONFIG_ZONE_DEVICE */ - #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) void put_zone_device_private_or_public_page(struct page *page) { @@ -1394,7 +1394,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON_PAGE(compound_head(page) != head, page); - put_dev_pagemap(pgmap); SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -1404,6 +1403,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, ret = 1; pte_unmap: + if (pgmap) + put_dev_pagemap(pgmap); pte_unmap(ptem); return ret; } @@ -1443,10 +1444,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, SetPageReferenced(page); pages[*nr] = page; get_page(page); - put_dev_pagemap(pgmap); (*nr)++; pfn++; } while (addr += PAGE_SIZE, addr != end); + + if (pgmap) + put_dev_pagemap(pgmap); return 1; } @@ -836,10 +836,10 @@ static void hmm_devmem_release(struct device *dev, void *data) mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) - __remove_pages(zone, start_pfn, npages); + __remove_pages(zone, start_pfn, npages, NULL); else arch_remove_memory(start_pfn << PAGE_SHIFT, - npages << PAGE_SHIFT); + npages << PAGE_SHIFT, NULL); mem_hotplug_done(); hmm_devmem_radix_release(resource); @@ -880,7 +880,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) else devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.res = devmem->resource; + devmem->pagemap.res = *devmem->resource; devmem->pagemap.page_fault = hmm_devmem_fault; devmem->pagemap.page_free = hmm_devmem_free; devmem->pagemap.dev = devmem->device; @@ -929,17 +929,18 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) * want the linear mapping and thus use arch_add_memory(). */ if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) - ret = arch_add_memory(nid, align_start, align_size, false); + ret = arch_add_memory(nid, align_start, align_size, NULL, + false); else ret = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, false); + align_size >> PAGE_SHIFT, NULL, false); if (ret) { mem_hotplug_done(); goto error_add_memory; } move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT); + align_size >> PAGE_SHIFT, NULL); mem_hotplug_done(); for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) { diff --git a/mm/memory.c b/mm/memory.c index 53373b7a1512..2248529e71c1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1904,12 +1904,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_pfn_prot); +static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) +{ + /* these checks mirror the abort conditions in vm_normal_page */ + if (vma->vm_flags & VM_MIXEDMAP) + return true; + if (pfn_t_devmap(pfn)) + return true; + if (pfn_t_special(pfn)) + return true; + if (is_zero_pfn(pfn_t_to_pfn(pfn))) + return true; + return false; +} + static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; - BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); + BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9bbd6982d4e4..b2bd52ff7605 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -247,7 +247,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - bool want_memblock) + struct vmem_altmap *altmap, bool want_memblock) { int ret; int i; @@ -255,7 +255,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); + ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); if (ret < 0) return ret; @@ -289,18 +289,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long i; int err = 0; int start_sec, end_sec; - struct vmem_altmap *altmap; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn)); if (altmap) { /* * Validate altmap is within bounds of the total request @@ -315,7 +314,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), want_memblock); + err = __add_section(nid, section_nr_to_pfn(i), altmap, + want_memblock); /* * EEXIST is finally dealt with by ioresource collision @@ -331,7 +331,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, out: return err; } -EXPORT_SYMBOL_GPL(__add_pages); #ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ @@ -534,7 +533,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) } static int __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { unsigned long start_pfn; int scn_nr; @@ -551,7 +550,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); - sparse_remove_one_section(zone, ms, map_offset); + sparse_remove_one_section(zone, ms, map_offset, altmap); return 0; } @@ -567,7 +566,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, * calling offline_pages(). */ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long i; unsigned long map_offset = 0; @@ -575,10 +574,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, /* In the ZONE_DEVICE case device driver owns the memory region */ if (is_dev_zone(zone)) { - struct page *page = pfn_to_page(phys_start_pfn); - struct vmem_altmap *altmap; - - altmap = to_vmem_altmap((unsigned long) page); if (altmap) map_offset = vmem_altmap_offset(altmap); } else { @@ -609,7 +604,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, for (i = 0; i < sections_to_remove; i++) { unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); + ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, + altmap); map_offset = 0; if (ret) break; @@ -799,8 +795,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; } -void __ref move_pfn_range_to_zone(struct zone *zone, - unsigned long start_pfn, unsigned long nr_pages) +void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { struct pglist_data *pgdat = zone->zone_pgdat; int nid = pgdat->node_id; @@ -825,7 +821,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone, * expects the zone spans the pfn range. All the pages in the range * are reserved so nobody should be touching them so we should be safe */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, + MEMMAP_HOTPLUG, altmap); set_zone_contiguous(zone); } @@ -897,7 +894,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid, struct zone *zone; zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); - move_pfn_range_to_zone(zone, start_pfn, nr_pages); + move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL); return zone; } @@ -1146,7 +1143,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) } /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, true); + ret = arch_add_memory(nid, start, size, NULL, true); if (ret < 0) goto error; @@ -1888,7 +1885,7 @@ void __ref remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - arch_remove_memory(start, size); + arch_remove_memory(start, size, NULL); try_offline_node(nid); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c7dd9c86e353..81e18ceef579 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5321,9 +5321,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum memmap_context context) + unsigned long start_pfn, enum memmap_context context, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn)); unsigned long end_pfn = start_pfn + size; pg_data_t *pgdat = NODE_DATA(nid); unsigned long pfn; @@ -5429,7 +5429,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ - memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) + memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL) #endif static int zone_batchsize(struct zone *zone) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 17acf01791fa..bd0276d5f66b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) } /* need to make sure size is all the same during early stage */ -static void * __meminit alloc_block_buf(unsigned long size, int node) +void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) { void *ptr; @@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap) } /** - * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation - * @altmap - reserved page pool for the allocation - * @nr_pfns - size (in pages) of the allocation + * altmap_alloc_block_buf - allocate pages from the device page map + * @altmap: device page map + * @size: size (in bytes) of the allocation * - * Allocations are aligned to the size of the request + * Allocations are aligned to the size of the request. */ -static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, - unsigned long nr_pfns) -{ - unsigned long pfn = vmem_altmap_next_pfn(altmap); - unsigned long nr_align; - - nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); - nr_align = ALIGN(pfn, nr_align) - pfn; - - if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) - return ULONG_MAX; - altmap->alloc += nr_pfns; - altmap->align += nr_align; - return pfn + nr_align; -} - -static void * __meminit altmap_alloc_block_buf(unsigned long size, +void * __meminit altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap) { - unsigned long pfn, nr_pfns; - void *ptr; + unsigned long pfn, nr_pfns, nr_align; if (size & ~PAGE_MASK) { pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", @@ -141,25 +124,20 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size, return NULL; } + pfn = vmem_altmap_next_pfn(altmap); nr_pfns = size >> PAGE_SHIFT; - pfn = vmem_altmap_alloc(altmap, nr_pfns); - if (pfn < ULONG_MAX) - ptr = __va(__pfn_to_phys(pfn)); - else - ptr = NULL; - pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", - __func__, pfn, altmap->alloc, altmap->align, nr_pfns); + nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG); + nr_align = ALIGN(pfn, nr_align) - pfn; + if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap)) + return NULL; - return ptr; -} + altmap->alloc += nr_pfns; + altmap->align += nr_align; + pfn += nr_align; -/* need to make sure size is all the same during early stage */ -void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap) -{ - if (altmap) - return altmap_alloc_block_buf(size, altmap); - return alloc_block_buf(size, node); + pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", + __func__, pfn, altmap->alloc, altmap->align, nr_pfns); + return __va(__pfn_to_phys(pfn)); } void __meminit vmemmap_verify(pte_t *pte, int node, @@ -178,7 +156,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = alloc_block_buf(PAGE_SIZE, node); + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); @@ -278,7 +256,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start, return 0; } -struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) +struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { unsigned long start; unsigned long end; @@ -288,7 +267,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) start = (unsigned long)map; end = (unsigned long)(map + PAGES_PER_SECTION); - if (vmemmap_populate(start, end, nid)) + if (vmemmap_populate(start, end, nid, altmap)) return NULL; return map; @@ -318,7 +297,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); diff --git a/mm/sparse.c b/mm/sparse.c index 6b8b5e91ceef..7af5e7a92528 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -421,7 +421,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data, } #ifndef CONFIG_SPARSEMEM_VMEMMAP -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) +struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { struct page *map; unsigned long size; @@ -476,7 +477,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); @@ -504,7 +505,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); - map = sparse_mem_map_populate(pnum, nid); + map = sparse_mem_map_populate(pnum, nid, NULL); if (map) return map; @@ -682,17 +683,19 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid); + return sparse_mem_map_populate(pnum, nid, altmap); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) @@ -700,7 +703,7 @@ static void free_map_bootmem(struct page *memmap) unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, NULL); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #else @@ -725,12 +728,14 @@ got_map_ptr: return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { return __kmalloc_section_memmap(); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { if (is_vmalloc_addr(memmap)) vfree(memmap); @@ -777,7 +782,8 @@ static void free_map_bootmem(struct page *memmap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) +int __meminit sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; @@ -793,12 +799,12 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); + memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); if (!usemap) { - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return -ENOMEM; } @@ -820,7 +826,7 @@ out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { kfree(usemap); - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); } return ret; } @@ -847,7 +853,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap) +static void free_section_usemap(struct page *memmap, unsigned long *usemap, + struct vmem_altmap *altmap) { struct page *usemap_page; @@ -861,7 +868,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) if (PageSlab(usemap_page) || PageCompound(usemap_page)) { kfree(usemap); if (memmap) - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return; } @@ -875,7 +882,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) } void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { struct page *memmap = NULL; unsigned long *usemap = NULL, flags; @@ -893,7 +900,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap); + free_section_usemap(memmap, usemap, altmap); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index db33b28c5ef3..0392153a0009 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o +nfit-y += acpi_nfit_test.o nfit-y += config_check.o nd_pmem-y := $(NVDIMM_SRC)/pmem.o nd_pmem-y += pmem-dax.o +nd_pmem-y += pmem_test.o nd_pmem-y += config_check.o nd_btt-y := $(NVDIMM_SRC)/btt.o @@ -57,6 +59,7 @@ dax-y += config_check.o device_dax-y := $(DAX_SRC)/device.o device_dax-y += dax-dev.o +device_dax-y += device_dax_test.o device_dax-y += config_check.o dax_pmem-y := $(DAX_SRC)/pmem.o @@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o +libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o obj-m += test/ diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c new file mode 100644 index 000000000000..43521512e577 --- /dev/null +++ b/tools/testing/nvdimm/acpi_nfit_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(acpi_nfit); diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c new file mode 100644 index 000000000000..24b17bf42429 --- /dev/null +++ b/tools/testing/nvdimm/device_dax_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(device_dax); diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c new file mode 100644 index 000000000000..00ca30b23932 --- /dev/null +++ b/tools/testing/nvdimm/libnvdimm_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(libnvdimm); diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c new file mode 100644 index 000000000000..fd38f92275cf --- /dev/null +++ b/tools/testing/nvdimm/pmem_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(pmem); diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index e1f75a1914a1..ff9d3a5825e1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { - resource_size_t offset = res->start; + resource_size_t offset = pgmap->res.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) return nfit_res->buf + offset - nfit_res->res.start; - return devm_memremap_pages(dev, res, ref, altmap); + return devm_memremap_pages(dev, pgmap); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 7217b2b953b5..620fa78b3b1b 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -27,6 +27,7 @@ #include <nfit.h> #include <nd.h> #include "nfit_test.h" +#include "../watermark.h" /* * Generate an NFIT table to describe the following topology: @@ -137,6 +138,14 @@ static u32 handle[] = { static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +struct nfit_test_fw { + enum intel_fw_update_state state; + u32 context; + u64 version; + u32 size_received; + u64 end_time; +}; + struct nfit_test { struct acpi_nfit_desc acpi_desc; struct platform_device pdev; @@ -168,8 +177,11 @@ struct nfit_test { spinlock_t lock; } ars_state; struct device *dimm_dev[NUM_DCR]; + struct nd_intel_smart *smart; + struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; struct work_struct work; + struct nfit_test_fw *fw; }; static struct workqueue_struct *nfit_wq; @@ -181,6 +193,226 @@ static struct nfit_test *to_nfit_test(struct device *dev) return container_of(pdev, struct nfit_test, pdev); } +static int nd_intel_test_get_fw_info(struct nfit_test *t, + struct nd_intel_fw_info *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE; + nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN; + nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL; + nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME; + nd_cmd->update_cap = 0; + nd_cmd->fis_version = INTEL_FW_FIS_VERSION; + nd_cmd->run_version = 0; + nd_cmd->updated_version = fw->version; + + return 0; +} + +static int nd_intel_test_start_update(struct nfit_test *t, + struct nd_intel_fw_start *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (fw->state != FW_STATE_NEW) { + /* extended status, FW update in progress */ + nd_cmd->status = 0x10007; + return 0; + } + + fw->state = FW_STATE_IN_PROGRESS; + fw->context++; + fw->size_received = 0; + nd_cmd->status = 0; + nd_cmd->context = fw->context; + + dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context); + + return 0; +} + +static int nd_intel_test_send_data(struct nfit_test *t, + struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + + dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status); + dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]); + dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1, + nd_cmd->data[nd_cmd->length-1]); + + if (fw->state != FW_STATE_IN_PROGRESS) { + dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__); + *status = 0x5; + return 0; + } + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + *status = 0x10007; + return 0; + } + + /* + * check offset + len > size of fw storage + * check length is > max send length + */ + if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE || + nd_cmd->length > INTEL_FW_MAX_SEND_LEN) { + *status = 0x3; + dev_dbg(dev, "%s: buffer boundary violation\n", __func__); + return 0; + } + + fw->size_received += nd_cmd->length; + dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n", + __func__, nd_cmd->length, fw->size_received); + *status = 0; + return 0; +} + +static int nd_intel_test_finish_fw(struct nfit_test *t, + struct nd_intel_fw_finish_update *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (fw->state == FW_STATE_UPDATED) { + /* update already done, need cold boot */ + nd_cmd->status = 0x20007; + return 0; + } + + dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n", + __func__, nd_cmd->context, nd_cmd->ctrl_flags); + + switch (nd_cmd->ctrl_flags) { + case 0: /* finish */ + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, + fw->context); + nd_cmd->status = 0x10007; + return 0; + } + nd_cmd->status = 0; + fw->state = FW_STATE_VERIFY; + /* set 1 second of time for firmware "update" */ + fw->end_time = jiffies + HZ; + break; + + case 1: /* abort */ + fw->size_received = 0; + /* successfully aborted status */ + nd_cmd->status = 0x40007; + fw->state = FW_STATE_NEW; + dev_dbg(dev, "%s: abort successful\n", __func__); + break; + + default: /* bad control flag */ + dev_warn(dev, "%s: unknown control flag: %#x\n", + __func__, nd_cmd->ctrl_flags); + return -EINVAL; + } + + return 0; +} + +static int nd_intel_test_finish_query(struct nfit_test *t, + struct nd_intel_fw_finish_query *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + nd_cmd->status = 0x10007; + return 0; + } + + dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context); + + switch (fw->state) { + case FW_STATE_NEW: + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0; + dev_dbg(dev, "%s: new state\n", __func__); + break; + + case FW_STATE_IN_PROGRESS: + /* sequencing error */ + nd_cmd->status = 0x40007; + nd_cmd->updated_fw_rev = 0; + dev_dbg(dev, "%s: sequence error\n", __func__); + break; + + case FW_STATE_VERIFY: + if (time_is_after_jiffies64(fw->end_time)) { + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0x20007; + dev_dbg(dev, "%s: still verifying\n", __func__); + break; + } + + dev_dbg(dev, "%s: transition out verify\n", __func__); + fw->state = FW_STATE_UPDATED; + /* we are going to fall through if it's "done" */ + case FW_STATE_UPDATED: + nd_cmd->status = 0; + /* bogus test version */ + fw->version = nd_cmd->updated_fw_rev = + INTEL_FW_FAKE_VERSION; + dev_dbg(dev, "%s: updated\n", __func__); + break; + + default: /* we should never get here */ + return -EINVAL; + } + + return 0; +} + static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, unsigned int buf_len) { @@ -440,39 +672,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus, return 0; } -static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) +static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len, + struct nd_intel_smart *smart_data) { - static const struct nd_smart_payload smart_data = { - .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID - | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID - | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, - .health = ND_SMART_NON_CRITICAL_HEALTH, - .temperature = 23 * 16, - .spares = 75, - .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .life_used = 5, - .shutdown_state = 0, - .vendor_size = 0, - }; - if (buf_len < sizeof(*smart)) return -EINVAL; - memcpy(smart->data, &smart_data, sizeof(smart_data)); + memcpy(smart, smart_data, sizeof(*smart)); return 0; } -static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, - unsigned int buf_len) +static int nfit_test_cmd_smart_threshold( + struct nd_intel_smart_threshold *out, + unsigned int buf_len, + struct nd_intel_smart_threshold *smart_t) { - static const struct nd_smart_threshold_payload smart_t_data = { - .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .temperature = 40 * 16, - .spares = 5, - }; - if (buf_len < sizeof(*smart_t)) return -EINVAL; - memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); + memcpy(out, smart_t, sizeof(*smart_t)); + return 0; +} + +static void smart_notify(struct device *bus_dev, + struct device *dimm_dev, struct nd_intel_smart *smart, + struct nd_intel_smart_threshold *thresh) +{ + dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n", + __func__, thresh->alarm_control, thresh->spares, + smart->spares, thresh->media_temperature, + smart->media_temperature, thresh->ctrl_temperature, + smart->ctrl_temperature); + if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP) + && smart->spares + <= thresh->spares) + || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP) + && smart->media_temperature + >= thresh->media_temperature) + || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) + && smart->ctrl_temperature + >= thresh->ctrl_temperature)) { + device_lock(bus_dev); + __acpi_nvdimm_notify(dimm_dev, 0x81); + device_unlock(bus_dev); + } +} + +static int nfit_test_cmd_smart_set_threshold( + struct nd_intel_smart_set_threshold *in, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + unsigned int size; + + size = sizeof(*in) - 4; + if (buf_len < size) + return -EINVAL; + memcpy(thresh->data, in, size); + in->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + return 0; } @@ -563,6 +822,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t, return 0; } +static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t, + struct nd_intel_lss *nd_cmd, unsigned int buf_len) +{ + struct device *dev = &t->pdev.dev; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + switch (nd_cmd->enable) { + case 0: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n", + __func__); + break; + case 1: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n", + __func__); + break; + default: + dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable); + nd_cmd->status = 0x3; + break; + } + + + return 0; +} + +static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) +{ + int i; + + /* lookup per-dimm data */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + if ((1 << func) & dimm_fail_cmd_flags[i]) + return -EIO; + + return i; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -591,22 +896,57 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, func = call_pkg->nd_command; if (call_pkg->nd_family != nfit_mem->family) return -ENOTTY; + + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; + + switch (func) { + case ND_INTEL_ENABLE_LSS_STATUS: + return nd_intel_test_cmd_set_lss_status(t, + buf, buf_len); + case ND_INTEL_FW_GET_INFO: + return nd_intel_test_get_fw_info(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_START_UPDATE: + return nd_intel_test_start_update(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_SEND_DATA: + return nd_intel_test_send_data(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_UPDATE: + return nd_intel_test_finish_fw(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_QUERY: + return nd_intel_test_finish_query(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_SMART: + return nfit_test_cmd_smart(buf, buf_len, + &t->smart[i - t->dcr_idx]); + case ND_INTEL_SMART_THRESHOLD: + return nfit_test_cmd_smart_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx]); + case ND_INTEL_SMART_SET_THRESHOLD: + return nfit_test_cmd_smart_set_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); + default: + return -ENOTTY; + } } if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; - /* lookup label space for the given dimm */ - for (i = 0; i < ARRAY_SIZE(handle); i++) - if (__to_nfit_memdev(nfit_mem)->device_handle == - handle[i]) - break; - if (i >= ARRAY_SIZE(handle)) - return -ENXIO; - - if ((1 << func) & dimm_fail_cmd_flags[i]) - return -EIO; + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; switch (func) { case ND_CMD_GET_CONFIG_SIZE: @@ -620,15 +960,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i - t->dcr_idx]); break; - case ND_CMD_SMART: - rc = nfit_test_cmd_smart(buf, buf_len); - break; - case ND_CMD_SMART_THRESHOLD: - rc = nfit_test_cmd_smart_threshold(buf, buf_len); - device_lock(&t->pdev.dev); - __acpi_nvdimm_notify(t->dimm_dev[i], 0x81); - device_unlock(&t->pdev.dev); - break; default: return -ENOTTY; } @@ -872,6 +1203,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static void smart_init(struct nfit_test *t) +{ + int i; + const struct nd_intel_smart_threshold smart_t_data = { + .alarm_control = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .media_temperature = 40 * 16, + .ctrl_temperature = 30 * 16, + .spares = 5, + }; + const struct nd_intel_smart smart_data = { + .flags = ND_INTEL_SMART_HEALTH_VALID + | ND_INTEL_SMART_SPARES_VALID + | ND_INTEL_SMART_ALARM_VALID + | ND_INTEL_SMART_USED_VALID + | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_MTEMP_VALID, + .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, + .media_temperature = 23 * 16, + .ctrl_temperature = 30 * 16, + .pmic_temperature = 40 * 16, + .spares = 75, + .alarm_flags = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .ait_status = 1, + .life_used = 5, + .shutdown_state = 0, + .vendor_size = 0, + .shutdown_count = 100, + }; + + for (i = 0; i < t->num_dcr; i++) { + memcpy(&t->smart[i], &smart_data, sizeof(smart_data)); + memcpy(&t->smart_threshold[i], &smart_t_data, + sizeof(smart_t_data)); + } +} + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA @@ -881,7 +1250,8 @@ static int nfit_test0_alloc(struct nfit_test *t) window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW + (sizeof(struct acpi_nfit_flush_address) - + sizeof(u64) * NUM_HINTS) * NUM_DCR; + + sizeof(u64) * NUM_HINTS) * NUM_DCR + + sizeof(struct acpi_nfit_capabilities); int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -939,6 +1309,7 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -969,6 +1340,7 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -993,6 +1365,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; + struct acpi_nfit_capabilities *pcap; unsigned int offset, i; /* @@ -1500,8 +1873,16 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + /* platform capabilities */ + pcap = nfit_buf + offset + flush_hint_size * 4; + pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; + pcap->header.length = sizeof(*pcap); + pcap->highest_capability = 1; + pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | + ACPI_NFIT_CAPABILITY_MEM_FLUSH; + if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4; + offset = offset + flush_hint_size * 4 + sizeof(*pcap); /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1642,17 +2023,24 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1750,6 +2138,7 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -2054,10 +2443,18 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(struct nfit_test_dcr *), GFP_KERNEL); nfit_test->dcr_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->smart = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart), GFP_KERNEL); + nfit_test->smart_threshold = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart_threshold), + GFP_KERNEL); + nfit_test->fw = devm_kcalloc(dev, num, + sizeof(struct nfit_test_fw), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr && nfit_test->dcr_dma && nfit_test->flush - && nfit_test->flush_dma) + && nfit_test->flush_dma + && nfit_test->fw) /* pass */; else return -ENOMEM; @@ -2159,6 +2556,11 @@ static __init int nfit_test_init(void) { int rc, i; + pmem_test(); + libnvdimm_test(); + acpi_nfit_test(); + device_dax_test(); + nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); nfit_wq = create_singlethread_workqueue("nfit"); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 113b44675a71..428344519cdf 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -84,6 +84,140 @@ struct nd_cmd_ars_err_inj_stat { } __packed record[0]; } __packed; +#define ND_INTEL_SMART 1 +#define ND_INTEL_SMART_THRESHOLD 2 +#define ND_INTEL_ENABLE_LSS_STATUS 10 +#define ND_INTEL_FW_GET_INFO 12 +#define ND_INTEL_FW_START_UPDATE 13 +#define ND_INTEL_FW_SEND_DATA 14 +#define ND_INTEL_FW_FINISH_UPDATE 15 +#define ND_INTEL_FW_FINISH_QUERY 16 +#define ND_INTEL_SMART_SET_THRESHOLD 17 + +#define ND_INTEL_SMART_HEALTH_VALID (1 << 0) +#define ND_INTEL_SMART_SPARES_VALID (1 << 1) +#define ND_INTEL_SMART_USED_VALID (1 << 2) +#define ND_INTEL_SMART_MTEMP_VALID (1 << 3) +#define ND_INTEL_SMART_CTEMP_VALID (1 << 4) +#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5) +#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6) +#define ND_INTEL_SMART_PTEMP_VALID (1 << 7) +#define ND_INTEL_SMART_ALARM_VALID (1 << 9) +#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10) +#define ND_INTEL_SMART_VENDOR_VALID (1 << 11) +#define ND_INTEL_SMART_SPARE_TRIP (1 << 0) +#define ND_INTEL_SMART_TEMP_TRIP (1 << 1) +#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2) +#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) +#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) +#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) + +struct nd_intel_smart { + __u32 status; + union { + struct { + __u32 flags; + __u8 reserved0[4]; + __u8 health; + __u8 spares; + __u8 life_used; + __u8 alarm_flags; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 shutdown_count; + __u8 ait_status; + __u16 pmic_temperature; + __u8 reserved1[8]; + __u8 shutdown_state; + __u32 vendor_size; + __u8 vendor_data[92]; + } __packed; + __u8 data[128]; + }; +} __packed; + +struct nd_intel_smart_threshold { + __u32 status; + union { + struct { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u8 reserved[1]; + } __packed; + __u8 data[8]; + }; +} __packed; + +struct nd_intel_smart_set_threshold { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 status; +} __packed; + +#define INTEL_FW_STORAGE_SIZE 0x100000 +#define INTEL_FW_MAX_SEND_LEN 0xFFEC +#define INTEL_FW_QUERY_INTERVAL 250000 +#define INTEL_FW_QUERY_MAX_TIME 3000000 +#define INTEL_FW_FIS_VERSION 0x0105 +#define INTEL_FW_FAKE_VERSION 0xffffffffabcd + +enum intel_fw_update_state { + FW_STATE_NEW = 0, + FW_STATE_IN_PROGRESS, + FW_STATE_VERIFY, + FW_STATE_UPDATED, +}; + +struct nd_intel_fw_info { + __u32 status; + __u32 storage_size; + __u32 max_send_len; + __u32 query_interval; + __u32 max_query_time; + __u8 update_cap; + __u8 reserved[3]; + __u32 fis_version; + __u64 run_version; + __u64 updated_version; +} __packed; + +struct nd_intel_fw_start { + __u32 status; + __u32 context; +} __packed; + +/* this one has the output first because the variable input data size */ +struct nd_intel_fw_send_data { + __u32 context; + __u32 offset; + __u32 length; + __u8 data[0]; +/* this field is not declared due ot variable data from input */ +/* __u32 status; */ +} __packed; + +struct nd_intel_fw_finish_update { + __u8 ctrl_flags; + __u8 reserved[3]; + __u32 context; + __u32 status; +} __packed; + +struct nd_intel_fw_finish_query { + __u32 context; + __u32 status; + __u64 updated_fw_rev; +} __packed; + +struct nd_intel_lss { + __u8 enable; + __u32 status; +} __packed; + union acpi_object; typedef void *acpi_handle; diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h new file mode 100644 index 000000000000..ed0528757bd4 --- /dev/null +++ b/tools/testing/nvdimm/watermark.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. +#ifndef _TEST_NVDIMM_WATERMARK_H_ +#define _TEST_NVDIMM_WATERMARK_H_ +int pmem_test(void); +int libnvdimm_test(void); +int acpi_nfit_test(void); +int device_dax_test(void); + +/* + * dummy routine for nfit_test to validate it is linking to the properly + * mocked module and not the standard one from the base tree. + */ +#define nfit_test_watermark(x) \ +int x##_test(void) \ +{ \ + pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \ + return 0; \ +} \ +EXPORT_SYMBOL(x##_test) +#endif /* _TEST_NVDIMM_WATERMARK_H_ */ |