summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/mm/mmu.c9
-rw-r--r--arch/ia64/mm/discontig.c6
-rw-r--r--arch/ia64/mm/init.c18
-rw-r--r--arch/powerpc/mm/init_64.c17
-rw-r--r--arch/powerpc/mm/mem.c11
-rw-r--r--arch/powerpc/platforms/Kconfig1
-rw-r--r--arch/powerpc/sysdev/axonram.c2
-rw-r--r--arch/s390/mm/init.c7
-rw-r--r--arch/s390/mm/vmem.c6
-rw-r--r--arch/sh/mm/init.c10
-rw-r--r--arch/sparc/mm/init_64.c5
-rw-r--r--arch/x86/mm/init_32.c9
-rw-r--r--arch/x86/mm/init_64.c94
-rw-r--r--drivers/acpi/nfit/core.c3
-rw-r--r--drivers/dax/device.c2
-rw-r--r--drivers/dax/pmem.c20
-rw-r--r--drivers/dax/super.c10
-rw-r--r--drivers/nvdimm/btt.c2
-rw-r--r--drivers/nvdimm/bus.c3
-rw-r--r--drivers/nvdimm/namespace_devs.c2
-rw-r--r--drivers/nvdimm/nd.h9
-rw-r--r--drivers/nvdimm/pfn_devs.c27
-rw-r--r--drivers/nvdimm/pmem.c37
-rw-r--r--drivers/nvdimm/pmem.h1
-rw-r--r--drivers/s390/block/Kconfig1
-rw-r--r--drivers/s390/block/dcssblk.c3
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/ext2/super.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--include/linux/memory_hotplug.h29
-rw-r--r--include/linux/memremap.h77
-rw-r--r--include/linux/mm.h22
-rw-r--r--include/linux/pfn_t.h13
-rw-r--r--include/uapi/linux/ndctl.h56
-rw-r--r--kernel/memremap.c174
-rw-r--r--mm/gup.c7
-rw-r--r--mm/hmm.c13
-rw-r--r--mm/memory.c16
-rw-r--r--mm/memory_hotplug.c39
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/sparse-vmemmap.c67
-rw-r--r--mm/sparse.c43
-rw-r--r--tools/testing/nvdimm/Kbuild4
-rw-r--r--tools/testing/nvdimm/acpi_nfit_test.c8
-rw-r--r--tools/testing/nvdimm/device_dax_test.c8
-rw-r--r--tools/testing/nvdimm/libnvdimm_test.c8
-rw-r--r--tools/testing/nvdimm/pmem_test.c8
-rw-r--r--tools/testing/nvdimm/test/iomap.c7
-rw-r--r--tools/testing/nvdimm/test/nfit.c484
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h134
-rw-r--r--tools/testing/nvdimm/watermark.h21
51 files changed, 1059 insertions, 523 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 267d2b79d52d..0b1f13e0b4b3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -654,12 +654,14 @@ int kern_addr_valid(unsigned long addr)
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
return vmemmap_populate_basepages(start, end, node);
}
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
unsigned long addr = start;
unsigned long next;
@@ -694,7 +696,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
return 0;
}
#endif /* CONFIG_ARM64_64K_PAGES */
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 9b2d994cddf6..5ea0d8d0968b 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
return vmemmap_populate_basepages(start, end, node);
}
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
#endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 7af4e05bb61e..18278b448530 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start),
- MEMMAP_EARLY);
+ MEMMAP_EARLY, NULL);
return 0;
}
@@ -509,9 +509,10 @@ void __meminit
memmap_init (unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn)
{
- if (!vmem_map)
- memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY);
- else {
+ if (!vmem_map) {
+ memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
+ NULL);
+ } else {
struct page *start;
struct memmap_init_callback_data args;
@@ -647,13 +648,14 @@ mem_init (void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
if (ret)
printk("%s: Problem encountered in __add_pages() as ret=%d\n",
__func__, ret);
@@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size)
int ret;
zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
if (ret)
pr_warn("%s: Problem encountered in __remove_pages() as"
" ret=%d\n", __func__, ret);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a07722531b32..7a2251d99ed3 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmemmap_list = vmem_back;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
- struct vmem_altmap *altmap;
void *p;
int rc;
if (vmemmap_populated(start, page_size))
continue;
- /* altmap lookups only work at section boundaries */
- altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
-
- p = __vmemmap_alloc_block_buf(page_size, node, altmap);
+ if (altmap)
+ p = altmap_alloc_block_buf(page_size, altmap);
+ else
+ p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
return -ENOMEM;
@@ -257,7 +257,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
return vmem_back->phys;
}
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long page_order = get_order(page_size);
@@ -268,7 +269,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
for (; start < end; start += page_size) {
unsigned long nr_pages, addr;
- struct vmem_altmap *altmap;
struct page *section_base;
struct page *page;
@@ -288,7 +288,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
section_base = pfn_to_page(vmemmap_section_start(start));
nr_pages = 1 << page_order;
- altmap = to_vmem_altmap((unsigned long) section_base);
if (altmap) {
vmem_altmap_free(altmap, nr_pages);
} else if (PageReserved(page)) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 4362b86ef84c..22aa528b78a2 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -144,15 +145,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
return -EFAULT;
}
- return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct vmem_altmap *altmap;
struct page *page;
int ret;
@@ -161,11 +161,10 @@ int arch_remove_memory(u64 start, u64 size)
* when querying the zone.
*/
page = pfn_to_page(start_pfn);
- altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
page += vmem_altmap_offset(altmap);
- ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
+ ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
if (ret)
return ret;
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 5a96a2763e4a..2ce89b42a9f4 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -297,6 +297,7 @@ config AXON_RAM
tristate "Axon DDR2 memory device driver"
depends on PPC_IBM_CELL_BLADE && BLOCK
select DAX
+ select FS_DAX_LIMITED
default m
help
It registers one block device per Axon's DDR2 memory bank found
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1b307c80b401..cdbb0e59b3d3 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -151,7 +151,7 @@ __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_page
resource_size_t offset = pgoff * PAGE_SIZE;
*kaddr = (void *) bank->io_addr + offset;
- *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
+ *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV|PFN_SPECIAL);
return (bank->size - offset) / PAGE_SIZE;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 671535e64aba..3fa3e5323612 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init);
#endif /* CONFIG_CMA */
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
@@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
if (rc)
return rc;
- rc = __add_pages(nid, start_pfn, size_pages, want_memblock);
+ rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
if (rc)
vmem_remove_mapping(start, size);
return rc;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
/*
* There is no hardware or firmware interface which could trigger a
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 3316d463fc29..db55561c5981 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
/*
* Add a backed mem_map array to the virtual mem_map array.
*/
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
unsigned long pgt_prot, sgt_prot;
unsigned long address = start;
@@ -296,7 +297,8 @@ out:
return ret;
}
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index bf726af5f1a5..ce0bbaa7e404 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end)
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
/* We only have ZONE_NORMAL, so this is easy.. */
- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
if (unlikely(ret))
printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
return ret;
}
-EXPORT_SYMBOL_GPL(arch_add_memory);
#ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 addr)
@@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size)
int ret;
zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
if (unlikely(ret))
pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
ret);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 55ba62957e64..995f9490334d 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE);
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
- int node)
+ int node, struct vmem_altmap *altmap)
{
unsigned long pte_base;
@@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
return 0;
}
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 135c9a7898c7..79cb066f40c0 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -829,23 +829,24 @@ void __init mem_init(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
zone = page_zone(pfn_to_page(start_pfn));
- return __remove_pages(zone, start_pfn, nr_pages);
+ return __remove_pages(zone, start_pfn, nr_pages, altmap);
}
#endif
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4a837289f2ad..1ab42c852069 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
}
}
-int add_pages(int nid, unsigned long start_pfn,
- unsigned long nr_pages, bool want_memblock)
+int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap, bool want_memblock)
{
int ret;
- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
WARN_ON_ONCE(ret);
/* update max_pfn, max_low_pfn and high_memory */
@@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn,
return ret;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
init_memory_mapping(start, start + size);
- return add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
-EXPORT_SYMBOL_GPL(arch_add_memory);
#define PAGE_INUSE 0xFD
-static void __meminit free_pagetable(struct page *page, int order)
+static void __meminit free_pagetable(struct page *page, int order,
+ struct vmem_altmap *altmap)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
- struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
if (altmap) {
vmem_altmap_free(altmap, nr_pages);
@@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order)
free_pages((unsigned long)page_address(page), order);
}
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+ struct vmem_altmap *altmap)
{
pte_t *pte;
int i;
@@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
}
/* free a pte talbe */
- free_pagetable(pmd_page(*pmd), 0);
+ free_pagetable(pmd_page(*pmd), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
+ struct vmem_altmap *altmap)
{
pmd_t *pmd;
int i;
@@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
}
/* free a pmd talbe */
- free_pagetable(pud_page(*pud), 0);
+ free_pagetable(pud_page(*pud), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
+ struct vmem_altmap *altmap)
{
pud_t *pud;
int i;
@@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
}
/* free a pud talbe */
- free_pagetable(p4d_page(*p4d), 0);
+ free_pagetable(p4d_page(*p4d), 0, altmap);
spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d);
spin_unlock(&init_mm.page_table_lock);
@@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
- bool direct)
+ struct vmem_altmap *altmap, bool direct)
{
unsigned long next, pages = 0;
pte_t *pte;
@@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
* freed when offlining, or simplely not in use.
*/
if (!direct)
- free_pagetable(pte_page(*pte), 0);
+ free_pagetable(pte_page(*pte), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
page_addr = page_address(pte_page(*pte));
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
- free_pagetable(pte_page(*pte), 0);
+ free_pagetable(pte_page(*pte), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
static void __meminit
remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
- bool direct)
+ bool direct, struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pte_t *pte_base;
@@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PMD_SIZE)) {
if (!direct)
free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE));
+ get_order(PMD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE,
PMD_SIZE)) {
free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE));
+ get_order(PMD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
}
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- remove_pte_table(pte_base, addr, next, direct);
- free_pte_table(pte_base, pmd);
+ remove_pte_table(pte_base, addr, next, altmap, direct);
+ free_pte_table(pte_base, pmd, altmap);
}
/* Call free_pmd_table() in remove_pud_table(). */
@@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
static void __meminit
remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
- bool direct)
+ struct vmem_altmap *altmap, bool direct)
{
unsigned long next, pages = 0;
pmd_t *pmd_base;
@@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PUD_SIZE)) {
if (!direct)
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE));
+ get_order(PUD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE,
PUD_SIZE)) {
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE));
+ get_order(PUD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
}
pmd_base = pmd_offset(pud, 0);
- remove_pmd_table(pmd_base, addr, next, direct);
- free_pmd_table(pmd_base, pud);
+ remove_pmd_table(pmd_base, addr, next, direct, altmap);
+ free_pmd_table(pmd_base, pud, altmap);
}
if (direct)
@@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
static void __meminit
remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
- bool direct)
+ struct vmem_altmap *altmap, bool direct)
{
unsigned long next, pages = 0;
pud_t *pud_base;
@@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
BUILD_BUG_ON(p4d_large(*p4d));
pud_base = pud_offset(p4d, 0);
- remove_pud_table(pud_base, addr, next, direct);
+ remove_pud_table(pud_base, addr, next, altmap, direct);
/*
* For 4-level page tables we do not want to free PUDs, but in the
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
if (CONFIG_PGTABLE_LEVELS == 5)
- free_pud_table(pud_base, p4d);
+ free_pud_table(pud_base, p4d, altmap);
}
if (direct)
@@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
/* start and end are both virtual address. */
static void __meminit
-remove_pagetable(unsigned long start, unsigned long end, bool direct)
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next;
unsigned long addr;
@@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
continue;
p4d = p4d_offset(pgd, 0);
- remove_p4d_table(p4d, addr, next, direct);
+ remove_p4d_table(p4d, addr, next, altmap, direct);
}
flush_tlb_all();
}
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
- remove_pagetable(start, end, false);
+ remove_pagetable(start, end, false, altmap);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
- remove_pagetable(start, end, true);
+ remove_pagetable(start, end, true, NULL);
}
-int __ref arch_remove_memory(u64 start, u64 size)
+int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn);
- struct vmem_altmap *altmap;
struct zone *zone;
int ret;
/* With altmap the first mapped page is offset from @start */
- altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
page += vmem_altmap_offset(altmap);
zone = page_zone(page);
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
WARN_ON_ONCE(ret);
kernel_physical_mapping_remove(start, start + size);
@@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) {
void *p;
- p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (altmap)
+ p = altmap_alloc_block_buf(PMD_SIZE, altmap);
+ else
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (p) {
pte_t entry;
@@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
return 0;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
- struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;
if (boot_cpu_has(X86_FEATURE_PSE))
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index aa9d00db763a..bbe48ad20886 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1883,6 +1883,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
struct kernfs_node *nfit_kernfs;
nvdimm = nfit_mem->nvdimm;
+ if (!nvdimm)
+ continue;
+
nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
if (nfit_kernfs)
nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 7b0bf825c4e7..2137dbc29877 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -133,7 +133,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
dax_region->base = addr;
if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
kfree(dax_region);
- return NULL;;
+ return NULL;
}
kref_get(&dax_region->kref);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 8d8c852ba8f2..31b6ecce4c64 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -21,6 +21,7 @@
struct dax_pmem {
struct device *dev;
struct percpu_ref ref;
+ struct dev_pagemap pgmap;
struct completion cmp;
};
@@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev)
struct nd_namespace_common *ndns;
struct nd_dax *nd_dax = to_nd_dax(dev);
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
- struct vmem_altmap __altmap, *altmap = NULL;
ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns))
return PTR_ERR(ndns);
nsio = to_nd_namespace_io(&ndns->dev);
+ dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
+ if (!dax_pmem)
+ return -ENOMEM;
+
/* parse the 'pfn' info block via ->rw_bytes */
rc = devm_nsio_enable(dev, nsio);
if (rc)
return rc;
- altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
- if (IS_ERR(altmap))
- return PTR_ERR(altmap);
+ rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
+ if (rc)
+ return rc;
devm_nsio_disable(dev, nsio);
pfn_sb = nd_pfn->pfn_sb;
@@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev)
return -EBUSY;
}
- dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
- if (!dax_pmem)
- return -ENOMEM;
-
dax_pmem->dev = dev;
init_completion(&dax_pmem->cmp);
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
@@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev)
if (rc)
return rc;
- addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
+ dax_pmem->pgmap.ref = &dax_pmem->ref;
+ addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
@@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev)
return rc;
/* adjust the dax_region resource to the start of data */
+ memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
res.start += le64_to_cpu(pfn_sb->dataoff);
rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 3ec804672601..473af694ad1c 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -15,6 +15,7 @@
#include <linux/mount.h>
#include <linux/magic.h>
#include <linux/genhd.h>
+#include <linux/pfn_t.h>
#include <linux/cdev.h>
#include <linux/hash.h>
#include <linux/slab.h>
@@ -123,6 +124,15 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
return len < 0 ? len : -EIO;
}
+ if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
+ || pfn_t_devmap(pfn))
+ /* pass */;
+ else {
+ pr_debug("VFS (%s): error: dax support not enabled\n",
+ sb->s_id);
+ return -EOPNOTSUPP;
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index c586bcdb5190..2ef544f10ec8 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -753,6 +753,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
return NULL;
arena->nd_btt = btt->nd_btt;
arena->sector_size = btt->sector_size;
+ mutex_init(&arena->err_lock);
if (!size)
return arena;
@@ -891,7 +892,6 @@ static int discover_arenas(struct btt *btt)
goto out;
}
- mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena);
if (ret)
goto out;
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 0a5e6cd758fe..78eabc3a1ab1 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -1142,9 +1142,6 @@ int __init nvdimm_bus_init(void)
{
int rc;
- BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
- BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
-
rc = bus_register(&nvdimm_bus_type);
if (rc)
return rc;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index bb3ba8cf24d4..658ada497be0 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2408,7 +2408,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
static struct device **create_namespaces(struct nd_region *nd_region)
{
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nd_mapping *nd_mapping;
struct device **devs;
int i;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e958f3724c41..8d6375ee0fda 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res);
#if IS_ENABLED(CONFIG_ND_CLAIM)
-struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap);
+int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
#else
-static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap)
+static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
+ struct dev_pagemap *pgmap)
{
- return ERR_PTR(-ENXIO);
+ return -ENXIO;
}
static inline int devm_nsio_enable(struct device *dev,
struct nd_namespace_io *nsio)
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 2adada1a5855..f5c4e8c6e29d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -542,9 +542,10 @@ static unsigned long init_altmap_reserve(resource_size_t base)
return reserve;
}
-static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap)
+static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
+ struct resource *res = &pgmap->res;
+ struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
@@ -561,11 +562,13 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
res->start += start_pad;
res->end -= end_trunc;
+ pgmap->type = MEMORY_DEVICE_HOST;
+
if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < SZ_8K)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
- altmap = NULL;
+ pgmap->altmap_valid = false;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- offset) / PAGE_SIZE);
@@ -577,10 +580,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
memcpy(altmap, &__altmap, sizeof(*altmap));
altmap->free = PHYS_PFN(offset - SZ_8K);
altmap->alloc = 0;
+ pgmap->altmap_valid = true;
} else
- return ERR_PTR(-ENXIO);
+ return -ENXIO;
- return altmap;
+ return 0;
}
static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
@@ -708,19 +712,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
* Determine the effective resource range and vmem_altmap from an nd_pfn
* instance.
*/
-struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap)
+int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
int rc;
if (!nd_pfn->uuid || !nd_pfn->ndns)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
rc = nd_pfn_init(nd_pfn);
if (rc)
- return ERR_PTR(rc);
+ return rc;
- /* we need a valid pfn_sb before we can init a vmem_altmap */
- return __nvdimm_setup_pfn(nd_pfn, res, altmap);
+ /* we need a valid pfn_sb before we can init a dev_pagemap */
+ return __nvdimm_setup_pfn(nd_pfn, pgmap);
}
EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8aa542398db4..10041ac4032c 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -299,34 +299,34 @@ static int pmem_attach_disk(struct device *dev,
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
- struct vmem_altmap __altmap, *altmap = NULL;
int nid = dev_to_node(dev), fua, wbc;
struct resource *res = &nsio->res;
+ struct resource bb_res;
struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem;
- struct resource pfn_res;
struct request_queue *q;
struct device *gendev;
struct gendisk *disk;
void *addr;
+ int rc;
+
+ pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
+ if (!pmem)
+ return -ENOMEM;
/* while nsio_rw_bytes is active, parse a pfn info block if present */
if (is_nd_pfn(dev)) {
nd_pfn = to_nd_pfn(dev);
- altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
- if (IS_ERR(altmap))
- return PTR_ERR(altmap);
+ rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap);
+ if (rc)
+ return rc;
}
/* we're attaching a block device, disable raw namespace access */
devm_nsio_disable(dev, nsio);
- pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
- if (!pmem)
- return -ENOMEM;
-
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
@@ -352,19 +352,22 @@ static int pmem_attach_disk(struct device *dev,
return -ENOMEM;
pmem->pfn_flags = PFN_DEV;
+ pmem->pgmap.ref = &q->q_usage_counter;
if (is_nd_pfn(dev)) {
- addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
- altmap);
+ addr = devm_memremap_pages(dev, &pmem->pgmap);
pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
- pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
+ pmem->pfn_pad = resource_size(res) -
+ resource_size(&pmem->pgmap.res);
pmem->pfn_flags |= PFN_MAP;
- res = &pfn_res; /* for badblocks populate */
- res->start += pmem->data_offset;
+ memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+ bb_res.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) {
- addr = devm_memremap_pages(dev, &nsio->res,
- &q->q_usage_counter, NULL);
+ memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
+ pmem->pgmap.altmap_valid = false;
+ addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
+ memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
} else
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
@@ -403,7 +406,7 @@ static int pmem_attach_disk(struct device *dev,
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
- nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
+ nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
disk->bb = &pmem->bb;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 6a3cd2a10db6..a64ebc78b5df 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -22,6 +22,7 @@ struct pmem_device {
struct badblocks bb;
struct dax_device *dax_dev;
struct gendisk *disk;
+ struct dev_pagemap pgmap;
};
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index bc27d716aa6b..1444333210c7 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -16,6 +16,7 @@ config BLK_DEV_XPRAM
config DCSSBLK
def_tristate m
select DAX
+ select FS_DAX_LIMITED
prompt "DCSSBLK support"
depends on S390 && BLOCK
help
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6aaefb780436..9cae08b36b80 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
dev_sz = dev_info->end - dev_info->start + 1;
*kaddr = (void *) dev_info->start + offset;
- *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
+ *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
+ PFN_DEV|PFN_SPECIAL);
return (dev_sz - offset) / PAGE_SIZE;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index 7aee6d699fd6..b40128bf6d1a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -58,6 +58,13 @@ config FS_DAX_PMD
depends on ZONE_DEVICE
depends on TRANSPARENT_HUGEPAGE
+# Selected by DAX drivers that do not expect filesystem DAX to support
+# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
+# for fork() of processes with MAP_SHARED mappings or support for
+# direct-I/O to a DAX mapping.
+config FS_DAX_LIMITED
+ bool
+
endif # BLOCK
# Posix ACL utility routines
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7646818ab266..38f9222606ee 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -959,8 +959,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
err = bdev_dax_supported(sb, blocksize);
- if (err)
- goto failed_mount;
+ if (err) {
+ ext2_msg(sb, KERN_ERR,
+ "DAX unsupported by block device. Turning off DAX.");
+ sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
+ }
}
/* If the blocksize doesn't match, re-read the thing.. */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7c46693a14d7..18873ea89e08 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3710,11 +3710,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
- goto failed_mount;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
}
err = bdev_dax_supported(sb, blocksize);
- if (err)
- goto failed_mount;
+ if (err) {
+ ext4_msg(sb, KERN_ERR,
+ "DAX unsupported by block device. Turning off DAX.");
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
+ }
}
if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 58e110aee7ab..aba5f86eb038 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,6 +13,7 @@ struct pglist_data;
struct mem_section;
struct memory_block;
struct resource;
+struct vmem_altmap;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -125,24 +126,26 @@ static inline bool movable_node_is_enabled(void)
#ifdef CONFIG_MEMORY_HOTREMOVE
extern bool is_pageblock_removable_nolock(struct page *page);
-extern int arch_remove_memory(u64 start, u64 size);
+extern int arch_remove_memory(u64 start, u64 size,
+ struct vmem_altmap *altmap);
extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages);
+ unsigned long nr_pages, struct vmem_altmap *altmap);
#endif /* CONFIG_MEMORY_HOTREMOVE */
/* reasonably generic interface to expand the physical pages */
-extern int __add_pages(int nid, unsigned long start_pfn,
- unsigned long nr_pages, bool want_memblock);
+extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap, bool want_memblock);
#ifndef CONFIG_ARCH_HAS_ADD_PAGES
static inline int add_pages(int nid, unsigned long start_pfn,
- unsigned long nr_pages, bool want_memblock)
+ unsigned long nr_pages, struct vmem_altmap *altmap,
+ bool want_memblock)
{
- return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
#else /* ARCH_HAS_ADD_PAGES */
-int add_pages(int nid, unsigned long start_pfn,
- unsigned long nr_pages, bool want_memblock);
+int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap, bool want_memblock);
#endif /* ARCH_HAS_ADD_PAGES */
#ifdef CONFIG_NUMA
@@ -318,15 +321,17 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource, bool online);
-extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock);
+extern int arch_add_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap, bool want_memblock);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages);
+ unsigned long nr_pages, struct vmem_altmap *altmap);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
-extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn);
+extern int sparse_add_one_section(struct pglist_data *pgdat,
+ unsigned long start_pfn, struct vmem_altmap *altmap);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
- unsigned long map_offset);
+ unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 10d23c367048..7b4899c06f49 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -26,18 +26,6 @@ struct vmem_altmap {
unsigned long alloc;
};
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-
-#ifdef CONFIG_ZONE_DEVICE
-struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
-#else
-static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
-{
- return NULL;
-}
-#endif
-
/*
* Specialize ZONE_DEVICE memory into multiple types each having differents
* usage.
@@ -125,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
struct dev_pagemap {
dev_page_fault_t page_fault;
dev_page_free_t page_free;
- struct vmem_altmap *altmap;
- const struct resource *res;
+ struct vmem_altmap altmap;
+ bool altmap_valid;
+ struct resource res;
struct percpu_ref *ref;
struct device *dev;
void *data;
@@ -134,15 +123,17 @@ struct dev_pagemap {
};
#ifdef CONFIG_ZONE_DEVICE
-void *devm_memremap_pages(struct device *dev, struct resource *res,
- struct percpu_ref *ref, struct vmem_altmap *altmap);
-struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap);
+
+unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
+void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
static inline bool is_zone_device_page(const struct page *page);
#else
static inline void *devm_memremap_pages(struct device *dev,
- struct resource *res, struct percpu_ref *ref,
- struct vmem_altmap *altmap)
+ struct dev_pagemap *pgmap)
{
/*
* Fail attempts to call devm_memremap_pages() without
@@ -153,11 +144,22 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO);
}
-static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap)
{
return NULL;
}
-#endif
+
+static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
+{
+ return 0;
+}
+
+static inline void vmem_altmap_free(struct vmem_altmap *altmap,
+ unsigned long nr_pfns)
+{
+}
+#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static inline bool is_device_private_page(const struct page *page)
@@ -173,39 +175,6 @@ static inline bool is_device_public_page(const struct page *page)
}
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-/**
- * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
- * @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
- * same mapping.
- */
-static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
- struct dev_pagemap *pgmap)
-{
- const struct resource *res = pgmap ? pgmap->res : NULL;
- resource_size_t phys = PFN_PHYS(pfn);
-
- /*
- * In the cached case we're already holding a live reference so
- * we can simply do a blind increment
- */
- if (res && phys >= res->start && phys <= res->end) {
- percpu_ref_get(pgmap->ref);
- return pgmap;
- }
-
- /* fall back to slow path lookup */
- rcu_read_lock();
- pgmap = find_dev_pagemap(phys);
- if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
- pgmap = NULL;
- rcu_read_unlock();
-
- return pgmap;
-}
-
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{
if (pgmap)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ea818ff739cd..09637c353de0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2069,8 +2069,8 @@ static inline void zero_resv_unavail(void) {}
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long,
- unsigned long, enum memmap_context);
+extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
+ enum memmap_context, struct vmem_altmap *);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
@@ -2538,7 +2538,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count,
int nodeid);
-struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
+struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
+ struct vmem_altmap *altmap);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -2546,20 +2547,17 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
-void *__vmemmap_alloc_block_buf(unsigned long size, int node,
- struct vmem_altmap *altmap);
-static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
-{
- return __vmemmap_alloc_block_buf(size, node, NULL);
-}
-
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
+void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node);
-int vmemmap_populate(unsigned long start, unsigned long end, int node);
+int vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void);
#ifdef CONFIG_MEMORY_HOTPLUG
-void vmemmap_free(unsigned long start, unsigned long end);
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap);
#endif
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
unsigned long nr_pages);
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 43b1d7648e82..a03c2642a87c 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -15,8 +15,10 @@
#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
+#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
#define PFN_FLAGS_TRACE \
+ { PFN_SPECIAL, "SPECIAL" }, \
{ PFN_SG_CHAIN, "SG_CHAIN" }, \
{ PFN_SG_LAST, "SG_LAST" }, \
{ PFN_DEV, "DEV" }, \
@@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud);
#endif
#endif /* __HAVE_ARCH_PTE_DEVMAP */
+#ifdef __HAVE_ARCH_PTE_SPECIAL
+static inline bool pfn_t_special(pfn_t pfn)
+{
+ return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
+}
+#else
+static inline bool pfn_t_special(pfn_t pfn)
+{
+ return false;
+}
+#endif /* __HAVE_ARCH_PTE_SPECIAL */
#endif /* _LINUX_PFN_T_H_ */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 3f03567631cb..7e27070b9440 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -15,54 +15,6 @@
#include <linux/types.h>
-struct nd_cmd_smart {
- __u32 status;
- __u8 data[128];
-} __packed;
-
-#define ND_SMART_HEALTH_VALID (1 << 0)
-#define ND_SMART_SPARES_VALID (1 << 1)
-#define ND_SMART_USED_VALID (1 << 2)
-#define ND_SMART_TEMP_VALID (1 << 3)
-#define ND_SMART_CTEMP_VALID (1 << 4)
-#define ND_SMART_ALARM_VALID (1 << 9)
-#define ND_SMART_SHUTDOWN_VALID (1 << 10)
-#define ND_SMART_VENDOR_VALID (1 << 11)
-#define ND_SMART_SPARE_TRIP (1 << 0)
-#define ND_SMART_TEMP_TRIP (1 << 1)
-#define ND_SMART_CTEMP_TRIP (1 << 2)
-#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0)
-#define ND_SMART_CRITICAL_HEALTH (1 << 1)
-#define ND_SMART_FATAL_HEALTH (1 << 2)
-
-struct nd_smart_payload {
- __u32 flags;
- __u8 reserved0[4];
- __u8 health;
- __u8 spares;
- __u8 life_used;
- __u8 alarm_flags;
- __u16 temperature;
- __u16 ctrl_temperature;
- __u8 reserved1[15];
- __u8 shutdown_state;
- __u32 vendor_size;
- __u8 vendor_data[92];
-} __packed;
-
-struct nd_cmd_smart_threshold {
- __u32 status;
- __u8 data[8];
-} __packed;
-
-struct nd_smart_threshold_payload {
- __u8 alarm_control;
- __u8 reserved0;
- __u16 temperature;
- __u8 spares;
- __u8 reserved[3];
-} __packed;
-
struct nd_cmd_dimm_flags {
__u32 status;
__u32 flags;
@@ -211,12 +163,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
#define ND_IOCTL 'N'
-#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\
- struct nd_cmd_smart)
-
-#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
- struct nd_cmd_smart_threshold)
-
#define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
struct nd_cmd_dimm_flags)
@@ -263,7 +209,7 @@ enum nd_driver_flags {
};
enum {
- ND_MIN_NAMESPACE_SIZE = 0x00400000,
+ ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
};
enum ars_masks {
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 403ab9cdb949..4849be5f9b3c 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-struct page_map {
- struct resource res;
- struct percpu_ref *ref;
- struct dev_pagemap pgmap;
- struct vmem_altmap altmap;
-};
-
static unsigned long order_at(struct resource *res, unsigned long pgoff)
{
unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
@@ -248,34 +241,36 @@ int device_private_entry_fault(struct vm_area_struct *vma,
EXPORT_SYMBOL(device_private_entry_fault);
#endif /* CONFIG_DEVICE_PRIVATE */
-static void pgmap_radix_release(struct resource *res)
+static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff)
{
unsigned long pgoff, order;
mutex_lock(&pgmap_lock);
- foreach_order_pgoff(res, order, pgoff)
+ foreach_order_pgoff(res, order, pgoff) {
+ if (pgoff >= end_pgoff)
+ break;
radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
+ }
mutex_unlock(&pgmap_lock);
synchronize_rcu();
}
-static unsigned long pfn_first(struct page_map *page_map)
+static unsigned long pfn_first(struct dev_pagemap *pgmap)
{
- struct dev_pagemap *pgmap = &page_map->pgmap;
- const struct resource *res = &page_map->res;
- struct vmem_altmap *altmap = pgmap->altmap;
+ const struct resource *res = &pgmap->res;
+ struct vmem_altmap *altmap = &pgmap->altmap;
unsigned long pfn;
pfn = res->start >> PAGE_SHIFT;
- if (altmap)
+ if (pgmap->altmap_valid)
pfn += vmem_altmap_offset(altmap);
return pfn;
}
-static unsigned long pfn_end(struct page_map *page_map)
+static unsigned long pfn_end(struct dev_pagemap *pgmap)
{
- const struct resource *res = &page_map->res;
+ const struct resource *res = &pgmap->res;
return (res->start + resource_size(res)) >> PAGE_SHIFT;
}
@@ -283,15 +278,15 @@ static unsigned long pfn_end(struct page_map *page_map)
#define for_each_device_pfn(pfn, map) \
for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
-static void devm_memremap_pages_release(struct device *dev, void *data)
+static void devm_memremap_pages_release(void *data)
{
- struct page_map *page_map = data;
- struct resource *res = &page_map->res;
+ struct dev_pagemap *pgmap = data;
+ struct device *dev = pgmap->dev;
+ struct resource *res = &pgmap->res;
resource_size_t align_start, align_size;
- struct dev_pagemap *pgmap = &page_map->pgmap;
unsigned long pfn;
- for_each_device_pfn(pfn, page_map)
+ for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn));
if (percpu_ref_tryget_live(pgmap->ref)) {
@@ -301,56 +296,51 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
- align_size = ALIGN(resource_size(res), SECTION_SIZE);
+ align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
+ - align_start;
mem_hotplug_begin();
- arch_remove_memory(align_start, align_size);
+ arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
+ &pgmap->altmap : NULL);
mem_hotplug_done();
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
- pgmap_radix_release(res);
- dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
- "%s: failed to free all reserved pages\n", __func__);
-}
-
-/* assumes rcu_read_lock() held at entry */
-struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
-{
- struct page_map *page_map;
-
- WARN_ON_ONCE(!rcu_read_lock_held());
-
- page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
- return page_map ? &page_map->pgmap : NULL;
+ pgmap_radix_release(res, -1);
+ dev_WARN_ONCE(dev, pgmap->altmap.alloc,
+ "%s: failed to free all reserved pages\n", __func__);
}
/**
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
- * @res: "host memory" address range
- * @ref: a live per-cpu reference count
- * @altmap: optional descriptor for allocating the memmap from @res
+ * @pgmap: pointer to a struct dev_pgmap
*
* Notes:
- * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
- * (or devm release event). The expected order of events is that @ref has
+ * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
+ * by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case altmap_valid
+ * must be set to true
+ *
+ * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
+ * time (or devm release event). The expected order of events is that ref has
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release().
*
- * 2/ @res is expected to be a host memory range that could feasibly be
+ * 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
* this is not enforced.
*/
-void *devm_memremap_pages(struct device *dev, struct resource *res,
- struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
resource_size_t align_start, align_size, align_end;
+ struct vmem_altmap *altmap = pgmap->altmap_valid ?
+ &pgmap->altmap : NULL;
unsigned long pfn, pgoff, order;
pgprot_t pgprot = PAGE_KERNEL;
- struct dev_pagemap *pgmap;
- struct page_map *page_map;
int error, nid, is_ram, i = 0;
+ struct resource *res = &pgmap->res;
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -367,47 +357,18 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (is_ram == REGION_INTERSECTS)
return __va(res->start);
- if (!ref)
+ if (!pgmap->ref)
return ERR_PTR(-EINVAL);
- page_map = devres_alloc_node(devm_memremap_pages_release,
- sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
- if (!page_map)
- return ERR_PTR(-ENOMEM);
- pgmap = &page_map->pgmap;
-
- memcpy(&page_map->res, res, sizeof(*res));
-
pgmap->dev = dev;
- if (altmap) {
- memcpy(&page_map->altmap, altmap, sizeof(*altmap));
- pgmap->altmap = &page_map->altmap;
- }
- pgmap->ref = ref;
- pgmap->res = &page_map->res;
- pgmap->type = MEMORY_DEVICE_HOST;
- pgmap->page_fault = NULL;
- pgmap->page_free = NULL;
- pgmap->data = NULL;
mutex_lock(&pgmap_lock);
error = 0;
align_end = align_start + align_size - 1;
foreach_order_pgoff(res, order, pgoff) {
- struct dev_pagemap *dup;
-
- rcu_read_lock();
- dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
- rcu_read_unlock();
- if (dup) {
- dev_err(dev, "%s: %pr collides with mapping for %s\n",
- __func__, res, dev_name(dup->dev));
- error = -EBUSY;
- break;
- }
error = __radix_tree_insert(&pgmap_radix,
- PHYS_PFN(res->start) + pgoff, order, page_map);
+ PHYS_PFN(res->start) + pgoff, order, pgmap);
if (error) {
dev_err(dev, "%s: failed: %d\n", __func__, error);
break;
@@ -427,16 +388,16 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
goto err_pfn_remap;
mem_hotplug_begin();
- error = arch_add_memory(nid, align_start, align_size, false);
+ error = arch_add_memory(nid, align_start, align_size, altmap, false);
if (!error)
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
align_start >> PAGE_SHIFT,
- align_size >> PAGE_SHIFT);
+ align_size >> PAGE_SHIFT, altmap);
mem_hotplug_done();
if (error)
goto err_add_memory;
- for_each_device_pfn(pfn, page_map) {
+ for_each_device_pfn(pfn, pgmap) {
struct page *page = pfn_to_page(pfn);
/*
@@ -447,19 +408,21 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
*/
list_del(&page->lru);
page->pgmap = pgmap;
- percpu_ref_get(ref);
+ percpu_ref_get(pgmap->ref);
if (!(++i % 1024))
cond_resched();
}
- devres_add(dev, page_map);
+
+ devm_add_action(dev, devm_memremap_pages_release, pgmap);
+
return __va(res->start);
err_add_memory:
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
err_pfn_remap:
err_radix:
- pgmap_radix_release(res);
- devres_free(page_map);
+ pgmap_radix_release(res, pgoff);
+ devres_free(pgmap);
return ERR_PTR(error);
}
EXPORT_SYMBOL(devm_memremap_pages);
@@ -475,34 +438,39 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
altmap->alloc -= nr_pfns;
}
-struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
+/**
+ * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: optional known pgmap that already has a reference
+ *
+ * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
+ * is non-NULL but does not cover @pfn the reference to it will be released.
+ */
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap)
{
- /*
- * 'memmap_start' is the virtual address for the first "struct
- * page" in this range of the vmemmap array. In the case of
- * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple
- * pointer arithmetic, so we can perform this to_vmem_altmap()
- * conversion without concern for the initialization state of
- * the struct page fields.
- */
- struct page *page = (struct page *) memmap_start;
- struct dev_pagemap *pgmap;
+ resource_size_t phys = PFN_PHYS(pfn);
/*
- * Unconditionally retrieve a dev_pagemap associated with the
- * given physical address, this is only for use in the
- * arch_{add|remove}_memory() for setting up and tearing down
- * the memmap.
+ * In the cached case we're already holding a live reference.
*/
+ if (pgmap) {
+ if (phys >= pgmap->res.start && phys <= pgmap->res.end)
+ return pgmap;
+ put_dev_pagemap(pgmap);
+ }
+
+ /* fall back to slow path lookup */
rcu_read_lock();
- pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
+ pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
+ if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+ pgmap = NULL;
rcu_read_unlock();
- return pgmap ? pgmap->altmap : NULL;
+ return pgmap;
}
#endif /* CONFIG_ZONE_DEVICE */
-
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
void put_zone_device_private_or_public_page(struct page *page)
{
diff --git a/mm/gup.c b/mm/gup.c
index e0d82b6706d7..3affe7544b0c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1410,7 +1410,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON_PAGE(compound_head(page) != head, page);
- put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -1420,6 +1419,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
ret = 1;
pte_unmap:
+ if (pgmap)
+ put_dev_pagemap(pgmap);
pte_unmap(ptem);
return ret;
}
@@ -1459,10 +1460,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
SetPageReferenced(page);
pages[*nr] = page;
get_page(page);
- put_dev_pagemap(pgmap);
(*nr)++;
pfn++;
} while (addr += PAGE_SIZE, addr != end);
+
+ if (pgmap)
+ put_dev_pagemap(pgmap);
return 1;
}
diff --git a/mm/hmm.c b/mm/hmm.c
index ea19742a5d60..320fdc87f064 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -838,10 +838,10 @@ static void hmm_devmem_release(struct device *dev, void *data)
mem_hotplug_begin();
if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY)
- __remove_pages(zone, start_pfn, npages);
+ __remove_pages(zone, start_pfn, npages, NULL);
else
arch_remove_memory(start_pfn << PAGE_SHIFT,
- npages << PAGE_SHIFT);
+ npages << PAGE_SHIFT, NULL);
mem_hotplug_done();
hmm_devmem_radix_release(resource);
@@ -882,7 +882,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
else
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
- devmem->pagemap.res = devmem->resource;
+ devmem->pagemap.res = *devmem->resource;
devmem->pagemap.page_fault = hmm_devmem_fault;
devmem->pagemap.page_free = hmm_devmem_free;
devmem->pagemap.dev = devmem->device;
@@ -931,17 +931,18 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
* want the linear mapping and thus use arch_add_memory().
*/
if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC)
- ret = arch_add_memory(nid, align_start, align_size, false);
+ ret = arch_add_memory(nid, align_start, align_size, NULL,
+ false);
else
ret = add_pages(nid, align_start >> PAGE_SHIFT,
- align_size >> PAGE_SHIFT, false);
+ align_size >> PAGE_SHIFT, NULL, false);
if (ret) {
mem_hotplug_done();
goto error_add_memory;
}
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
align_start >> PAGE_SHIFT,
- align_size >> PAGE_SHIFT);
+ align_size >> PAGE_SHIFT, NULL);
mem_hotplug_done();
for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
diff --git a/mm/memory.c b/mm/memory.c
index ca5674cbaff2..46b6c33b7f04 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1897,12 +1897,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(vm_insert_pfn_prot);
+static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
+{
+ /* these checks mirror the abort conditions in vm_normal_page */
+ if (vma->vm_flags & VM_MIXEDMAP)
+ return true;
+ if (pfn_t_devmap(pfn))
+ return true;
+ if (pfn_t_special(pfn))
+ return true;
+ if (is_zero_pfn(pfn_t_to_pfn(pfn)))
+ return true;
+ return false;
+}
+
static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn, bool mkwrite)
{
pgprot_t pgprot = vma->vm_page_prot;
- BUG_ON(!(vma->vm_flags & VM_MIXEDMAP));
+ BUG_ON(!vm_mixed_ok(vma, pfn));
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c52aa05b106c..12df8a5fadcc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -250,7 +250,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
- bool want_memblock)
+ struct vmem_altmap *altmap, bool want_memblock)
{
int ret;
int i;
@@ -258,7 +258,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
if (pfn_valid(phys_start_pfn))
return -EEXIST;
- ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn);
+ ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
if (ret < 0)
return ret;
@@ -292,18 +292,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
* add the new pages.
*/
int __ref __add_pages(int nid, unsigned long phys_start_pfn,
- unsigned long nr_pages, bool want_memblock)
+ unsigned long nr_pages, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long i;
int err = 0;
int start_sec, end_sec;
- struct vmem_altmap *altmap;
/* during initialize mem_map, align hot-added range to section */
start_sec = pfn_to_section_nr(phys_start_pfn);
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
- altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
if (altmap) {
/*
* Validate altmap is within bounds of the total request
@@ -318,7 +317,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
}
for (i = start_sec; i <= end_sec; i++) {
- err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
+ err = __add_section(nid, section_nr_to_pfn(i), altmap,
+ want_memblock);
/*
* EEXIST is finally dealt with by ioresource collision
@@ -334,7 +334,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
out:
return err;
}
-EXPORT_SYMBOL_GPL(__add_pages);
#ifdef CONFIG_MEMORY_HOTREMOVE
/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
@@ -537,7 +536,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
}
static int __remove_section(struct zone *zone, struct mem_section *ms,
- unsigned long map_offset)
+ unsigned long map_offset, struct vmem_altmap *altmap)
{
unsigned long start_pfn;
int scn_nr;
@@ -554,7 +553,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
__remove_zone(zone, start_pfn);
- sparse_remove_one_section(zone, ms, map_offset);
+ sparse_remove_one_section(zone, ms, map_offset, altmap);
return 0;
}
@@ -570,7 +569,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
* calling offline_pages().
*/
int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
- unsigned long nr_pages)
+ unsigned long nr_pages, struct vmem_altmap *altmap)
{
unsigned long i;
unsigned long map_offset = 0;
@@ -578,10 +577,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
/* In the ZONE_DEVICE case device driver owns the memory region */
if (is_dev_zone(zone)) {
- struct page *page = pfn_to_page(phys_start_pfn);
- struct vmem_altmap *altmap;
-
- altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
map_offset = vmem_altmap_offset(altmap);
} else {
@@ -612,7 +607,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
- ret = __remove_section(zone, __pfn_to_section(pfn), map_offset);
+ ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
+ altmap);
map_offset = 0;
if (ret)
break;
@@ -802,8 +798,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
}
-void __ref move_pfn_range_to_zone(struct zone *zone,
- unsigned long start_pfn, unsigned long nr_pages)
+void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages, struct vmem_altmap *altmap)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id;
@@ -828,7 +824,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone,
* expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe
*/
- memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG);
+ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
+ MEMMAP_HOTPLUG, altmap);
set_zone_contiguous(zone);
}
@@ -900,7 +897,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
struct zone *zone;
zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
- move_pfn_range_to_zone(zone, start_pfn, nr_pages);
+ move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
return zone;
}
@@ -1149,7 +1146,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
}
/* call arch's memory hotadd */
- ret = arch_add_memory(nid, start, size, true);
+ ret = arch_add_memory(nid, start, size, NULL, true);
if (ret < 0)
goto error;
@@ -1891,7 +1888,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
memblock_free(start, size);
memblock_remove(start, size);
- arch_remove_memory(start, size);
+ arch_remove_memory(start, size, NULL);
try_offline_node(nid);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 76c9688b6a0a..2bb7f163baca 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5314,9 +5314,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
* done. Non-atomic initialization, single-pass.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn, enum memmap_context context)
+ unsigned long start_pfn, enum memmap_context context,
+ struct vmem_altmap *altmap)
{
- struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
unsigned long end_pfn = start_pfn + size;
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long pfn;
@@ -5417,7 +5417,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \
- memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
+ memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL)
#endif
static int zone_batchsize(struct zone *zone)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 17acf01791fa..bd0276d5f66b 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
}
/* need to make sure size is all the same during early stage */
-static void * __meminit alloc_block_buf(unsigned long size, int node)
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
{
void *ptr;
@@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
}
/**
- * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation
- * @altmap - reserved page pool for the allocation
- * @nr_pfns - size (in pages) of the allocation
+ * altmap_alloc_block_buf - allocate pages from the device page map
+ * @altmap: device page map
+ * @size: size (in bytes) of the allocation
*
- * Allocations are aligned to the size of the request
+ * Allocations are aligned to the size of the request.
*/
-static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
- unsigned long nr_pfns)
-{
- unsigned long pfn = vmem_altmap_next_pfn(altmap);
- unsigned long nr_align;
-
- nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
- nr_align = ALIGN(pfn, nr_align) - pfn;
-
- if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
- return ULONG_MAX;
- altmap->alloc += nr_pfns;
- altmap->align += nr_align;
- return pfn + nr_align;
-}
-
-static void * __meminit altmap_alloc_block_buf(unsigned long size,
+void * __meminit altmap_alloc_block_buf(unsigned long size,
struct vmem_altmap *altmap)
{
- unsigned long pfn, nr_pfns;
- void *ptr;
+ unsigned long pfn, nr_pfns, nr_align;
if (size & ~PAGE_MASK) {
pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
@@ -141,25 +124,20 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
return NULL;
}
+ pfn = vmem_altmap_next_pfn(altmap);
nr_pfns = size >> PAGE_SHIFT;
- pfn = vmem_altmap_alloc(altmap, nr_pfns);
- if (pfn < ULONG_MAX)
- ptr = __va(__pfn_to_phys(pfn));
- else
- ptr = NULL;
- pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
- __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
+ nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
+ nr_align = ALIGN(pfn, nr_align) - pfn;
+ if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
+ return NULL;
- return ptr;
-}
+ altmap->alloc += nr_pfns;
+ altmap->align += nr_align;
+ pfn += nr_align;
-/* need to make sure size is all the same during early stage */
-void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
- struct vmem_altmap *altmap)
-{
- if (altmap)
- return altmap_alloc_block_buf(size, altmap);
- return alloc_block_buf(size, node);
+ pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
+ __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
+ return __va(__pfn_to_phys(pfn));
}
void __meminit vmemmap_verify(pte_t *pte, int node,
@@ -178,7 +156,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
pte_t *pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) {
pte_t entry;
- void *p = alloc_block_buf(PAGE_SIZE, node);
+ void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
if (!p)
return NULL;
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -278,7 +256,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
return 0;
}
-struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
+struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
+ struct vmem_altmap *altmap)
{
unsigned long start;
unsigned long end;
@@ -288,7 +267,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
start = (unsigned long)map;
end = (unsigned long)(map + PAGES_PER_SECTION);
- if (vmemmap_populate(start, end, nid))
+ if (vmemmap_populate(start, end, nid, altmap))
return NULL;
return map;
@@ -318,7 +297,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
if (!present_section_nr(pnum))
continue;
- map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
if (map_map[pnum])
continue;
ms = __nr_to_section(pnum);
diff --git a/mm/sparse.c b/mm/sparse.c
index 2609aba121e8..2583174b1d62 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -417,7 +417,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
}
#ifndef CONFIG_SPARSEMEM_VMEMMAP
-struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
+struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
+ struct vmem_altmap *altmap)
{
struct page *map;
unsigned long size;
@@ -472,7 +473,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
if (!present_section_nr(pnum))
continue;
- map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+ map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
if (map_map[pnum])
continue;
ms = __nr_to_section(pnum);
@@ -500,7 +501,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
struct mem_section *ms = __nr_to_section(pnum);
int nid = sparse_early_nid(ms);
- map = sparse_mem_map_populate(pnum, nid);
+ map = sparse_mem_map_populate(pnum, nid, NULL);
if (map)
return map;
@@ -678,17 +679,19 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+ struct vmem_altmap *altmap)
{
/* This will make the necessary allocations eventually. */
- return sparse_mem_map_populate(pnum, nid);
+ return sparse_mem_map_populate(pnum, nid, altmap);
}
-static void __kfree_section_memmap(struct page *memmap)
+static void __kfree_section_memmap(struct page *memmap,
+ struct vmem_altmap *altmap)
{
unsigned long start = (unsigned long)memmap;
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
- vmemmap_free(start, end);
+ vmemmap_free(start, end, altmap);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
static void free_map_bootmem(struct page *memmap)
@@ -696,7 +699,7 @@ static void free_map_bootmem(struct page *memmap)
unsigned long start = (unsigned long)memmap;
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
- vmemmap_free(start, end);
+ vmemmap_free(start, end, NULL);
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
#else
@@ -721,12 +724,14 @@ got_map_ptr:
return ret;
}
-static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid)
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+ struct vmem_altmap *altmap)
{
return __kmalloc_section_memmap();
}
-static void __kfree_section_memmap(struct page *memmap)
+static void __kfree_section_memmap(struct page *memmap,
+ struct vmem_altmap *altmap)
{
if (is_vmalloc_addr(memmap))
vfree(memmap);
@@ -773,7 +778,8 @@ static void free_map_bootmem(struct page *memmap)
* set. If this is <=0, then that means that the passed-in
* map was not consumed and must be freed.
*/
-int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn)
+int __meminit sparse_add_one_section(struct pglist_data *pgdat,
+ unsigned long start_pfn, struct vmem_altmap *altmap)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms;
@@ -789,12 +795,12 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st
ret = sparse_index_init(section_nr, pgdat->node_id);
if (ret < 0 && ret != -EEXIST)
return ret;
- memmap = kmalloc_section_memmap(section_nr, pgdat->node_id);
+ memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
if (!memmap)
return -ENOMEM;
usemap = __kmalloc_section_usemap();
if (!usemap) {
- __kfree_section_memmap(memmap);
+ __kfree_section_memmap(memmap, altmap);
return -ENOMEM;
}
@@ -816,7 +822,7 @@ out:
pgdat_resize_unlock(pgdat, &flags);
if (ret <= 0) {
kfree(usemap);
- __kfree_section_memmap(memmap);
+ __kfree_section_memmap(memmap, altmap);
}
return ret;
}
@@ -843,7 +849,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
}
#endif
-static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+static void free_section_usemap(struct page *memmap, unsigned long *usemap,
+ struct vmem_altmap *altmap)
{
struct page *usemap_page;
@@ -857,7 +864,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
kfree(usemap);
if (memmap)
- __kfree_section_memmap(memmap);
+ __kfree_section_memmap(memmap, altmap);
return;
}
@@ -871,7 +878,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
}
void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
- unsigned long map_offset)
+ unsigned long map_offset, struct vmem_altmap *altmap)
{
struct page *memmap = NULL;
unsigned long *usemap = NULL, flags;
@@ -889,7 +896,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
clear_hwpoisoned_pages(memmap + map_offset,
PAGES_PER_SECTION - map_offset);
- free_section_usemap(memmap, usemap);
+ free_section_usemap(memmap, usemap, altmap);
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index db33b28c5ef3..0392153a0009 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
nfit-y := $(ACPI_SRC)/core.o
nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
+nfit-y += acpi_nfit_test.o
nfit-y += config_check.o
nd_pmem-y := $(NVDIMM_SRC)/pmem.o
nd_pmem-y += pmem-dax.o
+nd_pmem-y += pmem_test.o
nd_pmem-y += config_check.o
nd_btt-y := $(NVDIMM_SRC)/btt.o
@@ -57,6 +59,7 @@ dax-y += config_check.o
device_dax-y := $(DAX_SRC)/device.o
device_dax-y += dax-dev.o
+device_dax-y += device_dax_test.o
device_dax-y += config_check.o
dax_pmem-y := $(DAX_SRC)/pmem.o
@@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-y += libnvdimm_test.o
libnvdimm-y += config_check.o
obj-m += test/
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644
index 000000000000..43521512e577
--- /dev/null
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644
index 000000000000..24b17bf42429
--- /dev/null
+++ b/tools/testing/nvdimm/device_dax_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644
index 000000000000..00ca30b23932
--- /dev/null
+++ b/tools/testing/nvdimm/libnvdimm_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644
index 000000000000..fd38f92275cf
--- /dev/null
+++ b/tools/testing/nvdimm/pmem_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(pmem);
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e1f75a1914a1..ff9d3a5825e1 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
}
EXPORT_SYMBOL(__wrap_devm_memremap);
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
- struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
- resource_size_t offset = res->start;
+ resource_size_t offset = pgmap->res.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res.start;
- return devm_memremap_pages(dev, res, ref, altmap);
+ return devm_memremap_pages(dev, pgmap);
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index de1373a7ed4f..620fa78b3b1b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -27,6 +27,7 @@
#include <nfit.h>
#include <nd.h>
#include "nfit_test.h"
+#include "../watermark.h"
/*
* Generate an NFIT table to describe the following topology:
@@ -137,6 +138,14 @@ static u32 handle[] = {
static unsigned long dimm_fail_cmd_flags[NUM_DCR];
+struct nfit_test_fw {
+ enum intel_fw_update_state state;
+ u32 context;
+ u64 version;
+ u32 size_received;
+ u64 end_time;
+};
+
struct nfit_test {
struct acpi_nfit_desc acpi_desc;
struct platform_device pdev;
@@ -168,8 +177,11 @@ struct nfit_test {
spinlock_t lock;
} ars_state;
struct device *dimm_dev[NUM_DCR];
+ struct nd_intel_smart *smart;
+ struct nd_intel_smart_threshold *smart_threshold;
struct badrange badrange;
struct work_struct work;
+ struct nfit_test_fw *fw;
};
static struct workqueue_struct *nfit_wq;
@@ -181,6 +193,226 @@ static struct nfit_test *to_nfit_test(struct device *dev)
return container_of(pdev, struct nfit_test, pdev);
}
+static int nd_intel_test_get_fw_info(struct nfit_test *t,
+ struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
+ nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
+ nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
+ nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
+ nd_cmd->update_cap = 0;
+ nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
+ nd_cmd->run_version = 0;
+ nd_cmd->updated_version = fw->version;
+
+ return 0;
+}
+
+static int nd_intel_test_start_update(struct nfit_test *t,
+ struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ if (fw->state != FW_STATE_NEW) {
+ /* extended status, FW update in progress */
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+
+ fw->state = FW_STATE_IN_PROGRESS;
+ fw->context++;
+ fw->size_received = 0;
+ nd_cmd->status = 0;
+ nd_cmd->context = fw->context;
+
+ dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
+
+ return 0;
+}
+
+static int nd_intel_test_send_data(struct nfit_test *t,
+ struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+ u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+
+ dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
+ dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
+ dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
+ nd_cmd->data[nd_cmd->length-1]);
+
+ if (fw->state != FW_STATE_IN_PROGRESS) {
+ dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
+ *status = 0x5;
+ return 0;
+ }
+
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context, fw->context);
+ *status = 0x10007;
+ return 0;
+ }
+
+ /*
+ * check offset + len > size of fw storage
+ * check length is > max send length
+ */
+ if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
+ nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
+ *status = 0x3;
+ dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
+ return 0;
+ }
+
+ fw->size_received += nd_cmd->length;
+ dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
+ __func__, nd_cmd->length, fw->size_received);
+ *status = 0;
+ return 0;
+}
+
+static int nd_intel_test_finish_fw(struct nfit_test *t,
+ struct nd_intel_fw_finish_update *nd_cmd,
+ unsigned int buf_len, int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (fw->state == FW_STATE_UPDATED) {
+ /* update already done, need cold boot */
+ nd_cmd->status = 0x20007;
+ return 0;
+ }
+
+ dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n",
+ __func__, nd_cmd->context, nd_cmd->ctrl_flags);
+
+ switch (nd_cmd->ctrl_flags) {
+ case 0: /* finish */
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context,
+ fw->context);
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+ nd_cmd->status = 0;
+ fw->state = FW_STATE_VERIFY;
+ /* set 1 second of time for firmware "update" */
+ fw->end_time = jiffies + HZ;
+ break;
+
+ case 1: /* abort */
+ fw->size_received = 0;
+ /* successfully aborted status */
+ nd_cmd->status = 0x40007;
+ fw->state = FW_STATE_NEW;
+ dev_dbg(dev, "%s: abort successful\n", __func__);
+ break;
+
+ default: /* bad control flag */
+ dev_warn(dev, "%s: unknown control flag: %#x\n",
+ __func__, nd_cmd->ctrl_flags);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_finish_query(struct nfit_test *t,
+ struct nd_intel_fw_finish_query *nd_cmd,
+ unsigned int buf_len, int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context, fw->context);
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+
+ dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
+
+ switch (fw->state) {
+ case FW_STATE_NEW:
+ nd_cmd->updated_fw_rev = 0;
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: new state\n", __func__);
+ break;
+
+ case FW_STATE_IN_PROGRESS:
+ /* sequencing error */
+ nd_cmd->status = 0x40007;
+ nd_cmd->updated_fw_rev = 0;
+ dev_dbg(dev, "%s: sequence error\n", __func__);
+ break;
+
+ case FW_STATE_VERIFY:
+ if (time_is_after_jiffies64(fw->end_time)) {
+ nd_cmd->updated_fw_rev = 0;
+ nd_cmd->status = 0x20007;
+ dev_dbg(dev, "%s: still verifying\n", __func__);
+ break;
+ }
+
+ dev_dbg(dev, "%s: transition out verify\n", __func__);
+ fw->state = FW_STATE_UPDATED;
+ /* we are going to fall through if it's "done" */
+ case FW_STATE_UPDATED:
+ nd_cmd->status = 0;
+ /* bogus test version */
+ fw->version = nd_cmd->updated_fw_rev =
+ INTEL_FW_FAKE_VERSION;
+ dev_dbg(dev, "%s: updated\n", __func__);
+ break;
+
+ default: /* we should never get here */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
unsigned int buf_len)
{
@@ -440,39 +672,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
return 0;
}
-static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len)
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+ struct nd_intel_smart *smart_data)
{
- static const struct nd_smart_payload smart_data = {
- .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
- | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
- | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
- .health = ND_SMART_NON_CRITICAL_HEALTH,
- .temperature = 23 * 16,
- .spares = 75,
- .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
- .life_used = 5,
- .shutdown_state = 0,
- .vendor_size = 0,
- };
-
if (buf_len < sizeof(*smart))
return -EINVAL;
- memcpy(smart->data, &smart_data, sizeof(smart_data));
+ memcpy(smart, smart_data, sizeof(*smart));
return 0;
}
-static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t,
- unsigned int buf_len)
+static int nfit_test_cmd_smart_threshold(
+ struct nd_intel_smart_threshold *out,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *smart_t)
{
- static const struct nd_smart_threshold_payload smart_t_data = {
- .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
- .temperature = 40 * 16,
- .spares = 5,
- };
-
if (buf_len < sizeof(*smart_t))
return -EINVAL;
- memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data));
+ memcpy(out, smart_t, sizeof(*smart_t));
+ return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+ struct device *dimm_dev, struct nd_intel_smart *smart,
+ struct nd_intel_smart_threshold *thresh)
+{
+ dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+ __func__, thresh->alarm_control, thresh->spares,
+ smart->spares, thresh->media_temperature,
+ smart->media_temperature, thresh->ctrl_temperature,
+ smart->ctrl_temperature);
+ if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+ && smart->spares
+ <= thresh->spares)
+ || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+ && smart->media_temperature
+ >= thresh->media_temperature)
+ || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+ && smart->ctrl_temperature
+ >= thresh->ctrl_temperature)) {
+ device_lock(bus_dev);
+ __acpi_nvdimm_notify(dimm_dev, 0x81);
+ device_unlock(bus_dev);
+ }
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+ struct nd_intel_smart_set_threshold *in,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *thresh,
+ struct nd_intel_smart *smart,
+ struct device *bus_dev, struct device *dimm_dev)
+{
+ unsigned int size;
+
+ size = sizeof(*in) - 4;
+ if (buf_len < size)
+ return -EINVAL;
+ memcpy(thresh->data, in, size);
+ in->status = 0;
+ smart_notify(bus_dev, dimm_dev, smart, thresh);
+
return 0;
}
@@ -563,6 +822,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
return 0;
}
+static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
+ struct nd_intel_lss *nd_cmd, unsigned int buf_len)
+{
+ struct device *dev = &t->pdev.dev;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ switch (nd_cmd->enable) {
+ case 0:
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
+ __func__);
+ break;
+ case 1:
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
+ __func__);
+ break;
+ default:
+ dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
+ nd_cmd->status = 0x3;
+ break;
+ }
+
+
+ return 0;
+}
+
+static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
+{
+ int i;
+
+ /* lookup per-dimm data */
+ for (i = 0; i < ARRAY_SIZE(handle); i++)
+ if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+ break;
+ if (i >= ARRAY_SIZE(handle))
+ return -ENXIO;
+
+ if ((1 << func) & dimm_fail_cmd_flags[i])
+ return -EIO;
+
+ return i;
+}
+
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -591,22 +896,57 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
func = call_pkg->nd_command;
if (call_pkg->nd_family != nfit_mem->family)
return -ENOTTY;
+
+ i = get_dimm(nfit_mem, func);
+ if (i < 0)
+ return i;
+
+ switch (func) {
+ case ND_INTEL_ENABLE_LSS_STATUS:
+ return nd_intel_test_cmd_set_lss_status(t,
+ buf, buf_len);
+ case ND_INTEL_FW_GET_INFO:
+ return nd_intel_test_get_fw_info(t, buf,
+ buf_len, i - t->dcr_idx);
+ case ND_INTEL_FW_START_UPDATE:
+ return nd_intel_test_start_update(t, buf,
+ buf_len, i - t->dcr_idx);
+ case ND_INTEL_FW_SEND_DATA:
+ return nd_intel_test_send_data(t, buf,
+ buf_len, i - t->dcr_idx);
+ case ND_INTEL_FW_FINISH_UPDATE:
+ return nd_intel_test_finish_fw(t, buf,
+ buf_len, i - t->dcr_idx);
+ case ND_INTEL_FW_FINISH_QUERY:
+ return nd_intel_test_finish_query(t, buf,
+ buf_len, i - t->dcr_idx);
+ case ND_INTEL_SMART:
+ return nfit_test_cmd_smart(buf, buf_len,
+ &t->smart[i - t->dcr_idx]);
+ case ND_INTEL_SMART_THRESHOLD:
+ return nfit_test_cmd_smart_threshold(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx]);
+ case ND_INTEL_SMART_SET_THRESHOLD:
+ return nfit_test_cmd_smart_set_threshold(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx],
+ &t->smart[i - t->dcr_idx],
+ &t->pdev.dev, t->dimm_dev[i]);
+ default:
+ return -ENOTTY;
+ }
}
if (!test_bit(cmd, &cmd_mask)
|| !test_bit(func, &nfit_mem->dsm_mask))
return -ENOTTY;
- /* lookup label space for the given dimm */
- for (i = 0; i < ARRAY_SIZE(handle); i++)
- if (__to_nfit_memdev(nfit_mem)->device_handle ==
- handle[i])
- break;
- if (i >= ARRAY_SIZE(handle))
- return -ENXIO;
-
- if ((1 << func) & dimm_fail_cmd_flags[i])
- return -EIO;
+ i = get_dimm(nfit_mem, func);
+ if (i < 0)
+ return i;
switch (func) {
case ND_CMD_GET_CONFIG_SIZE:
@@ -620,15 +960,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nfit_test_cmd_set_config_data(buf, buf_len,
t->label[i - t->dcr_idx]);
break;
- case ND_CMD_SMART:
- rc = nfit_test_cmd_smart(buf, buf_len);
- break;
- case ND_CMD_SMART_THRESHOLD:
- rc = nfit_test_cmd_smart_threshold(buf, buf_len);
- device_lock(&t->pdev.dev);
- __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
- device_unlock(&t->pdev.dev);
- break;
default:
return -ENOTTY;
}
@@ -872,6 +1203,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
NULL,
};
+static void smart_init(struct nfit_test *t)
+{
+ int i;
+ const struct nd_intel_smart_threshold smart_t_data = {
+ .alarm_control = ND_INTEL_SMART_SPARE_TRIP
+ | ND_INTEL_SMART_TEMP_TRIP,
+ .media_temperature = 40 * 16,
+ .ctrl_temperature = 30 * 16,
+ .spares = 5,
+ };
+ const struct nd_intel_smart smart_data = {
+ .flags = ND_INTEL_SMART_HEALTH_VALID
+ | ND_INTEL_SMART_SPARES_VALID
+ | ND_INTEL_SMART_ALARM_VALID
+ | ND_INTEL_SMART_USED_VALID
+ | ND_INTEL_SMART_SHUTDOWN_VALID
+ | ND_INTEL_SMART_MTEMP_VALID,
+ .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+ .media_temperature = 23 * 16,
+ .ctrl_temperature = 30 * 16,
+ .pmic_temperature = 40 * 16,
+ .spares = 75,
+ .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+ | ND_INTEL_SMART_TEMP_TRIP,
+ .ait_status = 1,
+ .life_used = 5,
+ .shutdown_state = 0,
+ .vendor_size = 0,
+ .shutdown_count = 100,
+ };
+
+ for (i = 0; i < t->num_dcr; i++) {
+ memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
+ memcpy(&t->smart_threshold[i], &smart_t_data,
+ sizeof(smart_t_data));
+ }
+}
+
static int nfit_test0_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -940,6 +1309,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
}
+ smart_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -970,6 +1340,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
if (!t->spa_set[1])
return -ENOMEM;
+ smart_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -1652,17 +2023,24 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
- set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
- set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
}
static void nfit_test1_setup(struct nfit_test *t)
@@ -1760,6 +2138,7 @@ static void nfit_test1_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
}
static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -2064,10 +2443,18 @@ static int nfit_test_probe(struct platform_device *pdev)
sizeof(struct nfit_test_dcr *), GFP_KERNEL);
nfit_test->dcr_dma = devm_kcalloc(dev, num,
sizeof(dma_addr_t), GFP_KERNEL);
+ nfit_test->smart = devm_kcalloc(dev, num,
+ sizeof(struct nd_intel_smart), GFP_KERNEL);
+ nfit_test->smart_threshold = devm_kcalloc(dev, num,
+ sizeof(struct nd_intel_smart_threshold),
+ GFP_KERNEL);
+ nfit_test->fw = devm_kcalloc(dev, num,
+ sizeof(struct nfit_test_fw), GFP_KERNEL);
if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
&& nfit_test->label_dma && nfit_test->dcr
&& nfit_test->dcr_dma && nfit_test->flush
- && nfit_test->flush_dma)
+ && nfit_test->flush_dma
+ && nfit_test->fw)
/* pass */;
else
return -ENOMEM;
@@ -2169,6 +2556,11 @@ static __init int nfit_test_init(void)
{
int rc, i;
+ pmem_test();
+ libnvdimm_test();
+ acpi_nfit_test();
+ device_dax_test();
+
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
nfit_wq = create_singlethread_workqueue("nfit");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 113b44675a71..428344519cdf 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -84,6 +84,140 @@ struct nd_cmd_ars_err_inj_stat {
} __packed record[0];
} __packed;
+#define ND_INTEL_SMART 1
+#define ND_INTEL_SMART_THRESHOLD 2
+#define ND_INTEL_ENABLE_LSS_STATUS 10
+#define ND_INTEL_FW_GET_INFO 12
+#define ND_INTEL_FW_START_UPDATE 13
+#define ND_INTEL_FW_SEND_DATA 14
+#define ND_INTEL_FW_FINISH_UPDATE 15
+#define ND_INTEL_FW_FINISH_QUERY 16
+#define ND_INTEL_SMART_SET_THRESHOLD 17
+
+#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
+#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
+#define ND_INTEL_SMART_USED_VALID (1 << 2)
+#define ND_INTEL_SMART_MTEMP_VALID (1 << 3)
+#define ND_INTEL_SMART_CTEMP_VALID (1 << 4)
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5)
+#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6)
+#define ND_INTEL_SMART_PTEMP_VALID (1 << 7)
+#define ND_INTEL_SMART_ALARM_VALID (1 << 9)
+#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10)
+#define ND_INTEL_SMART_VENDOR_VALID (1 << 11)
+#define ND_INTEL_SMART_SPARE_TRIP (1 << 0)
+#define ND_INTEL_SMART_TEMP_TRIP (1 << 1)
+#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2)
+#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
+#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
+#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
+
+struct nd_intel_smart {
+ __u32 status;
+ union {
+ struct {
+ __u32 flags;
+ __u8 reserved0[4];
+ __u8 health;
+ __u8 spares;
+ __u8 life_used;
+ __u8 alarm_flags;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 shutdown_count;
+ __u8 ait_status;
+ __u16 pmic_temperature;
+ __u8 reserved1[8];
+ __u8 shutdown_state;
+ __u32 vendor_size;
+ __u8 vendor_data[92];
+ } __packed;
+ __u8 data[128];
+ };
+} __packed;
+
+struct nd_intel_smart_threshold {
+ __u32 status;
+ union {
+ struct {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u8 reserved[1];
+ } __packed;
+ __u8 data[8];
+ };
+} __packed;
+
+struct nd_intel_smart_set_threshold {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 status;
+} __packed;
+
+#define INTEL_FW_STORAGE_SIZE 0x100000
+#define INTEL_FW_MAX_SEND_LEN 0xFFEC
+#define INTEL_FW_QUERY_INTERVAL 250000
+#define INTEL_FW_QUERY_MAX_TIME 3000000
+#define INTEL_FW_FIS_VERSION 0x0105
+#define INTEL_FW_FAKE_VERSION 0xffffffffabcd
+
+enum intel_fw_update_state {
+ FW_STATE_NEW = 0,
+ FW_STATE_IN_PROGRESS,
+ FW_STATE_VERIFY,
+ FW_STATE_UPDATED,
+};
+
+struct nd_intel_fw_info {
+ __u32 status;
+ __u32 storage_size;
+ __u32 max_send_len;
+ __u32 query_interval;
+ __u32 max_query_time;
+ __u8 update_cap;
+ __u8 reserved[3];
+ __u32 fis_version;
+ __u64 run_version;
+ __u64 updated_version;
+} __packed;
+
+struct nd_intel_fw_start {
+ __u32 status;
+ __u32 context;
+} __packed;
+
+/* this one has the output first because the variable input data size */
+struct nd_intel_fw_send_data {
+ __u32 context;
+ __u32 offset;
+ __u32 length;
+ __u8 data[0];
+/* this field is not declared due ot variable data from input */
+/* __u32 status; */
+} __packed;
+
+struct nd_intel_fw_finish_update {
+ __u8 ctrl_flags;
+ __u8 reserved[3];
+ __u32 context;
+ __u32 status;
+} __packed;
+
+struct nd_intel_fw_finish_query {
+ __u32 context;
+ __u32 status;
+ __u64 updated_fw_rev;
+} __packed;
+
+struct nd_intel_lss {
+ __u8 enable;
+ __u32 status;
+} __packed;
+
union acpi_object;
typedef void *acpi_handle;
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644
index 000000000000..ed0528757bd4
--- /dev/null
+++ b/tools/testing/nvdimm/watermark.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+#ifndef _TEST_NVDIMM_WATERMARK_H_
+#define _TEST_NVDIMM_WATERMARK_H_
+int pmem_test(void);
+int libnvdimm_test(void);
+int acpi_nfit_test(void);
+int device_dax_test(void);
+
+/*
+ * dummy routine for nfit_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define nfit_test_watermark(x) \
+int x##_test(void) \
+{ \
+ pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \
+ return 0; \
+} \
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_NVDIMM_WATERMARK_H_ */