summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/mmu_context.h5
-rw-r--r--arch/x86/include/asm/mmzone.h6
-rw-r--r--arch/x86/include/asm/mmzone_32.h17
-rw-r--r--arch/x86/include/asm/mmzone_64.h18
-rw-r--r--arch/x86/include/asm/numa.h26
-rw-r--r--arch/x86/include/asm/pgtable.h150
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/kernel/sys_x86_64.c21
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology.c1
-rw-r--r--arch/x86/mm/numa.c622
-rw-r--r--arch/x86/mm/numa_emulation.c585
-rw-r--r--arch/x86/mm/numa_internal.h24
-rw-r--r--arch/x86/mm/pat/memtype.c25
-rw-r--r--arch/x86/mm/pgtable.c18
-rw-r--r--arch/x86/um/vdso/vma.c12
-rw-r--r--arch/x86/xen/mmu_pv.c7
22 files changed, 204 insertions, 1364 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 273b17a5cb81..2852fcd82cbd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -28,6 +28,7 @@ config X86_64
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_PER_VMA_LOCK
+ select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
@@ -299,6 +300,7 @@ config X86
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
+ select NUMA_MEMBLKS if NUMA
select PCI_DOMAINS if PCI
select PCI_LOCKLESS_CONFIG if PCI
select PERF_EVENTS
@@ -1601,14 +1603,6 @@ config X86_64_ACPI_NUMA
help
Enable ACPI SRAT based node topology detection.
-config NUMA_EMU
- bool "NUMA emulation"
- depends on NUMA
- help
- Enable NUMA emulation. A flat machine will be split
- into virtual nodes when booted with "numa=fake=N", where N is the
- number of nodes. This is only useful for debugging.
-
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
range 1 10
@@ -1808,6 +1802,7 @@ config X86_PAT
def_bool y
prompt "x86 PAT support" if EXPERT
depends on MTRR
+ select ARCH_USES_PG_ARCH_2
help
Use PAT attributes to setup page level cache control.
@@ -1819,10 +1814,6 @@ config X86_PAT
If unsure, say Y.
-config ARCH_USES_PG_UNCACHED
- def_bool y
- depends on X86_PAT
-
config X86_UMIP
def_bool y
prompt "User Mode Instruction Prevention" if EXPERT
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 944454306ef4..04a35b2c26e9 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -511,7 +511,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
if (init_unaccepted_memory()) {
debug_putstr("Accepting memory... ");
- accept_memory(__pa(output), __pa(output) + needed_size);
+ accept_memory(__pa(output), needed_size);
}
entry_offset = decompress_kernel(output, virt_addr, error);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b353a7be380c..dd8d1a85f671 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -256,6 +256,6 @@ static inline bool init_unaccepted_memory(void) { return false; }
/* Defined in EFI stub */
extern struct efi_unaccepted_memory *unaccepted_table;
-void accept_memory(phys_addr_t start, phys_addr_t end);
+void accept_memory(phys_addr_t start, unsigned long size);
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a192bdea69e2..6c23d1661b17 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,3 +11,4 @@ generated-y += xen-hypercalls.h
generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 19091ebb8633..2886cb668d7f 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -238,11 +238,6 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
deleted file mode 100644
index c41b41edd691..000000000000
--- a/arch/x86/include/asm/mmzone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/mmzone_32.h>
-#else
-# include <asm/mmzone_64.h>
-#endif
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
deleted file mode 100644
index 2d4515e8b7df..000000000000
--- a/arch/x86/include/asm/mmzone_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
- *
- */
-
-#ifndef _ASM_X86_MMZONE_32_H
-#define _ASM_X86_MMZONE_32_H
-
-#include <asm/smp.h>
-
-#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-#endif /* CONFIG_NUMA */
-
-#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
deleted file mode 100644
index 0c585046f744..000000000000
--- a/arch/x86/include/asm/mmzone_64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* K8 NUMA support */
-/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
-/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_MMZONE_64_H
-#define _ASM_X86_MMZONE_64_H
-
-#ifdef CONFIG_NUMA
-
-#include <linux/mmdebug.h>
-#include <asm/smp.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif
-#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index ef2844d69173..5469d7a7c40f 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -10,8 +10,6 @@
#ifdef CONFIG_NUMA
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
extern int numa_off;
/*
@@ -25,9 +23,6 @@ extern int numa_off;
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
@@ -54,31 +49,20 @@ static inline int numa_cpu_node(int cpu)
extern void numa_set_node(int cpu, int node);
extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
-extern void numa_add_cpu(int cpu);
-extern void numa_remove_cpu(int cpu);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
-static inline void numa_add_cpu(int cpu) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-void debug_cpumask_set_cpu(int cpu, int node, bool enable);
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
#endif
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
-int numa_emu_cmdline(char *str);
-#else /* CONFIG_NUMA_EMU */
-static inline int numa_emu_cmdline(char *str)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_NUMA_EMU */
-
#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e39311a89bf4..4c2d080d26b4 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -120,6 +120,34 @@ extern pmdval_t early_pmd_flags;
#define arch_end_context_switch(prev) do {} while(0)
#endif /* CONFIG_PARAVIRT_XXL */
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v & ~clear);
+}
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -174,6 +202,13 @@ static inline int pud_young(pud_t pud)
return pud_flags(pud) & _PAGE_ACCESSED;
}
+static inline bool pud_shstk(pud_t pud)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
+}
+
static inline int pte_write(pte_t pte)
{
/*
@@ -310,6 +345,30 @@ static inline int pud_devmap(pud_t pud)
}
#endif
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SPECIAL;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+ return pmd_set_flags(pmd, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SPECIAL;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
static inline int pgd_devmap(pgd_t pgd)
{
return 0;
@@ -480,20 +539,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
}
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v | set);
-}
-
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
@@ -588,20 +633,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#define pmd_mkwrite pmd_mkwrite
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v | set);
-}
-
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pud_t pud_mksaveddirty(pud_t pud)
{
@@ -780,6 +811,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -827,14 +864,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmd_result = __pmd(val);
/*
- * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
- * 1. Marking Write=0 PMDs Dirty=1
- * 2. Marking Dirty=1 PMDs Write=0
- *
- * The first case cannot happen because the _PAGE_CHG_MASK will filter
- * out any Dirty bit passed in newprot. Handle the second case by
- * going through the mksaveddirty exercise. Only do this if the old
- * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ * Avoid creating shadow stack PMD by accident. See comment in
+ * pte_modify().
*/
if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result);
@@ -844,6 +875,29 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pmd_result;
}
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ pudval_t val = pud_val(pud), oldval = val;
+ pud_t pud_result;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+ pud_result = __pud(val);
+
+ /*
+ * Avoid creating shadow stack PUD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pud_result = pud_mksaveddirty(pud_result);
+ else
+ pud_result = pud_clear_saveddirty(pud_result);
+
+ return pud_result;
+}
+
/*
* mprotect needs to preserve PAT and encryption bits when updating
* vm_page_prot
@@ -1078,8 +1132,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#define pud_leaf pud_leaf
static inline bool pud_leaf(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return pud_val(pud) & _PAGE_PSE;
}
static inline int pud_bad(pud_t pud)
@@ -1383,10 +1436,28 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+#endif
+
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1668,6 +1739,9 @@ void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
#define arch_check_zapped_pmd arch_check_zapped_pmd
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+#define arch_check_zapped_pud arch_check_zapped_pud
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
+
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 3c4407271d08..7e9db77231ac 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -245,7 +245,6 @@ extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#define HAVE_ARCH_UNMAPPED_AREA_VMFLAGS
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 64df897c0ee3..3918c7a434f5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -31,13 +31,4 @@
#endif /* CONFIG_SPARSEMEM */
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_NUMA_KEEP_MEMINFO
-extern int phys_to_target_node(phys_addr_t start);
-#define phys_to_target_node phys_to_target_node
-extern int memory_add_physaddr_to_nid(u64 start);
-#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-#endif
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 01d7cd85ef97..87f8c9a71c49 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -121,7 +121,7 @@ static inline unsigned long stack_guard_placement(vm_flags_t vm_flags)
}
unsigned long
-arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len,
+arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
@@ -158,7 +158,7 @@ arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned l
}
unsigned long
-arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr0,
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0,
unsigned long len, unsigned long pgoff,
unsigned long flags, vm_flags_t vm_flags)
{
@@ -228,20 +228,5 @@ bottomup:
* can happen with large stack limits and large mmap()
* allocations.
*/
- return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
-}
-
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
-{
- return arch_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, 0);
-}
-
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- return arch_get_unmapped_area_topdown_vmflags(filp, addr, len, pgoff, flags, 0);
+ return arch_get_unmapped_area(filp, addr0, len, pgoff, flags, 0);
}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8d3a00e5c528..690fbf48e853 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
-obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 9332b36a1091..628833afee37 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
+#include <linux/numa_memblks.h>
#include <asm/io.h>
#include <linux/pci_ids.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 6ce10e3c6228..64e5cdb2460a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/topology.h>
#include <linux/sort.h>
+#include <linux/numa_memblks.h>
#include <asm/e820/api.h>
#include <asm/proto.h>
@@ -22,16 +23,6 @@
#include "numa_internal.h"
int numa_off;
-nodemask_t numa_nodes_parsed __initdata;
-
-struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
-EXPORT_SYMBOL(node_data);
-
-static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
-static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
-
-static int numa_distance_cnt;
-static u8 *numa_distance;
static __init int numa_setup(char *opt)
{
@@ -124,456 +115,27 @@ void __init setup_node_to_cpumask_map(void)
pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
}
-static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
- struct numa_meminfo *mi)
-{
- /* ignore zero length blks */
- if (start == end)
- return 0;
-
- /* whine about and ignore invalid blks */
- if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
- return 0;
- }
-
- if (mi->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("too many memblk ranges\n");
- return -EINVAL;
- }
-
- mi->blk[mi->nr_blks].start = start;
- mi->blk[mi->nr_blks].end = end;
- mi->blk[mi->nr_blks].nid = nid;
- mi->nr_blks++;
- return 0;
-}
-
-/**
- * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
- * @idx: Index of memblk to remove
- * @mi: numa_meminfo to remove memblk from
- *
- * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
- * decrementing @mi->nr_blks.
- */
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
-{
- mi->nr_blks--;
- memmove(&mi->blk[idx], &mi->blk[idx + 1],
- (mi->nr_blks - idx) * sizeof(mi->blk[0]));
-}
-
-/**
- * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another
- * @dst: numa_meminfo to append block to
- * @idx: Index of memblk to remove
- * @src: numa_meminfo to remove memblk from
- */
-static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
- struct numa_meminfo *src)
-{
- dst->blk[dst->nr_blks++] = src->blk[idx];
- numa_remove_memblk_from(idx, src);
-}
-
-/**
- * numa_add_memblk - Add one numa_memblk to numa_meminfo
- * @nid: NUMA node ID of the new memblk
- * @start: Start address of the new memblk
- * @end: End address of the new memblk
- *
- * Add a new memblk to the default numa_meminfo.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_add_memblk(int nid, u64 start, u64 end)
-{
- return numa_add_memblk_to(nid, start, end, &numa_meminfo);
-}
-
-/* Allocate NODE_DATA for a node on the local memory */
-static void __init alloc_node_data(int nid)
+static int __init numa_register_nodes(void)
{
- const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- u64 nd_pa;
- void *nd;
- int tnid;
-
- /*
- * Allocate node data. Try node-local memory and then any node.
- * Never allocate in DMA zone.
- */
- nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
- if (!nd_pa) {
- pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
- nd_size, nid);
- return;
- }
- nd = __va(nd_pa);
-
- /* report and initialize */
- printk(KERN_INFO "NODE_DATA(%d) allocated [mem %#010Lx-%#010Lx]\n", nid,
- nd_pa, nd_pa + nd_size - 1);
- tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (tnid != nid)
- printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
-
- node_data[nid] = nd;
- memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
-
- node_set_online(nid);
-}
-
-/**
- * numa_cleanup_meminfo - Cleanup a numa_meminfo
- * @mi: numa_meminfo to clean up
- *
- * Sanitize @mi by merging and removing unnecessary memblks. Also check for
- * conflicts and clear unused memblks.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
-{
- const u64 low = 0;
- const u64 high = PFN_PHYS(max_pfn);
- int i, j, k;
-
- /* first, trim all entries */
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *bi = &mi->blk[i];
-
- /* move / save reserved memory ranges */
- if (!memblock_overlaps_region(&memblock.memory,
- bi->start, bi->end - bi->start)) {
- numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
- continue;
- }
-
- /* make sure all non-reserved blocks are inside the limits */
- bi->start = max(bi->start, low);
-
- /* preserve info for non-RAM areas above 'max_pfn': */
- if (bi->end > high) {
- numa_add_memblk_to(bi->nid, high, bi->end,
- &numa_reserved_meminfo);
- bi->end = high;
- }
-
- /* and there's no empty block */
- if (bi->start >= bi->end)
- numa_remove_memblk_from(i--, mi);
- }
-
- /* merge neighboring / overlapping entries */
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *bi = &mi->blk[i];
-
- for (j = i + 1; j < mi->nr_blks; j++) {
- struct numa_memblk *bj = &mi->blk[j];
- u64 start, end;
-
- /*
- * See whether there are overlapping blocks. Whine
- * about but allow overlaps of the same nid. They
- * will be merged below.
- */
- if (bi->end > bj->start && bi->start < bj->end) {
- if (bi->nid != bj->nid) {
- pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->nid, bj->start, bj->end - 1);
- return -EINVAL;
- }
- pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->start, bj->end - 1);
- }
-
- /*
- * Join together blocks on the same node, holes
- * between which don't overlap with memory on other
- * nodes.
- */
- if (bi->nid != bj->nid)
- continue;
- start = min(bi->start, bj->start);
- end = max(bi->end, bj->end);
- for (k = 0; k < mi->nr_blks; k++) {
- struct numa_memblk *bk = &mi->blk[k];
-
- if (bi->nid == bk->nid)
- continue;
- if (start < bk->end && end > bk->start)
- break;
- }
- if (k < mi->nr_blks)
- continue;
- printk(KERN_INFO "NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1, bj->start,
- bj->end - 1, start, end - 1);
- bi->start = start;
- bi->end = end;
- numa_remove_memblk_from(j--, mi);
- }
- }
-
- /* clear unused ones */
- for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
- mi->blk[i].start = mi->blk[i].end = 0;
- mi->blk[i].nid = NUMA_NO_NODE;
- }
-
- return 0;
-}
-
-/*
- * Set nodes, which have memory in @mi, in *@nodemask.
- */
-static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
- const struct numa_meminfo *mi)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
- if (mi->blk[i].start != mi->blk[i].end &&
- mi->blk[i].nid != NUMA_NO_NODE)
- node_set(mi->blk[i].nid, *nodemask);
-}
-
-/**
- * numa_reset_distance - Reset NUMA distance table
- *
- * The current table is freed. The next numa_set_distance() call will
- * create a new one.
- */
-void __init numa_reset_distance(void)
-{
- size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
-
- /* numa_distance could be 1LU marking allocation failure, test cnt */
- if (numa_distance_cnt)
- memblock_free(numa_distance, size);
- numa_distance_cnt = 0;
- numa_distance = NULL; /* enable table creation */
-}
-
-static int __init numa_alloc_distance(void)
-{
- nodemask_t nodes_parsed;
- size_t size;
- int i, j, cnt = 0;
- u64 phys;
-
- /* size the new table and allocate it */
- nodes_parsed = numa_nodes_parsed;
- numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
-
- for_each_node_mask(i, nodes_parsed)
- cnt = i;
- cnt++;
- size = cnt * cnt * sizeof(numa_distance[0]);
-
- phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0,
- PFN_PHYS(max_pfn_mapped));
- if (!phys) {
- pr_warn("Warning: can't allocate distance table!\n");
- /* don't retry until explicitly reset */
- numa_distance = (void *)1LU;
- return -ENOMEM;
- }
-
- numa_distance = __va(phys);
- numa_distance_cnt = cnt;
-
- /* fill with the default distances */
- for (i = 0; i < cnt; i++)
- for (j = 0; j < cnt; j++)
- numa_distance[i * cnt + j] = i == j ?
- LOCAL_DISTANCE : REMOTE_DISTANCE;
- printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
-
- return 0;
-}
-
-/**
- * numa_set_distance - Set NUMA distance from one NUMA to another
- * @from: the 'from' node to set distance
- * @to: the 'to' node to set distance
- * @distance: NUMA distance
- *
- * Set the distance from node @from to @to to @distance. If distance table
- * doesn't exist, one which is large enough to accommodate all the currently
- * known nodes will be created.
- *
- * If such table cannot be allocated, a warning is printed and further
- * calls are ignored until the distance table is reset with
- * numa_reset_distance().
- *
- * If @from or @to is higher than the highest known node or lower than zero
- * at the time of table creation or @distance doesn't make sense, the call
- * is ignored.
- * This is to allow simplification of specific NUMA config implementations.
- */
-void __init numa_set_distance(int from, int to, int distance)
-{
- if (!numa_distance && numa_alloc_distance() < 0)
- return;
-
- if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
- from < 0 || to < 0) {
- pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
- from, to, distance);
- return;
- }
-
- if ((u8)distance != distance ||
- (from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
- from, to, distance);
- return;
- }
-
- numa_distance[from * numa_distance_cnt + to] = distance;
-}
-
-int __node_distance(int from, int to)
-{
- if (from >= numa_distance_cnt || to >= numa_distance_cnt)
- return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
- return numa_distance[from * numa_distance_cnt + to];
-}
-EXPORT_SYMBOL(__node_distance);
-
-/*
- * Mark all currently memblock-reserved physical memory (which covers the
- * kernel's own memory ranges) as hot-unswappable.
- */
-static void __init numa_clear_kernel_node_hotplug(void)
-{
- nodemask_t reserved_nodemask = NODE_MASK_NONE;
- struct memblock_region *mb_region;
- int i;
-
- /*
- * We have to do some preprocessing of memblock regions, to
- * make them suitable for reservation.
- *
- * At this time, all memory regions reserved by memblock are
- * used by the kernel, but those regions are not split up
- * along node boundaries yet, and don't necessarily have their
- * node ID set yet either.
- *
- * So iterate over all memory known to the x86 architecture,
- * and use those ranges to set the nid in memblock.reserved.
- * This will split up the memblock regions along node
- * boundaries and will set the node IDs as well.
- */
- for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = numa_meminfo.blk + i;
- int ret;
-
- ret = memblock_set_node(mb->start, mb->end - mb->start, &memblock.reserved, mb->nid);
- WARN_ON_ONCE(ret);
- }
-
- /*
- * Now go over all reserved memblock regions, to construct a
- * node mask of all kernel reserved memory areas.
- *
- * [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
- * numa_meminfo might not include all memblock.reserved
- * memory ranges, because quirks such as trim_snb_memory()
- * reserve specific pages for Sandy Bridge graphics. ]
- */
- for_each_reserved_mem_region(mb_region) {
- int nid = memblock_get_region_node(mb_region);
-
- if (nid != NUMA_NO_NODE)
- node_set(nid, reserved_nodemask);
- }
-
- /*
- * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
- * belonging to the reserved node mask.
- *
- * Note that this will include memory regions that reside
- * on nodes that contain kernel memory - entire nodes
- * become hot-unpluggable:
- */
- for (i = 0; i < numa_meminfo.nr_blks; i++) {
- struct numa_memblk *mb = numa_meminfo.blk + i;
-
- if (!node_isset(mb->nid, reserved_nodemask))
- continue;
-
- memblock_clear_hotplug(mb->start, mb->end - mb->start);
- }
-}
-
-static int __init numa_register_memblks(struct numa_meminfo *mi)
-{
- int i, nid;
-
- /* Account for nodes with cpus and no memory */
- node_possible_map = numa_nodes_parsed;
- numa_nodemask_from_meminfo(&node_possible_map, mi);
- if (WARN_ON(nodes_empty(node_possible_map)))
- return -EINVAL;
-
- for (i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *mb = &mi->blk[i];
- memblock_set_node(mb->start, mb->end - mb->start,
- &memblock.memory, mb->nid);
- }
-
- /*
- * At very early time, the kernel have to use some memory such as
- * loading the kernel image. We cannot prevent this anyway. So any
- * node the kernel resides in should be un-hotpluggable.
- *
- * And when we come here, alloc node data won't fail.
- */
- numa_clear_kernel_node_hotplug();
-
- /*
- * If sections array is gonna be used for pfn -> nid mapping, check
- * whether its granularity is fine enough.
- */
- if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
- unsigned long pfn_align = node_map_pfn_alignment();
-
- if (pfn_align && pfn_align < PAGES_PER_SECTION) {
- pr_warn("Node alignment %LuMB < min %LuMB, rejecting NUMA config\n",
- PFN_PHYS(pfn_align) >> 20,
- PFN_PHYS(PAGES_PER_SECTION) >> 20);
- return -EINVAL;
- }
- }
+ int nid;
if (!memblock_validate_numa_coverage(SZ_1M))
return -EINVAL;
/* Finally register nodes. */
for_each_node_mask(nid, node_possible_map) {
- u64 start = PFN_PHYS(max_pfn);
- u64 end = 0;
-
- for (i = 0; i < mi->nr_blks; i++) {
- if (nid != mi->blk[i].nid)
- continue;
- start = min(mi->blk[i].start, start);
- end = max(mi->blk[i].end, end);
- }
+ unsigned long start_pfn, end_pfn;
- if (start >= end)
+ /*
+ * Note, get_pfn_range_for_nid() depends on
+ * memblock_set_node() having already happened
+ */
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ if (start_pfn >= end_pfn)
continue;
alloc_node_data(nid);
+ node_set_online(nid);
}
/* Dump memblock with node info and return. */
@@ -609,39 +171,11 @@ static int __init numa_init(int (*init_func)(void))
for (i = 0; i < MAX_LOCAL_APIC; i++)
set_apicid_to_node(i, NUMA_NO_NODE);
- nodes_clear(numa_nodes_parsed);
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
- memset(&numa_meminfo, 0, sizeof(numa_meminfo));
- WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
- NUMA_NO_NODE));
- WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
- NUMA_NO_NODE));
- /* In case that parsing SRAT failed. */
- WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
- numa_reset_distance();
-
- ret = init_func();
- if (ret < 0)
- return ret;
-
- /*
- * We reset memblock back to the top-down direction
- * here because if we configured ACPI_NUMA, we have
- * parsed SRAT in init_func(). It is ok to have the
- * reset here even if we did't configure ACPI_NUMA
- * or acpi numa init fails and fallbacks to dummy
- * numa init.
- */
- memblock_set_bottom_up(false);
-
- ret = numa_cleanup_meminfo(&numa_meminfo);
+ ret = numa_memblks_init(init_func, /* memblock_force_top_down */ true);
if (ret < 0)
return ret;
- numa_emulation(&numa_meminfo, numa_distance_cnt);
-
- ret = numa_register_memblks(&numa_meminfo);
+ ret = numa_register_nodes();
if (ret < 0)
return ret;
@@ -782,12 +316,12 @@ void __init init_cpu_to_node(void)
#ifndef CONFIG_DEBUG_PER_CPU_MAPS
# ifndef CONFIG_NUMA_EMU
-void numa_add_cpu(int cpu)
+void numa_add_cpu(unsigned int cpu)
{
cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
-void numa_remove_cpu(int cpu)
+void numa_remove_cpu(unsigned int cpu)
{
cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
}
@@ -825,7 +359,7 @@ int early_cpu_to_node(int cpu)
return per_cpu(x86_cpu_to_node_map, cpu);
}
-void debug_cpumask_set_cpu(int cpu, int node, bool enable)
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable)
{
struct cpumask *mask;
@@ -857,12 +391,12 @@ static void numa_set_cpumask(int cpu, bool enable)
debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable);
}
-void numa_add_cpu(int cpu)
+void numa_add_cpu(unsigned int cpu)
{
numa_set_cpumask(cpu, true);
}
-void numa_remove_cpu(int cpu)
+void numa_remove_cpu(unsigned int cpu)
{
numa_set_cpumask(cpu, false);
}
@@ -893,113 +427,29 @@ EXPORT_SYMBOL(cpumask_of_node);
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#ifdef CONFIG_NUMA_KEEP_MEMINFO
-static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
+#ifdef CONFIG_NUMA_EMU
+void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys,
+ unsigned int nr_emu_nids)
{
- int i;
-
- for (i = 0; i < mi->nr_blks; i++)
- if (mi->blk[i].start <= start && mi->blk[i].end > start)
- return mi->blk[i].nid;
- return NUMA_NO_NODE;
-}
-
-int phys_to_target_node(phys_addr_t start)
-{
- int nid = meminfo_to_nid(&numa_meminfo, start);
+ int i, j;
/*
- * Prefer online nodes, but if reserved memory might be
- * hot-added continue the search with reserved ranges.
+ * Transform __apicid_to_node table to use emulated nids by
+ * reverse-mapping phys_nid. The maps should always exist but fall
+ * back to zero just in case.
*/
- if (nid != NUMA_NO_NODE)
- return nid;
-
- return meminfo_to_nid(&numa_reserved_meminfo, start);
-}
-EXPORT_SYMBOL_GPL(phys_to_target_node);
-
-int memory_add_physaddr_to_nid(u64 start)
-{
- int nid = meminfo_to_nid(&numa_meminfo, start);
-
- if (nid == NUMA_NO_NODE)
- nid = numa_meminfo.blk[0].nid;
- return nid;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-
-#endif
-
-static int __init cmp_memblk(const void *a, const void *b)
-{
- const struct numa_memblk *ma = *(const struct numa_memblk **)a;
- const struct numa_memblk *mb = *(const struct numa_memblk **)b;
-
- return (ma->start > mb->start) - (ma->start < mb->start);
+ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
+ if (__apicid_to_node[i] == NUMA_NO_NODE)
+ continue;
+ for (j = 0; j < nr_emu_nids; j++)
+ if (__apicid_to_node[i] == emu_nid_to_phys[j])
+ break;
+ __apicid_to_node[i] = j < nr_emu_nids ? j : 0;
+ }
}
-static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
-
-/**
- * numa_fill_memblks - Fill gaps in numa_meminfo memblks
- * @start: address to begin fill
- * @end: address to end fill
- *
- * Find and extend numa_meminfo memblks to cover the physical
- * address range @start-@end
- *
- * RETURNS:
- * 0 : Success
- * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
- */
-
-int __init numa_fill_memblks(u64 start, u64 end)
+u64 __init numa_emu_dma_end(void)
{
- struct numa_memblk **blk = &numa_memblk_list[0];
- struct numa_meminfo *mi = &numa_meminfo;
- int count = 0;
- u64 prev_end;
-
- /*
- * Create a list of pointers to numa_meminfo memblks that
- * overlap start, end. The list is used to make in-place
- * changes that fill out the numa_meminfo memblks.
- */
- for (int i = 0; i < mi->nr_blks; i++) {
- struct numa_memblk *bi = &mi->blk[i];
-
- if (memblock_addrs_overlap(start, end - start, bi->start,
- bi->end - bi->start)) {
- blk[count] = &mi->blk[i];
- count++;
- }
- }
- if (!count)
- return NUMA_NO_MEMBLK;
-
- /* Sort the list of pointers in memblk->start order */
- sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
-
- /* Make sure the first/last memblks include start/end */
- blk[0]->start = min(blk[0]->start, start);
- blk[count - 1]->end = max(blk[count - 1]->end, end);
-
- /*
- * Fill any gaps by tracking the previous memblks
- * end address and backfilling to it if needed.
- */
- prev_end = blk[0]->end;
- for (int i = 1; i < count; i++) {
- struct numa_memblk *curr = blk[i];
-
- if (prev_end >= curr->start) {
- if (prev_end < curr->end)
- prev_end = curr->end;
- } else {
- curr->start = prev_end;
- prev_end = curr->end;
- }
- }
- return 0;
+ return PFN_PHYS(MAX_DMA32_PFN);
}
+#endif /* CONFIG_NUMA_EMU */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
deleted file mode 100644
index 9a9305367fdd..000000000000
--- a/arch/x86/mm/numa_emulation.c
+++ /dev/null
@@ -1,585 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA emulation
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/topology.h>
-#include <linux/memblock.h>
-#include <asm/dma.h>
-
-#include "numa_internal.h"
-
-static int emu_nid_to_phys[MAX_NUMNODES];
-static char *emu_cmdline __initdata;
-
-int __init numa_emu_cmdline(char *str)
-{
- emu_cmdline = str;
- return 0;
-}
-
-static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
-{
- int i;
-
- for (i = 0; i < mi->nr_blks; i++)
- if (mi->blk[i].nid == nid)
- return i;
- return -ENOENT;
-}
-
-static u64 __init mem_hole_size(u64 start, u64 end)
-{
- unsigned long start_pfn = PFN_UP(start);
- unsigned long end_pfn = PFN_DOWN(end);
-
- if (start_pfn < end_pfn)
- return PFN_PHYS(absent_pages_in_range(start_pfn, end_pfn));
- return 0;
-}
-
-/*
- * Sets up nid to range from @start to @end. The return value is -errno if
- * something went wrong, 0 otherwise.
- */
-static int __init emu_setup_memblk(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- int nid, int phys_blk, u64 size)
-{
- struct numa_memblk *eb = &ei->blk[ei->nr_blks];
- struct numa_memblk *pb = &pi->blk[phys_blk];
-
- if (ei->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("NUMA: Too many emulated memblks, failing emulation\n");
- return -EINVAL;
- }
-
- ei->nr_blks++;
- eb->start = pb->start;
- eb->end = pb->start + size;
- eb->nid = nid;
-
- if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
- emu_nid_to_phys[nid] = pb->nid;
-
- pb->start += size;
- if (pb->start >= pb->end) {
- WARN_ON_ONCE(pb->start > pb->end);
- numa_remove_memblk_from(phys_blk, pi);
- }
-
- printk(KERN_INFO "Faking node %d at [mem %#018Lx-%#018Lx] (%LuMB)\n",
- nid, eb->start, eb->end - 1, (eb->end - eb->start) >> 20);
- return 0;
-}
-
-/*
- * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr.
- *
- * Returns zero on success or negative on error.
- */
-static int __init split_nodes_interleave(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- u64 addr, u64 max_addr, int nr_nodes)
-{
- nodemask_t physnode_mask = numa_nodes_parsed;
- u64 size;
- int big;
- int nid = 0;
- int i, ret;
-
- if (nr_nodes <= 0)
- return -1;
- if (nr_nodes > MAX_NUMNODES) {
- pr_info("numa=fake=%d too large, reducing to %d\n",
- nr_nodes, MAX_NUMNODES);
- nr_nodes = MAX_NUMNODES;
- }
-
- /*
- * Calculate target node size. x86_32 freaks on __udivdi3() so do
- * the division in ulong number of pages and convert back.
- */
- size = max_addr - addr - mem_hole_size(addr, max_addr);
- size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
-
- /*
- * Calculate the number of big nodes that can be allocated as a result
- * of consolidating the remainder.
- */
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
- FAKE_NODE_MIN_SIZE;
-
- size &= FAKE_NODE_MIN_HASH_MASK;
- if (!size) {
- pr_err("Not enough memory for each node. "
- "NUMA emulation disabled.\n");
- return -1;
- }
-
- /*
- * Continue to fill physical nodes with fake nodes until there is no
- * memory left on any of them.
- */
- while (!nodes_empty(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
- u64 start, limit, end;
- int phys_blk;
-
- phys_blk = emu_find_memblk_by_nid(i, pi);
- if (phys_blk < 0) {
- node_clear(i, physnode_mask);
- continue;
- }
- start = pi->blk[phys_blk].start;
- limit = pi->blk[phys_blk].end;
- end = start + size;
-
- if (nid < big)
- end += FAKE_NODE_MIN_SIZE;
-
- /*
- * Continue to add memory to this fake node if its
- * non-reserved memory is less than the per-node size.
- */
- while (end - start - mem_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > limit) {
- end = limit;
- break;
- }
- }
-
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (limit - end - mem_hole_size(end, limit) < size)
- end = limit;
-
- ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
- phys_blk,
- min(end, limit) - start);
- if (ret < 0)
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * Returns the end address of a node so that there is at least `size' amount of
- * non-reserved memory or `max_addr' is reached.
- */
-static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
-{
- u64 end = start + size;
-
- while (end - start - mem_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
- break;
- }
- }
- return end;
-}
-
-static u64 uniform_size(u64 max_addr, u64 base, u64 hole, int nr_nodes)
-{
- unsigned long max_pfn = PHYS_PFN(max_addr);
- unsigned long base_pfn = PHYS_PFN(base);
- unsigned long hole_pfns = PHYS_PFN(hole);
-
- return PFN_PHYS((max_pfn - base_pfn - hole_pfns) / nr_nodes);
-}
-
-/*
- * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'.
- *
- * Returns zero on success or negative on error.
- */
-static int __init split_nodes_size_interleave_uniform(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- u64 addr, u64 max_addr, u64 size,
- int nr_nodes, struct numa_memblk *pblk,
- int nid)
-{
- nodemask_t physnode_mask = numa_nodes_parsed;
- int i, ret, uniform = 0;
- u64 min_size;
-
- if ((!size && !nr_nodes) || (nr_nodes && !pblk))
- return -1;
-
- /*
- * In the 'uniform' case split the passed in physical node by
- * nr_nodes, in the non-uniform case, ignore the passed in
- * physical block and try to create nodes of at least size
- * @size.
- *
- * In the uniform case, split the nodes strictly by physical
- * capacity, i.e. ignore holes. In the non-uniform case account
- * for holes and treat @size as a minimum floor.
- */
- if (!nr_nodes)
- nr_nodes = MAX_NUMNODES;
- else {
- nodes_clear(physnode_mask);
- node_set(pblk->nid, physnode_mask);
- uniform = 1;
- }
-
- if (uniform) {
- min_size = uniform_size(max_addr, addr, 0, nr_nodes);
- size = min_size;
- } else {
- /*
- * The limit on emulated nodes is MAX_NUMNODES, so the
- * size per node is increased accordingly if the
- * requested size is too small. This creates a uniform
- * distribution of node sizes across the entire machine
- * (but not necessarily over physical nodes).
- */
- min_size = uniform_size(max_addr, addr,
- mem_hole_size(addr, max_addr), nr_nodes);
- }
- min_size = ALIGN(max(min_size, FAKE_NODE_MIN_SIZE), FAKE_NODE_MIN_SIZE);
- if (size < min_size) {
- pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
- size >> 20, min_size >> 20);
- size = min_size;
- }
- size = ALIGN_DOWN(size, FAKE_NODE_MIN_SIZE);
-
- /*
- * Fill physical nodes with fake nodes of size until there is no memory
- * left on any of them.
- */
- while (!nodes_empty(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
- u64 start, limit, end;
- int phys_blk;
-
- phys_blk = emu_find_memblk_by_nid(i, pi);
- if (phys_blk < 0) {
- node_clear(i, physnode_mask);
- continue;
- }
-
- start = pi->blk[phys_blk].start;
- limit = pi->blk[phys_blk].end;
-
- if (uniform)
- end = start + size;
- else
- end = find_end_of_node(start, limit, size);
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- mem_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if ((limit - end - mem_hole_size(end, limit) < size)
- && !uniform)
- end = limit;
-
- ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
- phys_blk,
- min(end, limit) - start);
- if (ret < 0)
- return ret;
- }
- }
- return nid;
-}
-
-static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
- struct numa_meminfo *pi,
- u64 addr, u64 max_addr, u64 size)
-{
- return split_nodes_size_interleave_uniform(ei, pi, addr, max_addr, size,
- 0, NULL, 0);
-}
-
-static int __init setup_emu2phys_nid(int *dfl_phys_nid)
-{
- int i, max_emu_nid = 0;
-
- *dfl_phys_nid = NUMA_NO_NODE;
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
- if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
- max_emu_nid = i;
- if (*dfl_phys_nid == NUMA_NO_NODE)
- *dfl_phys_nid = emu_nid_to_phys[i];
- }
- }
-
- return max_emu_nid;
-}
-
-/**
- * numa_emulation - Emulate NUMA nodes
- * @numa_meminfo: NUMA configuration to massage
- * @numa_dist_cnt: The size of the physical NUMA distance table
- *
- * Emulate NUMA nodes according to the numa=fake kernel parameter.
- * @numa_meminfo contains the physical memory configuration and is modified
- * to reflect the emulated configuration on success. @numa_dist_cnt is
- * used to determine the size of the physical distance table.
- *
- * On success, the following modifications are made.
- *
- * - @numa_meminfo is updated to reflect the emulated nodes.
- *
- * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
- * emulated nodes.
- *
- * - NUMA distance table is rebuilt to represent distances between emulated
- * nodes. The distances are determined considering how emulated nodes
- * are mapped to physical nodes and match the actual distances.
- *
- * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
- * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
- *
- * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
- * identity mapping and no other modification is made.
- */
-void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
-{
- static struct numa_meminfo ei __initdata;
- static struct numa_meminfo pi __initdata;
- const u64 max_addr = PFN_PHYS(max_pfn);
- u8 *phys_dist = NULL;
- size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
- int max_emu_nid, dfl_phys_nid;
- int i, j, ret;
-
- if (!emu_cmdline)
- goto no_emu;
-
- memset(&ei, 0, sizeof(ei));
- pi = *numa_meminfo;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- emu_nid_to_phys[i] = NUMA_NO_NODE;
-
- /*
- * If the numa=fake command-line contains a 'M' or 'G', it represents
- * the fixed node size. Otherwise, if it is just a single number N,
- * split the system RAM into N fake nodes.
- */
- if (strchr(emu_cmdline, 'U')) {
- nodemask_t physnode_mask = numa_nodes_parsed;
- unsigned long n;
- int nid = 0;
-
- n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
- ret = -1;
- for_each_node_mask(i, physnode_mask) {
- /*
- * The reason we pass in blk[0] is due to
- * numa_remove_memblk_from() called by
- * emu_setup_memblk() will delete entry 0
- * and then move everything else up in the pi.blk
- * array. Therefore we should always be looking
- * at blk[0].
- */
- ret = split_nodes_size_interleave_uniform(&ei, &pi,
- pi.blk[0].start, pi.blk[0].end, 0,
- n, &pi.blk[0], nid);
- if (ret < 0)
- break;
- if (ret < n) {
- pr_info("%s: phys: %d only got %d of %ld nodes, failing\n",
- __func__, i, ret, n);
- ret = -1;
- break;
- }
- nid = ret;
- }
- } else if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
- u64 size;
-
- size = memparse(emu_cmdline, &emu_cmdline);
- ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
- } else {
- unsigned long n;
-
- n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
- ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
- }
- if (*emu_cmdline == ':')
- emu_cmdline++;
-
- if (ret < 0)
- goto no_emu;
-
- if (numa_cleanup_meminfo(&ei) < 0) {
- pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
- goto no_emu;
- }
-
- /* copy the physical distance table */
- if (numa_dist_cnt) {
- u64 phys;
-
- phys = memblock_phys_alloc_range(phys_size, PAGE_SIZE, 0,
- PFN_PHYS(max_pfn_mapped));
- if (!phys) {
- pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
- goto no_emu;
- }
- phys_dist = __va(phys);
-
- for (i = 0; i < numa_dist_cnt; i++)
- for (j = 0; j < numa_dist_cnt; j++)
- phys_dist[i * numa_dist_cnt + j] =
- node_distance(i, j);
- }
-
- /*
- * Determine the max emulated nid and the default phys nid to use
- * for unmapped nodes.
- */
- max_emu_nid = setup_emu2phys_nid(&dfl_phys_nid);
-
- /* commit */
- *numa_meminfo = ei;
-
- /* Make sure numa_nodes_parsed only contains emulated nodes */
- nodes_clear(numa_nodes_parsed);
- for (i = 0; i < ARRAY_SIZE(ei.blk); i++)
- if (ei.blk[i].start != ei.blk[i].end &&
- ei.blk[i].nid != NUMA_NO_NODE)
- node_set(ei.blk[i].nid, numa_nodes_parsed);
-
- /*
- * Transform __apicid_to_node table to use emulated nids by
- * reverse-mapping phys_nid. The maps should always exist but fall
- * back to zero just in case.
- */
- for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
- if (__apicid_to_node[i] == NUMA_NO_NODE)
- continue;
- for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
- if (__apicid_to_node[i] == emu_nid_to_phys[j])
- break;
- __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
- }
-
- /* make sure all emulated nodes are mapped to a physical node */
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
- if (emu_nid_to_phys[i] == NUMA_NO_NODE)
- emu_nid_to_phys[i] = dfl_phys_nid;
-
- /* transform distance table */
- numa_reset_distance();
- for (i = 0; i < max_emu_nid + 1; i++) {
- for (j = 0; j < max_emu_nid + 1; j++) {
- int physi = emu_nid_to_phys[i];
- int physj = emu_nid_to_phys[j];
- int dist;
-
- if (get_option(&emu_cmdline, &dist) == 2)
- ;
- else if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
- dist = physi == physj ?
- LOCAL_DISTANCE : REMOTE_DISTANCE;
- else
- dist = phys_dist[physi * numa_dist_cnt + physj];
-
- numa_set_distance(i, j, dist);
- }
- }
-
- /* free the copied physical distance table */
- memblock_free(phys_dist, phys_size);
- return;
-
-no_emu:
- /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
- for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
- emu_nid_to_phys[i] = i;
-}
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-void numa_add_cpu(int cpu)
-{
- int physnid, nid;
-
- nid = early_cpu_to_node(cpu);
- BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
-
- physnid = emu_nid_to_phys[nid];
-
- /*
- * Map the cpu to each emulated node that is allocated on the physical
- * node of the cpu's apic id.
- */
- for_each_online_node(nid)
- if (emu_nid_to_phys[nid] == physnid)
- cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
-}
-
-void numa_remove_cpu(int cpu)
-{
- int i;
-
- for_each_online_node(i)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
-}
-#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
-static void numa_set_cpumask(int cpu, bool enable)
-{
- int nid, physnid;
-
- nid = early_cpu_to_node(cpu);
- if (nid == NUMA_NO_NODE) {
- /* early_cpu_to_node() already emits a warning and trace */
- return;
- }
-
- physnid = emu_nid_to_phys[nid];
-
- for_each_online_node(nid) {
- if (emu_nid_to_phys[nid] != physnid)
- continue;
-
- debug_cpumask_set_cpu(cpu, nid, enable);
- }
-}
-
-void numa_add_cpu(int cpu)
-{
- numa_set_cpumask(cpu, true);
-}
-
-void numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, false);
-}
-#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
index 86860f279662..11e1ff370c10 100644
--- a/arch/x86/mm/numa_internal.h
+++ b/arch/x86/mm/numa_internal.h
@@ -5,30 +5,6 @@
#include <linux/types.h>
#include <asm/numa.h>
-struct numa_memblk {
- u64 start;
- u64 end;
- int nid;
-};
-
-struct numa_meminfo {
- int nr_blks;
- struct numa_memblk blk[NR_NODE_MEMBLKS];
-};
-
-void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
-int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
-void __init numa_reset_distance(void);
-
void __init x86_numa_init(void);
-#ifdef CONFIG_NUMA_EMU
-void __init numa_emulation(struct numa_meminfo *numa_meminfo,
- int numa_dist_cnt);
-#else
-static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
- int numa_dist_cnt)
-{ }
-#endif
-
#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index bdc2a240c2aa..f73b5ce270b3 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -104,7 +104,7 @@ __setup("debugpat", pat_debug_setup);
#ifdef CONFIG_X86_PAT
/*
- * X86 PAT uses page flags arch_1 and uncached together to keep track of
+ * X86 PAT uses page flags arch_1 and arch_2 together to keep track of
* memory type of pages that have backing page struct.
*
* X86 PAT supports 4 different memory types:
@@ -118,9 +118,9 @@ __setup("debugpat", pat_debug_setup);
#define _PGMT_WB 0
#define _PGMT_WC (1UL << PG_arch_1)
-#define _PGMT_UC_MINUS (1UL << PG_uncached)
-#define _PGMT_WT (1UL << PG_uncached | 1UL << PG_arch_1)
-#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
+#define _PGMT_UC_MINUS (1UL << PG_arch_2)
+#define _PGMT_WT (1UL << PG_arch_2 | 1UL << PG_arch_1)
+#define _PGMT_MASK (1UL << PG_arch_2 | 1UL << PG_arch_1)
#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
static inline enum page_cache_mode get_page_memtype(struct page *pg)
@@ -951,23 +951,20 @@ static void free_pfn_range(u64 paddr, unsigned long size)
static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
resource_size_t *phys)
{
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ struct follow_pfnmap_args args = { .vma = vma, .address = vma->vm_start };
- if (follow_pte(vma, vma->vm_start, &ptep, &ptl))
+ if (follow_pfnmap_start(&args))
return -EINVAL;
- pte = ptep_get(ptep);
-
/* Never return PFNs of anon folios in COW mappings. */
- if (vm_normal_folio(vma, vma->vm_start, pte)) {
- pte_unmap_unlock(ptep, ptl);
+ if (!args.special) {
+ follow_pfnmap_end(&args);
return -EINVAL;
}
- *prot = pgprot_val(pte_pgprot(pte));
- *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
- pte_unmap_unlock(ptep, ptl);
+ *prot = pgprot_val(args.pgprot);
+ *phys = (resource_size_t)args.pfn << PAGE_SHIFT;
+ follow_pfnmap_end(&args);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index f5931499c2d6..5745a354a241 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -641,6 +641,18 @@ pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
}
#endif
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return old;
+}
+#endif
+
/**
* reserve_top_address - reserves a hole in the top of kernel address space
* @reserve - size of hole to reserve
@@ -926,3 +938,9 @@ void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
pmd_shstk(pmd));
}
+
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
+{
+ /* See note in arch_check_zapped_pte() */
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pud_shstk(pud));
+}
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 76d9f6ce7a3d..f238f7b33cdd 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -52,8 +52,11 @@ subsys_initcall(init_vdso);
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- int err;
+ struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
+ static struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ };
if (!vdso_enabled)
return 0;
@@ -61,12 +64,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (mmap_write_lock_killable(mm))
return -EINTR;
- err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
+ vdso_mapping.pages = vdsop;
+ vma = _install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdsop);
+ &vdso_mapping);
mmap_write_unlock(mm);
- return err;
+ return IS_ERR(vma) ? PTR_ERR(vma) : 0;
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 839e6613753d..55a4996d0c04 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -665,7 +665,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
{
spinlock_t *ptl = NULL;
-#if USE_SPLIT_PTE_PTLOCKS
+#if defined(CONFIG_SPLIT_PTE_PTLOCKS)
ptl = ptlock_ptr(page_ptdesc(page));
spin_lock_nest_lock(ptl, &mm->page_table_lock);
#endif
@@ -1553,7 +1553,8 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
__set_pfn_prot(pfn, PAGE_KERNEL_RO);
- if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned)
+ if (level == PT_PTE && IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS) &&
+ !pinned)
__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
xen_mc_issue(XEN_LAZY_MMU);
@@ -1581,7 +1582,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
if (pinned) {
xen_mc_batch();
- if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
+ if (level == PT_PTE && IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS))
__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
__set_pfn_prot(pfn, PAGE_KERNEL);