diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/io.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 20 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/amd_gart_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sgx/driver.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sgx/main.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/irq_64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 42 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 24 | ||||
-rw-r--r-- | arch/x86/mm/hugetlbpage.c | 35 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 47 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/pat/memtype.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 6 |
17 files changed, 91 insertions, 126 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc47bc9841ff..671bc6fd557c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -224,7 +224,7 @@ config X86 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA select HAVE_EXIT_THREAD - select HAVE_FAST_GUP + select HAVE_GUP_FAST select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4b99ed326b17..1d60427379c9 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -42,6 +42,7 @@ #include <asm/early_ioremap.h> #include <asm/pgtable_types.h> #include <asm/shared/io.h> +#include <asm/special_insns.h> #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 315535ffb258..65b8e5bb902c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -234,6 +234,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } +#define pud_pfn pud_pfn static inline unsigned long pud_pfn(pud_t pud) { phys_addr_t pfn = pud_val(pud); @@ -387,23 +388,7 @@ static inline pte_t pte_wrprotect(pte_t pte) #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { - bool wp = pte_flags(pte) & _PAGE_UFFD_WP; - -#ifdef CONFIG_DEBUG_VM - /* - * Having write bit for wr-protect-marked present ptes is fatal, - * because it means the uffd-wp bit will be ignored and write will - * just go through. - * - * Use any chance of pgtable walking to verify this (e.g., when - * page swapped out or being migrated for all purposes). It means - * something is already wrong. Tell the admin even before the - * process crashes. We also nail it with wrong pgtable setup. - */ - WARN_ON_ONCE(wp && pte_write(pte)); -#endif - - return wp; + return pte_flags(pte) & _PAGE_UFFD_WP; } static inline pte_t pte_mkuffd_wp(pte_t pte) @@ -1200,7 +1185,6 @@ static inline int pgd_none(pgd_t pgd) extern int direct_gbpages; void init_mem_mapping(void); void early_alloc_pgt_buf(void); -extern void memblock_find_dma_reserve(void); void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7e9db77231ac..3c4407271d08 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -245,6 +245,7 @@ extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN +#define HAVE_ARCH_UNMAPPED_AREA_VMFLAGS #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 2ae98f754e59..c884deca839b 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -676,7 +676,7 @@ static const struct dma_map_ops gart_dma_ops = { .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, .get_required_mask = dma_direct_get_required_mask, - .alloc_pages = dma_direct_alloc_pages, + .alloc_pages_op = dma_direct_alloc_pages, .free_pages = dma_direct_free_pages, }; diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c index 262f5fb18d74..22b65a5f5ec6 100644 --- a/arch/x86/kernel/cpu/sgx/driver.c +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -113,7 +113,7 @@ static unsigned long sgx_get_unmapped_area(struct file *file, if (flags & MAP_FIXED) return addr; - return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); + return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); } #ifdef CONFIG_COMPAT diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 166692f2d501..27892e57c4ef 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -13,6 +13,7 @@ #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/sysfs.h> +#include <linux/vmalloc.h> #include <asm/sgx.h> #include "driver.h" #include "encl.h" diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index fe0c859873d1..ade0043ce56e 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,7 @@ #include <linux/uaccess.h> #include <linux/smp.h> #include <linux/sched/task_stack.h> +#include <linux/vmalloc.h> #include <asm/cpu_entry_area.h> #include <asm/softirq_stack.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 55a1fc332e20..05c5aa951da7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1108,8 +1108,6 @@ void __init setup_arch(char **cmdline_p) */ arch_reserve_crashkernel(); - memblock_find_dma_reserve(); - if (!early_xdbc_setup_hardware()) early_xdbc_register_console(); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index cb9fa1d5c66f..01d7cd85ef97 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -112,13 +112,21 @@ static void find_start_end(unsigned long addr, unsigned long flags, *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); } +static inline unsigned long stack_guard_placement(vm_flags_t vm_flags) +{ + if (vm_flags & VM_SHADOW_STACK) + return PAGE_SIZE; + + return 0; +} + unsigned long -arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) +arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; unsigned long begin, end; if (flags & MAP_FIXED) @@ -137,12 +145,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } - info.flags = 0; info.length = len; info.low_limit = begin; info.high_limit = end; - info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; + info.start_gap = stack_guard_placement(vm_flags); if (filp) { info.align_mask = get_align_mask(); info.align_offset += get_align_bits(); @@ -151,14 +158,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) +arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr0, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long addr = addr0; - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -192,6 +199,7 @@ get_unmapped_area: info.low_limit = PAGE_SIZE; info.high_limit = get_mmap_base(0); + info.start_gap = stack_guard_placement(vm_flags); /* * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area @@ -203,7 +211,6 @@ get_unmapped_area: if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall()) info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; - info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; if (filp) { info.align_mask = get_align_mask(); @@ -223,3 +230,18 @@ bottomup: */ return arch_get_unmapped_area(filp, addr0, len, pgoff, flags); } + +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return arch_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, 0); +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + return arch_get_unmapped_area_topdown_vmflags(filp, addr, len, pgoff, flags, 0); +} diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f26ecabc9424..e6c469b323cc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -20,6 +20,7 @@ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> #include <linux/mm.h> /* find_and_lock_vma() */ +#include <linux/vmalloc.h> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -835,14 +836,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 pkey, int si_code) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma, u32 pkey, int si_code) { - struct mm_struct *mm = current->mm; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - mmap_read_unlock(mm); + if (mm) + mmap_read_unlock(mm); + else + vma_end_read(vma); __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } @@ -866,7 +870,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma) { /* * This OSPKE check is not strictly necessary at runtime. @@ -896,9 +901,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, */ u32 pkey = vma_pkey(vma); - __bad_area(regs, error_code, address, pkey, SEGV_PKUERR); + __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR); } else { - __bad_area(regs, error_code, address, 0, SEGV_ACCERR); + __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR); } } @@ -1326,8 +1331,9 @@ void do_user_addr_fault(struct pt_regs *regs, goto lock_mmap; if (unlikely(access_error(error_code, vma))) { - vma_end_read(vma); - goto lock_mmap; + bad_area_access_error(regs, error_code, address, NULL, vma); + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + return; } fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) @@ -1363,7 +1369,7 @@ retry: * we can handle it.. */ if (unlikely(access_error(error_code, vma))) { - bad_area_access_error(regs, error_code, address, vma); + bad_area_access_error(regs, error_code, address, mm, vma); return; } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 5804bbae4f01..807a5859a3c4 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -19,41 +19,14 @@ #include <asm/tlbflush.h> #include <asm/elf.h> -/* - * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal - * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry. - * Otherwise, returns 0. - */ -int pmd_huge(pmd_t pmd) -{ - return !pmd_none(pmd) && - (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; -} - -/* - * pud_huge() returns 1 if @pud is hugetlb related entry, that is normal - * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry. - * Otherwise, returns 0. - */ -int pud_huge(pud_t pud) -{ -#if CONFIG_PGTABLE_LEVELS > 2 - return !pud_none(pud) && - (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; -#else - return 0; -#endif -} - #ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; - info.flags = 0; info.length = len; info.low_limit = get_mmap_base(1); @@ -65,7 +38,6 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW); info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; return vm_unmapped_area(&info); } @@ -74,7 +46,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; + struct vm_unmapped_area_info info = {}; info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; @@ -89,7 +61,6 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW; info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; addr = vm_unmapped_area(&info); /* @@ -141,7 +112,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } get_unmapped_area: - if (mm->get_unmapped_area == arch_get_unmapped_area) + if (!test_bit(MMF_TOPDOWN, &mm->flags)) return hugetlb_get_unmapped_area_bottomup(file, addr, len, pgoff, flags); else diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a51f22c90a7a..eb503f53c319 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -987,53 +987,6 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) } #endif -/* - * Calculate the precise size of the DMA zone (first 16 MB of RAM), - * and pass it to the MM layer - to help it set zone watermarks more - * accurately. - * - * Done on 64-bit systems only for the time being, although 32-bit systems - * might benefit from this as well. - */ -void __init memblock_find_dma_reserve(void) -{ -#ifdef CONFIG_X86_64 - u64 nr_pages = 0, nr_free_pages = 0; - unsigned long start_pfn, end_pfn; - phys_addr_t start_addr, end_addr; - int i; - u64 u; - - /* - * Iterate over all memory ranges (free and reserved ones alike), - * to calculate the total number of pages in the first 16 MB of RAM: - */ - nr_pages = 0; - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - start_pfn = min(start_pfn, MAX_DMA_PFN); - end_pfn = min(end_pfn, MAX_DMA_PFN); - - nr_pages += end_pfn - start_pfn; - } - - /* - * Iterate over free memory ranges to calculate the number of free - * pages in the DMA zone, while not counting potential partial - * pages at the beginning or the end of the range: - */ - nr_free_pages = 0; - for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) { - start_pfn = min_t(unsigned long, PFN_UP(start_addr), MAX_DMA_PFN); - end_pfn = min_t(unsigned long, PFN_DOWN(end_addr), MAX_DMA_PFN); - - if (start_pfn < end_pfn) - nr_free_pages += end_pfn - start_pfn; - } - - set_dma_reserve(nr_pages - nr_free_pages); -#endif -} - void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c90c20904a60..a2cabb1c81e1 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -129,9 +129,9 @@ static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { if (mmap_is_legacy()) - mm->get_unmapped_area = arch_get_unmapped_area; + clear_bit(MMF_TOPDOWN, &mm->flags); else - mm->get_unmapped_area = arch_get_unmapped_area_topdown; + set_bit(MMF_TOPDOWN, &mm->flags); arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, arch_rnd(mmap64_rnd_bits), task_size_64bit(0), diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 025fd7ea5d69..65fda406e6f2 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include <linux/memblock.h> #include <linux/init.h> +#include <linux/vmalloc.h> #include <asm/pgtable_areas.h> #include "numa_internal.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 36b603d0cdde..bdc2a240c2aa 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -39,6 +39,7 @@ #include <linux/pfn_t.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/highmem.h> #include <linux/fs.h> #include <linux/rbtree.h> @@ -947,6 +948,29 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int follow_phys(struct vm_area_struct *vma, unsigned long *prot, + resource_size_t *phys) +{ + pte_t *ptep, pte; + spinlock_t *ptl; + + if (follow_pte(vma, vma->vm_start, &ptep, &ptl)) + return -EINVAL; + + pte = ptep_get(ptep); + + /* Never return PFNs of anon folios in COW mappings. */ + if (vm_normal_folio(vma, vma->vm_start, pte)) { + pte_unmap_unlock(ptep, ptl); + return -EINVAL; + } + + *prot = pgprot_val(pte_pgprot(pte)); + *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; + pte_unmap_unlock(ptep, ptl); + return 0; +} + static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, pgprot_t *pgprot) { @@ -964,7 +988,7 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, * detect the PFN. If we need the cachemode as well, we're out of luck * for now and have to fail fork(). */ - if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (!follow_phys(vma, &prot, paddr)) { if (pgprot) *pgprot = __pgprot(prot); return 0; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index d007591b8059..93e54ba91fbf 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -631,6 +631,8 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + /* * No flush is necessary. Once an invalid PTE is established, the PTE's * access and dirty bits cannot be updated. @@ -731,7 +733,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) return 0; /* Bail out if we are we on a populated non-leaf entry: */ - if (pud_present(*pud) && !pud_huge(*pud)) + if (pud_present(*pud) && !pud_leaf(*pud)) return 0; set_pte((pte_t *)pud, pfn_pte( @@ -760,7 +762,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) } /* Bail out if we are we on a populated non-leaf entry: */ - if (pmd_present(*pmd) && !pmd_huge(*pmd)) + if (pmd_present(*pmd) && !pmd_leaf(*pmd)) return 0; set_pte((pte_t *)pmd, pfn_pte( |