diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-14 11:09:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-14 11:09:39 -0700 |
commit | 103916ffe24969a4c938ccfe89e956fe7d9339fd (patch) | |
tree | b09758778cc29b1aca5ac8e2eae6322b00d6f093 /arch/arm64 | |
parent | 1338acfe629ddd955fd524fc01e26bca4f1bb22b (diff) | |
parent | 54e1a2aa61a7bf4af2799baf7ab2dc2712844245 (diff) | |
download | linux-stable-103916ffe24969a4c938ccfe89e956fe7d9339fd.tar.gz linux-stable-103916ffe24969a4c938ccfe89e956fe7d9339fd.tar.bz2 linux-stable-103916ffe24969a4c938ccfe89e956fe7d9339fd.zip |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Will Deacon:
"The most interesting parts are probably the mm changes from Ryan which
optimise the creation of the linear mapping at boot and (separately)
implement write-protect support for userfaultfd.
Outside of our usual directories, the Kbuild-related changes under
scripts/ have been acked by Masahiro whilst the drivers/acpi/ parts
have been acked by Rafael and the addition of cpumask_any_and_but()
has been acked by Yury.
ACPI:
- Support for the Firmware ACPI Control Structure (FACS) signature
feature which is used to reboot out of hibernation on some systems
Kbuild:
- Support for building Flat Image Tree (FIT) images, where the kernel
Image is compressed alongside a set of devicetree blobs
Memory management:
- Optimisation of our early page-table manipulation for creation of
the linear mapping
- Support for userfaultfd write protection, which brings along some
nice cleanups to our handling of invalid but present ptes
- Extend our use of range TLBI invalidation at EL1
Perf and PMUs:
- Ensure that the 'pmu->parent' pointer is correctly initialised by
PMU drivers
- Avoid allocating 'cpumask_t' types on the stack in some PMU drivers
- Fix parsing of the CPU PMU "version" field in assembly code, as it
doesn't follow the usual architectural rules
- Add best-effort unwinding support for USER_STACKTRACE
- Minor driver fixes and cleanups
Selftests:
- Minor cleanups to the arm64 selftests (missing NULL check, unused
variable)
Miscellaneous:
- Add a command-line alias for disabling 32-bit application support
- Add part number for Neoverse-V2 CPUs
- Minor fixes and cleanups"
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (64 commits)
arm64/mm: Fix pud_user_accessible_page() for PGTABLE_LEVELS <= 2
arm64/mm: Add uffd write-protect support
arm64/mm: Move PTE_PRESENT_INVALID to overlay PTE_NG
arm64/mm: Remove PTE_PROT_NONE bit
arm64/mm: generalize PMD_PRESENT_INVALID for all levels
arm64: simplify arch_static_branch/_jump function
arm64: Add USER_STACKTRACE support
arm64: Add the arm64.no32bit_el0 command line option
drivers/perf: hisi: hns3: Actually use devm_add_action_or_reset()
drivers/perf: hisi: hns3: Fix out-of-bound access when valid event group
drivers/perf: hisi_pcie: Fix out-of-bound access when valid event group
kselftest: arm64: Add a null pointer check
arm64: defer clearing DAIF.D
arm64: assembler: update stale comment for disable_step_tsk
arm64/sysreg: Update PIE permission encodings
kselftest/arm64: Remove unused parameters in abi test
perf/arm-spe: Assign parents for event_source device
perf/arm-smmuv3: Assign parents for event_source device
perf/arm-dsu: Assign parents for event_source device
perf/arm-dmc620: Assign parents for event_source device
...
Diffstat (limited to 'arch/arm64')
-rw-r--r-- | arch/arm64/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm64/Makefile | 11 | ||||
-rw-r--r-- | arch/arm64/boot/.gitignore | 1 | ||||
-rw-r--r-- | arch/arm64/boot/Makefile | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/assembler.h | 13 | ||||
-rw-r--r-- | arch/arm64/include/asm/cputype.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/el2_setup.h | 9 | ||||
-rw-r--r-- | arch/arm64/include/asm/irqflags.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/jump_label.h | 28 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable-prot.h | 19 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 114 | ||||
-rw-r--r-- | arch/arm64/include/asm/sysreg.h | 24 | ||||
-rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 33 | ||||
-rw-r--r-- | arch/arm64/kernel/acpi.c | 10 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_callchain.c | 118 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/idreg-override.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/setup.c | 11 | ||||
-rw-r--r-- | arch/arm64/kernel/smp.c | 7 | ||||
-rw-r--r-- | arch/arm64/kernel/stacktrace.c | 120 | ||||
-rw-r--r-- | arch/arm64/mm/mmu.c | 101 | ||||
-rw-r--r-- | arch/arm64/mm/proc.S | 10 |
21 files changed, 384 insertions, 258 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b11c98b3e84..a04059c31aba 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -255,9 +255,11 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK + select USER_STACKTRACE_SUPPORT help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 0e075d3c546b..b8b1d4f4a572 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -154,6 +154,10 @@ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make boot := arch/arm64/boot +BOOT_TARGETS := Image vmlinuz.efi image.fit + +PHONY += $(BOOT_TARGETS) + ifeq ($(CONFIG_EFI_ZBOOT),) KBUILD_IMAGE := $(boot)/Image.gz else @@ -162,8 +166,10 @@ endif all: $(notdir $(KBUILD_IMAGE)) -vmlinuz.efi: Image -Image vmlinuz.efi: vmlinux +image.fit: dtbs + +vmlinuz.efi image.fit: Image +$(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ Image.%: Image @@ -215,6 +221,7 @@ virtconfig: define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' + echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index af5dc61f8b43..abaae9de1bdd 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -2,3 +2,4 @@ Image Image.gz vmlinuz* +image.fit diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index a5a787371117..607a67a649c4 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,8 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo \ + Image.zst image.fit $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) @@ -39,6 +40,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/Image.zst: $(obj)/Image FORCE $(call if_changed,zstd) +$(obj)/image.fit: $(obj)/Image $(obj)/dts/dtbs-list FORCE + $(call if_changed,fit) + EFI_ZBOOT_PAYLOAD := Image EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64 EFI_ZBOOT_MACH_TYPE := ARM64 diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ab8b396428da..bc0b0d75acef 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -50,16 +50,12 @@ msr daif, \flags .endm - .macro enable_dbg - msr daifclr, #8 - .endm - .macro disable_step_tsk, flgs, tmp tbz \flgs, #TIF_SINGLESTEP, 9990f mrs \tmp, mdscr_el1 bic \tmp, \tmp, #DBG_MDSCR_SS msr mdscr_el1, \tmp - isb // Synchronise with enable_dbg + isb // Take effect before a subsequent clear of DAIF.D 9990: .endm @@ -480,9 +476,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 52f076afeb96..936389e9aecb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -159,6 +160,7 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b7afaa026842..e4546b29dd0c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -59,13 +59,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 0a7186a93882..d4d7451c2c12 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -5,7 +5,6 @@ #ifndef __ASM_IRQFLAGS_H #define __ASM_IRQFLAGS_H -#include <asm/alternative.h> #include <asm/barrier.h> #include <asm/ptrace.h> #include <asm/sysreg.h> diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 6aafbb789991..4e753908b801 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,17 +15,23 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE +#define JUMP_TABLE_ENTRY(key, label) \ + ".pushsection __jump_table, \"aw\"\n\t" \ + ".align 3\n\t" \ + ".long 1b - ., %l["#label"] - .\n\t" \ + ".quad %c0 - .\n\t" \ + ".popsection\n\t" \ + : : "i"(key) : : label + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; + asm goto( "1: nop \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + JUMP_TABLE_ENTRY(k, l_yes) + ); return false; l_yes: @@ -35,15 +41,11 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { + char *k = &((char *)key)[branch]; asm goto( "1: b %l[l_yes] \n\t" - " .pushsection __jump_table, \"aw\" \n\t" - " .align 3 \n\t" - " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" - " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); - + JUMP_TABLE_ENTRY(k, l_yes) + ); return false; l_yes: return true; diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index dd9ee67d1d87..b11cfb9fdd37 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -18,14 +18,21 @@ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* - * This bit indicates that the entry is present i.e. pmd_page() - * still points to a valid huge page in memory even if the pmd - * has been invalidated. + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be + * interpreted according to the HW layout by SW but any attempted HW access to + * the address will result in a fault. pte_present() returns true. */ -#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define PTE_PRESENT_INVALID (PTE_NG) /* only when !PTE_VALID */ + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define PTE_UFFD_WP (_AT(pteval_t, 1) << 58) /* uffd-wp tracking */ +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 1) << 3) /* only for swp ptes */ +#else +#define PTE_UFFD_WP (_AT(pteval_t, 0)) +#define PTE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) @@ -103,7 +110,7 @@ static inline bool __pure lpa2_is_enabled(void) __val; \ }) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_SHARED) #define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index afdd56d26ad7..bde9fd179388 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -105,7 +105,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) @@ -132,6 +132,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +#define pte_present_invalid(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID) /* * Execute-only user mappings do not have the PTE_USER bit set. All valid * kernel mappings have the PTE_UXN bit set. @@ -261,6 +263,13 @@ static inline pte_t pte_mkpresent(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_VALID)); } +static inline pte_t pte_mkinvalid(pte_t pte) +{ + pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID)); + pte = clear_pte_bit(pte, __pgprot(PTE_VALID)); + return pte; +} + static inline pmd_t pmd_mkcont(pmd_t pmd) { return __pmd(pmd_val(pmd) | PMD_SECT_CONT); @@ -271,9 +280,31 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } -static inline void __set_pte(pte_t *ptep, pte_t pte) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_UFFD_WP); +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(set_pte_bit(pte, __pgprot(PTE_UFFD_WP))); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +static inline void __set_pte_nosync(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); +} + +static inline void __set_pte(pte_t *ptep, pte_t pte) +{ + __set_pte_nosync(ptep, pte); /* * Only if the new pte is valid and kernel, otherwise TLB maintenance @@ -463,13 +494,39 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & PTE_SWP_UFFD_WP); +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_SWP_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; + /* + * pte_present_invalid() tells us that the pte is invalid from HW + * perspective but present from SW perspective, so the fields are to be + * interpretted as per the HW layout. The second 2 checks are the unique + * encoding that we use for PROT_NONE. It is insufficient to only use + * the first check because we share the same encoding scheme with pmds + * which support pmd_mkinvalid(), so can be present-invalid without + * being PROT_NONE. + */ + return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); } static inline int pmd_protnone(pmd_t pmd) @@ -478,12 +535,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -#define pmd_present_invalid(pmd) (!!(pmd_val(pmd) & PMD_PRESENT_INVALID)) - -static inline int pmd_present(pmd_t pmd) -{ - return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); -} +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) /* * THP definitions. @@ -508,14 +560,16 @@ static inline int pmd_trans_huge(pmd_t pmd) #define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) - -static inline pmd_t pmd_mkinvalid(pmd_t pmd) -{ - pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID)); - pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID)); - - return pmd; -} +#define pmd_mkinvalid(pmd) pte_pmd(pte_mkinvalid(pmd_pte(pmd))) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define pmd_uffd_wp(pmd) pte_uffd_wp(pmd_pte(pmd)) +#define pmd_mkuffd_wp(pmd) pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))) +#define pmd_clear_uffd_wp(pmd) pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))) +#define pmd_swp_uffd_wp(pmd) pte_swp_uffd_wp(pmd_pte(pmd)) +#define pmd_swp_mkuffd_wp(pmd) pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))) +#define pmd_swp_clear_uffd_wp(pmd) \ + pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))) +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) @@ -760,6 +814,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #else +#define pud_valid(pud) false #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) #define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ @@ -1005,6 +1060,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr) static inline bool pgtable_l5_enabled(void) { return false; } +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) + /* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */ #define p4d_set_fixmap(addr) NULL #define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp) @@ -1027,8 +1084,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | - PTE_ATTRINDX_MASK; + PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | + PTE_GP | PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); @@ -1076,17 +1133,17 @@ static inline int pgd_devmap(pgd_t pgd) #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { - return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); + return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); + return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif @@ -1248,15 +1305,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * Encode and decode a swap entry: * bits 0-1: present (must be zero) * bits 2: remember PG_anon_exclusive - * bits 3-7: swap type - * bits 8-57: swap offset - * bit 58: PTE_PROT_NONE (must be zero) + * bit 3: remember uffd-wp state + * bits 6-10: swap type + * bit 11: PTE_PRESENT_INVALID (must be zero) + * bits 12-61: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 6 #define __SWP_TYPE_BITS 5 -#define __SWP_OFFSET_BITS 50 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) -#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_SHIFT 12 +#define __SWP_OFFSET_BITS 50 #define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e8999592f3a..af3b206fa423 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1036,18 +1036,18 @@ * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). */ -#define PIE_NONE_O 0x0 -#define PIE_R_O 0x1 -#define PIE_X_O 0x2 -#define PIE_RX_O 0x3 -#define PIE_RW_O 0x5 -#define PIE_RWnX_O 0x6 -#define PIE_RWX_O 0x7 -#define PIE_R 0x8 -#define PIE_GCS 0x9 -#define PIE_RX 0xa -#define PIE_RW 0xc -#define PIE_RWX 0xe +#define PIE_NONE_O UL(0x0) +#define PIE_R_O UL(0x1) +#define PIE_X_O UL(0x2) +#define PIE_RX_O UL(0x3) +#define PIE_RW_O UL(0x5) +#define PIE_RWnX_O UL(0x6) +#define PIE_RWX_O UL(0x7) +#define PIE_R UL(0x8) +#define PIE_GCS UL(0x9) +#define PIE_RX UL(0xa) +#define PIE_RW UL(0xc) +#define PIE_RWX UL(0xe) #define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a75de2665d84..95fbc8c05607 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -142,17 +142,24 @@ static inline unsigned long get_trans_granule(void) * EL1, Inner Shareable". * */ -#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ - ({ \ - unsigned long __ta = (baddr); \ - unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ - __ta &= GENMASK_ULL(36, 0); \ - __ta |= __ttl << 37; \ - __ta |= (unsigned long)(num) << 39; \ - __ta |= (unsigned long)(scale) << 44; \ - __ta |= get_trans_granule() << 46; \ - __ta |= (unsigned long)(asid) << 48; \ - __ta; \ +#define TLBIR_ASID_MASK GENMASK_ULL(63, 48) +#define TLBIR_TG_MASK GENMASK_ULL(47, 46) +#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44) +#define TLBIR_NUM_MASK GENMASK_ULL(43, 39) +#define TLBIR_TTL_MASK GENMASK_ULL(38, 37) +#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0) + +#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = 0; \ + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ + __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \ + __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \ + __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \ + __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \ + __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \ + __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \ + __ta; \ }) /* These macros are used by the TLBI RANGE feature. */ @@ -439,11 +446,11 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, * When not uses TLB range ops, we can handle up to * (MAX_DVM_OPS - 1) pages; * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. + * MAX_TLBI_RANGE_PAGES pages. */ if ((!system_supports_tlb_range() && (end - start) >= (MAX_DVM_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { + pages > MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index dba8fcec7f33..e0e7b93c16cc 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -26,6 +26,7 @@ #include <linux/libfdt.h> #include <linux/smp.h> #include <linux/serial_core.h> +#include <linux/suspend.h> #include <linux/pgtable.h> #include <acpi/ghes.h> @@ -227,6 +228,15 @@ done: if (earlycon_acpi_spcr_enable) early_init_dt_scan_chosen_stdout(); } else { +#ifdef CONFIG_HIBERNATION + struct acpi_table_header *facs = NULL; + acpi_get_table(ACPI_SIG_FACS, 1, &facs); + if (facs) { + swsusp_hardware_signature = + ((struct acpi_table_facs *)facs)->hardware_signature; + acpi_put_table(facs); + } +#endif acpi_parse_spcr(earlycon_acpi_spcr_enable, true); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 6d157f32187b..e8ed5673f481 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -10,94 +10,12 @@ #include <asm/pointer_auth.h> -struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; -} __attribute__((packed)); - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) -{ - struct frame_tail buftail; - unsigned long err; - unsigned long lr; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - lr = ptrauth_strip_user_insn_pac(buftail.lr); - - perf_callchain_store(entry, lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail >= buftail.fp) - return NULL; - - return buftail.fp; -} - -#ifdef CONFIG_COMPAT -/* - * The registers we're interested in are at the end of the variable - * length saved register structure. The fp points at the end of this - * structure so the address of this struct is: - * (struct compat_frame_tail *)(xxx->fp)-1 - * - * This code has been adapted from the ARM OProfile support. - */ -struct compat_frame_tail { - compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ - u32 sp; - u32 lr; -} __attribute__((packed)); - -static struct compat_frame_tail __user * -compat_user_backtrace(struct compat_frame_tail __user *tail, - struct perf_callchain_entry_ctx *entry) +static bool callchain_trace(void *data, unsigned long pc) { - struct compat_frame_tail buftail; - unsigned long err; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail + 1 >= (struct compat_frame_tail __user *) - compat_ptr(buftail.fp)) - return NULL; + struct perf_callchain_entry_ctx *entry = data; - return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; + return perf_callchain_store(entry, pc) == 0; } -#endif /* CONFIG_COMPAT */ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) @@ -107,35 +25,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, return; } - perf_callchain_store(entry, regs->pc); - - if (!compat_user_mode(regs)) { - /* AARCH64 mode */ - struct frame_tail __user *tail; - - tail = (struct frame_tail __user *)regs->regs[29]; - - while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0x7)) - tail = user_backtrace(tail, entry); - } else { -#ifdef CONFIG_COMPAT - /* AARCH32 compat mode */ - struct compat_frame_tail __user *tail; - - tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - - while ((entry->nr < entry->max_stack) && - tail && !((unsigned long)tail & 0x3)) - tail = compat_user_backtrace(tail, entry); -#endif - } -} - -static bool callchain_trace(void *data, unsigned long pc) -{ - struct perf_callchain_entry_ctx *entry = data; - return perf_callchain_store(entry, pc) == 0; + arch_stack_walk_user(callchain_trace, entry, regs); } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/arm64/kernel/pi/idreg-override.c b/arch/arm64/kernel/pi/idreg-override.c index aad399796e81..48c1aa456af9 100644 --- a/arch/arm64/kernel/pi/idreg-override.c +++ b/arch/arm64/kernel/pi/idreg-override.c @@ -108,6 +108,7 @@ static const struct ftr_set_desc pfr0 __prel64_initconst = { .override = &id_aa64pfr0_override, .fields = { FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter), + FIELD("el0", ID_AA64PFR0_EL1_EL0_SHIFT, NULL), {} }, }; @@ -223,6 +224,7 @@ static const struct { { "nokaslr", "arm64_sw.nokaslr=1" }, { "rodata=off", "arm64_sw.rodataoff=1" }, { "arm64.nolva", "id_aa64mmfr2.varange=0" }, + { "arm64.no32bit_el0", "id_aa64pfr0.el0=1" }, }; static int __init parse_hexdigit(const char *p, u64 *v) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 65a052bf741f..a096e2451044 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -298,8 +298,15 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) dynamic_scs_init(); /* - * Unmask SError as soon as possible after initializing earlycon so - * that we can report any SErrors immediately. + * The primary CPU enters the kernel with all DAIF exceptions masked. + * + * We must unmask Debug and SError before preemption or scheduling is + * possible to ensure that these are consistently unmasked across + * threads, and we want to unmask SError as soon as possible after + * initializing earlycon so that we can report any SErrors immediately. + * + * IRQ and FIQ will be unmasked after the root irqchip has been + * detected and initialized. */ local_daif_restore(DAIF_PROCCTX_NOIRQ); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4ced34f62dab..31c8b3094dd7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -264,6 +264,13 @@ asmlinkage notrace void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Secondary CPUs enter the kernel with all DAIF exceptions masked. + * + * As with setup_arch() we must unmask Debug and SError exceptions, and + * as the root irqchip has already been detected and initialized we can + * unmask IRQ and FIQ at the same time. + */ local_daif_restore(DAIF_PROCCTX); /* diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 684c26511696..6b3258860377 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -324,3 +324,123 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) dump_backtrace(NULL, tsk, loglvl); barrier(); } + +/* + * The struct defined for userspace stack frame in AARCH64 mode. + */ +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +unwind_user_frame(struct frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct frame_tail buftail; + unsigned long err; + unsigned long lr; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + lr = ptrauth_strip_user_insn_pac(buftail.lr); + + if (!consume_entry(cookie, lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +unwind_compat_user_frame(struct compat_frame_tail __user *tail, void *cookie, + stack_trace_consume_fn consume_entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + if (!consume_entry(cookie, buftail.lr)) + return NULL; + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + + +void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, + const struct pt_regs *regs) +{ + if (!consume_entry(cookie, regs->pc)) + return; + + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + while (tail && !((unsigned long)tail & 0x7)) + tail = unwind_user_frame(tail, cookie, consume_entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + while (tail && !((unsigned long)tail & 0x3)) + tail = unwind_compat_user_frame(tail, cookie, consume_entry); +#endif + } +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 495b732d5af3..c927e9312f10 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot); static phys_addr_t __init early_pgtable_alloc(int shift) { phys_addr_t phys; - void *ptr; phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); - /* - * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE - * slot will be free, so we can (ab)use the FIX_PTE slot to initialise - * any level of table. - */ - ptr = pte_set_fixmap(phys); - - memset(ptr, 0, PAGE_SIZE); - - /* - * Implicit barriers also ensure the zeroed page is visible to the page - * table walker - */ - pte_clear_fixmap(); - return phys; } @@ -172,16 +156,25 @@ bool pgattr_change_is_safe(u64 old, u64 new) return ((old ^ new) & ~mask) == 0; } -static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) +static void init_clear_pgtable(void *table) { - pte_t *ptep; + clear_page(table); - ptep = pte_set_fixmap_offset(pmdp, addr); + /* Ensure the zeroing is observed by page table walks. */ + dsb(ishst); +} + +static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) +{ do { pte_t old_pte = __ptep_get(ptep); - __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); + /* + * Required barriers to make this visible to the table walker + * are deferred to the end of alloc_init_cont_pte(). + */ + __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we @@ -192,8 +185,6 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys += PAGE_SIZE; } while (ptep++, addr += PAGE_SIZE, addr != end); - - pte_clear_fixmap(); } static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, @@ -204,6 +195,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, { unsigned long next; pmd_t pmd = READ_ONCE(*pmdp); + pte_t *ptep; BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { @@ -214,10 +206,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); + ptep = pte_set_fixmap(pte_phys); + init_clear_pgtable(ptep); + ptep += pte_index(addr); __pmd_populate(pmdp, pte_phys, pmdval); - pmd = READ_ONCE(*pmdp); + } else { + BUG_ON(pmd_bad(pmd)); + ptep = pte_set_fixmap_offset(pmdp, addr); } - BUG_ON(pmd_bad(pmd)); do { pgprot_t __prot = prot; @@ -229,20 +225,26 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pte(pmdp, addr, next, phys, __prot); + init_pte(ptep, addr, next, phys, __prot); + ptep += pte_index(next) - pte_index(addr); phys += next - addr; } while (addr = next, addr != end); + + /* + * Note: barriers and maintenance necessary to clear the fixmap slot + * ensure that all previous pgtable writes are visible to the table + * walker. + */ + pte_clear_fixmap(); } -static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, +static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; - pmd_t *pmdp; - pmdp = pmd_set_fixmap_offset(pudp, addr); do { pmd_t old_pmd = READ_ONCE(*pmdp); @@ -268,8 +270,6 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, } phys += next - addr; } while (pmdp++, addr = next, addr != end); - - pmd_clear_fixmap(); } static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, @@ -279,6 +279,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, { unsigned long next; pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; /* * Check for initial section mappings in the pgd/pud. @@ -292,10 +293,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); + pmdp = pmd_set_fixmap(pmd_phys); + init_clear_pgtable(pmdp); + pmdp += pmd_index(addr); __pud_populate(pudp, pmd_phys, pudval); - pud = READ_ONCE(*pudp); + } else { + BUG_ON(pud_bad(pud)); + pmdp = pmd_set_fixmap_offset(pudp, addr); } - BUG_ON(pud_bad(pud)); do { pgprot_t __prot = prot; @@ -307,10 +312,13 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags); + init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + pmdp += pmd_index(next) - pmd_index(addr); phys += next - addr; } while (addr = next, addr != end); + + pmd_clear_fixmap(); } static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, @@ -330,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); + pudp = pud_set_fixmap(pud_phys); + init_clear_pgtable(pudp); + pudp += pud_index(addr); __p4d_populate(p4dp, pud_phys, p4dval); - p4d = READ_ONCE(*p4dp); + } else { + BUG_ON(p4d_bad(p4d)); + pudp = pud_set_fixmap_offset(p4dp, addr); } - BUG_ON(p4d_bad(p4d)); - pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -385,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys = pgtable_alloc(P4D_SHIFT); + p4dp = p4d_set_fixmap(p4d_phys); + init_clear_pgtable(p4dp); + p4dp += p4d_index(addr); __pgd_populate(pgdp, p4d_phys, pgdval); - pgd = READ_ONCE(*pgdp); + } else { + BUG_ON(pgd_bad(pgd)); + p4dp = p4d_set_fixmap_offset(pgdp, addr); } - BUG_ON(pgd_bad(pgd)); - p4dp = p4d_set_fixmap_offset(pgdp, addr); do { p4d_t old_p4d = READ_ONCE(*p4dp); @@ -457,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, static phys_addr_t __pgd_pgtable_alloc(int shift) { - void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); - BUG_ON(!ptr); + /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); + BUG_ON(!ptr); return __pa(ptr); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 9d40f3ffd8d2..f4bc6c5bac06 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -135,14 +135,6 @@ SYM_FUNC_START(cpu_do_resume) msr tcr_el1, x8 msr vbar_el1, x9 - - /* - * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking - * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug - * exception. Mask them until local_daif_restore() in cpu_suspend() - * resets them. - */ - disable_daif msr mdscr_el1, x10 msr sctlr_el1, x12 @@ -466,8 +458,6 @@ SYM_FUNC_START(__cpu_setup) msr cpacr_el1, xzr // Reset cpacr_el1 mov x1, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x1 // access to the DCC from EL0 - isb // Unmask debug exceptions now, - enable_dbg // since this is per-cpu reset_pmuserenr_el0 x1 // Disable PMU access from EL0 reset_amuserenr_el0 x1 // Disable AMU access from EL0 |