diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2020-05-13 12:14:05 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2020-05-13 12:14:05 -0400 |
commit | 4aef2ec9022b217f74d0f4c9b84081f07cc223d9 (patch) | |
tree | edf9bb9ca1f8ab6345c156a7e87aaed28939f66c /arch/x86 | |
parent | 7c67f54661fcc8d141fb11abbab1739f32e13b03 (diff) | |
parent | 37486135d3a7b03acc7755b63627a130437f066a (diff) | |
download | linux-stable-4aef2ec9022b217f74d0f4c9b84081f07cc223d9.tar.gz linux-stable-4aef2ec9022b217f74d0f4c9b84081f07cc223d9.tar.bz2 linux-stable-4aef2ec9022b217f74d0f4c9b84081f07cc223d9.zip |
Merge branch 'kvm-amd-fixes' into HEAD
Diffstat (limited to 'arch/x86')
86 files changed, 1228 insertions, 504 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore index 5a82bac5e0bc..677111acbaa3 100644 --- a/arch/x86/.gitignore +++ b/arch/x86/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only boot/compressed/vmlinux tools/test_get_len tools/insn_sanity diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1edf788d301c..1d6104ea8af0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -149,6 +149,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 + select HAVE_ARCH_USERFAULTFD_WP if USERFAULTFD select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_ASM_MODVERSIONS @@ -1660,6 +1661,7 @@ config X86_PMEM_LEGACY depends on PHYS_ADDR_T_64BIT depends on BLK_DEV select X86_PMEM_LEGACY_DEVICE + select NUMA_KEEP_MEMINFO if NUMA select LIBNVDIMM help Treat memory marked using the non-standard e820 type of 12 as used @@ -2930,3 +2932,5 @@ config HAVE_ATOMIC_IOMAP source "drivers/firmware/Kconfig" source "arch/x86/kvm/Kconfig" + +source "arch/x86/Kconfig.assembler" diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler new file mode 100644 index 000000000000..13de0db38d4e --- /dev/null +++ b/arch/x86/Kconfig.assembler @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + +config AS_AVX512 + def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5) + help + Supported by binutils >= 2.25 and LLVM integrated assembler + +config AS_SHA1_NI + def_bool $(as-instr,sha1msg1 %xmm0$(comma)%xmm1) + help + Supported by binutils >= 2.24 and LLVM integrated assembler + +config AS_SHA256_NI + def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) + help + Supported by binutils >= 2.24 and LLVM integrated assembler diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 513a55562d75..b65ec63c7db7 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -177,28 +177,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) endif -# Stackpointer is addressed different for 32 bit and 64 bit x86 -sp-$(CONFIG_X86_32) := esp -sp-$(CONFIG_X86_64) := rsp - -# do binutils support CFI? -cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) -# is .cfi_signal_frame supported too? -cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) -cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) - -# does binutils support specific instructions? -asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) -avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) -avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) -avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1) -sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) -sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) -adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1) - -KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) -KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr) - KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) # diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 09d25dd09307..9cc7f1357b9b 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only bootsect bzImage cpustr.h diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index 4a46fab7162e..25805199a506 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only relocs vmlinux.bin.all vmlinux.relocs diff --git a/arch/x86/boot/tools/.gitignore b/arch/x86/boot/tools/.gitignore index 378eac25d311..ae91f4d0d78b 100644 --- a/arch/x86/boot/tools/.gitignore +++ b/arch/x86/boot/tools/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only build diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 5b602beb0b72..550904591e94 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -136,7 +136,6 @@ CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y @@ -286,7 +285,6 @@ CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_BOOT_PARAMS=y -CONFIG_OPTIMIZE_INLINING=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index f3d1f36103b1..614961009075 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -134,7 +134,6 @@ CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y @@ -283,7 +282,6 @@ CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_BOOT_PARAMS=y -CONFIG_OPTIMIZE_INLINING=y CONFIG_UNWINDER_ORC=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore index 30be0400a439..580c839bb177 100644 --- a/arch/x86/crypto/.gitignore +++ b/arch/x86/crypto/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only poly1305-x86_64-cryptogams.S diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 8c2e9eadee8a..a31de0c6ccde 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -1,131 +1,97 @@ # SPDX-License-Identifier: GPL-2.0 # -# Arch-specific CryptoAPI modules. -# +# x86 crypto algorithms OBJECT_FILES_NON_STANDARD := y -avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) -avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no) -sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) -sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) -adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no) - obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o +twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o +twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o +obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o +twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o + obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o +serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o +obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o +serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o +obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o +serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o +obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o +serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o +des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o + obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o +camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o +obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o +camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o camellia_aesni_avx_glue.o +obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o +camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o + obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o -obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o -obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o -obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o -obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o -obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o -obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o +blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o -obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o -obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o -obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o -obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o -obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o -obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o -obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o +obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o +cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o + +obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o +cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o +aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o -obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o -obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o +obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o +chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha_glue.o +chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o -# These modules require the assembler to support ADX. -ifeq ($(adx_supported),yes) - obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o -endif - -# These modules require assembler to support AVX. -ifeq ($(avx_supported),yes) - obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \ - camellia-aesni-avx-x86_64.o - obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o - obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o - obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o - obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o - obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o -endif - -# These modules require assembler to support AVX2. -ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o - obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o -endif +obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o +aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o -twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o -serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o +obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o +sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o -des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o -camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o -blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o -twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o -twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o -chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o -serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o +obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o +sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o +sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o -aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o +obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o +sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o -nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o +obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o blake2s-x86_64-y := blake2s-core.o blake2s-glue.o -poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o -ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),) -targets += poly1305-x86_64-cryptogams.S -endif - -ifeq ($(avx_supported),yes) - camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ - camellia_aesni_avx_glue.o - cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o - cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o - twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o \ - twofish_avx_glue.o - serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o \ - serpent_avx_glue.o -endif - -ifeq ($(avx2_supported),yes) - camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o - chacha-x86_64-y += chacha-avx2-x86_64.o - serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o - - nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o -endif - -ifeq ($(avx512_supported),yes) - chacha-x86_64-y += chacha-avx512vl-x86_64.o -endif -aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o -aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o -sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o -ifeq ($(avx2_supported),yes) -sha1-ssse3-y += sha1_avx2_x86_64_asm.o -endif -ifeq ($(sha1_ni_supported),yes) -sha1-ssse3-y += sha1_ni_asm.o -endif + +obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o + +obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o -sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o -ifeq ($(sha256_ni_supported),yes) -sha256-ssse3-y += sha256_ni_asm.o -endif -sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o + +obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o +obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o +poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o +targets += poly1305-x86_64-cryptogams.S + +obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o +nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o +obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o +nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o + +obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o + quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index bfa1c0b3e5b4..0cea33295287 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -886,7 +886,6 @@ _less_than_8_bytes_left_\@: _partial_block_done_\@: .endm # PARTIAL_BLOCK -#ifdef CONFIG_AS_AVX ############################################################################### # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) # Input: A and B (128-bits each, bit-reflected) @@ -1869,9 +1868,6 @@ key_256_finalize: ret SYM_FUNC_END(aesni_gcm_finalize_avx_gen2) -#endif /* CONFIG_AS_AVX */ - -#ifdef CONFIG_AS_AVX2 ############################################################################### # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) # Input: A and B (128-bits each, bit-reflected) @@ -2839,5 +2835,3 @@ key_256_finalize4: FUNC_RESTORE ret SYM_FUNC_END(aesni_gcm_finalize_avx_gen4) - -#endif /* CONFIG_AS_AVX2 */ diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 75b6ea20491e..ad8a7188a2bf 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -185,7 +185,6 @@ static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = { .finalize = &aesni_gcm_finalize, }; -#ifdef CONFIG_AS_AVX asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, void *keys, u8 *out, unsigned int num_bytes); asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, @@ -234,9 +233,6 @@ static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = { .finalize = &aesni_gcm_finalize_avx_gen2, }; -#endif - -#ifdef CONFIG_AS_AVX2 /* * asmlinkage void aesni_gcm_init_avx_gen4() * gcm_data *my_ctx_data, context data @@ -279,8 +275,6 @@ static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = { .finalize = &aesni_gcm_finalize_avx_gen4, }; -#endif - static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) { @@ -476,7 +470,6 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } -#ifdef CONFIG_AS_AVX static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv) { @@ -493,7 +486,6 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, else aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); } -#endif static int ctr_crypt(struct skcipher_request *req) { @@ -711,14 +703,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, if (!enc) left -= auth_tag_len; -#ifdef CONFIG_AS_AVX2 if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4) gcm_tfm = &aesni_gcm_tfm_avx_gen2; -#endif -#ifdef CONFIG_AS_AVX if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2) gcm_tfm = &aesni_gcm_tfm_sse; -#endif /* Linearize assoc, if not already linear */ if (req->src->length >= assoclen && req->src->length && @@ -1076,31 +1064,24 @@ static int __init aesni_init(void) if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; #ifdef CONFIG_X86_64 -#ifdef CONFIG_AS_AVX2 if (boot_cpu_has(X86_FEATURE_AVX2)) { pr_info("AVX2 version of gcm_enc/dec engaged.\n"); aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4; } else -#endif -#ifdef CONFIG_AS_AVX if (boot_cpu_has(X86_FEATURE_AVX)) { pr_info("AVX version of gcm_enc/dec engaged.\n"); aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2; - } else -#endif - { + } else { pr_info("SSE version of gcm_enc/dec engaged.\n"); aesni_gcm_tfm = &aesni_gcm_tfm_sse; } aesni_ctr_enc_tfm = aesni_ctr_enc; -#ifdef CONFIG_AS_AVX if (boot_cpu_has(X86_FEATURE_AVX)) { /* optimize performance of ctr mode encryption transform */ aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; pr_info("AES CTR mode by8 optimization enabled\n"); } #endif -#endif err = crypto_register_alg(&aesni_cipher_alg); if (err) diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S index 24910b766bdd..2ca79974f819 100644 --- a/arch/x86/crypto/blake2s-core.S +++ b/arch/x86/crypto/blake2s-core.S @@ -46,7 +46,6 @@ SIGMA2: #endif /* CONFIG_AS_AVX512 */ .text -#ifdef CONFIG_AS_SSSE3 SYM_FUNC_START(blake2s_compress_ssse3) testq %rdx,%rdx je .Lendofloop @@ -174,7 +173,6 @@ SYM_FUNC_START(blake2s_compress_ssse3) .Lendofloop: ret SYM_FUNC_END(blake2s_compress_ssse3) -#endif /* CONFIG_AS_SSSE3 */ #ifdef CONFIG_AS_AVX512 SYM_FUNC_START(blake2s_compress_avx512) diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 68a74953efaf..b412c21ee06e 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -79,8 +79,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, } } - if (IS_ENABLED(CONFIG_AS_AVX2) && - static_branch_likely(&chacha_use_avx2)) { + if (static_branch_likely(&chacha_use_avx2)) { while (bytes >= CHACHA_BLOCK_SIZE * 8) { chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); bytes -= CHACHA_BLOCK_SIZE * 8; @@ -288,8 +287,7 @@ static int __init chacha_simd_mod_init(void) static_branch_enable(&chacha_use_simd); - if (IS_ENABLED(CONFIG_AS_AVX2) && - boot_cpu_has(X86_FEATURE_AVX) && + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { static_branch_enable(&chacha_use_avx2); diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 7a6b5380a46f..137edcf038cb 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -404,10 +404,6 @@ ___ &end_function("poly1305_emit_x86_64"); if ($avx) { -if($kernel) { - $code .= "#ifdef CONFIG_AS_AVX\n"; -} - ######################################################################## # Layout of opaque area is following. # @@ -1516,16 +1512,8 @@ $code.=<<___; ___ &end_function("poly1305_emit_avx"); -if ($kernel) { - $code .= "#endif\n"; -} - if ($avx>1) { -if ($kernel) { - $code .= "#ifdef CONFIG_AS_AVX2\n"; -} - my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) = map("%ymm$_",(0..15)); my $S4=$MASK; @@ -2816,10 +2804,6 @@ ___ poly1305_blocks_avxN(0); &end_function("poly1305_blocks_avx2"); -if($kernel) { - $code .= "#endif\n"; -} - ####################################################################### if ($avx>2) { # On entry we have input length divisible by 64. But since inner loop diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 79bb58737d52..6dfec19f7d57 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -94,7 +94,7 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE || PAGE_SIZE % POLY1305_BLOCK_SIZE); - if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) || + if (!static_branch_likely(&poly1305_use_avx) || (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) || !crypto_simd_usable()) { convert_to_base2_64(ctx); @@ -108,7 +108,7 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, kernel_fpu_begin(); if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512)) poly1305_blocks_avx512(ctx, inp, bytes, padbit); - else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2)) + else if (static_branch_likely(&poly1305_use_avx2)) poly1305_blocks_avx2(ctx, inp, bytes, padbit); else poly1305_blocks_avx(ctx, inp, bytes, padbit); @@ -123,7 +123,7 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len, static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { - if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx)) + if (!static_branch_likely(&poly1305_use_avx)) poly1305_emit_x86_64(ctx, mac, nonce); else poly1305_emit_avx(ctx, mac, nonce); @@ -261,11 +261,10 @@ static struct shash_alg alg = { static int __init poly1305_simd_mod_init(void) { - if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) && + if (boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) static_branch_enable(&poly1305_use_avx); - if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) static_branch_enable(&poly1305_use_avx2); if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) && diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 12e2d19d7402..d25668d2a1e9 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -467,8 +467,6 @@ W_PRECALC_SSSE3 */ SHA1_VECTOR_ASM sha1_transform_ssse3 -#ifdef CONFIG_AS_AVX - .macro W_PRECALC_AVX .purgem W_PRECALC_00_15 @@ -553,5 +551,3 @@ W_PRECALC_AVX * const u8 *data, int blocks); */ SHA1_VECTOR_ASM sha1_transform_avx - -#endif diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index d70b40ad594c..a801ffc10cbb 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -114,7 +114,6 @@ static void unregister_sha1_ssse3(void) crypto_unregister_shash(&sha1_ssse3_alg); } -#ifdef CONFIG_AS_AVX asmlinkage void sha1_transform_avx(struct sha1_state *state, const u8 *data, int blocks); @@ -175,13 +174,6 @@ static void unregister_sha1_avx(void) crypto_unregister_shash(&sha1_avx_alg); } -#else /* CONFIG_AS_AVX */ -static inline int register_sha1_avx(void) { return 0; } -static inline void unregister_sha1_avx(void) { } -#endif /* CONFIG_AS_AVX */ - - -#if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ asmlinkage void sha1_transform_avx2(struct sha1_state *state, @@ -253,11 +245,6 @@ static void unregister_sha1_avx2(void) crypto_unregister_shash(&sha1_avx2_alg); } -#else -static inline int register_sha1_avx2(void) { return 0; } -static inline void unregister_sha1_avx2(void) { } -#endif - #ifdef CONFIG_AS_SHA1_NI asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data, int rounds); diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index fcbc30f58c38..4739cd31b9db 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -47,7 +47,6 @@ # This code schedules 1 block at a time, with 4 lanes per block ######################################################################## -#ifdef CONFIG_AS_AVX #include <linux/linkage.h> ## assume buffers not aligned @@ -498,5 +497,3 @@ _SHUF_00BA: # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF - -#endif diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 499d9ec129de..11ff60c29c8b 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -48,7 +48,6 @@ # This code schedules 2 blocks at a time, with 4 lanes per block ######################################################################## -#ifdef CONFIG_AS_AVX2 #include <linux/linkage.h> ## assume buffers not aligned @@ -767,5 +766,3 @@ _SHUF_00BA: .align 32 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF - -#endif diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 03ad657c04bd..6394b5fe8db6 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -144,7 +144,6 @@ static void unregister_sha256_ssse3(void) ARRAY_SIZE(sha256_ssse3_algs)); } -#ifdef CONFIG_AS_AVX asmlinkage void sha256_transform_avx(struct sha256_state *state, const u8 *data, int blocks); @@ -221,12 +220,6 @@ static void unregister_sha256_avx(void) ARRAY_SIZE(sha256_avx_algs)); } -#else -static inline int register_sha256_avx(void) { return 0; } -static inline void unregister_sha256_avx(void) { } -#endif - -#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) asmlinkage void sha256_transform_rorx(struct sha256_state *state, const u8 *data, int blocks); @@ -301,11 +294,6 @@ static void unregister_sha256_avx2(void) ARRAY_SIZE(sha256_avx2_algs)); } -#else -static inline int register_sha256_avx2(void) { return 0; } -static inline void unregister_sha256_avx2(void) { } -#endif - #ifdef CONFIG_AS_SHA256_NI asmlinkage void sha256_ni_transform(struct sha256_state *digest, const u8 *data, int rounds); diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 90ea945ba5e6..63470fd6ae32 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -47,7 +47,6 @@ # ######################################################################## -#ifdef CONFIG_AS_AVX #include <linux/linkage.h> .text @@ -424,4 +423,3 @@ K512: .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -#endif diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 3dd886b14e7d..3a44bdcfd583 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -49,7 +49,6 @@ # This code schedules 1 blocks at a time, with 4 lanes per block ######################################################################## -#ifdef CONFIG_AS_AVX2 #include <linux/linkage.h> .text @@ -749,5 +748,3 @@ PSHUFFLE_BYTE_FLIP_MASK: MASK_YMM_LO: .octa 0x00000000000000000000000000000000 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF - -#endif diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 1c444f41037c..82cc1b3ced1d 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -142,7 +142,6 @@ static void unregister_sha512_ssse3(void) ARRAY_SIZE(sha512_ssse3_algs)); } -#ifdef CONFIG_AS_AVX asmlinkage void sha512_transform_avx(struct sha512_state *state, const u8 *data, int blocks); static bool avx_usable(void) @@ -218,12 +217,7 @@ static void unregister_sha512_avx(void) crypto_unregister_shashes(sha512_avx_algs, ARRAY_SIZE(sha512_avx_algs)); } -#else -static inline int register_sha512_avx(void) { return 0; } -static inline void unregister_sha512_avx(void) { } -#endif -#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) asmlinkage void sha512_transform_rorx(struct sha512_state *state, const u8 *data, int blocks); @@ -298,10 +292,6 @@ static void unregister_sha512_avx2(void) crypto_unregister_shashes(sha512_avx2_algs, ARRAY_SIZE(sha512_avx2_algs)); } -#else -static inline int register_sha512_avx2(void) { return 0; } -static inline void unregister_sha512_avx2(void) { } -#endif static int __init sha512_ssse3_mod_init(void) { diff --git a/arch/x86/entry/vdso/.gitignore b/arch/x86/entry/vdso/.gitignore index aae8ffdd5880..37a6129d597b 100644 --- a/arch/x86/entry/vdso/.gitignore +++ b/arch/x86/entry/vdso/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds vdsox32.lds vdso32-syscall-syms.lds diff --git a/arch/x86/entry/vdso/vdso32/.gitignore b/arch/x86/entry/vdso/vdso32/.gitignore index e45fba9d0ced..5167384843b9 100644 --- a/arch/x86/entry/vdso/vdso32/.gitignore +++ b/arch/x86/entry/vdso/vdso32/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso32.lds diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 1e82bd43286c..84a4a73f77f7 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define BUILD_VDSO32 -#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE -#undef CONFIG_OPTIMIZE_INLINING -#endif - #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 1ba72c563313..cf76d6631afa 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1476,6 +1476,12 @@ static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = { .mmio_init = tgl_l_uncore_mmio_init, }; +static const struct intel_uncore_init_fun icx_uncore_init __initconst = { + .cpu_init = icx_uncore_cpu_init, + .pci_init = icx_uncore_pci_init, + .mmio_init = icx_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun snr_uncore_init __initconst = { .cpu_init = snr_uncore_cpu_init, .pci_init = snr_uncore_pci_init, @@ -1511,6 +1517,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index b30429f8a53a..0da4a4605536 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -550,6 +550,9 @@ void skx_uncore_cpu_init(void); int snr_uncore_pci_init(void); void snr_uncore_cpu_init(void); void snr_uncore_mmio_init(void); +int icx_uncore_pci_init(void); +void icx_uncore_cpu_init(void); +void icx_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 01023f0d935b..07652fa20ebb 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -382,6 +382,42 @@ #define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 #define SNR_IMC_MMIO_MEM0_MASK 0x7FF +/* ICX CHA */ +#define ICX_C34_MSR_PMON_CTR0 0xb68 +#define ICX_C34_MSR_PMON_CTL0 0xb61 +#define ICX_C34_MSR_PMON_BOX_CTL 0xb60 +#define ICX_C34_MSR_PMON_BOX_FILTER0 0xb65 + +/* ICX IIO */ +#define ICX_IIO_MSR_PMON_CTL0 0xa58 +#define ICX_IIO_MSR_PMON_CTR0 0xa51 +#define ICX_IIO_MSR_PMON_BOX_CTL 0xa50 + +/* ICX IRP */ +#define ICX_IRP0_MSR_PMON_CTL0 0xa4d +#define ICX_IRP0_MSR_PMON_CTR0 0xa4b +#define ICX_IRP0_MSR_PMON_BOX_CTL 0xa4a + +/* ICX M2PCIE */ +#define ICX_M2PCIE_MSR_PMON_CTL0 0xa46 +#define ICX_M2PCIE_MSR_PMON_CTR0 0xa41 +#define ICX_M2PCIE_MSR_PMON_BOX_CTL 0xa40 + +/* ICX UPI */ +#define ICX_UPI_PCI_PMON_CTL0 0x350 +#define ICX_UPI_PCI_PMON_CTR0 0x320 +#define ICX_UPI_PCI_PMON_BOX_CTL 0x318 +#define ICX_UPI_CTL_UMASK_EXT 0xffffff + +/* ICX M3UPI*/ +#define ICX_M3UPI_PCI_PMON_CTL0 0xd8 +#define ICX_M3UPI_PCI_PMON_CTR0 0xa8 +#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0 + +/* ICX IMC */ +#define ICX_NUMBER_IMC_CHN 2 +#define ICX_IMC_MEM_STRIDE 0x4 + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); @@ -390,6 +426,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -4551,3 +4588,477 @@ void snr_uncore_mmio_init(void) } /* end of SNR uncore support */ + +/* ICX uncore support */ + +static unsigned icx_cha_msr_offsets[] = { + 0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310, + 0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e, + 0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a, + 0x428, 0x436, 0x444, 0x452, 0x460, 0x46e, 0x47c, 0x0, 0xe, + 0x1c, 0x2a, 0x38, 0x46, +}; + +static int icx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + bool tie_en = !!(event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN); + + if (tie_en) { + reg1->reg = ICX_C34_MSR_PMON_BOX_FILTER0 + + icx_cha_msr_offsets[box->pmu->pmu_idx]; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + } + + return 0; +} + +static struct intel_uncore_ops icx_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = icx_cha_hw_config, +}; + +static struct intel_uncore_type icx_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .perf_ctr_bits = 48, + .event_ctl = ICX_C34_MSR_PMON_CTL0, + .perf_ctr = ICX_C34_MSR_PMON_CTR0, + .box_ctl = ICX_C34_MSR_PMON_BOX_CTL, + .msr_offsets = icx_cha_msr_offsets, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .constraints = skx_uncore_chabox_constraints, + .ops = &icx_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static unsigned icx_msr_offsets[] = { + 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, +}; + +static struct event_constraint icx_uncore_iio_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x02, 0x3), + UNCORE_EVENT_CONSTRAINT(0x03, 0x3), + UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), + UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_IIO_MSR_PMON_CTL0, + .perf_ctr = ICX_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = ICX_IIO_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .constraints = icx_uncore_iio_constraints, + .ops = &skx_uncore_iio_ops, + .format_group = &snr_uncore_iio_format_group, +}; + +static struct intel_uncore_type icx_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_IRP0_MSR_PMON_CTL0, + .perf_ctr = ICX_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = ICX_IRP0_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct event_constraint icx_uncore_m2pcie_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x14, 0x3), + UNCORE_EVENT_CONSTRAINT(0x23, 0x3), + UNCORE_EVENT_CONSTRAINT(0x2d, 0x3), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = ICX_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = ICX_M2PCIE_MSR_PMON_CTR0, + .box_ctl = ICX_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offsets = icx_msr_offsets, + .constraints = icx_uncore_m2pcie_constraints, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +enum perf_uncore_icx_iio_freerunning_type_id { + ICX_IIO_MSR_IOCLK, + ICX_IIO_MSR_BW_IN, + + ICX_IIO_FREERUNNING_TYPE_MAX, +}; + +static unsigned icx_iio_clk_freerunning_box_offsets[] = { + 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, +}; + +static unsigned icx_iio_bw_freerunning_box_offsets[] = { + 0x0, 0x10, 0x20, 0x90, 0xa0, 0xb0, +}; + +static struct freerunning_counters icx_iio_freerunning[] = { + [ICX_IIO_MSR_IOCLK] = { 0xa55, 0x1, 0x20, 1, 48, icx_iio_clk_freerunning_box_offsets }, + [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, +}; + +static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type icx_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 6, + .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, + .freerunning = icx_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = icx_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *icx_msr_uncores[] = { + &skx_uncore_ubox, + &icx_uncore_chabox, + &icx_uncore_iio, + &icx_uncore_irp, + &icx_uncore_m2pcie, + &skx_uncore_pcu, + &icx_uncore_iio_free_running, + NULL, +}; + +/* + * To determine the number of CHAs, it should read CAPID6(Low) and CAPID7 (High) + * registers which located at Device 30, Function 3 + */ +#define ICX_CAPID6 0x9c +#define ICX_CAPID7 0xa0 + +static u64 icx_count_chabox(void) +{ + struct pci_dev *dev = NULL; + u64 caps = 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x345b, dev); + if (!dev) + goto out; + + pci_read_config_dword(dev, ICX_CAPID6, (u32 *)&caps); + pci_read_config_dword(dev, ICX_CAPID7, (u32 *)&caps + 1); +out: + pci_dev_put(dev); + return hweight64(caps); +} + +void icx_uncore_cpu_init(void) +{ + u64 num_boxes = icx_count_chabox(); + + if (WARN_ON(num_boxes > ARRAY_SIZE(icx_cha_msr_offsets))) + return; + icx_uncore_chabox.num_boxes = num_boxes; + uncore_msr_uncores = icx_msr_uncores; +} + +static struct intel_uncore_type icx_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 4, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +static struct attribute *icx_upi_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext4.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group icx_upi_uncore_format_group = { + .name = "format", + .attrs = icx_upi_uncore_formats_attr, +}; + +static struct intel_uncore_type icx_uncore_upi = { + .name = "upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = ICX_UPI_PCI_PMON_CTR0, + .event_ctl = ICX_UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = ICX_UPI_CTL_UMASK_EXT, + .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, + .ops = &skx_upi_uncore_pci_ops, + .format_group = &icx_upi_uncore_format_group, +}; + +static struct event_constraint icx_uncore_m3upi_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x1c, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1d, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1e, 0x1), + UNCORE_EVENT_CONSTRAINT(0x1f, 0x1), + UNCORE_EVENT_CONSTRAINT(0x40, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4e, 0x7), + UNCORE_EVENT_CONSTRAINT(0x4f, 0x7), + UNCORE_EVENT_CONSTRAINT(0x50, 0x7), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type icx_uncore_m3upi = { + .name = "m3upi", + .num_counters = 4, + .num_boxes = 3, + .perf_ctr_bits = 48, + .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, + .event_ctl = ICX_M3UPI_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, + .constraints = icx_uncore_m3upi_constraints, + .ops = &ivbep_uncore_pci_ops, + .format_group = &skx_uncore_format_group, +}; + +enum { + ICX_PCI_UNCORE_M2M, + ICX_PCI_UNCORE_UPI, + ICX_PCI_UNCORE_M3UPI, +}; + +static struct intel_uncore_type *icx_pci_uncores[] = { + [ICX_PCI_UNCORE_M2M] = &icx_uncore_m2m, + [ICX_PCI_UNCORE_UPI] = &icx_uncore_upi, + [ICX_PCI_UNCORE_M3UPI] = &icx_uncore_m3upi, + NULL, +}; + +static const struct pci_device_id icx_uncore_pci_ids[] = { + { /* M2M 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, ICX_PCI_UNCORE_M2M, 0), + }, + { /* M2M 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 0, ICX_PCI_UNCORE_M2M, 1), + }, + { /* M2M 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, ICX_PCI_UNCORE_M2M, 2), + }, + { /* M2M 3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, ICX_PCI_UNCORE_M2M, 3), + }, + { /* UPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(2, 1, ICX_PCI_UNCORE_UPI, 0), + }, + { /* UPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(3, 1, ICX_PCI_UNCORE_UPI, 1), + }, + { /* UPI Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 1, ICX_PCI_UNCORE_UPI, 2), + }, + { /* M3UPI Link 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(5, 1, ICX_PCI_UNCORE_M3UPI, 0), + }, + { /* M3UPI Link 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(6, 1, ICX_PCI_UNCORE_M3UPI, 1), + }, + { /* M3UPI Link 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(7, 1, ICX_PCI_UNCORE_M3UPI, 2), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver icx_uncore_pci_driver = { + .name = "icx_uncore", + .id_table = icx_uncore_pci_ids, +}; + +int icx_uncore_pci_init(void) +{ + /* ICX UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3450, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = icx_pci_uncores; + uncore_pci_driver = &icx_uncore_pci_driver; + return 0; +} + +static void icx_uncore_imc_init_box(struct intel_uncore_box *box) +{ + unsigned int box_ctl = box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * (box->pmu->pmu_idx % ICX_NUMBER_IMC_CHN); + int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE + + SNR_IMC_MMIO_MEM0_OFFSET; + + __snr_uncore_mmio_init_box(box, box_ctl, mem_offset); +} + +static struct intel_uncore_ops icx_uncore_mmio_ops = { + .init_box = icx_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct intel_uncore_type icx_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 8, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = hswep_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .ops = &icx_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_icx_imc_freerunning_type_id { + ICX_IMC_DCLK, + ICX_IMC_DDR, + ICX_IMC_DDRT, + + ICX_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters icx_imc_freerunning[] = { + [ICX_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [ICX_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, + [ICX_IMC_DDRT] = { 0x22a0, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + + INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE + + SNR_IMC_MMIO_MEM0_OFFSET; + + __snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), mem_offset); +} + +static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = { + .init_box = icx_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type icx_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 5, + .num_boxes = 4, + .num_freerunning_types = ICX_IMC_FREERUNNING_TYPE_MAX, + .freerunning = icx_imc_freerunning, + .ops = &icx_uncore_imc_freerunning_ops, + .event_descs = icx_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *icx_mmio_uncores[] = { + &icx_uncore_imc, + &icx_uncore_imc_free_running, + NULL, +}; + +void icx_uncore_mmio_init(void) +{ + uncore_mmio_uncores = icx_mmio_uncores; +} + +/* end of ICX uncore support */ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b0da5320bcff..624f5d9b0f79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -20,6 +20,7 @@ #include <linux/mm.h> #include <linux/hyperv.h> #include <linux/slab.h> +#include <linux/kernel.h> #include <linux/cpuhotplug.h> #include <linux/syscore_ops.h> #include <clocksource/hyperv_timer.h> @@ -419,11 +420,14 @@ void hyperv_cleanup(void) } EXPORT_SYMBOL_GPL(hyperv_cleanup); -void hyperv_report_panic(struct pt_regs *regs, long err) +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { static bool panic_reported; u64 guest_id; + if (in_die && !panic_on_oops) + return; + /* * We prefer to report panic on 'die' chain as we have proper * registers to report, but if we miss it (e.g. on BUG()) we need diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index ff6f3ca649b3..dd17c2da1af5 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -44,6 +44,7 @@ unsigned int x86_stepping(unsigned int sig); extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); +extern bool handle_guest_split_lock(unsigned long ip); #else static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {} static inline void switch_to_sld(unsigned long tifn) {} @@ -51,5 +52,10 @@ static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) { return false; } + +static inline bool handle_guest_split_lock(unsigned long ip) +{ + return false; +} #endif #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index f71a0cce9373..430fca13bb56 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,15 +6,6 @@ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif -/* - * Macros for dwarf2 CFI unwind table entries. - * See "as.info" for details on these pseudo ops. Unfortunately - * they are only supported in very new binutils, so define them - * away for older version. - */ - -#ifdef CONFIG_AS_CFI - #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc #define CFI_DEF_CFA .cfi_def_cfa @@ -30,13 +21,6 @@ #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape -#ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame -#else -#define CFI_SIGNAL_FRAME -#endif - -#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) #ifndef BUILD_VDSO /* * Emit CFI data in .debug_frame sections, not .eh_frame sections. @@ -53,33 +37,5 @@ */ .cfi_sections .eh_frame, .debug_frame #endif -#endif - -#else - -/* - * Due to the structure of pre-exisiting code, don't use assembler line - * comment character # to ignore the arguments. Instead, use a dummy macro. - */ -.macro cfi_ignore a=0, b=0, c=0, d=0 -.endm - -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore -#define CFI_DEF_CFA_REGISTER cfi_ignore -#define CFI_DEF_CFA_OFFSET cfi_ignore -#define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_ESCAPE cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore - -#endif #endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cdcf48d52a12..8391c115c0ec 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -178,8 +178,10 @@ extern void efi_free_boot_services(void); extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); +/* kexec external ABI */ struct efi_setup_data { u64 fw_vendor; + u64 __unused; u64 tables; u64 smbios; u64 reserved[8]; diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 92abc1e42bfc..29336574d0bc 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -376,6 +376,7 @@ struct hv_tsc_emulation_status { #define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_RETARGET_INTERRUPT 0x007e #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 @@ -405,6 +406,8 @@ enum HV_GENERIC_SET_FORMAT { HV_GENERIC_SET_ALL, }; +#define HV_PARTITION_ID_SELF ((u64)-1) + #define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) #define HV_HYPERCALL_FAST_BIT BIT(16) #define HV_HYPERCALL_VARHEAD_OFFSET 17 @@ -909,4 +912,42 @@ struct hv_tlb_flush_ex { struct hv_partition_assist_pg { u32 tlb_lock_count; }; + +union hv_msi_entry { + u64 as_uint64; + struct { + u32 address; + u32 data; + } __packed; +}; + +struct hv_interrupt_entry { + u32 source; /* 1 for MSI(-X) */ + u32 reserved1; + union hv_msi_entry msi_entry; +} __packed; + +/* + * flags for hv_device_interrupt_target.flags + */ +#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 +#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 + +struct hv_device_interrupt_target { + u32 vector; + u32 flags; + union { + u64 vp_mask; + struct hv_vpset vp_set; + }; +} __packed; + +/* HvRetargetDeviceInterrupt hypercall */ +struct hv_retarget_device_interrupt { + u64 partition_id; /* use "self" */ + u64 device_id; + struct hv_interrupt_entry int_entry; + u64 reserved2; + struct hv_device_interrupt_target int_target; +} __packed __aligned(8); #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a239a297be33..b3a5da27c2a5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -583,6 +583,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; @@ -1098,8 +1099,6 @@ struct kvm_x86_ops { void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - u64 (*get_dr6)(struct kvm_vcpu *vcpu); - void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); @@ -1463,6 +1462,7 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); @@ -1682,8 +1682,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ - return (irq->delivery_mode == dest_Fixed || - irq->delivery_mode == dest_LowestPrio); + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 6685e1218959..7063b5a43220 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index edc2c581704a..1c42ecbe75cb 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/nmi.h> +#include <linux/msi.h> #include <asm/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> @@ -242,6 +243,13 @@ bool hv_vcpu_is_preempted(int vcpu); static inline void hv_apic_init(void) {} #endif +static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, + struct msi_desc *msi_desc) +{ + msi_entry->address = msi_desc->msg.address_lo; + msi_entry->data = msi_desc->msg.data; +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48..a506a411474d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) @@ -73,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index afda66a6d325..4d02e64af1b3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -25,6 +25,7 @@ #include <asm/x86_init.h> #include <asm/fpu/xstate.h> #include <asm/fpu/api.h> +#include <asm-generic/pgtable_uffd.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -313,6 +314,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pte_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_UFFD_WP; +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_UFFD_WP); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pte_t pte_mkclean(pte_t pte) { return pte_clear_flags(pte, _PAGE_DIRTY); @@ -392,6 +410,23 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline int pmd_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_UFFD_WP; +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_UFFD_WP); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + static inline pmd_t pmd_mkold(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_ACCESSED); @@ -825,7 +860,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); @@ -1043,6 +1081,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else @@ -1374,6 +1415,38 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte_set_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline int pte_swp_uffd_wp(pte_t pte) +{ + return pte_flags(pte) & _PAGE_SWP_UFFD_WP; +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SWP_UFFD_WP); +} + +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_UFFD_WP); +} + +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_UFFD_WP; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_UFFD_WP); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0b6c4042942a..df1373415f11 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -189,7 +189,7 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names - * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|X|SD|0| <- swp entry + * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| X|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above @@ -197,9 +197,15 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); * erratum where they can be incorrectly set by hardware on * non-present PTEs. * + * SD Bits 1-4 are not used in non-present format and available for + * special use described below: + * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * + * F (2) in swp entry is used to record when a pagetable is + * writeprotected by userfaultfd WP support. + * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 65c2ecd730c5..b6606fe6cfdf 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -32,6 +32,7 @@ #define _PAGE_BIT_SPECIAL _PAGE_BIT_SOFTW1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1 +#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 @@ -100,6 +101,14 @@ #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define _PAGE_UFFD_WP (_AT(pteval_t, 1) << _PAGE_BIT_UFFD_WP) +#define _PAGE_SWP_UFFD_WP _PAGE_USER +#else +#define _PAGE_UFFD_WP (_AT(pteval_t, 0)) +#define _PAGE_SWP_UFFD_WP (_AT(pteval_t, 0)) +#endif + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_DEVMAP (_AT(u64, 1) << _PAGE_BIT_DEVMAP) @@ -118,7 +127,8 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ + _PAGE_UFFD_WP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 950532ccbc4a..ec2c0a094b5d 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index d61ddf3d052b..0c4e5b5e3852 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -11,8 +11,6 @@ * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines */ -#ifdef CONFIG_AS_AVX - #include <linux/compiler.h> #include <asm/fpu/api.h> @@ -170,11 +168,4 @@ do { \ #define AVX_SELECT(FASTEST) \ (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST) -#else - -#define AVX_XOR_SPEED {} - -#define AVX_SELECT(FASTEST) (FASTEST) - -#endif #endif diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore index 08f4fd731469..ef66569e7e22 100644 --- a/arch/x86/kernel/.gitignore +++ b/arch/x86/kernel/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only vsyscall.lds vsyscall_32.lds vmlinux.lds diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1ae5439a9a85..683ed9e12e6b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL(acpi_disabled); #define PREFIX "ACPI: " int acpi_noirq; /* skip ACPI IRQ initialization */ -int acpi_nobgrt; /* skip ACPI BGRT */ +static int acpi_nobgrt; /* skip ACPI BGRT */ int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index caf2edccbad2..49ae4e1ac9cd 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -161,7 +161,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx, + false); if (retval == 0) { /* Use the hint in CST */ percpu_entry->states[cx->index].eax = cx->address; diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 4e5f50236048..16133819415c 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -744,7 +744,8 @@ int __init gart_iommu_init(void) start_pfn = PFN_DOWN(aper_base); if (!pfn_range_is_mapped(start_pfn, end_pfn)) - init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT, + PAGE_KERNEL); pr_info("PCI-DMA: using GART IOMMU.\n"); iommu_size = check_iommu_size(info.aper_base, aper_size); diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore index 667df55a4399..0bca7ef7426a 100644 --- a/arch/x86/kernel/cpu/.gitignore +++ b/arch/x86/kernel/cpu/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only capflags.c diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9a26e972cdea..a19a680542ce 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -21,6 +21,7 @@ #include <asm/elf.h> #include <asm/cpu_device_id.h> #include <asm/cmdline.h> +#include <asm/traps.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> @@ -1066,13 +1067,10 @@ static void split_lock_init(void) split_lock_verify_msr(sld_state != sld_off); } -bool handle_user_split_lock(struct pt_regs *regs, long error_code) +static void split_lock_warn(unsigned long ip) { - if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) - return false; - pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", - current->comm, current->pid, regs->ip); + current->comm, current->pid, ip); /* * Disable the split lock detection for this task so it can make @@ -1081,6 +1079,31 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code) */ sld_update_msr(false); set_tsk_thread_flag(current, TIF_SLD); +} + +bool handle_guest_split_lock(unsigned long ip) +{ + if (sld_state == sld_warn) { + split_lock_warn(ip); + return true; + } + + pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n", + current->comm, current->pid, + sld_state == sld_fatal ? "fatal" : "bogus", ip); + + current->thread.error_code = 0; + current->thread.trap_nr = X86_TRAP_AC; + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + return false; +} +EXPORT_SYMBOL_GPL(handle_guest_split_lock); + +bool handle_user_split_lock(struct pt_regs *regs, long error_code) +{ + if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) + return false; + split_lock_warn(regs->ip); return true; } @@ -1096,35 +1119,53 @@ void switch_to_sld(unsigned long tifn) sld_update_msr(!(tifn & _TIF_SLD)); } -#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY} - /* - * The following processors have the split lock detection feature. But - * since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot - * be enumerated. Enable it by family and model matching on these - * processors. + * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should + * only be trusted if it is confirmed that a CPU model implements a + * specific feature at a particular bit position. + * + * The possible driver data field values: + * + * - 0: CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. + * + * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use + * bit 5 to enumerate the per-core split-lock detection feature. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X), - SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), {} }; void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) { - u64 ia32_core_caps = 0; + const struct x86_cpu_id *m; + u64 ia32_core_caps; - if (c->x86_vendor != X86_VENDOR_INTEL) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; - if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) { - /* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */ + + m = x86_match_cpu(split_lock_cpu_ids); + if (!m) + return; + + switch (m->driver_data) { + case 0: + break; + case 1: + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) + return; rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - } else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) { - /* Enumerate split lock detection by family and model. */ - if (x86_match_cpu(split_lock_cpu_ids)) - ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT; + if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) + return; + break; + default: + return; } - if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) - split_lock_setup(); + split_lock_setup(); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index caa032ce3fe3..ebf34c7bc8bc 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -227,8 +227,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); @@ -263,6 +263,16 @@ static void __init ms_hyperv_init_platform(void) cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); } + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 89049b343c7a..d8cc5223b7ce 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -578,6 +578,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 181c992f448c..3dd13f3a8b23 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -601,5 +601,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 064e9ef44cd6..5a359d9fcc05 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1859,6 +1859,19 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * Enable or disable the MBA software controller * which helps user specify bandwidth in MBps. @@ -3072,7 +3085,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { ret = rdtgroup_ctrl_remove(kn, rdtgrp); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e6b545047f38..4b3fa6cd3106 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -16,6 +16,7 @@ #include <linux/pci.h> #include <linux/root_dev.h> #include <linux/sfi.h> +#include <linux/hugetlb.h> #include <linux/tboot.h> #include <linux/usb/xhci-dbgp.h> @@ -1157,6 +1158,9 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); + if (boot_cpu_has(X86_FEATURE_GBPAGES)) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + /* * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 4d732a444711..8d5cbe1bbb3b 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -81,7 +81,7 @@ #define UMIP_INST_SLDT 3 /* 0F 00 /0 */ #define UMIP_INST_STR 4 /* 0F 00 /1 */ -const char * const umip_insns[5] = { +static const char * const umip_insns[5] = { [UMIP_INST_SGDT] = "SGDT", [UMIP_INST_SIDT] = "SIDT", [UMIP_INST_SMSW] = "SMSW", diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 9fea0757db92..d8154e0684b6 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -107,8 +107,4 @@ config KVM_MMU_AUDIT This option adds a R/W kVM module parameter 'mmu_audit', which allows auditing of KVM MMU events at runtime. -# OK, it's a little counter-intuitive to do this, but it puts it neatly under -# the virtualization menu. -source "drivers/vhost/Kconfig" - endif # VIRTUALIZATION diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a789759b7261..4a3081e9f4b5 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,6 +3,10 @@ ccflags-y += -Iarch/x86/kvm ccflags-$(CONFIG_KVM_WERROR) += -Werror +ifeq ($(CONFIG_FRAME_POINTER),y) +OBJECT_FILES_NON_STANDARD_vmenter.o := y +endif + KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2f96ff9e60ee..f9d3b919823c 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1426,7 +1426,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, * analyze it here, flush TLB regardless of the specified address space. */ kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, - vcpu_mask, &hv_vcpu->tlb_flush); + NULL, vcpu_mask, &hv_vcpu->tlb_flush); ret_success: /* We always do full TLB flush, set rep_done = rep_cnt. */ diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 750ff0b29404..d057376bd3d3 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, } /* - * AMD SVM AVIC accelerate EOI write and do not trap, - * in-kernel IOAPIC will not be able to receive the EOI. - * In this case, we do lazy update of the pending EOI when - * trying to set IOAPIC irq. + * AMD SVM AVIC accelerate EOI write iff the interrupt is edge + * triggered, in which case the in-kernel IOAPIC will not be able + * to receive the EOI. In this case, we do a lazy update of the + * pending EOI when trying to set IOAPIC irq. */ - if (kvm_apicv_activated(ioapic->kvm)) + if (edge && kvm_apicv_activated(ioapic->kvm)) ioapic_lazy_update_eoi(ioapic, irq); /* diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a7c3b3030e59..1429f506fe9e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <asm/msr-index.h> +#include <asm/debugreg.h> #include "kvm_emulate.h" #include "trace.h" @@ -271,7 +272,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->save.rsp = nested_vmcb->save.rsp; svm->vmcb->save.rip = nested_vmcb->save.rip; svm->vmcb->save.dr7 = nested_vmcb->save.dr7; - svm->vmcb->save.dr6 = nested_vmcb->save.dr6; + svm->vcpu.arch.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; @@ -490,7 +491,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; nested_vmcb->save.dr7 = vmcb->save.dr7; - nested_vmcb->save.dr6 = vmcb->save.dr6; + nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; nested_vmcb->save.cpl = vmcb->save.cpl; nested_vmcb->control.int_ctl = vmcb->control.int_ctl; @@ -614,26 +615,45 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) /* DB exceptions for our internal use must not cause vmexit */ static int nested_svm_intercept_db(struct vcpu_svm *svm) { - unsigned long dr6; + unsigned long dr6 = svm->vmcb->save.dr6; + + /* Always catch it and pass it to userspace if debugging. */ + if (svm->vcpu.guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return NESTED_EXIT_HOST; /* if we're not singlestepping, it's not ours */ if (!svm->nmi_singlestep) - return NESTED_EXIT_DONE; + goto reflected_db; /* if it's not a singlestep exception, it's not ours */ - if (kvm_get_dr(&svm->vcpu, 6, &dr6)) - return NESTED_EXIT_DONE; if (!(dr6 & DR6_BS)) - return NESTED_EXIT_DONE; + goto reflected_db; /* if the guest is singlestepping, it should get the vmexit */ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { disable_nmi_singlestep(svm); - return NESTED_EXIT_DONE; + goto reflected_db; } /* it's ours, the nested hypervisor must not see this one */ return NESTED_EXIT_HOST; + +reflected_db: + /* + * Synchronize guest DR6 here just like in kvm_deliver_exception_payload; + * it will be moved into the nested VMCB by nested_svm_vmexit. Once + * exceptions will be moved to svm_check_nested_events, all this stuff + * will just go away and we could just return NESTED_EXIT_HOST + * unconditionally. db_interception will queue the exception, which + * will be processed by svm_check_nested_events if a nested vmexit is + * required, and we will just use kvm_deliver_exception_payload to copy + * the payload to DR6 before vmexit. + */ + WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT); + svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM); + svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1; + return NESTED_EXIT_DONE; } static int nested_svm_intercept_ioio(struct vcpu_svm *svm) @@ -690,6 +710,9 @@ static int nested_svm_intercept(struct vcpu_svm *svm) if (svm->nested.intercept_exceptions & excp_bits) { if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) vmexit = nested_svm_intercept_db(svm); + else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR && + svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP) + vmexit = NESTED_EXIT_HOST; else vmexit = NESTED_EXIT_DONE; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c86f7278509b..b627564e41f9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1673,17 +1673,14 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) mark_dirty(svm->vmcb, VMCB_ASID); } -static u64 svm_get_dr6(struct kvm_vcpu *vcpu) +static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) { - return to_svm(vcpu)->vmcb->save.dr6; -} - -static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) -{ - struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb; - svm->vmcb->save.dr6 = value; - mark_dirty(svm->vmcb, VMCB_DR); + if (unlikely(value != vmcb->save.dr6)) { + vmcb->save.dr6 = value; + mark_dirty(vmcb, VMCB_DR); + } } static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) @@ -1694,9 +1691,12 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); get_debugreg(vcpu->arch.db[3], 3); - vcpu->arch.dr6 = svm_get_dr6(vcpu); + /* + * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here, + * because db_interception might need it. We can do it before vmentry. + */ + vcpu->arch.dr6 = svm->vmcb->save.dr6; vcpu->arch.dr7 = svm->vmcb->save.dr7; - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; set_dr_intercepts(svm); } @@ -1740,7 +1740,8 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->nmi_singlestep) { - kvm_queue_exception(&svm->vcpu, DB_VECTOR); + u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1; + kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload); return 1; } @@ -1753,6 +1754,8 @@ static int db_interception(struct vcpu_svm *svm) if (svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6; + kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; kvm_run->debug.arch.exception = DB_VECTOR; @@ -3349,6 +3352,15 @@ static enum exit_fastpath_completion svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; + /* + * Run with all-zero DR6 unless needed, so that we can get the exact cause + * of a #DB. + */ + if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + svm_set_dr6(svm, vcpu->arch.dr6); + else + svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM); + clgi(); kvm_load_guest_xsave_state(vcpu); @@ -3967,8 +3979,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, - .get_dr6 = svm_get_dr6, - .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b516c24494e3..b644bbf85460 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5236,7 +5236,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) roots_to_free = KVM_MMU_ROOTS_ALL; break; default: - BUG_ON(1); + BUG(); break; } diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 87f3f24fef37..51d1a82742fd 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit) /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ + or $1, %_ASM_AX + pop %_ASM_AX .Lvmexit_skip_rsb: #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 455cd2c8dbce..46aa3ca01929 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1381,7 +1381,6 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmx_vcpu_pi_load(vcpu, cpu); - vmx->host_pkru = read_pkru(); vmx->host_debugctlmsr = get_debugctlmsr(); } @@ -4645,6 +4644,26 @@ static int handle_machine_check(struct kvm_vcpu *vcpu) return 1; } +/* + * If the host has split lock detection disabled, then #AC is + * unconditionally injected into the guest, which is the pre split lock + * detection behaviour. + * + * If the host has split lock detection enabled then #AC is + * only injected into the guest when: + * - Guest CPL == 3 (user mode) + * - Guest has #AC detection enabled in CR0 + * - Guest EFLAGS has AC bit set + */ +static inline bool guest_inject_ac(struct kvm_vcpu *vcpu) +{ + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + return true; + + return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) && + (kvm_get_rflags(vcpu) & X86_EFLAGS_AC); +} + static int handle_exception_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4710,22 +4729,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { - case AC_VECTOR: - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); - return 1; case DB_VECTOR: dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); /* fall through */ case BP_VECTOR: @@ -4741,6 +4755,20 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; kvm_run->debug.arch.exception = ex_no; break; + case AC_VECTOR: + if (guest_inject_ac(vcpu)) { + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; + } + + /* + * Handle split lock. Depending on detection mode this will + * either warn and disable split lock detection for this + * task or force SIGBUS on it. + */ + if (handle_guest_split_lock(kvm_rip_read(vcpu))) + return 1; + fallthrough; default: kvm_run->exit_reason = KVM_EXIT_EXCEPTION; kvm_run->ex.exception = ex_no; @@ -4955,16 +4983,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) * guest debugging itself. */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; + vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1; vcpu->run->debug.arch.dr7 = dr7; vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); vcpu->run->debug.arch.exception = DB_VECTOR; vcpu->run->exit_reason = KVM_EXIT_DEBUG; return 0; } else { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD); return 1; } } @@ -4995,15 +5021,6 @@ static int handle_dr(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.dr6; -} - -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) -{ -} - static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { get_debugreg(vcpu->arch.db[0], 0); @@ -6615,11 +6632,6 @@ static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_load_guest_xsave_state(vcpu); - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - pt_guest_enter(vmx); if (vcpu_to_pmu(vcpu)->version) @@ -6704,18 +6716,6 @@ static enum exit_fastpath_completion vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_exit(vmx); - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - kvm_load_host_xsave_state(vcpu); vmx->nested.nested_run_pending = 0; @@ -7785,8 +7785,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, - .get_dr6 = vmx_get_dr6, - .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c0b77ac8dc6..542a00008caa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -568,11 +568,12 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) } EXPORT_SYMBOL_GPL(kvm_requeue_exception); -static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, - unsigned long payload) +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, + unsigned long payload) { kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false); } +EXPORT_SYMBOL_GPL(kvm_queue_exception_p); static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code, unsigned long payload) @@ -845,11 +846,25 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) vcpu->arch.ia32_xss != host_xss) wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } + + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) && + vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.pkru); } EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { + vcpu->arch.pkru = rdpkru(); + if (vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.host_pkru); + } + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { if (vcpu->arch.xcr0 != host_xcr0) @@ -935,19 +950,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); __reserved_bits; \ }) -static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) -{ - u64 reserved_bits = __cr4_reserved_bits(cpu_has, c); - - if (kvm_cpu_cap_has(X86_FEATURE_LA57)) - reserved_bits &= ~X86_CR4_LA57; - - if (kvm_cpu_cap_has(X86_FEATURE_UMIP)) - reserved_bits &= ~X86_CR4_UMIP; - - return reserved_bits; -} - static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) @@ -1067,12 +1069,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu) } } -static void kvm_update_dr6(struct kvm_vcpu *vcpu) -{ - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6); -} - static void kvm_update_dr7(struct kvm_vcpu *vcpu) { unsigned long dr7; @@ -1112,7 +1108,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) if (val & 0xffffffff00000000ULL) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); - kvm_update_dr6(vcpu); break; case 5: /* fall through */ @@ -1148,10 +1143,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) case 4: /* fall through */ case 6: - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - *val = vcpu->arch.dr6; - else - *val = kvm_x86_ops.get_dr6(vcpu); + *val = vcpu->arch.dr6; break; case 5: /* fall through */ @@ -3075,6 +3067,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_PERF_CTL: case MSR_AMD64_DC_CFG: case MSR_F15H_EX_CFG: + /* + * Intel Sandy Bridge CPUs must support the RAPL (running average power + * limit) MSRs. Just return 0, as we do not want to expose the host + * data here. Do not conditionalize this on CPUID, as KVM does not do + * so for existing CPU-specific MSRs. + */ + case MSR_RAPL_POWER_UNIT: + case MSR_PP0_ENERGY_STATUS: /* Power plane 0 (core) */ + case MSR_PP1_ENERGY_STATUS: /* Power plane 1 (graphics uncore) */ + case MSR_PKG_ENERGY_STATUS: /* Total package */ + case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */ msr_info->data = 0; break; case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: @@ -3389,6 +3392,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -3574,6 +3578,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops.vcpu_load(vcpu, cpu); + /* Save host pkru register if supported */ + vcpu->arch.host_pkru = read_pkru(); + /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -4025,7 +4032,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); vcpu->arch.dr6 = dbgregs->dr6; - kvm_update_dr6(vcpu); vcpu->arch.dr7 = dbgregs->dr7; kvm_update_dr7(vcpu); @@ -5064,10 +5070,13 @@ set_identity_unlock: r = -EFAULT; if (copy_from_user(&u.ps, argp, sizeof(u.ps))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit_out; r = kvm_vm_ioctl_set_pit(kvm, &u.ps); +set_pit_out: + mutex_unlock(&kvm->lock); break; } case KVM_GET_PIT2: { @@ -5087,10 +5096,13 @@ set_identity_unlock: r = -EFAULT; if (copy_from_user(&u.ps2, argp, sizeof(u.ps2))) goto out; + mutex_lock(&kvm->lock); r = -ENXIO; if (!kvm->arch.vpit) - goto out; + goto set_pit2_out; r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2); +set_pit2_out: + mutex_unlock(&kvm->lock); break; } case KVM_REINJECT_CONTROL: { @@ -5854,6 +5866,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, { struct kvm_host_map map; struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + u64 page_line_mask; gpa_t gpa; char *kaddr; bool exchanged; @@ -5868,7 +5881,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto emul_write; - if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) + /* + * Emulate the atomic as a straight write to avoid #AC if SLD is + * enabled in the host and the access splits a cache line. + */ + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + page_line_mask = ~(cache_line_size() - 1); + else + page_line_mask = PAGE_MASK; + + if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask)) goto emul_write; if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) @@ -6659,7 +6681,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; return 0; @@ -6719,9 +6741,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.db); if (dr6 != 0) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); *r = 1; return true; } @@ -8042,7 +8062,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, zalloc_cpumask_var(&cpus, GFP_ATOMIC); kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, - vcpu_bitmap, cpus); + NULL, vcpu_bitmap, cpus); free_cpumask_var(cpus); } @@ -8072,6 +8092,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); */ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) { + struct kvm_vcpu *except; unsigned long old, new, expected; if (!kvm_x86_ops.check_apicv_inhibit_reasons || @@ -8096,7 +8117,17 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) trace_kvm_apicv_update_request(activate, bit); if (kvm_x86_ops.pre_update_apicv_exec_ctrl) kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate); - kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); + + /* + * Sending request to update APICV for all other vcpus, + * while update the calling vcpu immediately instead of + * waiting for another #VMEXIT to handle the request. + */ + except = kvm_get_running_vcpu(); + kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE, + except); + if (except) + kvm_vcpu_update_apicv(except); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); @@ -8418,7 +8449,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops.sync_dirty_debug_regs(vcpu); kvm_update_dr0123(vcpu); - kvm_update_dr6(vcpu); kvm_update_dr7(vcpu); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } @@ -9480,7 +9510,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); vcpu->arch.dr6 = DR6_INIT; - kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); @@ -9662,7 +9691,9 @@ int kvm_arch_hardware_setup(void *opaque) if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) supported_xss = 0; - cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); +#undef __kvm_cpu_cap_has if (kvm_has_tsc_control) { /* @@ -9694,7 +9725,8 @@ int kvm_arch_check_processor_compat(void *opaque) WARN_ON(!irqs_disabled()); - if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits) + if (__cr4_reserved_bits(cpu_has, c) != + __cr4_reserved_bits(cpu_has, &boot_cpu_data)) return -EIO; return ops->check_processor_compatibility(); diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore index 8df89f0a3fe6..8ae0f93ecbfd 100644 --- a/arch/x86/lib/.gitignore +++ b/arch/x86/lib/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only inat-tables.c diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 859519f5b342..a51df516b87b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1222,7 +1222,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) return 1; /* read, not present: */ - if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + if (unlikely(!vma_is_accessible(vma))) return 1; return 0; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e7bb483557c9..1bba16c5742b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -467,7 +467,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) * the physical memory. To access them they are temporarily mapped. */ unsigned long __ref init_memory_mapping(unsigned long start, - unsigned long end) + unsigned long end, pgprot_t prot) { struct map_range mr[NR_RANGE_MR]; unsigned long ret = 0; @@ -481,7 +481,8 @@ unsigned long __ref init_memory_mapping(unsigned long start, for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, - mr[i].page_size_mask); + mr[i].page_size_mask, + prot); add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); @@ -521,7 +522,7 @@ static unsigned long __init init_range_memory_mapping( */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= min(end, (u64)pgt_buf_top<<PAGE_SHIFT); - init_memory_mapping(start, end); + init_memory_mapping(start, end, PAGE_KERNEL); mapped_ram_size += end - start; can_use_brk_pgt = true; } @@ -661,7 +662,7 @@ void __init init_mem_mapping(void) #endif /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); + init_memory_mapping(0, ISA_END_ADDRESS, PAGE_KERNEL); /* Init the trampoline, possibly with KASLR memory offset */ init_trampoline(); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index de73992b8432..4222a010057a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -257,7 +257,8 @@ static inline int __is_kernel_text(unsigned long addr) unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, + pgprot_t prot) { int use_pse = page_size_mask == (1<<PG_LEVEL_2M); unsigned long last_map_addr = end; @@ -819,12 +820,24 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; - return __add_pages(nid, start_pfn, nr_pages, restrictions); + /* + * The page tables were already mapped at boot so if the caller + * requests a different mapping type then we must change all the + * pages with __set_memory_prot(). + */ + if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { + ret = __set_memory_prot(start, nr_pages, params->pgprot); + if (ret) + return ret; + } + + return __add_pages(nid, start_pfn, nr_pages, params); } void arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a14711d3a93..3b289c2f75cd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -585,7 +585,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, */ static unsigned long __meminit phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t _prot, bool init) { unsigned long pages = 0, paddr_next; unsigned long paddr_last = paddr_end; @@ -595,7 +595,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) { pud_t *pud; pmd_t *pmd; - pgprot_t prot = PAGE_KERNEL; + pgprot_t prot = _prot; vaddr = (unsigned long)__va(paddr); pud = pud_page + pud_index(vaddr); @@ -644,9 +644,12 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<<PG_LEVEL_1G)) { pages++; spin_lock(&init_mm.page_table_lock); + + prot = __pgprot(pgprot_val(prot) | __PAGE_KERNEL_LARGE); + set_pte_init((pte_t *)pud, pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT, - PAGE_KERNEL_LARGE), + prot), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -669,7 +672,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t prot, bool init) { unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; @@ -679,7 +682,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, - page_size_mask, init); + page_size_mask, prot, init); for (; vaddr < vaddr_end; vaddr = vaddr_next) { p4d_t *p4d = p4d_page + p4d_index(vaddr); @@ -702,13 +705,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); continue; } pud = alloc_low_page(); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); p4d_populate_init(&init_mm, p4d, pud, init); @@ -722,7 +725,7 @@ static unsigned long __meminit __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, unsigned long page_size_mask, - bool init) + pgprot_t prot, bool init) { bool pgd_changed = false; unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; @@ -743,13 +746,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start, paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask, - init); + prot, init); continue; } p4d = alloc_low_page(); paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); if (pgtable_l5_enabled()) @@ -778,10 +781,10 @@ __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long __meminit kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, true); + page_size_mask, prot, true); } /* @@ -796,7 +799,8 @@ kernel_physical_mapping_change(unsigned long paddr_start, unsigned long page_size_mask) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, false); + page_size_mask, PAGE_KERNEL, + false); } #ifndef CONFIG_NUMA @@ -843,11 +847,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -858,14 +862,14 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size); + init_memory_mapping(start, start + size, params->pgprot); - return add_pages(nid, start_pfn, nr_pages, restrictions); + return add_pages(nid, start_pfn, nr_pages, params); } #define PAGE_INUSE 0xFD diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index eeae142062ed..3f37b5c80bb3 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -12,7 +12,8 @@ void early_ioremap_page_table_range_init(void); unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask); + unsigned long page_size_mask, + pgprot_t prot); unsigned long kernel_physical_mapping_change(unsigned long start, unsigned long end, unsigned long page_size_mask); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 99f7a68738f0..59ba008504dc 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -25,11 +25,8 @@ nodemask_t numa_nodes_parsed __initdata; struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -static struct numa_meminfo numa_meminfo -#ifndef CONFIG_MEMORY_HOTPLUG -__initdata -#endif -; +static struct numa_meminfo numa_meminfo __initdata_or_meminfo; +static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo; static int numa_distance_cnt; static u8 *numa_distance; @@ -169,6 +166,19 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) } /** + * numa_move_tail_memblk - Move a numa_memblk from one numa_meminfo to another + * @dst: numa_meminfo to append block to + * @idx: Index of memblk to remove + * @src: numa_meminfo to remove memblk from + */ +static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx, + struct numa_meminfo *src) +{ + dst->blk[dst->nr_blks++] = src->blk[idx]; + numa_remove_memblk_from(idx, src); +} + +/** * numa_add_memblk - Add one numa_memblk to numa_meminfo * @nid: NUMA node ID of the new memblk * @start: Start address of the new memblk @@ -237,14 +247,19 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) for (i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - /* make sure all blocks are inside the limits */ + /* move / save reserved memory ranges */ + if (!memblock_overlaps_region(&memblock.memory, + bi->start, bi->end - bi->start)) { + numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi); + continue; + } + + /* make sure all non-reserved blocks are inside the limits */ bi->start = max(bi->start, low); bi->end = min(bi->end, high); - /* and there's no empty or non-exist block */ - if (bi->start >= bi->end || - !memblock_overlaps_region(&memblock.memory, - bi->start, bi->end - bi->start)) + /* and there's no empty block */ + if (bi->start >= bi->end) numa_remove_memblk_from(i--, mi); } @@ -881,16 +896,38 @@ EXPORT_SYMBOL(cpumask_of_node); #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ -#ifdef CONFIG_MEMORY_HOTPLUG -int memory_add_physaddr_to_nid(u64 start) +#ifdef CONFIG_NUMA_KEEP_MEMINFO +static int meminfo_to_nid(struct numa_meminfo *mi, u64 start) { - struct numa_meminfo *mi = &numa_meminfo; - int nid = mi->blk[0].nid; int i; for (i = 0; i < mi->nr_blks; i++) if (mi->blk[i].start <= start && mi->blk[i].end > start) - nid = mi->blk[i].nid; + return mi->blk[i].nid; + return NUMA_NO_NODE; +} + +int phys_to_target_node(phys_addr_t start) +{ + int nid = meminfo_to_nid(&numa_meminfo, start); + + /* + * Prefer online nodes, but if reserved memory might be + * hot-added continue the search with reserved ranges. + */ + if (nid != NUMA_NO_NODE) + return nid; + + return meminfo_to_nid(&numa_reserved_meminfo, start); +} +EXPORT_SYMBOL_GPL(phys_to_target_node); + +int memory_add_physaddr_to_nid(u64 start) +{ + int nid = meminfo_to_nid(&numa_meminfo, start); + + if (nid == NUMA_NO_NODE) + nid = numa_meminfo.blk[0].nid; return nid; } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6d5424069e2b..59eca6a94ce7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1795,6 +1795,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * _set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index c6f84c0b5d7a..8873ed1438a9 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -63,7 +63,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey) return false; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 211bb9358b73..c5e393f8bb3f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -202,7 +202,7 @@ virt_to_phys_or_null_size(void *va, unsigned long size) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text, pf; + unsigned long pfn, text, pf, rodata; struct page *page; unsigned npages; pgd_t *pgd = efi_mm.pgd; @@ -256,7 +256,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; + npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; @@ -266,6 +266,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } + npages = (__end_rodata - __start_rodata) >> PAGE_SHIFT; + rodata = __pa(__start_rodata); + pfn = rodata >> PAGE_SHIFT; + if (kernel_map_pages_in_pgd(pgd, pfn, rodata, npages, pf)) { + pr_err("Failed to map kernel rodata 1:1\n"); + return 1; + } + return 0; } @@ -638,7 +646,7 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - if (!phys_name || !phys_data) + if (!phys_name || (data && !phys_data)) status = EFI_INVALID_PARAMETER; else status = efi_thunk(set_variable, phys_name, phys_vendor, @@ -669,7 +677,7 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - if (!phys_name || !phys_data) + if (!phys_name || (data && !phys_data)) status = EFI_INVALID_PARAMETER; else status = efi_thunk(set_variable, phys_name, phys_vendor, diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 607f58147311..c60255da5a6c 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -352,7 +352,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, if (type == EFI_MEMORY_MAPPED_IO) return ioremap(phys_addr, size); - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size, + PAGE_KERNEL); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; efi_ioremap(top, size - (top - phys_addr), type, attribute); diff --git a/arch/x86/realmode/rm/.gitignore b/arch/x86/realmode/rm/.gitignore index b6ed3a2555cb..6c3464f46166 100644 --- a/arch/x86/realmode/rm/.gitignore +++ b/arch/x86/realmode/rm/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only pasyms.h realmode.lds realmode.relocs diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore index be0ed065249b..d36dc7cf9115 100644 --- a/arch/x86/tools/.gitignore +++ b/arch/x86/tools/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only relocs diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 593d5f3902bd..478710384b34 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __UM_PROCESSOR_H #define __UM_PROCESSOR_H +#include <linux/time-internal.h> /* include faultinfo structure */ #include <sysdep/faultinfo.h> @@ -21,12 +22,19 @@ #include <asm/user.h> /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) +static __always_inline void rep_nop(void) { __asm__ __volatile__("rep;nop": : :"memory"); } -#define cpu_relax() rep_nop() +static __always_inline void cpu_relax(void) +{ + if (time_travel_mode == TT_MODE_INFCPU || + time_travel_mode == TT_MODE_EXTERNAL) + time_travel_ndelay(1); + else + rep_nop(); +} #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h index 7c297e9e2413..df7a3896f5dd 100644 --- a/arch/x86/um/asm/vm-flags.h +++ b/arch/x86/um/asm/vm-flags.h @@ -9,17 +9,11 @@ #ifdef CONFIG_X86_32 -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #else -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ - VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC) #endif #endif diff --git a/arch/x86/um/vdso/.gitignore b/arch/x86/um/vdso/.gitignore index f8b69d84238e..652e31d82582 100644 --- a/arch/x86/um/vdso/.gitignore +++ b/arch/x86/um/vdso/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 33b0e20df7fc..1a2d8a50dac4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -985,7 +985,7 @@ void xen_enable_syscall(void) #endif /* CONFIG_X86_64 */ } -void __init xen_pvmmu_arch_setup(void) +static void __init xen_pvmmu_arch_setup(void) { HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 802ee5bba66c..8fb8a50a28b4 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -53,6 +53,7 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); +void asm_cpu_bringup_and_idle(void); static void cpu_bringup(void) { @@ -309,7 +310,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) * pointing just below where pt_regs would be if it were a normal * kernel entry. */ - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->user_regs.eip = (unsigned long)asm_cpu_bringup_and_idle; ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ ctxt->user_regs.ds = __USER_DS; diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1d0cee3163e4..1ba601df3a37 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -35,7 +35,11 @@ SYM_CODE_START(startup_xen) rep __ASM_SIZE(stos) mov %_ASM_SI, xen_start_info - mov $init_thread_union+THREAD_SIZE, %_ASM_SP +#ifdef CONFIG_X86_64 + mov initial_stack(%rip), %rsp +#else + mov initial_stack, %esp +#endif #ifdef CONFIG_X86_64 /* Set up %gs. @@ -51,9 +55,19 @@ SYM_CODE_START(startup_xen) wrmsr #endif - jmp xen_start_kernel + call xen_start_kernel SYM_CODE_END(startup_xen) __FINIT + +#ifdef CONFIG_XEN_PV_SMP +.pushsection .text +SYM_CODE_START(asm_cpu_bringup_and_idle) + UNWIND_HINT_EMPTY + + call cpu_bringup_and_idle +SYM_CODE_END(asm_cpu_bringup_and_idle) +.popsection +#endif #endif .pushsection .text |