diff options
Diffstat (limited to 'arch/x86')
148 files changed, 4764 insertions, 1912 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 20da391b5f32..63bf349b2b24 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,6 +54,8 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_PHYS_TO_DMA + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 @@ -61,6 +63,7 @@ config X86 select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_ZONE_DEVICE if X86_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG @@ -68,7 +71,6 @@ config X86 select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS @@ -116,6 +118,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 @@ -154,6 +157,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 @@ -320,7 +324,7 @@ config X86_64_SMP config X86_32_LAZY_GS def_bool y - depends on X86_32 && !CC_STACKPROTECTOR + depends on X86_32 && CC_STACKPROTECTOR_NONE config ARCH_SUPPORTS_UPROBES def_bool y @@ -810,6 +814,15 @@ config PARAVIRT_TIME_ACCOUNTING config PARAVIRT_CLOCK bool +config JAILHOUSE_GUEST + bool "Jailhouse non-root cell support" + depends on X86_64 && PCI + select X86_PM_TIMER + ---help--- + This option allows to run Linux as guest in a Jailhouse non-root + cell. You can leave this option disabled if you only want to start + Jailhouse and run Linux afterwards in the root cell. + endif #HYPERVISOR_GUEST config NO_BOOTMEM @@ -1255,9 +1268,9 @@ config MICROCODE CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. - In addition, you can build-in the microcode into the kernel. For that you - need to enable FIRMWARE_IN_KERNEL and add the vendor-supplied microcode - to the CONFIG_EXTRA_FIRMWARE config option. + In addition, you can build the microcode into the kernel. For that you + need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE + config option. config MICROCODE_INTEL bool "Intel microcode loading support" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 672441c008c7..192e4d2f9efc 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -169,14 +169,6 @@ config IOMMU_DEBUG options. See Documentation/x86/x86_64/boot-options.txt for more details. -config IOMMU_STRESS - bool "Enable IOMMU stress-test mode" - ---help--- - This option disables various optimizations in IOMMU related - code to do real stress testing of the IOMMU code. This option - will cause a performance drop and should only be enabled for - testing. - config IOMMU_LEAK bool "IOMMU leak tracing" depends on IOMMU_DEBUG && DMA_API_DEBUG diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index e56dbc67e837..353e20c3f114 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -999,6 +999,7 @@ struct boot_params *efi_main(struct efi_config *c, /* Ask the firmware to clear memory on unclean shutdown */ efi_enable_reset_attack_mitigation(sys_table); + efi_retrieve_tpm2_eventlog(sys_table); setup_graphics(boot_params); diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3d09e3aca18d..12e8484a8ee7 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -90,30 +90,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 -.section .rodata -.align 16 -.type aad_shift_arr, @object -.size aad_shift_arr, 272 -aad_shift_arr: - .octa 0xffffffffffffffffffffffffffffffff - .octa 0xffffffffffffffffffffffffffffff0C - .octa 0xffffffffffffffffffffffffffff0D0C - .octa 0xffffffffffffffffffffffffff0E0D0C - .octa 0xffffffffffffffffffffffff0F0E0D0C - .octa 0xffffffffffffffffffffff0C0B0A0908 - .octa 0xffffffffffffffffffff0D0C0B0A0908 - .octa 0xffffffffffffffffff0E0D0C0B0A0908 - .octa 0xffffffffffffffff0F0E0D0C0B0A0908 - .octa 0xffffffffffffff0C0B0A090807060504 - .octa 0xffffffffffff0D0C0B0A090807060504 - .octa 0xffffffffff0E0D0C0B0A090807060504 - .octa 0xffffffff0F0E0D0C0B0A090807060504 - .octa 0xffffff0C0B0A09080706050403020100 - .octa 0xffff0D0C0B0A09080706050403020100 - .octa 0xff0E0D0C0B0A09080706050403020100 - .octa 0x0F0E0D0C0B0A09080706050403020100 - - .text @@ -257,6 +233,37 @@ aad_shift_arr: pxor \TMP1, \GH # result is in TMP1 .endm +# Reads DLEN bytes starting at DPTR and stores in XMMDst +# where 0 < DLEN < 16 +# Clobbers %rax, DLEN and XMM1 +.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst + cmp $8, \DLEN + jl _read_lt8_\@ + mov (\DPTR), %rax + MOVQ_R64_XMM %rax, \XMMDst + sub $8, \DLEN + jz _done_read_partial_block_\@ + xor %eax, %eax +_read_next_byte_\@: + shl $8, %rax + mov 7(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_\@ + MOVQ_R64_XMM %rax, \XMM1 + pslldq $8, \XMM1 + por \XMM1, \XMMDst + jmp _done_read_partial_block_\@ +_read_lt8_\@: + xor %eax, %eax +_read_next_byte_lt8_\@: + shl $8, %rax + mov -1(\DPTR, \DLEN, 1), %al + dec \DLEN + jnz _read_next_byte_lt8_\@ + MOVQ_R64_XMM %rax, \XMMDst +_done_read_partial_block_\@: +.endm + /* * if a = number of total plaintext bytes * b = floor(a/16) @@ -273,62 +280,30 @@ aad_shift_arr: XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 + mov arg8, %r11 # %r11 = aadLen pxor %xmm\i, %xmm\i pxor \XMM2, \XMM2 cmp $16, %r11 - jl _get_AAD_rest8\num_initial_blocks\operation + jl _get_AAD_rest\num_initial_blocks\operation _get_AAD_blocks\num_initial_blocks\operation: movdqu (%r10), %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor %xmm\i, \XMM2 GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 add $16, %r10 - sub $16, %r12 sub $16, %r11 cmp $16, %r11 jge _get_AAD_blocks\num_initial_blocks\operation movdqu \XMM2, %xmm\i + + /* read the last <16B of AAD */ +_get_AAD_rest\num_initial_blocks\operation: cmp $0, %r11 je _get_AAD_done\num_initial_blocks\operation - pxor %xmm\i,%xmm\i - - /* read the last <16B of AAD. since we have at least 4B of - data right after the AAD (the ICV, and maybe some CT), we can - read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\num_initial_blocks\operation: - cmp $4, %r11 - jle _get_AAD_rest4\num_initial_blocks\operation - movq (%r10), \TMP1 - add $8, %r10 - sub $8, %r11 - pslldq $8, \TMP1 - psrldq $8, %xmm\i - pxor \TMP1, %xmm\i - jmp _get_AAD_rest8\num_initial_blocks\operation -_get_AAD_rest4\num_initial_blocks\operation: - cmp $0, %r11 - jle _get_AAD_rest0\num_initial_blocks\operation - mov (%r10), %eax - movq %rax, \TMP1 - add $4, %r10 - sub $4, %r10 - pslldq $12, \TMP1 - psrldq $4, %xmm\i - pxor \TMP1, %xmm\i -_get_AAD_rest0\num_initial_blocks\operation: - /* finalize: shift out the extra bytes we read, and align - left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - movdqu aad_shift_arr(%r11), \TMP1 - PSHUFB_XMM \TMP1, %xmm\i -_get_AAD_rest_final\num_initial_blocks\operation: + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor \XMM2, %xmm\i GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 @@ -532,62 +507,30 @@ _initial_blocks_done\num_initial_blocks\operation: XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 + mov arg8, %r11 # %r11 = aadLen pxor %xmm\i, %xmm\i pxor \XMM2, \XMM2 cmp $16, %r11 - jl _get_AAD_rest8\num_initial_blocks\operation + jl _get_AAD_rest\num_initial_blocks\operation _get_AAD_blocks\num_initial_blocks\operation: movdqu (%r10), %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor %xmm\i, \XMM2 GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 add $16, %r10 - sub $16, %r12 sub $16, %r11 cmp $16, %r11 jge _get_AAD_blocks\num_initial_blocks\operation movdqu \XMM2, %xmm\i + + /* read the last <16B of AAD */ +_get_AAD_rest\num_initial_blocks\operation: cmp $0, %r11 je _get_AAD_done\num_initial_blocks\operation - pxor %xmm\i,%xmm\i - - /* read the last <16B of AAD. since we have at least 4B of - data right after the AAD (the ICV, and maybe some PT), we can - read 4B/8B blocks safely, and then get rid of the extra stuff */ -_get_AAD_rest8\num_initial_blocks\operation: - cmp $4, %r11 - jle _get_AAD_rest4\num_initial_blocks\operation - movq (%r10), \TMP1 - add $8, %r10 - sub $8, %r11 - pslldq $8, \TMP1 - psrldq $8, %xmm\i - pxor \TMP1, %xmm\i - jmp _get_AAD_rest8\num_initial_blocks\operation -_get_AAD_rest4\num_initial_blocks\operation: - cmp $0, %r11 - jle _get_AAD_rest0\num_initial_blocks\operation - mov (%r10), %eax - movq %rax, \TMP1 - add $4, %r10 - sub $4, %r10 - pslldq $12, \TMP1 - psrldq $4, %xmm\i - pxor \TMP1, %xmm\i -_get_AAD_rest0\num_initial_blocks\operation: - /* finalize: shift out the extra bytes we read, and align - left. since pslldq can only shift by an immediate, we use - vpshufb and an array of shuffle masks */ - movq %r12, %r11 - salq $4, %r11 - movdqu aad_shift_arr(%r11), \TMP1 - PSHUFB_XMM \TMP1, %xmm\i -_get_AAD_rest_final\num_initial_blocks\operation: + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data pxor \XMM2, %xmm\i GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 @@ -1386,14 +1329,6 @@ _esb_loop_\@: * * AAD Format with 64-bit Extended Sequence Number * -* aadLen: -* from the definition of the spec, aadLen can only be 8 or 12 bytes. -* The code supports 16 too but for other sizes, the code will fail. -* -* TLen: -* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. -* For other sizes, the code will fail. -* * poly = x^128 + x^127 + x^126 + x^121 + 1 * *****************************************************************************/ @@ -1487,19 +1422,16 @@ _zero_cipher_left_decrypt: PSHUFB_XMM %xmm10, %xmm0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) - sub $16, %r11 - add %r13, %r11 - movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 -# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes -# (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask - PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes + lea (%arg3,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + + lea ALL_F+16(%rip), %r12 + sub %r13, %r12 movdqa %xmm1, %xmm2 pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + movdqu (%r12), %xmm1 # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm2 @@ -1508,9 +1440,6 @@ _zero_cipher_left_decrypt: pxor %xmm2, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - sub %r13, %r11 - add $16, %r11 # output %r13 bytes MOVQ_R64_XMM %xmm0, %rax @@ -1664,14 +1593,6 @@ ENDPROC(aesni_gcm_dec) * * AAD Format with 64-bit Extended Sequence Number * -* aadLen: -* from the definition of the spec, aadLen can only be 8 or 12 bytes. -* The code supports 16 too but for other sizes, the code will fail. -* -* TLen: -* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. -* For other sizes, the code will fail. -* * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ ENTRY(aesni_gcm_enc) @@ -1764,19 +1685,16 @@ _zero_cipher_left_encrypt: movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - sub $16, %r11 - add %r13, %r11 - movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks - lea SHIFT_MASK+16(%rip), %r12 + + lea (%arg3,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + + lea ALL_F+16(%rip), %r12 sub %r13, %r12 - # adjust the shuffle mask pointer to be able to shift 16-r13 bytes - # (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask - PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + movdqu (%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm0 pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 movdqa SHUF_MASK(%rip), %xmm10 @@ -1785,9 +1703,6 @@ _zero_cipher_left_encrypt: pxor %xmm0, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 # GHASH computation for the last <16 byte block - sub %r13, %r11 - add $16, %r11 - movdqa SHUF_MASK(%rip), %xmm10 PSHUFB_XMM %xmm10, %xmm0 diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 3bf3dcf29825..34cf1c1f8c98 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -690,8 +690,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } -static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, - unsigned int key_len) +static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key, + unsigned int key_len) { struct cryptd_aead **ctx = crypto_aead_ctx(parent); struct cryptd_aead *cryptd_tfm = *ctx; @@ -716,8 +716,8 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead, /* This is the Integrity Check Value (aka the authentication tag length and can * be 8, 12 or 16 bytes long. */ -static int rfc4106_set_authsize(struct crypto_aead *parent, - unsigned int authsize) +static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent, + unsigned int authsize) { struct cryptd_aead **ctx = crypto_aead_ctx(parent); struct cryptd_aead *cryptd_tfm = *ctx; @@ -824,7 +824,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || req->src->offset + req->src->length <= PAGE_SIZE) && - sg_is_last(req->dst) && + sg_is_last(req->dst) && req->dst->length && (!PageHighMem(sg_page(req->dst)) || req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; @@ -929,7 +929,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) aes_ctx); } -static int rfc4106_encrypt(struct aead_request *req) +static int gcmaes_wrapper_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_aead **ctx = crypto_aead_ctx(tfm); @@ -945,7 +945,7 @@ static int rfc4106_encrypt(struct aead_request *req) return crypto_aead_encrypt(req); } -static int rfc4106_decrypt(struct aead_request *req) +static int gcmaes_wrapper_decrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_aead **ctx = crypto_aead_ctx(tfm); @@ -1117,7 +1117,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req) { __be32 counter = cpu_to_be32(1); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); @@ -1128,6 +1128,30 @@ static int generic_gcmaes_decrypt(struct aead_request *req) aes_ctx); } +static int generic_gcmaes_init(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni", + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void generic_gcmaes_exit(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} + static struct aead_alg aesni_aead_algs[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, @@ -1147,10 +1171,10 @@ static struct aead_alg aesni_aead_algs[] = { { }, { .init = rfc4106_init, .exit = rfc4106_exit, - .setkey = rfc4106_set_key, - .setauthsize = rfc4106_set_authsize, - .encrypt = rfc4106_encrypt, - .decrypt = rfc4106_decrypt, + .setkey = gcmaes_wrapper_set_key, + .setauthsize = gcmaes_wrapper_set_authsize, + .encrypt = gcmaes_wrapper_encrypt, + .decrypt = gcmaes_wrapper_decrypt, .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { @@ -1170,13 +1194,31 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_AES_IV_SIZE, .maxauthsize = 16, .base = { + .cra_name = "__generic-gcm-aes-aesni", + .cra_driver_name = "__driver-generic-gcm-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), + .cra_alignmask = AESNI_ALIGN - 1, + .cra_module = THIS_MODULE, + }, +}, { + .init = generic_gcmaes_init, + .exit = generic_gcmaes_exit, + .setkey = gcmaes_wrapper_set_key, + .setauthsize = gcmaes_wrapper_set_authsize, + .encrypt = gcmaes_wrapper_encrypt, + .decrypt = gcmaes_wrapper_decrypt, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = 16, + .base = { .cra_name = "gcm(aes)", .cra_driver_name = "generic-gcm-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), - .cra_alignmask = AESNI_ALIGN - 1, + .cra_ctxsize = sizeof(struct cryptd_aead *), .cra_module = THIS_MODULE, }, } }; diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 1e6af1b35f7b..dce7c5d39c2f 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -107,7 +107,6 @@ static struct skcipher_alg alg = { .base.cra_priority = 300, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha20_ctx), - .base.cra_alignmask = sizeof(u32) - 1, .base.cra_module = THIS_MODULE, .min_keysize = CHACHA20_KEY_SIZE, diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 27226df3f7d8..c8d9cdacbf10 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -162,6 +162,7 @@ static struct shash_alg alg = { .cra_name = "crc32", .cra_driver_name = "crc32-pclmul", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index c194d5717ae5..5773e1161072 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -226,6 +226,7 @@ static struct shash_alg alg = { .cra_name = "crc32c", .cra_driver_name = "crc32c-intel", .cra_priority = 200, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, .cra_ctxsize = sizeof(u32), .cra_module = THIS_MODULE, diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e32142bc071d..790377797544 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -164,14 +164,12 @@ static struct shash_alg alg = { .init = poly1305_simd_init, .update = poly1305_simd_update, .final = crypto_poly1305_final, - .setkey = crypto_poly1305_setkey, .descsize = sizeof(struct poly1305_simd_desc_ctx), .base = { .cra_name = "poly1305", .cra_driver_name = "poly1305-simd", .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_alignmask = sizeof(u32) - 1, .cra_blocksize = POLY1305_BLOCK_SIZE, .cra_module = THIS_MODULE, }, diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S index 329452b8f794..6014b7b9e52a 100644 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -1,6 +1,7 @@ -# salsa20_pm.s version 20051229 -# D. J. Bernstein -# Public domain. +# Derived from: +# salsa20_pm.s version 20051229 +# D. J. Bernstein +# Public domain. #include <linux/linkage.h> @@ -935,180 +936,3 @@ ENTRY(salsa20_encrypt_bytes) # goto bytesatleast1 jmp ._bytesatleast1 ENDPROC(salsa20_encrypt_bytes) - -# enter salsa20_keysetup -ENTRY(salsa20_keysetup) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # k = arg2 - movl 8(%esp,%eax),%ecx - # kbits = arg3 - movl 12(%esp,%eax),%edx - # x = arg1 - movl 4(%esp,%eax),%eax - # in1 = *(uint32 *) (k + 0) - movl 0(%ecx),%ebx - # in2 = *(uint32 *) (k + 4) - movl 4(%ecx),%esi - # in3 = *(uint32 *) (k + 8) - movl 8(%ecx),%edi - # in4 = *(uint32 *) (k + 12) - movl 12(%ecx),%ebp - # *(uint32 *) (x + 4) = in1 - movl %ebx,4(%eax) - # *(uint32 *) (x + 8) = in2 - movl %esi,8(%eax) - # *(uint32 *) (x + 12) = in3 - movl %edi,12(%eax) - # *(uint32 *) (x + 16) = in4 - movl %ebp,16(%eax) - # kbits - 256 - cmp $256,%edx - # goto kbits128 if unsigned< - jb ._kbits128 -._kbits256: - # in11 = *(uint32 *) (k + 16) - movl 16(%ecx),%edx - # in12 = *(uint32 *) (k + 20) - movl 20(%ecx),%ebx - # in13 = *(uint32 *) (k + 24) - movl 24(%ecx),%esi - # in14 = *(uint32 *) (k + 28) - movl 28(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 857760878 - mov $857760878,%edx - # in10 = 2036477234 - mov $2036477234,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) - # goto keysetupdone - jmp ._keysetupdone -._kbits128: - # in11 = *(uint32 *) (k + 0) - movl 0(%ecx),%edx - # in12 = *(uint32 *) (k + 4) - movl 4(%ecx),%ebx - # in13 = *(uint32 *) (k + 8) - movl 8(%ecx),%esi - # in14 = *(uint32 *) (k + 12) - movl 12(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 824206446 - mov $824206446,%edx - # in10 = 2036477238 - mov $2036477238,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) -._keysetupdone: - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -ENDPROC(salsa20_keysetup) - -# enter salsa20_ivsetup -ENTRY(salsa20_ivsetup) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # iv = arg2 - movl 8(%esp,%eax),%ecx - # x = arg1 - movl 4(%esp,%eax),%eax - # in6 = *(uint32 *) (iv + 0) - movl 0(%ecx),%edx - # in7 = *(uint32 *) (iv + 4) - movl 4(%ecx),%ecx - # in8 = 0 - mov $0,%ebx - # in9 = 0 - mov $0,%esi - # *(uint32 *) (x + 24) = in6 - movl %edx,24(%eax) - # *(uint32 *) (x + 28) = in7 - movl %ecx,28(%eax) - # *(uint32 *) (x + 32) = in8 - movl %ebx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %esi,36(%eax) - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 10db30d58006..03a4918f41ee 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -803,117 +803,3 @@ ENTRY(salsa20_encrypt_bytes) # goto bytesatleast1 jmp ._bytesatleast1 ENDPROC(salsa20_encrypt_bytes) - -# enter salsa20_keysetup -ENTRY(salsa20_keysetup) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # k = arg2 - mov %rsi,%rsi - # kbits = arg3 - mov %rdx,%rdx - # x = arg1 - mov %rdi,%rdi - # in0 = *(uint64 *) (k + 0) - movq 0(%rsi),%r8 - # in2 = *(uint64 *) (k + 8) - movq 8(%rsi),%r9 - # *(uint64 *) (x + 4) = in0 - movq %r8,4(%rdi) - # *(uint64 *) (x + 12) = in2 - movq %r9,12(%rdi) - # unsigned<? kbits - 256 - cmp $256,%rdx - # comment:fp stack unchanged by jump - # goto kbits128 if unsigned< - jb ._kbits128 -# kbits256: -._kbits256: - # in10 = *(uint64 *) (k + 16) - movq 16(%rsi),%rdx - # in12 = *(uint64 *) (k + 24) - movq 24(%rsi),%rsi - # *(uint64 *) (x + 44) = in10 - movq %rdx,44(%rdi) - # *(uint64 *) (x + 52) = in12 - movq %rsi,52(%rdi) - # in0 = 1634760805 - mov $1634760805,%rsi - # in4 = 857760878 - mov $857760878,%rdx - # in10 = 2036477234 - mov $2036477234,%rcx - # in14 = 1797285236 - mov $1797285236,%r8 - # *(uint32 *) (x + 0) = in0 - movl %esi,0(%rdi) - # *(uint32 *) (x + 20) = in4 - movl %edx,20(%rdi) - # *(uint32 *) (x + 40) = in10 - movl %ecx,40(%rdi) - # *(uint32 *) (x + 60) = in14 - movl %r8d,60(%rdi) - # comment:fp stack unchanged by jump - # goto keysetupdone - jmp ._keysetupdone -# kbits128: -._kbits128: - # in10 = *(uint64 *) (k + 0) - movq 0(%rsi),%rdx - # in12 = *(uint64 *) (k + 8) - movq 8(%rsi),%rsi - # *(uint64 *) (x + 44) = in10 - movq %rdx,44(%rdi) - # *(uint64 *) (x + 52) = in12 - movq %rsi,52(%rdi) - # in0 = 1634760805 - mov $1634760805,%rsi - # in4 = 824206446 - mov $824206446,%rdx - # in10 = 2036477238 - mov $2036477238,%rcx - # in14 = 1797285236 - mov $1797285236,%r8 - # *(uint32 *) (x + 0) = in0 - movl %esi,0(%rdi) - # *(uint32 *) (x + 20) = in4 - movl %edx,20(%rdi) - # *(uint32 *) (x + 40) = in10 - movl %ecx,40(%rdi) - # *(uint32 *) (x + 60) = in14 - movl %r8d,60(%rdi) -# keysetupdone: -._keysetupdone: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -ENDPROC(salsa20_keysetup) - -# enter salsa20_ivsetup -ENTRY(salsa20_ivsetup) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # iv = arg2 - mov %rsi,%rsi - # x = arg1 - mov %rdi,%rdi - # in6 = *(uint64 *) (iv + 0) - movq 0(%rsi),%rsi - # in8 = 0 - mov $0,%r8 - # *(uint64 *) (x + 24) = in6 - movq %rsi,24(%rdi) - # *(uint64 *) (x + 32) = in8 - movq %r8,32(%rdi) - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index cb91a64a99e7..b07d7d959806 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -11,6 +11,9 @@ * - x86-64 version, renamed as salsa20-x86_64-asm_64.S * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s> * + * Also modified to set up the initial state using the generic C code rather + * than in assembly. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -18,93 +21,65 @@ * */ -#include <crypto/algapi.h> +#include <asm/unaligned.h> +#include <crypto/internal/skcipher.h> +#include <crypto/salsa20.h> #include <linux/module.h> -#include <linux/crypto.h> - -#define SALSA20_IV_SIZE 8U -#define SALSA20_MIN_KEY_SIZE 16U -#define SALSA20_MAX_KEY_SIZE 32U - -struct salsa20_ctx -{ - u32 input[16]; -}; -asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, - u32 keysize, u32 ivsize); -asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); -asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, - const u8 *src, u8 *dst, u32 bytes); +asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst, + u32 bytes); -static int setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keysize) +static int salsa20_asm_crypt(struct skcipher_request *req) { - struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); - salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); - return 0; -} - -static int encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + u32 state[16]; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 64); + err = skcipher_walk_virt(&walk, req, true); - salsa20_ivsetup(ctx, walk.iv); + crypto_salsa20_init(state, ctx, walk.iv); - while (walk.nbytes >= 64) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes - (walk.nbytes % 64)); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); - } + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; - if (walk.nbytes) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + salsa20_encrypt_bytes(state, walk.src.virt.addr, + walk.dst.virt.addr, nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } -static struct crypto_alg alg = { - .cra_name = "salsa20", - .cra_driver_name = "salsa20-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_type = &crypto_blkcipher_type, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct salsa20_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .setkey = setkey, - .encrypt = encrypt, - .decrypt = encrypt, - .min_keysize = SALSA20_MIN_KEY_SIZE, - .max_keysize = SALSA20_MAX_KEY_SIZE, - .ivsize = SALSA20_IV_SIZE, - } - } +static struct skcipher_alg alg = { + .base.cra_name = "salsa20", + .base.cra_driver_name = "salsa20-asm", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct salsa20_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = SALSA20_MIN_KEY_SIZE, + .max_keysize = SALSA20_MAX_KEY_SIZE, + .ivsize = SALSA20_IV_SIZE, + .chunksize = SALSA20_BLOCK_SIZE, + .setkey = crypto_salsa20_setkey, + .encrypt = salsa20_asm_crypt, + .decrypt = salsa20_asm_crypt, }; static int __init init(void) { - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(init); diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c index 36870b26067a..d08805032f01 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c @@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) { unsigned int j; - state->lens[0] = 0; - state->lens[1] = 1; - state->lens[2] = 2; - state->lens[3] = 3; + /* initially all lanes are unused */ + state->lens[0] = 0xFFFFFFFF00000000; + state->lens[1] = 0xFFFFFFFF00000001; + state->lens[2] = 0xFFFFFFFF00000002; + state->lens[3] = 0xFFFFFFFF00000003; + state->unused_lanes = 0xFF03020100; for (j = 0; j < 4; j++) state->ldata[j].job_in_lane = NULL; diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 1c3b7ceb36d2..e7273a606a07 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -55,29 +55,31 @@ #define RAB1bl %bl #define RAB2bl %cl +#define CD0 0x0(%rsp) +#define CD1 0x8(%rsp) +#define CD2 0x10(%rsp) + +# used only before/after all rounds #define RCD0 %r8 #define RCD1 %r9 #define RCD2 %r10 -#define RCD0d %r8d -#define RCD1d %r9d -#define RCD2d %r10d - -#define RX0 %rbp -#define RX1 %r11 -#define RX2 %r12 +# used only during rounds +#define RX0 %r8 +#define RX1 %r9 +#define RX2 %r10 -#define RX0d %ebp -#define RX1d %r11d -#define RX2d %r12d +#define RX0d %r8d +#define RX1d %r9d +#define RX2d %r10d -#define RY0 %r13 -#define RY1 %r14 -#define RY2 %r15 +#define RY0 %r11 +#define RY1 %r12 +#define RY2 %r13 -#define RY0d %r13d -#define RY1d %r14d -#define RY2d %r15d +#define RY0d %r11d +#define RY1d %r12d +#define RY2d %r13d #define RT0 %rdx #define RT1 %rsi @@ -85,6 +87,8 @@ #define RT0d %edx #define RT1d %esi +#define RT1bl %sil + #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ movzbl ab ## bl, tmp2 ## d; \ movzbl ab ## bh, tmp1 ## d; \ @@ -92,6 +96,11 @@ op1##l T0(CTX, tmp2, 4), dst ## d; \ op2##l T1(CTX, tmp1, 4), dst ## d; +#define swap_ab_with_cd(ab, cd, tmp) \ + movq cd, tmp; \ + movq ab, cd; \ + movq tmp, ab; + /* * Combined G1 & G2 function. Reordered with help of rotates to have moves * at begining. @@ -110,15 +119,15 @@ /* G1,2 && G2,2 */ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ - xchgq cd ## 0, ab ## 0; \ + swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ - xchgq cd ## 1, ab ## 1; \ + swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \ \ do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ - xchgq cd ## 2, ab ## 2; + swap_ab_with_cd(ab ## 2, cd ## 2, RT0); #define enc_round_end(ab, x, y, n) \ addl y ## d, x ## d; \ @@ -168,6 +177,16 @@ decrypt_round3(ba, dc, (n*2)+1); \ decrypt_round3(ba, dc, (n*2)); +#define push_cd() \ + pushq RCD2; \ + pushq RCD1; \ + pushq RCD0; + +#define pop_cd() \ + popq RCD0; \ + popq RCD1; \ + popq RCD2; + #define inpack3(in, n, xy, m) \ movq 4*(n)(in), xy ## 0; \ xorq w+4*m(CTX), xy ## 0; \ @@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way) * %rdx: src, RIO * %rcx: bool, if true: xor output */ - pushq %r15; - pushq %r14; pushq %r13; pushq %r12; - pushq %rbp; pushq %rbx; pushq %rcx; /* bool xor */ @@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way) inpack_enc3(); - encrypt_cycle3(RAB, RCD, 0); - encrypt_cycle3(RAB, RCD, 1); - encrypt_cycle3(RAB, RCD, 2); - encrypt_cycle3(RAB, RCD, 3); - encrypt_cycle3(RAB, RCD, 4); - encrypt_cycle3(RAB, RCD, 5); - encrypt_cycle3(RAB, RCD, 6); - encrypt_cycle3(RAB, RCD, 7); + push_cd(); + encrypt_cycle3(RAB, CD, 0); + encrypt_cycle3(RAB, CD, 1); + encrypt_cycle3(RAB, CD, 2); + encrypt_cycle3(RAB, CD, 3); + encrypt_cycle3(RAB, CD, 4); + encrypt_cycle3(RAB, CD, 5); + encrypt_cycle3(RAB, CD, 6); + encrypt_cycle3(RAB, CD, 7); + pop_cd(); popq RIO; /* dst */ - popq %rbp; /* bool xor */ + popq RT1; /* bool xor */ - testb %bpl, %bpl; + testb RT1bl, RT1bl; jnz .L__enc_xor3; outunpack_enc3(mov); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; .L__enc_xor3: outunpack_enc3(xor); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; ENDPROC(__twofish_enc_blk_3way) @@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way) * %rsi: dst * %rdx: src, RIO */ - pushq %r15; - pushq %r14; pushq %r13; pushq %r12; - pushq %rbp; pushq %rbx; pushq %rsi; /* dst */ inpack_dec3(); - decrypt_cycle3(RAB, RCD, 7); - decrypt_cycle3(RAB, RCD, 6); - decrypt_cycle3(RAB, RCD, 5); - decrypt_cycle3(RAB, RCD, 4); - decrypt_cycle3(RAB, RCD, 3); - decrypt_cycle3(RAB, RCD, 2); - decrypt_cycle3(RAB, RCD, 1); - decrypt_cycle3(RAB, RCD, 0); + push_cd(); + decrypt_cycle3(RAB, CD, 7); + decrypt_cycle3(RAB, CD, 6); + decrypt_cycle3(RAB, CD, 5); + decrypt_cycle3(RAB, CD, 4); + decrypt_cycle3(RAB, CD, 3); + decrypt_cycle3(RAB, CD, 2); + decrypt_cycle3(RAB, CD, 1); + decrypt_cycle3(RAB, CD, 0); + pop_cd(); popq RIO; /* dst */ outunpack_dec3(); popq %rbx; - popq %rbp; popq %r12; popq %r13; - popq %r14; - popq %r15; ret; ENDPROC(twofish_dec_blk_3way) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 21dbdf0e476b..74f6eee15179 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -154,6 +154,9 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (cached_flags & _TIF_PATCH_PENDING) + klp_update_patch_state(current); + /* deal with pending signal delivery */ if (cached_flags & _TIF_SIGPENDING) do_signal(regs); @@ -166,9 +169,6 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); - if (cached_flags & _TIF_PATCH_PENDING) - klp_update_patch_state(current); - /* Disable IRQs and retry */ local_irq_disable(); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2a35b1e0fb90..16c2c022540d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -566,6 +566,11 @@ restore_all: .Lrestore_nocheck: RESTORE_REGS 4 # skip orig_eax/error_code .Lirq_return: + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization + * when returning from IPI handler and when returning from + * scheduler to user-space. + */ INTERRUPT_RETURN .section .fixup, "ax" @@ -895,6 +900,9 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, hyperv_vector_handler) +BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR, + hyperv_reenlightenment_intr) + #endif /* CONFIG_HYPERV */ ENTRY(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4fd9044e72e7..8971bd64d515 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -658,6 +658,10 @@ GLOBAL(restore_regs_and_return_to_kernel) #endif POP_REGS addq $8, %rsp /* skip regs->orig_ax */ + /* + * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization + * when returning from IPI handler. + */ INTERRUPT_RETURN ENTRY(native_iret) @@ -1099,6 +1103,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ #if IS_ENABLED(CONFIG_HYPERV) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler + +apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ + hyperv_reenlightenment_vector hyperv_reenlightenment_intr #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 14efaa0e8684..18e2628e2d8f 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -10,7 +10,9 @@ enum perf_msr_id { PERF_MSR_SMI = 4, PERF_MSR_PTSC = 5, PERF_MSR_IRPERF = 6, - + PERF_MSR_THERM = 7, + PERF_MSR_THERM_SNAP = 8, + PERF_MSR_THERM_UNIT = 9, PERF_MSR_EVENT_MAX, }; @@ -29,6 +31,11 @@ static bool test_irperf(int idx) return boot_cpu_has(X86_FEATURE_IRPERF); } +static bool test_therm_status(int idx) +{ + return boot_cpu_has(X86_FEATURE_DTHERM); +} + static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || @@ -95,22 +102,28 @@ struct perf_msr { bool (*test)(int idx); }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01"); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); -PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05"); -PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06"); +PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" ); +PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" ); +PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" ); +PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" ); +PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" ); +PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" ); +PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" ); static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, - [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, - [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, + [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, + [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, + [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, }; static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { @@ -161,9 +174,9 @@ static int msr_event_init(struct perf_event *event) if (!msr[cfg].attr) return -EINVAL; - event->hw.idx = -1; - event->hw.event_base = msr[cfg].msr; - event->hw.config = cfg; + event->hw.idx = -1; + event->hw.event_base = msr[cfg].msr; + event->hw.config = cfg; return 0; } @@ -184,7 +197,7 @@ static void msr_event_update(struct perf_event *event) u64 prev, now; s64 delta; - /* Careful, an NMI might modify the previous event value. */ + /* Careful, an NMI might modify the previous event value: */ again: prev = local64_read(&event->hw.prev_count); now = msr_read_counter(event); @@ -193,17 +206,22 @@ again: goto again; delta = now - prev; - if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) + if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) { delta = sign_extend64(delta, 31); - - local64_add(delta, &event->count); + local64_add(delta, &event->count); + } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) { + /* If valid, extract digital readout, otherwise set to -1: */ + now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1; + local64_set(&event->count, now); + } else { + local64_add(delta, &event->count); + } } static void msr_event_start(struct perf_event *event, int flags) { - u64 now; + u64 now = msr_read_counter(event); - now = msr_read_counter(event); local64_set(&event->hw.prev_count, now); } @@ -250,9 +268,7 @@ static int __init msr_init(void) for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { u64 val; - /* - * Virt sucks arse; you cannot tell if a R/O MSR is present :/ - */ + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) msr[i].attr = NULL; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8e4ea143ed96..78f91ec1056e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -93,7 +93,8 @@ struct amd_nb { PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ - PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ + PERF_SAMPLE_PERIOD) #define PEBS_REGS \ (PERF_REG_X86_AX | \ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 189a398290db..2edc49e7409b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -18,6 +18,8 @@ */ #include <linux/types.h> +#include <asm/apic.h> +#include <asm/desc.h> #include <asm/hypervisor.h> #include <asm/hyperv.h> #include <asm/mshyperv.h> @@ -37,6 +39,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { return tsc_pg; } +EXPORT_SYMBOL_GPL(hv_get_tsc_page); static u64 read_hv_clock_tsc(struct clocksource *arg) { @@ -101,6 +104,115 @@ static int hv_cpu_init(unsigned int cpu) return 0; } +static void (*hv_reenlightenment_cb)(void); + +static void hv_reenlightenment_notify(struct work_struct *dummy) +{ + struct hv_tsc_emulation_status emu_status; + + rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + + /* Don't issue the callback if TSC accesses are not emulated */ + if (hv_reenlightenment_cb && emu_status.inprogress) + hv_reenlightenment_cb(); +} +static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify); + +void hyperv_stop_tsc_emulation(void) +{ + u64 freq; + struct hv_tsc_emulation_status emu_status; + + rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + emu_status.inprogress = 0; + wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); + tsc_khz = div64_u64(freq, 1000); +} +EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); + +static inline bool hv_reenlightenment_available(void) +{ + /* + * Check for required features and priviliges to make TSC frequency + * change notifications work. + */ + return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && + ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && + ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT; +} + +__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) +{ + entering_ack_irq(); + + inc_irq_stat(irq_hv_reenlightenment_count); + + schedule_delayed_work(&hv_reenlightenment_work, HZ/10); + + exiting_irq(); +} + +void set_hv_tscchange_cb(void (*cb)(void)) +{ + struct hv_reenlightenment_control re_ctrl = { + .vector = HYPERV_REENLIGHTENMENT_VECTOR, + .enabled = 1, + .target_vp = hv_vp_index[smp_processor_id()] + }; + struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; + + if (!hv_reenlightenment_available()) { + pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + return; + } + + hv_reenlightenment_cb = cb; + + /* Make sure callback is registered before we write to MSRs */ + wmb(); + + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); +} +EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); + +void clear_hv_tscchange_cb(void) +{ + struct hv_reenlightenment_control re_ctrl; + + if (!hv_reenlightenment_available()) + return; + + rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + re_ctrl.enabled = 0; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + + hv_reenlightenment_cb = NULL; +} +EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); + +static int hv_cpu_die(unsigned int cpu) +{ + struct hv_reenlightenment_control re_ctrl; + unsigned int new_cpu; + + if (hv_reenlightenment_cb == NULL) + return 0; + + rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + if (re_ctrl.target_vp == hv_vp_index[cpu]) { + /* Reassign to some other online CPU */ + new_cpu = cpumask_any_but(cpu_online_mask, cpu); + + re_ctrl.target_vp = hv_vp_index[new_cpu]; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + } + + return 0; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -110,12 +222,19 @@ static int hv_cpu_init(unsigned int cpu) */ void hyperv_init(void) { - u64 guest_id; + u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; + /* Absolutely required MSRs */ + required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE | + HV_X64_MSR_VP_INDEX_AVAILABLE; + + if ((ms_hyperv.features & required_msrs) != required_msrs) + return; + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); @@ -123,7 +242,7 @@ void hyperv_init(void) return; if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", - hv_cpu_init, NULL) < 0) + hv_cpu_init, hv_cpu_die) < 0) goto free_vp_index; /* @@ -239,17 +358,24 @@ void hyperv_report_panic(struct pt_regs *regs, long err) } EXPORT_SYMBOL_GPL(hyperv_report_panic); -bool hv_is_hypercall_page_setup(void) +bool hv_is_hyperv_initialized(void) { union hv_x64_msr_hypercall_contents hypercall_msr; - /* Check if the hypercall page is setup */ + /* + * Ensure that we're really on Hyper-V, and not a KVM or Xen + * emulation of Hyper-V + */ + if (x86_hyper_type != X86_HYPER_MS_HYPERV) + return false; + + /* + * Verify that earlier initialization succeeded by checking + * that the hypercall page is setup + */ hypercall_msr.as_uint64 = 0; rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - if (!hypercall_msr.enable) - return false; - - return true; + return hypercall_msr.enable; } -EXPORT_SYMBOL_GPL(hv_is_hypercall_page_setup); +EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 9cc9e1c1e2db..56c9ebac946f 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -137,7 +137,12 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, } if (info->mm) { + /* + * AddressSpace argument must match the CR3 with PCID bits + * stripped out. + */ flush->address_space = virt_to_phys(info->mm->pgd); + flush->address_space &= CR3_ADDR_MASK; flush->flags = 0; } else { flush->address_space = 0; @@ -219,7 +224,12 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, } if (info->mm) { + /* + * AddressSpace argument must match the CR3 with PCID bits + * stripped out. + */ flush->address_space = virt_to_phys(info->mm->pgd); + flush->address_space &= CR3_ADDR_MASK; flush->flags = 0; } else { flush->address_space = 0; @@ -278,8 +288,6 @@ void hyperv_setup_mmu_ops(void) if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) return; - setup_clear_cpu_cap(X86_FEATURE_PCID); - if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) { pr_info("Using hypercall for remote TLB flush\n"); pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 5d6a53fd7521..de690c2d2e33 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -6,7 +6,6 @@ generated-y += unistd_32_ia32.h generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h -generic-y += clkdev.h generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index f077401869ee..11881726ed37 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -49,7 +49,7 @@ extern int acpi_fix_pin2_polarity; extern int acpi_disable_cmcff; extern u8 acpi_sci_flags; -extern int acpi_sci_override_gsi; +extern u32 acpi_sci_override_gsi; void acpi_pic_sci_set_trigger(unsigned int, u16); struct device; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 2cbd75dd2fd3..e1c8dab86670 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -127,88 +127,6 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; -typedef union compat_sigval { - compat_int_t sival_int; - compat_uptr_t sival_ptr; -} compat_sigval_t; - -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[128/sizeof(int) - 3]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - int _overrun_incr; /* amount to add to overrun */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGCHLD (x32 version) */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_s64 _utime; - compat_s64 _stime; - } _sigchld_x32; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ - short int _addr_lsb; /* Valid LSB of the reported address. */ - union { - /* used when si_code=SEGV_BNDERR */ - struct { - compat_uptr_t _lower; - compat_uptr_t _upper; - } _addr_bnd; - /* used when si_code=SEGV_PKUERR */ - compat_u32 _pkey; - }; - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - struct { - unsigned int _call_addr; /* calling insn */ - int _syscall; /* triggering system call number */ - unsigned int _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define COMPAT_OFF_T_MAX 0x7fffffff struct compat_ipc64_perm { @@ -331,4 +249,8 @@ static inline bool in_compat_syscall(void) } #define in_compat_syscall in_compat_syscall /* override the generic impl */ +struct compat_siginfo; +int __copy_siginfo_to_user32(struct compat_siginfo __user *to, + const siginfo_t *from, bool x32_ABI); + #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 73b5fff159a4..0dfe4d3f74e2 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -206,9 +206,11 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ +#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h new file mode 100644 index 000000000000..1295bc622ebe --- /dev/null +++ b/arch/x86/include/asm/dma-direct.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_X86_DMA_DIRECT_H +#define ASM_X86_DMA_DIRECT_H 1 + +#include <linux/mem_encrypt.h> + +#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ +bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); +#else +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size - 1 <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(paddr); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return __sme_clr(daddr); +} +#endif /* CONFIG_X86_DMA_REMAP */ +#endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 0350d99bb8fd..6277c83c0eb1 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -12,7 +12,6 @@ #include <asm/io.h> #include <asm/swiotlb.h> #include <linux/dma-contiguous.h> -#include <linux/mem_encrypt.h> #ifdef CONFIG_ISA # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) @@ -31,6 +30,9 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } +int arch_dma_supported(struct device *dev, u64 mask); +#define arch_dma_supported arch_dma_supported + bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs @@ -42,31 +44,6 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs); -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ -extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else - -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} - -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ - static inline unsigned long dma_alloc_coherent_mask(struct device *dev, gfp_t gfp) { diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h new file mode 100644 index 000000000000..47b7a1296245 --- /dev/null +++ b/arch/x86/include/asm/error-injection.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ERROR_INJECTION_H +#define _ASM_ERROR_INJECTION_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm-generic/error-injection.h> + +asmlinkage void just_return_func(void); +void override_function_with_return(struct pt_regs *regs); + +#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 4df2754ef380..44bbc39a57b3 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -20,12 +20,6 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, # define ia32_setup_rt_frame __setup_rt_frame #endif -#ifdef CONFIG_COMPAT -int __copy_siginfo_to_user32(compat_siginfo_t __user *to, - const siginfo_t *from, bool x32_ABI); -#endif - - extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); extern void convert_to_fxsr(struct task_struct *tsk, diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 51cc979dd364..7c341a74ec8c 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -38,6 +38,9 @@ typedef struct { #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) unsigned int irq_hv_callback_count; #endif +#if IS_ENABLED(CONFIG_HYPERV) + unsigned int irq_hv_reenlightenment_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 96aa6b9884dc..8c5aaba6633f 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -28,6 +28,7 @@ enum x86_hypervisor_type { X86_HYPER_XEN_PV, X86_HYPER_XEN_HVM, X86_HYPER_KVM, + X86_HYPER_JAILHOUSE, }; #ifdef CONFIG_HYPERVISOR_GUEST diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index c8376b40e882..5cdcdbd4d892 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -69,6 +69,11 @@ struct legacy_pic { extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; +static inline bool has_legacy_pic(void) +{ + return legacy_pic != &null_legacy_pic; +} + static inline int nr_legacy_irqs(void) { return legacy_pic->nr_legacy_irqs; diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 35a6bc4da8ad..cf090e584202 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -10,6 +10,10 @@ * * Things ending in "2" are usually because we have no better * name for them. There's no processor called "SILVERMONT2". + * + * While adding a new CPUID for a new microarchitecture, add a new + * group to keep logically sorted out in chronological order. Within + * that group keep the CPUID for the variants sorted by model number. */ #define INTEL_FAM6_CORE_YONAH 0x0E @@ -49,6 +53,8 @@ #define INTEL_FAM6_KABYLAKE_MOBILE 0x8E #define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E +#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66 + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_PINEVIEW 0x1C diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 528ed4be4393..9e7adcdbe031 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -38,6 +38,7 @@ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); int intel_pmc_s0ix_counter_read(u64 *data); int intel_pmc_gcr_read(u32 offset, u32 *data); +int intel_pmc_gcr_read64(u32 offset, u64 *data); int intel_pmc_gcr_write(u32 offset, u32 data); int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val); @@ -70,6 +71,11 @@ static inline int intel_pmc_gcr_read(u32 offset, u32 *data) return -EINVAL; } +static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) +{ + return -EINVAL; +} + static inline int intel_pmc_gcr_write(u32 offset, u32 data) { return -EINVAL; diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index 7d87437bd030..3de0489deade 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -147,6 +147,18 @@ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); /** + * iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus + * notifier, unlocked + * + * Like iosf_mbi_unregister_pmic_bus_access_notifier(), but for use when the + * caller has already called iosf_mbi_punit_acquire() itself. + * + * @nb: notifier_block to unregister + */ +int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + struct notifier_block *nb); + +/** * iosf_mbi_call_pmic_bus_access_notifier_chain - Call PMIC bus notifier chain * * @val: action to pass into listener's notifier_call function @@ -154,6 +166,11 @@ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); */ int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v); +/** + * iosf_mbi_assert_punit_acquired - Assert that the P-Unit has been acquired. + */ +void iosf_mbi_assert_punit_acquired(void); + #else /* CONFIG_IOSF_MBI is not enabled */ static inline bool iosf_mbi_available(void) @@ -197,12 +214,20 @@ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) return 0; } +static inline int +iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(struct notifier_block *nb) +{ + return 0; +} + static inline int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) { return 0; } +static inline void iosf_mbi_assert_punit_acquired(void) {} + #endif /* CONFIG_IOSF_MBI */ #endif /* IOSF_MBI_SYMS_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 67421f649cfa..e71c1120426b 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -103,7 +103,12 @@ #endif #define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef -#define LOCAL_TIMER_VECTOR 0xee + +#if IS_ENABLED(CONFIG_HYPERV) +#define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#endif + +#define LOCAL_TIMER_VECTOR 0xed #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h new file mode 100644 index 000000000000..875b54376689 --- /dev/null +++ b/arch/x86/include/asm/jailhouse_para.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL2.0 */ + +/* + * Jailhouse paravirt_ops implementation + * + * Copyright (c) Siemens AG, 2015-2017 + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + */ + +#ifndef _ASM_X86_JAILHOUSE_PARA_H +#define _ASM_X86_JAILHOUSE_PARA_H + +#include <linux/types.h> + +#ifdef CONFIG_JAILHOUSE_GUEST +bool jailhouse_paravirt(void); +#else +static inline bool jailhouse_paravirt(void) +{ + return false; +} +#endif + +#endif /* _ASM_X86_JAILHOUSE_PARA_H */ diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index b577dd0916aa..13e70da38bed 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -4,6 +4,7 @@ #include <linux/const.h> #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_SCALE_SHIFT 3 /* * Compiler uses shadow offset assuming that addresses start @@ -12,12 +13,15 @@ * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT */ #define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ - ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3)) + ((-1UL << __VIRTUAL_MASK_SHIFT) >> \ + KASAN_SHADOW_SCALE_SHIFT)) /* - * 47 bits for kernel address -> (47 - 3) bits for shadow - * 56 bits for kernel address -> (56 - 3) bits for shadow + * 47 bits for kernel address -> (47 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow + * 56 bits for kernel address -> (56 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow */ -#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3))) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + \ + (1ULL << (__VIRTUAL_MASK_SHIFT - \ + KASAN_SHADOW_SCALE_SHIFT))) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9f2e3102e0bb..367d99cff426 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,6 +67,8 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); +extern void arch_kprobe_override_function(struct pt_regs *regs); + /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 516798431328..dd6f57a54a26 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -86,7 +86,7 @@ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ - | X86_CR4_SMAP | X86_CR4_PKE)) + | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -504,6 +504,7 @@ struct kvm_vcpu_arch { int mp_state; u64 ia32_misc_enable_msr; u64 smbase; + u64 smi_count; bool tpr_access_reporting; u64 ia32_xss; @@ -760,6 +761,15 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +struct kvm_sev_info { + bool active; /* SEV enabled guest */ + unsigned int asid; /* ASID used for this guest */ + unsigned int handle; /* SEV firmware handle */ + int fd; /* SEV device fd */ + unsigned long pages_locked; /* Number of pages locked */ + struct list_head regions_list; /* List of registered regions */ +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -847,6 +857,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + struct kvm_sev_info sev_info; }; struct kvm_vm_stat { @@ -883,7 +895,6 @@ struct kvm_vcpu_stat { u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; - u64 efer_reload; u64 fpu_reload; u64 insn_emulation; u64 insn_emulation_fail; @@ -965,7 +976,7 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*tlb_flush)(struct kvm_vcpu *vcpu); + void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); void (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu); @@ -1017,6 +1028,7 @@ struct kvm_x86_ops { void (*handle_external_intr)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); + bool (*umip_emulated)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); @@ -1079,6 +1091,10 @@ struct kvm_x86_ops { int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); int (*enable_smi_window)(struct kvm_vcpu *vcpu); + + int (*mem_enc_op)(struct kvm *kvm, void __user *argp); + int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); }; struct kvm_arch_async_pf { diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b1e8d8db921f..96ea4b5ba658 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -376,6 +376,7 @@ struct smca_bank { extern struct smca_bank smca_banks[MAX_NR_BANKS]; extern const char *smca_get_long_name(enum smca_bank_types t); +extern bool amd_mce_is_memory_error(struct mce *m); extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); @@ -384,6 +385,7 @@ extern int mce_threshold_remove_device(unsigned int cpu); static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; +static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; #endif diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index a6bec8028480..6fb923a34309 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -128,9 +128,17 @@ enum mp_irq_source_types { mp_ExtINT = 3 }; -#define MP_IRQDIR_DEFAULT 0 -#define MP_IRQDIR_HIGH 1 -#define MP_IRQDIR_LOW 3 +#define MP_IRQPOL_DEFAULT 0x0 +#define MP_IRQPOL_ACTIVE_HIGH 0x1 +#define MP_IRQPOL_RESERVED 0x2 +#define MP_IRQPOL_ACTIVE_LOW 0x3 +#define MP_IRQPOL_MASK 0x3 + +#define MP_IRQTRIG_DEFAULT 0x0 +#define MP_IRQTRIG_EDGE 0x4 +#define MP_IRQTRIG_RESERVED 0x8 +#define MP_IRQTRIG_LEVEL 0xc +#define MP_IRQTRIG_MASK 0xc #define MP_APIC_ALL 0xFF diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 8bf450b13d9f..25283f7eb299 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -160,6 +160,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_set_synint_state(int_num, val) wrmsrl(int_num, val) void hyperv_callback_vector(void); +void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector #endif @@ -314,20 +315,29 @@ void hyperv_init(void); void hyperv_setup_mmu_ops(void); void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); -bool hv_is_hypercall_page_setup(void); +bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); + +void hyperv_reenlightenment_intr(struct pt_regs *regs); +void set_hv_tscchange_cb(void (*cb)(void)); +void clear_hv_tscchange_cb(void); +void hyperv_stop_tsc_emulation(void); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} -static inline bool hv_is_hypercall_page_setup(void) { return false; } +static inline bool hv_is_hyperv_initialized(void) { return false; } static inline void hyperv_cleanup(void) {} static inline void hyperv_setup_mmu_ops(void) {} +static inline void set_hv_tscchange_cb(void (*cb)(void)) {} +static inline void clear_hv_tscchange_cb(void) {} +static inline void hyperv_stop_tsc_emulation(void) {}; #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc) { - u64 scale, offset, cur_tsc; + u64 scale, offset; u32 sequence; /* @@ -358,7 +368,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) scale = READ_ONCE(tsc_pg->tsc_scale); offset = READ_ONCE(tsc_pg->tsc_offset); - cur_tsc = rdtsc_ordered(); + *cur_tsc = rdtsc_ordered(); /* * Make sure we read sequence after we read all other values @@ -368,7 +378,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - return mul_u64_u64_shr(cur_tsc, scale, 64) + offset; + return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; +} + +static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +{ + u64 cur_tsc; + + return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); } #else @@ -376,5 +393,12 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { return NULL; } + +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc) +{ + BUG(); + return U64_MAX; +} #endif #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e520a1e6fc11..c9084dedfcfa 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -397,6 +397,8 @@ #define MSR_K7_PERFCTR3 0xc0010007 #define MSR_K7_CLK_CTL 0xc001001b #define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_HWCR_SMMLOCK_BIT 0 +#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 8a3ee355b422..92015c65fa2a 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -22,4 +22,6 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end, void io_free_memtype(resource_size_t start, resource_size_t end); +bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index bc4af5453802..f24df59c40b2 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -158,7 +158,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#ifdef CONFIG_SMP union split_pmd { struct { u32 pmd_low; @@ -166,6 +165,8 @@ union split_pmd { }; pmd_t pmd; }; + +#ifdef CONFIG_SMP static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) { union split_pmd res, *orig = (union split_pmd *)pmdp; @@ -181,6 +182,40 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) #endif +#ifndef pmdp_establish +#define pmdp_establish pmdp_establish +static inline pmd_t pmdp_establish(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, pmd_t pmd) +{ + pmd_t old; + + /* + * If pmd has present bit cleared we can get away without expensive + * cmpxchg64: we can update pmdp half-by-half without racing with + * anybody. + */ + if (!(pmd_val(pmd) & _PAGE_PRESENT)) { + union split_pmd old, new, *ptr; + + ptr = (union split_pmd *)pmdp; + + new.pmd = pmd; + + /* xchg acts as a barrier before setting of the high bits */ + old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low); + old.pmd_high = ptr->pmd_high; + ptr->pmd_high = new.pmd_high; + return old.pmd; + } + + do { + old = *pmdp; + } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd); + + return old; +} +#endif + #ifdef CONFIG_SMP union split_pud { struct { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e42b8943cb1a..63c2552b6b65 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1109,6 +1109,21 @@ static inline int pud_write(pud_t pud) return pud_flags(pud) & _PAGE_RW; } +#ifndef pmdp_establish +#define pmdp_establish pmdp_establish +static inline pmd_t pmdp_establish(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, pmd_t pmd) +{ + if (IS_ENABLED(CONFIG_SMP)) { + return xchg(pmdp, pmd); + } else { + pmd_t old = *pmdp; + *pmdp = pmd; + return old; + } +} +#endif + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/arch/x86/include/asm/pmc_core.h b/arch/x86/include/asm/pmc_core.h deleted file mode 100644 index d4855f11136d..000000000000 --- a/arch/x86/include/asm/pmc_core.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Intel Core SoC Power Management Controller Header File - * - * Copyright (c) 2016, Intel Corporation. - * All Rights Reserved. - * - * Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com> - * Vishwanath Somayaji <vishwanath.somayaji@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef _ASM_PMC_CORE_H -#define _ASM_PMC_CORE_H - -/* API to read SLP_S0_RESIDENCY counter */ -int intel_pmc_slp_s0_counter_read(u32 *data); - -#endif /* _ASM_PMC_CORE_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 44c2c4ec6d60..1bd9ed87606f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -505,6 +505,14 @@ struct thread_struct { */ }; +/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.state); + *size = fpu_kernel_xstate_size; +} + /* * Thread-synchronous status. * diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 14131dd06b29..6de1fd3d0097 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ax = rc; +} + /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 78dd9df88157..0487ac054870 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -146,6 +146,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL +#define SVM_NESTED_CTL_NP_ENABLE BIT(0) +#define SVM_NESTED_CTL_SEV_ENABLE BIT(1) + struct __attribute__ ((__packed__)) vmcb_seg { u16 selector; u16 attrib; diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index bdf9aed40403..1c6a6cb230ff 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -28,8 +28,6 @@ static inline void pci_swiotlb_late_init(void) } #endif -static inline void dma_mark_clean(void *addr, size_t size) {} - extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs); diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h new file mode 100644 index 000000000000..c67caafd3381 --- /dev/null +++ b/arch/x86/include/asm/sync_core.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SYNC_CORE_H +#define _ASM_X86_SYNC_CORE_H + +#include <linux/preempt.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> + +/* + * Ensure that a core serializing instruction is issued before returning + * to user-mode. x86 implements return to user-space through sysexit, + * sysrel, and sysretq, which are not core serializing. + */ +static inline void sync_core_before_usermode(void) +{ + /* With PTI, we unconditionally serialize before running user code. */ + if (static_cpu_has(X86_FEATURE_PTI)) + return; + /* + * Return from interrupt and NMI is done through iret, which is core + * serializing. + */ + if (in_irq() || in_nmi()) + return; + sync_core(); +} + +#endif /* _ASM_X86_SYNC_CORE_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index eda3b6823ca4..a5d9521bb2cb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -63,8 +63,6 @@ struct thread_info { .flags = 0, \ } -#define init_stack (init_thread_union.stack) - #else /* !__ASSEMBLY__ */ #include <asm/asm-offsets.h> diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 74f4c2ff6427..d8bfa98fca98 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -53,6 +53,10 @@ struct arch_uprobe { u8 fixups; u8 ilen; } defparam; + struct { + u8 reg_offset; /* to the start of pt_regs */ + u8 ilen; + } push; }; }; diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 7cac79802ad2..7803114aa140 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -48,7 +48,6 @@ #define UV2_NET_ENDPOINT_INTD 0x28 #define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) -#define UV_DESC_PSHIFT 49 #define UV_PAYLOADQ_GNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define UV_BAU_BASENAME "sgi_uv/bau_tunables" diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 036e26d63d9a..44cf6d6deb7a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -241,6 +241,7 @@ static inline int uv_hub_info_check(int version) #define UV2_HUB_REVISION_BASE 3 #define UV3_HUB_REVISION_BASE 5 #define UV4_HUB_REVISION_BASE 7 +#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ #ifdef UV1_HUB_IS_SUPPORTED static inline int is_uv1_hub(void) @@ -280,6 +281,19 @@ static inline int is_uv3_hub(void) } #endif +/* First test "is UV4A", then "is UV4" */ +#ifdef UV4A_HUB_IS_SUPPORTED +static inline int is_uv4a_hub(void) +{ + return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE); +} +#else +static inline int is_uv4a_hub(void) +{ + return 0; +} +#endif + #ifdef UV4_HUB_IS_SUPPORTED static inline int is_uv4_hub(void) { diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 548d684a7960..ecb9ddef128f 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -39,9 +39,11 @@ * #define UV2Hxxx b * #define UV3Hxxx c * #define UV4Hxxx d + * #define UV4AHxxx e * #define UVHxxx (is_uv1_hub() ? UV1Hxxx : * (is_uv2_hub() ? UV2Hxxx : * (is_uv3_hub() ? UV3Hxxx : + * (is_uv4a_hub() ? UV4AHxxx : * UV4Hxxx)) * * If the MMR exists on all hub types > 1 but have different addresses, the @@ -49,8 +51,10 @@ * #define UV2Hxxx b * #define UV3Hxxx c * #define UV4Hxxx d + * #define UV4AHxxx e * #define UVHxxx (is_uv2_hub() ? UV2Hxxx : * (is_uv3_hub() ? UV3Hxxx : + * (is_uv4a_hub() ? UV4AHxxx : * UV4Hxxx)) * * union uvh_xxx { @@ -63,6 +67,7 @@ * } s2; * struct uv3h_xxx_s { # Full UV3 definition (*) * } s3; + * (NOTE: No struct uv4ah_xxx_s members exist) * struct uv4h_xxx_s { # Full UV4 definition (*) * } s4; * }; @@ -99,6 +104,7 @@ #define UV2_HUB_IS_SUPPORTED 1 #define UV3_HUB_IS_SUPPORTED 1 #define UV4_HUB_IS_SUPPORTED 1 +#define UV4A_HUB_IS_SUPPORTED 1 /* Error function to catch undefined references */ extern unsigned long uv_undefined(char *str); @@ -2779,35 +2785,47 @@ union uvh_lb_bau_sb_activation_status_1_u { /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32) #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL - - -union uvh_lb_bau_sb_descriptor_base_u { - unsigned long v; - struct uvh_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long rsvd_12_48:37; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s; - struct uv4h_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long page_address:34; /* RW */ - unsigned long rsvd_46_48:3; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s4; -}; +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 53 +#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL +#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0xffe0000000000000UL + +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT) + +#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK) + +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK) /* ========================================================================= */ /* UVH_NODE_ID */ @@ -3031,6 +3049,41 @@ union uvh_node_present_table_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long v; @@ -3042,6 +3095,46 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -3064,6 +3157,41 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long v; @@ -3075,6 +3203,46 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -3097,6 +3265,41 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long v; @@ -3108,6 +3311,46 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -3126,6 +3369,21 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long v; @@ -3134,6 +3392,31 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; }; /* ========================================================================= */ @@ -3152,6 +3435,21 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u { #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long v; @@ -3160,6 +3458,31 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; }; /* ========================================================================= */ @@ -3178,6 +3501,21 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long v; @@ -3186,6 +3524,31 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; }; /* ========================================================================= */ @@ -3384,6 +3747,162 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { }; /* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR") +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR") +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR) + + +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK) + +union uvh_rh_gam_mmioh_overlay_config0_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; + struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s4a; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR) + + +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK) + +union uvh_rh_gam_mmioh_overlay_config1_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; + struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s4a; +}; + +/* ========================================================================= */ /* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL @@ -3438,6 +3957,112 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { }; /* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR) + +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH) + + +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL + +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL + +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK) + +union uvh_rh_gam_mmioh_redirect_config0_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; + struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; + struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:12; /* RW */ + unsigned long rsvd_12_63:52; + } s4a; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR) + +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH) + + +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL + +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL + +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK) + +union uvh_rh_gam_mmioh_redirect_config1_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; + struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; + struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:12; /* RW */ + unsigned long rsvd_12_63:52; + } s4a; +}; + +/* ========================================================================= */ /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL @@ -4138,88 +4763,6 @@ union uv3h_gr0_gam_gr_config_u { }; /* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL - -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL - -union uv3h_rh_gam_mmioh_overlay_config0_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s { - unsigned long rsvd_0_25:26; - unsigned long base:20; /* RW */ - unsigned long m_io:6; /* RW */ - unsigned long n_io:4; - unsigned long rsvd_56_62:7; - unsigned long enable:1; /* RW */ - } s3; -}; - -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL - -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL - -union uv3h_rh_gam_mmioh_overlay_config1_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s { - unsigned long rsvd_0_25:26; - unsigned long base:20; /* RW */ - unsigned long m_io:6; /* RW */ - unsigned long n_io:4; - unsigned long rsvd_56_62:7; - unsigned long enable:1; /* RW */ - } s3; -}; - -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 - -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL - -union uv3h_rh_gam_mmioh_redirect_config0_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s { - unsigned long nasid:15; /* RW */ - unsigned long rsvd_15_63:49; - } s3; -}; - -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 - -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL - -union uv3h_rh_gam_mmioh_redirect_config1_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s { - unsigned long nasid:15; /* RW */ - unsigned long rsvd_15_63:49; - } s3; -}; - -/* ========================================================================= */ /* UV4H_LB_PROC_INTD_QUEUE_FIRST */ /* ========================================================================= */ #define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index aa4747569e23..fc2f082ac635 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -212,6 +212,7 @@ enum x86_legacy_i8042_state { struct x86_legacy_features { enum x86_legacy_i8042_state i8042; int rtc; + int warm_reset; int no_vga; int reserve_bios_regions; struct x86_legacy_devices devices; diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index 1e901e421f2d..322681622d1e 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -5,3 +5,4 @@ generic-y += bpf_perf_event.h generated-y += unistd_32.h generated-y += unistd_64.h generated-y += unistd_x32.h +generic-y += poll.h diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index afdd5ae0fcc4..aebf60357758 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -9,6 +9,7 @@ #define SETUP_PCI 3 #define SETUP_EFI 4 #define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -126,6 +127,27 @@ struct boot_e820_entry { __u32 type; } __attribute__((packed)); +/* + * Smallest compatible version of jailhouse_setup_data required by this kernel. + */ +#define JAILHOUSE_SETUP_REQUIRED_VERSION 1 + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + u16 version; + u16 compatible_version; + u16 pm_timer_address; + u16 num_cpus; + u64 pci_mmconfig_base; + u32 tsc_khz; + u32 apic_khz; + u8 standard_ioapic; + u8 cpu_ids[255]; +} __attribute__((packed)); + /* The so-called "zeropage" */ struct boot_params { struct screen_info screen_info; /* 0x000 */ diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 1a5bfead93b4..197c2e6c7376 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -40,6 +40,9 @@ */ #define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11) +/* AccessReenlightenmentControls privilege */ +#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) + /* * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available @@ -234,6 +237,30 @@ #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) +/* TSC emulation after migration */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 + +struct hv_reenlightenment_control { + u64 vector:8; + u64 reserved1:8; + u64 enabled:1; + u64 reserved2:15; + u64 target_vp:32; +}; + +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 + +struct hv_tsc_emulation_control { + u64 enabled:1; + u64 reserved:63; +}; + +struct hv_tsc_emulation_status { + u64 inprogress:1; + u64 reserved:63; +}; + #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 09cc06483bed..7a2ade4aa235 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -25,6 +25,7 @@ #define KVM_FEATURE_STEAL_TIME 5 #define KVM_FEATURE_PV_EOI 6 #define KVM_FEATURE_PV_UNHALT 7 +#define KVM_FEATURE_PV_TLB_FLUSH 9 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -51,6 +52,9 @@ struct kvm_steal_time { __u32 pad[11]; }; +#define KVM_VCPU_PREEMPTED (1 << 0) +#define KVM_VCPU_FLUSH_TLB (1 << 1) + #define KVM_CLOCK_PAIRING_WALLCLOCK 0 struct kvm_clock_pairing { __s64 sec; diff --git a/arch/x86/include/uapi/asm/poll.h b/arch/x86/include/uapi/asm/poll.h deleted file mode 100644 index c98509d3149e..000000000000 --- a/arch/x86/include/uapi/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7e2baf7304ae..29786c87e864 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -115,6 +115,8 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o +obj-$(CONFIG_JAILHOUSE_GUEST) += jailhouse.o + obj-$(CONFIG_EISA) += eisa.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f4c463df8b08..2aa92094b59d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -36,6 +36,7 @@ #include <linux/ioport.h> #include <linux/pci.h> #include <linux/efi-bgrt.h> +#include <linux/serial_core.h> #include <asm/e820/api.h> #include <asm/irqdomain.h> @@ -68,8 +69,9 @@ int acpi_ioapic; int acpi_strict; int acpi_disable_cmcff; +/* ACPI SCI override configuration */ u8 acpi_sci_flags __initdata; -int acpi_sci_override_gsi __initdata; +u32 acpi_sci_override_gsi __initdata = INVALID_ACPI_IRQ; int acpi_skip_timer_override __initdata; int acpi_use_timer_override __initdata; int acpi_fix_pin2_polarity __initdata; @@ -112,8 +114,6 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; -#define ACPI_INVALID_GSI INT_MIN - /* * This is just a simple wrapper around early_memremap(), * with sanity checks for phys == 0 and size == 0. @@ -372,7 +372,7 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, * and acpi_isa_irq_to_gsi() may give wrong result. */ if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi) - isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI; + isa_irq_to_gsi[gsi] = INVALID_ACPI_IRQ; isa_irq_to_gsi[bus_irq] = gsi; } @@ -620,24 +620,24 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) } rc = acpi_get_override_irq(gsi, &trigger, &polarity); - if (rc == 0) { - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; - irq = acpi_register_gsi(NULL, gsi, trigger, polarity); - if (irq >= 0) { - *irqp = irq; - return 0; - } - } + if (rc) + return rc; - return -1; + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + irq = acpi_register_gsi(NULL, gsi, trigger, polarity); + if (irq < 0) + return irq; + + *irqp = irq; + return 0; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { if (isa_irq < nr_legacy_irqs() && - isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) { + isa_irq_to_gsi[isa_irq] != INVALID_ACPI_IRQ) { *gsi = isa_irq_to_gsi[isa_irq]; return 0; } @@ -676,8 +676,7 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, mutex_lock(&acpi_ioapic_lock); irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); /* Don't set up the ACPI SCI because it's already set up */ - if (irq >= 0 && enable_update_mptable && - acpi_gbl_FADT.sci_interrupt != gsi) + if (irq >= 0 && enable_update_mptable && gsi != acpi_gbl_FADT.sci_interrupt) mp_config_acpi_gsi(dev, gsi, trigger, polarity); mutex_unlock(&acpi_ioapic_lock); #endif @@ -1211,8 +1210,9 @@ static int __init acpi_parse_madt_ioapic_entries(void) /* * If BIOS did not supply an INT_SRC_OVR for the SCI * pretend we got one so we can set the SCI flags. + * But ignore setting up SCI on hardware reduced platforms. */ - if (!acpi_sci_override_gsi) + if (acpi_sci_override_gsi == INVALID_ACPI_IRQ && !acpi_gbl_reduced_hardware) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, acpi_gbl_FADT.sci_interrupt); @@ -1626,6 +1626,8 @@ int __init acpi_boot_init(void) if (!acpi_noirq) x86_init.pci.init = pci_acpi_init; + /* Do not enable ACPI SPCR console by default */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, false); return 0; } diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 7188aea91549..f1915b744052 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -138,6 +138,8 @@ static int __init acpi_sleep_setup(char *str) acpi_nvs_nosave_s3(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "nobl", 4) == 0) + acpi_sleep_no_blacklist(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index cc0e8bc0ea3f..ecd486cb06ab 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/gfp.h> #include <linux/atomic.h> +#include <linux/dma-direct.h> #include <asm/mtrr.h> #include <asm/pgtable.h> #include <asm/proto.h> diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index f5d92bc3b884..2c4d5ece7456 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -30,6 +30,7 @@ #include <asm/dma.h> #include <asm/amd_nb.h> #include <asm/x86_init.h> +#include <linux/crash_dump.h> /* * Using 512M as goal, in case kexec will load kernel_big @@ -56,6 +57,33 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; +#ifdef CONFIG_PROC_VMCORE +/* + * If the first kernel maps the aperture over e820 RAM, the kdump kernel will + * use the same range because it will remain configured in the northbridge. + * Trying to dump this area via /proc/vmcore may crash the machine, so exclude + * it from vmcore. + */ +static unsigned long aperture_pfn_start, aperture_page_count; + +static int gart_oldmem_pfn_is_ram(unsigned long pfn) +{ + return likely((pfn < aperture_pfn_start) || + (pfn >= aperture_pfn_start + aperture_page_count)); +} + +static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +{ + aperture_pfn_start = aper_base >> PAGE_SHIFT; + aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; + WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram)); +} +#else +static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +{ +} +#endif + /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ @@ -435,8 +463,16 @@ int __init gart_iommu_hole_init(void) out: if (!fix && !fallback_aper_force) { - if (last_aper_base) + if (last_aper_base) { + /* + * If this is the kdump kernel, the first kernel + * may have allocated the range over its e820 RAM + * and fixed up the northbridge + */ + exclude_from_vmcore(last_aper_base, last_aper_order); + return 1; + } return 0; } @@ -473,6 +509,14 @@ out: return 0; } + /* + * If this is the kdump kernel _and_ the first kernel did not + * configure the aperture in the northbridge, this range may + * overlap with the first kernel's memory. We can't access the + * range through vmcore even though it should be part of the dump. + */ + exclude_from_vmcore(aper_alloc, aper_order); + /* Fix up the north bridges */ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus, dev_base, dev_limit; diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 25a87028cb3f..e84c9eb4e5b4 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -19,6 +19,7 @@ #include <asm/smp.h> #include <asm/apic.h> #include <asm/ipi.h> +#include <asm/jailhouse_para.h> #include <linux/acpi.h> @@ -84,12 +85,8 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) static void flat_send_IPI_allbutself(int vector) { int cpu = smp_processor_id(); -#ifdef CONFIG_HOTPLUG_CPU - int hotplug = 1; -#else - int hotplug = 0; -#endif - if (hotplug || vector == NMI_VECTOR) { + + if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) { if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { unsigned long mask = cpumask_bits(cpu_online_mask)[0]; @@ -218,6 +215,15 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +static void physflat_init_apic_ldr(void) +{ + /* + * LDR and DFR are not involved in physflat mode, rather: + * "In physical destination mode, the destination processor is + * specified by its local APIC ID [...]." (Intel SDM, 10.6.2.1) + */ +} + static void physflat_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -230,7 +236,8 @@ static void physflat_send_IPI_all(int vector) static int physflat_probe(void) { - if (apic == &apic_physflat || num_possible_cpus() > 8) + if (apic == &apic_physflat || num_possible_cpus() > 8 || + jailhouse_paravirt()) return 1; return 0; @@ -251,8 +258,7 @@ static struct apic apic_physflat __ro_after_init = { .dest_logical = 0, .check_apicid_used = NULL, - /* not needed, but shouldn't hurt: */ - .init_apic_ldr = flat_init_apic_ldr, + .init_apic_ldr = physflat_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8a7963421460..8ad2e410974f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -800,18 +800,18 @@ static int irq_polarity(int idx) /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].irqflag & 0x03) { - case 0: + switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) { + case MP_IRQPOL_DEFAULT: /* conforms to spec, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) return default_ISA_polarity(idx); else return default_PCI_polarity(idx); - case 1: + case MP_IRQPOL_ACTIVE_HIGH: return IOAPIC_POL_HIGH; - case 2: + case MP_IRQPOL_RESERVED: pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); - case 3: + case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_POL_LOW; } @@ -845,8 +845,8 @@ static int irq_trigger(int idx) /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].irqflag >> 2) & 0x03) { - case 0: + switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) { + case MP_IRQTRIG_DEFAULT: /* conforms to spec, ie. bus-type dependent trigger mode */ if (test_bit(bus, mp_bus_not_pci)) trigger = default_ISA_trigger(idx); @@ -854,11 +854,11 @@ static int irq_trigger(int idx) trigger = default_PCI_trigger(idx); /* Take EISA into account */ return eisa_irq_trigger(idx, bus, trigger); - case 1: + case MP_IRQTRIG_EDGE: return IOAPIC_EDGE; - case 2: + case MP_IRQTRIG_RESERVED: pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); - case 3: + case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_LEVEL; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e1b8e8bf6b3c..46b675aaf20b 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -137,6 +137,8 @@ static int __init early_get_pnodeid(void) case UV3_HUB_PART_NUMBER_X: uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; break; + + /* Update: UV4A has only a modified revision to indicate HUB fixes */ case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */ @@ -316,6 +318,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } else if (!strcmp(oem_table_id, "UVH")) { /* Only UV1 systems: */ uv_system_type = UV_NON_UNIQUE_APIC; + x86_platform.legacy.warm_reset = 0; __this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift); uv_set_apicid_hibit(); uv_apic = 1; @@ -767,6 +770,7 @@ static __init void map_gru_high(int max_pnode) return; } + /* Only UV3 has distributed GRU mode */ if (is_uv3_hub() && gru.s3.mode) { map_gru_distributed(gru.v); return; @@ -790,63 +794,61 @@ static __init void map_mmr_high(int max_pnode) pr_info("UV: MMR disabled\n"); } -/* - * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY - * and REDIRECT MMR regs are exactly the same on UV3. - */ -struct mmioh_config { - unsigned long overlay; - unsigned long redirect; - char *id; -}; - -static __initdata struct mmioh_config mmiohs[] = { - { - UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR, - UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR, - "MMIOH0" - }, - { - UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR, - UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR, - "MMIOH1" - }, -}; - -/* UV3 & UV4 have identical MMIOH overlay configs */ -static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) +/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */ +static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode) { - union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; + unsigned long overlay; unsigned long mmr; unsigned long base; + unsigned long nasid_mask; + unsigned long m_overlay; int i, n, shift, m_io, max_io; int nasid, lnasid, fi, li; char *id; - id = mmiohs[index].id; - overlay.v = uv_read_local_mmr(mmiohs[index].overlay); - - pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", id, overlay.v, overlay.s3.base, overlay.s3.m_io); - if (!overlay.s3.enable) { + if (index == 0) { + id = "MMIOH0"; + m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR; + overlay = uv_read_local_mmr(m_overlay); + base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK; + mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR; + m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK) + >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT; + shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT; + n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK; + } else { + id = "MMIOH1"; + m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR; + overlay = uv_read_local_mmr(m_overlay); + base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK; + mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR; + m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK) + >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT; + shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT; + n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK; + } + pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io); + if (!(overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)) { pr_info("UV: %s disabled\n", id); return; } - shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT; - base = (unsigned long)overlay.s3.base; - m_io = overlay.s3.m_io; - mmr = mmiohs[index].redirect; - n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; /* Convert to NASID: */ min_pnode *= 2; max_pnode *= 2; max_io = lnasid = fi = li = -1; for (i = 0; i < n; i++) { - union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect; + unsigned long m_redirect = mmr + i * 8; + unsigned long redirect = uv_read_local_mmr(m_redirect); + + nasid = redirect & nasid_mask; + if (i == 0) + pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n", + id, redirect, m_redirect, nasid); - redirect.v = uv_read_local_mmr(mmr + i * 8); - nasid = redirect.s3.nasid; /* Invalid NASID: */ if (nasid < min_pnode || max_pnode < nasid) nasid = -1; @@ -894,8 +896,8 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode) if (is_uv3_hub() || is_uv4_hub()) { /* Map both MMIOH regions: */ - map_mmioh_high_uv3(0, min_pnode, max_pnode); - map_mmioh_high_uv3(1, min_pnode, max_pnode); + map_mmioh_high_uv34(0, min_pnode, max_pnode); + map_mmioh_high_uv34(1, min_pnode, max_pnode); return; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index e4b0d92b3ae0..dfcbe6924eaf 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1506,7 +1506,7 @@ static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t * return 0; } -static unsigned int do_poll(struct file *fp, poll_table *wait) +static __poll_t do_poll(struct file *fp, poll_table *wait) { struct apm_user *as; @@ -1515,7 +1515,7 @@ static unsigned int do_poll(struct file *fp, poll_table *wait) return 0; poll_wait(fp, &apm_waitqueue, wait); if (!queue_empty(as)) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; return 0; } @@ -2389,6 +2389,7 @@ static int __init apm_init(void) if (HZ != 100) idle_period = (idle_period * HZ) / 100; if (idle_threshold < 100) { + cpuidle_poll_state_init(&apm_idle_driver); if (!cpuidle_register_driver(&apm_idle_driver)) if (cpuidle_register_device(&apm_cpuidle_device)) cpuidle_unregister_driver(&apm_idle_driver); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e7d5a7883632..f0e6456ca7d3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -556,6 +556,51 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } } +static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) +{ + u64 msr; + + /* + * BIOS support is required for SME and SEV. + * For SME: If BIOS has enabled SME then adjust x86_phys_bits by + * the SME physical address space reduction value. + * If BIOS has not enabled SME then don't advertise the + * SME feature (set in scattered.c). + * For SEV: If BIOS has not enabled SEV then don't advertise the + * SEV feature (set in scattered.c). + * + * In all cases, since support for SME and SEV requires long mode, + * don't advertise the feature under CONFIG_X86_32. + */ + if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { + /* Check if memory encryption is enabled */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + goto clear_all; + + /* + * Always adjust physical address bits. Even though this + * will be a value above 32-bits this is still done for + * CONFIG_X86_32 so that accurate values are reported. + */ + c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; + + if (IS_ENABLED(CONFIG_X86_32)) + goto clear_all; + + rdmsrl(MSR_K7_HWCR, msr); + if (!(msr & MSR_K7_HWCR_SMMLOCK)) + goto clear_sev; + + return; + +clear_all: + clear_cpu_cap(c, X86_FEATURE_SME); +clear_sev: + clear_cpu_cap(c, X86_FEATURE_SEV); + } +} + static void early_init_amd(struct cpuinfo_x86 *c) { u32 dummy; @@ -627,26 +672,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (cpu_has_amd_erratum(c, amd_erratum_400)) set_cpu_bug(c, X86_BUG_AMD_E400); - /* - * BIOS support is required for SME. If BIOS has enabled SME then - * adjust x86_phys_bits by the SME physical address space reduction - * value. If BIOS has not enabled SME then don't advertise the - * feature (set in scattered.c). Also, since the SME support requires - * long mode, don't advertise the feature under CONFIG_X86_32. - */ - if (cpu_has(c, X86_FEATURE_SME)) { - u64 msr; - - /* Check if SME is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) { - c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f; - if (IS_ENABLED(CONFIG_X86_32)) - clear_cpu_cap(c, X86_FEATURE_SME); - } else { - clear_cpu_cap(c, X86_FEATURE_SME); - } - } + early_detect_mem_encrypt(c); } static void init_amd_k8(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 595be776727d..e5ec0f11c0de 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -106,6 +106,10 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif + if (c->x86_power & (1 << 8)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index bea8d3e24f50..479ca4728de0 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -31,6 +31,7 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_pv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; +extern const struct hypervisor_x86 x86_hyper_jailhouse; static const __initconst struct hypervisor_x86 * const hypervisors[] = { @@ -45,6 +46,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = #ifdef CONFIG_KVM_GUEST &x86_hyper_kvm, #endif +#ifdef CONFIG_JAILHOUSE_GUEST + &x86_hyper_jailhouse, +#endif }; enum x86_hypervisor_type x86_hyper_type; diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 18dd8f22e353..589b948e6e01 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = { .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L2DATA] = + { + .rid = RDT_RESOURCE_L2DATA, + .name = "L2DATA", + .domains = domain_init(RDT_RESOURCE_L2DATA), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 0, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, + [RDT_RESOURCE_L2CODE] = + { + .rid = RDT_RESOURCE_L2CODE, + .name = "L2CODE", + .domains = domain_init(RDT_RESOURCE_L2CODE), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 1, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, [RDT_RESOURCE_MBA] = { .rid = RDT_RESOURCE_MBA, @@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->alloc_enabled = true; } -static void rdt_get_cdp_l3_config(int type) +static void rdt_get_cdp_config(int level, int type) { - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_l = &rdt_resources_all[level]; struct rdt_resource *r = &rdt_resources_all[type]; - r->num_closid = r_l3->num_closid / 2; - r->cache.cbm_len = r_l3->cache.cbm_len; - r->default_ctrl = r_l3->default_ctrl; - r->cache.shareable_bits = r_l3->cache.shareable_bits; + r->num_closid = r_l->num_closid / 2; + r->cache.cbm_len = r_l->cache.cbm_len; + r->default_ctrl = r_l->default_ctrl; + r->cache.shareable_bits = r_l->cache.shareable_bits; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; /* @@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type) r->alloc_enabled = false; } +static void rdt_get_cdp_l3_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA); + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE); +} + +static void rdt_get_cdp_l2_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA); + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); +} + static int get_cache_id(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); @@ -645,6 +691,7 @@ enum { RDT_FLAG_L3_CAT, RDT_FLAG_L3_CDP, RDT_FLAG_L2_CAT, + RDT_FLAG_L2_CDP, RDT_FLAG_MBA, }; @@ -667,6 +714,7 @@ static struct rdt_options rdt_options[] __initdata = { RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), + RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) @@ -729,15 +777,15 @@ static __init bool get_rdt_alloc_resources(void) if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); - if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { - rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); - rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); - } + if (rdt_cpu_has(X86_FEATURE_CDP_L3)) + rdt_get_cdp_l3_config(); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); + if (rdt_cpu_has(X86_FEATURE_CDP_L2)) + rdt_get_cdp_l2_config(); ret = true; } diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 3397244984f5..3fd7a70ee04a 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -7,12 +7,15 @@ #include <linux/jump_label.h> #define IA32_L3_QOS_CFG 0xc81 +#define IA32_L2_QOS_CFG 0xc82 #define IA32_L3_CBM_BASE 0xc90 #define IA32_L2_CBM_BASE 0xd10 #define IA32_MBA_THRTL_BASE 0xd50 #define L3_QOS_CDP_ENABLE 0x01ULL +#define L2_QOS_CDP_ENABLE 0x01ULL + /* * Event IDs are used to program IA32_QM_EVTSEL before reading event * counter from IA32_QM_CTR @@ -357,6 +360,8 @@ enum { RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, + RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE, RDT_RESOURCE_MBA, /* Must be the last */ diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 64c5ff97ee0d..bdab7d2f51af 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -990,6 +990,7 @@ out_destroy: kernfs_remove(kn); return ret; } + static void l3_qos_cfg_update(void *arg) { bool *enable = arg; @@ -997,8 +998,17 @@ static void l3_qos_cfg_update(void *arg) wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); } -static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) +static void l2_qos_cfg_update(void *arg) { + bool *enable = arg; + + wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); +} + +static int set_cache_qos_cfg(int level, bool enable) +{ + void (*update)(void *arg); + struct rdt_resource *r_l; cpumask_var_t cpu_mask; struct rdt_domain *d; int cpu; @@ -1006,16 +1016,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - list_for_each_entry(d, &r->domains, list) { + if (level == RDT_RESOURCE_L3) + update = l3_qos_cfg_update; + else if (level == RDT_RESOURCE_L2) + update = l2_qos_cfg_update; + else + return -EINVAL; + + r_l = &rdt_resources_all[level]; + list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ if (cpumask_test_cpu(cpu, cpu_mask)) - l3_qos_cfg_update(&enable); + update(&enable); /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ - smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); + smp_call_function_many(cpu_mask, update, &enable, 1); put_cpu(); free_cpumask_var(cpu_mask); @@ -1023,52 +1041,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) return 0; } -static int cdp_enable(void) +static int cdp_enable(int level, int data_type, int code_type) { - struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; + struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; + struct rdt_resource *r_l = &rdt_resources_all[level]; int ret; - if (!r_l3->alloc_capable || !r_l3data->alloc_capable || - !r_l3code->alloc_capable) + if (!r_l->alloc_capable || !r_ldata->alloc_capable || + !r_lcode->alloc_capable) return -EINVAL; - ret = set_l3_qos_cfg(r_l3, true); + ret = set_cache_qos_cfg(level, true); if (!ret) { - r_l3->alloc_enabled = false; - r_l3data->alloc_enabled = true; - r_l3code->alloc_enabled = true; + r_l->alloc_enabled = false; + r_ldata->alloc_enabled = true; + r_lcode->alloc_enabled = true; } return ret; } -static void cdp_disable(void) +static int cdpl3_enable(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE); +} + +static int cdpl2_enable(void) +{ + return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE); +} + +static void cdp_disable(int level, int data_type, int code_type) +{ + struct rdt_resource *r = &rdt_resources_all[level]; r->alloc_enabled = r->alloc_capable; - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) { - rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false; - rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false; - set_l3_qos_cfg(r, false); + if (rdt_resources_all[data_type].alloc_enabled) { + rdt_resources_all[data_type].alloc_enabled = false; + rdt_resources_all[code_type].alloc_enabled = false; + set_cache_qos_cfg(level, false); } } +static void cdpl3_disable(void) +{ + cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); +} + +static void cdpl2_disable(void) +{ + cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); +} + +static void cdp_disable_all(void) +{ + if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + cdpl3_disable(); + if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + cdpl2_disable(); +} + static int parse_rdtgroupfs_options(char *data) { char *token, *o = data; int ret = 0; while ((token = strsep(&o, ",")) != NULL) { - if (!*token) - return -EINVAL; + if (!*token) { + ret = -EINVAL; + goto out; + } - if (!strcmp(token, "cdp")) - ret = cdp_enable(); + if (!strcmp(token, "cdp")) { + ret = cdpl3_enable(); + if (ret) + goto out; + } else if (!strcmp(token, "cdpl2")) { + ret = cdpl2_enable(); + if (ret) + goto out; + } else { + ret = -EINVAL; + goto out; + } } + return 0; + +out: + pr_err("Invalid mount option \"%s\"\n", token); + return ret; } @@ -1223,7 +1288,7 @@ out_mongrp: out_info: kernfs_remove(kn_info); out_cdp: - cdp_disable(); + cdp_disable_all(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); @@ -1383,7 +1448,7 @@ static void rdt_kill_sb(struct super_block *sb) /*Put everything back to default values. */ for_each_alloc_enabled_rdt_resource(r) reset_all_ctrls(r); - cdp_disable(); + cdp_disable_all(); rmdir_all_sub(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c index 7f85b76f43bc..97685a0c3175 100644 --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c @@ -243,13 +243,13 @@ out: return err ? err : buf - ubuf; } -static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) +static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) { poll_wait(file, &mce_chrdev_wait, wait); if (READ_ONCE(mcelog.next)) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; if (!mce_apei_read_done && apei_check_mce()) - return POLLIN | POLLRDNORM; + return EPOLLIN | EPOLLRDNORM; return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 4ca632a06e0b..5bbd06f38ff6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -59,6 +59,7 @@ static struct severity { #define MCGMASK(x, y) .mcgmask = x, .mcgres = y #define MASK(x, y) .mask = x, .result = y #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) +#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) @@ -101,6 +102,22 @@ static struct severity { NOSER, BITCLR(MCI_STATUS_UC) ), + /* + * known AO MCACODs reported via MCE or CMC: + * + * SRAO could be signaled either via a machine check exception or + * CMCI with the corresponding bit S 1 or 0. So we don't need to + * check bit S for SRAO. + */ + MCESEV( + AO, "Action optional: memory scrubbing error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB) + ), + MCESEV( + AO, "Action optional: last level cache writeback error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) + ), + /* ignore OVER for UCNA */ MCESEV( UCNA, "Uncorrected no action required", @@ -149,15 +166,6 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) ), - /* known AO MCACODs: */ - MCESEV( - AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) - ), - MCESEV( - AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) - ), MCESEV( SOME, "Action optional: unknown MCACOD", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 868e412b4f0c..3a8e88a611eb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -14,7 +14,6 @@ #include <linux/capability.h> #include <linux/miscdevice.h> #include <linux/ratelimit.h> -#include <linux/kallsyms.h> #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/uaccess.h> @@ -235,7 +234,7 @@ static void __print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) - print_symbol("{%s}", m->ip); + pr_cont("{%pS}", (void *)m->ip); pr_cont("\n"); } @@ -503,10 +502,8 @@ static int mce_usable_address(struct mce *m) bool mce_is_memory_error(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD) { - /* ErrCodeExt[20:16] */ - u8 xec = (m->status >> 16) & 0x1f; + return amd_mce_is_memory_error(m); - return (xec == 0x0 || xec == 0x8); } else if (m->cpuvendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes @@ -530,6 +527,17 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool mce_is_correctable(struct mce *m) +{ + if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) + return false; + + if (m->status & MCI_STATUS_UC) + return false; + + return true; +} + static bool cec_add_mce(struct mce *m) { if (!m) @@ -537,7 +545,7 @@ static bool cec_add_mce(struct mce *m) /* We eat only correctable DRAM errors with usable addresses. */ if (mce_is_memory_error(m) && - !(m->status & MCI_STATUS_UC) && + mce_is_correctable(m) && mce_usable_address(m)) if (!cec_add_elem(m->addr >> PAGE_SHIFT)) return true; @@ -582,7 +590,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; - memory_failure(pfn, MCE_VECTOR, 0); + memory_failure(pfn, 0); } return NOTIFY_OK; @@ -1046,7 +1054,7 @@ static int do_memory_failure(struct mce *m) pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); if (!(m->mcgstatus & MCG_STATUS_RIPV)) flags |= MF_MUST_KILL; - ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags); + ret = memory_failure(m->addr >> PAGE_SHIFT, flags); if (ret) pr_err("Memory error not recovered"); return ret; @@ -1325,7 +1333,7 @@ out_ist: EXPORT_SYMBOL_GPL(do_machine_check); #ifndef CONFIG_MEMORY_FAILURE -int memory_failure(unsigned long pfn, int vector, int flags) +int memory_failure(unsigned long pfn, int flags) { /* mce_severity() should not hand us an ACTION_REQUIRED error */ BUG_ON(flags & MF_ACTION_REQUIRED); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 486f640b02ef..0f32ad242324 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -110,6 +110,20 @@ const char *smca_get_long_name(enum smca_bank_types t) } EXPORT_SYMBOL_GPL(smca_get_long_name); +static enum smca_bank_types smca_get_bank_type(struct mce *m) +{ + struct smca_bank *b; + + if (m->bank >= N_SMCA_BANK_TYPES) + return N_SMCA_BANK_TYPES; + + b = &smca_banks[m->bank]; + if (!b->hwid) + return N_SMCA_BANK_TYPES; + + return b->hwid->bank_type; +} + static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ @@ -407,7 +421,9 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) (deferred_error_int_vector != amd_deferred_error_interrupt)) deferred_error_int_vector = amd_deferred_error_interrupt; - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + if (!mce_flags.smca) + low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + wrmsr(MSR_CU_DEF_ERR, low, high); } @@ -738,6 +754,17 @@ out_err: } EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); +bool amd_mce_is_memory_error(struct mce *m) +{ + /* ErrCodeExt[20:16] */ + u8 xec = (m->status >> 16) & 0x1f; + + if (mce_flags.smca) + return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0; + + return m->bank == 4 && xec == 0x8; +} + static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) { struct mce m; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index e4fc595cd6ea..319dd65f98a2 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -560,7 +560,7 @@ static ssize_t pf_show(struct device *dev, return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); } -static DEVICE_ATTR(reload, 0200, NULL, reload_store); +static DEVICE_ATTR_WO(reload); static DEVICE_ATTR(version, 0400, version_show, NULL); static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 85eb5fc180c8..9340f41ce8d3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void) hyperv_setup_mmu_ops(); /* Setup the IDT for hypervisor callback */ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + + /* Setup the IDT for reenlightenment notifications */ + if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) + alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, + hyperv_reenlightenment_vector); + #endif } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index df11f5d604be..772c219b6889 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -24,11 +24,13 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, + { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, + { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 76e07698e6d1..25de5f6ca997 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -2,7 +2,6 @@ /* * Architecture specific OF callbacks. */ -#include <linux/bootmem.h> #include <linux/export.h> #include <linux/io.h> #include <linux/interrupt.h> @@ -39,11 +38,6 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) BUG(); } -void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) -{ - return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); -} - void __init add_dtb(u64 data) { initial_dtb = data + offsetof(struct setup_data, data); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index afbecff161d1..a2d8a3908670 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -109,7 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; - bool partial; + bool partial = false; printk("%sCall Trace:\n", log_lvl); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1e82f787c160..bae0d32e327b 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -243,7 +243,7 @@ static void __init intel_remapping_check(int num, int slot, int func) #define KB(x) ((x) * 1024UL) #define MB(x) (KB (KB (x))) -static size_t __init i830_tseg_size(void) +static resource_size_t __init i830_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC); @@ -256,7 +256,7 @@ static size_t __init i830_tseg_size(void) return KB(512); } -static size_t __init i845_tseg_size(void) +static resource_size_t __init i845_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC); u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK; @@ -273,7 +273,7 @@ static size_t __init i845_tseg_size(void) return 0; } -static size_t __init i85x_tseg_size(void) +static resource_size_t __init i85x_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC); @@ -283,12 +283,12 @@ static size_t __init i85x_tseg_size(void) return MB(1); } -static size_t __init i830_mem_size(void) +static resource_size_t __init i830_mem_size(void) { return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32); } -static size_t __init i85x_mem_size(void) +static resource_size_t __init i85x_mem_size(void) { return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32); } @@ -297,36 +297,36 @@ static size_t __init i85x_mem_size(void) * On 830/845/85x the stolen memory base isn't available in any * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. */ -static phys_addr_t __init i830_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i830_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i830_mem_size() - i830_tseg_size() - stolen_size; + return i830_mem_size() - i830_tseg_size() - stolen_size; } -static phys_addr_t __init i845_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i845_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i830_mem_size() - i845_tseg_size() - stolen_size; + return i830_mem_size() - i845_tseg_size() - stolen_size; } -static phys_addr_t __init i85x_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i85x_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i85x_mem_size() - i85x_tseg_size() - stolen_size; + return i85x_mem_size() - i85x_tseg_size() - stolen_size; } -static phys_addr_t __init i865_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i865_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { u16 toud = 0; toud = read_pci_config_16(0, 0, 0, I865_TOUD); - return (phys_addr_t)(toud << 16) + i845_tseg_size(); + return toud * KB(64) + i845_tseg_size(); } -static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init gen3_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { u32 bsm; @@ -337,10 +337,10 @@ static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, */ bsm = read_pci_config(num, slot, func, INTEL_BSM); - return (phys_addr_t)bsm & INTEL_BSM_MASK; + return bsm & INTEL_BSM_MASK; } -static size_t __init i830_stolen_size(int num, int slot, int func) +static resource_size_t __init i830_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -361,7 +361,7 @@ static size_t __init i830_stolen_size(int num, int slot, int func) return 0; } -static size_t __init gen3_stolen_size(int num, int slot, int func) +static resource_size_t __init gen3_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -390,7 +390,7 @@ static size_t __init gen3_stolen_size(int num, int slot, int func) return 0; } -static size_t __init gen6_stolen_size(int num, int slot, int func) +static resource_size_t __init gen6_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -398,10 +398,10 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK; - return (size_t)gms * MB(32); + return gms * MB(32); } -static size_t __init gen8_stolen_size(int num, int slot, int func) +static resource_size_t __init gen8_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -409,10 +409,10 @@ static size_t __init gen8_stolen_size(int num, int slot, int func) gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK; - return (size_t)gms * MB(32); + return gms * MB(32); } -static size_t __init chv_stolen_size(int num, int slot, int func) +static resource_size_t __init chv_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -426,14 +426,14 @@ static size_t __init chv_stolen_size(int num, int slot, int func) * 0x17 to 0x1d: 4MB increments start at 36MB */ if (gms < 0x11) - return (size_t)gms * MB(32); + return gms * MB(32); else if (gms < 0x17) - return (size_t)(gms - 0x11 + 2) * MB(4); + return (gms - 0x11) * MB(4) + MB(8); else - return (size_t)(gms - 0x17 + 9) * MB(4); + return (gms - 0x17) * MB(4) + MB(36); } -static size_t __init gen9_stolen_size(int num, int slot, int func) +static resource_size_t __init gen9_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -444,14 +444,15 @@ static size_t __init gen9_stolen_size(int num, int slot, int func) /* 0x0 to 0xef: 32MB increments starting at 0MB */ /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ if (gms < 0xf0) - return (size_t)gms * MB(32); + return gms * MB(32); else - return (size_t)(gms - 0xf0 + 1) * MB(4); + return (gms - 0xf0) * MB(4) + MB(4); } struct intel_early_ops { - size_t (*stolen_size)(int num, int slot, int func); - phys_addr_t (*stolen_base)(int num, int slot, int func, size_t size); + resource_size_t (*stolen_size)(int num, int slot, int func); + resource_size_t (*stolen_base)(int num, int slot, int func, + resource_size_t size); }; static const struct intel_early_ops i830_early_ops __initconst = { @@ -527,16 +528,20 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_SKL_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), + INTEL_CFL_IDS(&gen9_early_ops), INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), }; +struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); +EXPORT_SYMBOL(intel_graphics_stolen_res); + static void __init intel_graphics_stolen(int num, int slot, int func, const struct intel_early_ops *early_ops) { - phys_addr_t base, end; - size_t size; + resource_size_t base, size; + resource_size_t end; size = early_ops->stolen_size(num, slot, func); base = early_ops->stolen_base(num, slot, func, size); @@ -545,8 +550,12 @@ intel_graphics_stolen(int num, int slot, int func, return; end = base + size - 1; - printk(KERN_INFO "Reserving Intel graphics memory at %pa-%pa\n", - &base, &end); + + intel_graphics_stolen_res.start = base; + intel_graphics_stolen_res.end = end; + + printk(KERN_INFO "Reserving Intel graphics memory at %pR\n", + &intel_graphics_stolen_res); /* Mark this space as reserved */ e820__range_add(base, size, E820_TYPE_RESERVED); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 68e1867cca80..45fb4d2565f8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -142,6 +142,15 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_puts(p, " Hypervisor callback interrupts\n"); } #endif +#if IS_ENABLED(CONFIG_HYPERV) + if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { + seq_printf(p, "%*s: ", prec, "HRE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->irq_hv_reenlightenment_count); + seq_puts(p, " Hyper-V reenlightenment interrupts\n"); + } +#endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index f73f475d0573..d177940aa090 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -24,7 +24,6 @@ #include <linux/cpumask.h> #include <linux/cpuset.h> #include <linux/mutex.h> -#include <linux/sched.h> #include <linux/sysctl.h> #include <linux/nodemask.h> diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c new file mode 100644 index 000000000000..b68fd895235a --- /dev/null +++ b/arch/x86/kernel/jailhouse.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL2.0 +/* + * Jailhouse paravirt_ops implementation + * + * Copyright (c) Siemens AG, 2015-2017 + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + */ + +#include <linux/acpi_pmtmr.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <asm/apic.h> +#include <asm/cpu.h> +#include <asm/hypervisor.h> +#include <asm/i8259.h> +#include <asm/irqdomain.h> +#include <asm/pci_x86.h> +#include <asm/reboot.h> +#include <asm/setup.h> + +static __initdata struct jailhouse_setup_data setup_data; +static unsigned int precalibrated_tsc_khz; + +static uint32_t jailhouse_cpuid_base(void) +{ + if (boot_cpu_data.cpuid_level < 0 || + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return 0; + + return hypervisor_cpuid_base("Jailhouse\0\0\0", 0); +} + +static uint32_t __init jailhouse_detect(void) +{ + return jailhouse_cpuid_base(); +} + +static void jailhouse_get_wallclock(struct timespec *now) +{ + memset(now, 0, sizeof(*now)); +} + +static void __init jailhouse_timer_init(void) +{ + lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); +} + +static unsigned long jailhouse_get_tsc(void) +{ + return precalibrated_tsc_khz; +} + +static void __init jailhouse_x2apic_init(void) +{ +#ifdef CONFIG_X86_X2APIC + if (!x2apic_enabled()) + return; + /* + * We do not have access to IR inside Jailhouse non-root cells. So + * we have to run in physical mode. + */ + x2apic_phys = 1; + /* + * This will trigger the switch to apic_x2apic_phys. Empty OEM IDs + * ensure that only this APIC driver picks up the call. + */ + default_acpi_madt_oem_check("", ""); +#endif +} + +static void __init jailhouse_get_smp_config(unsigned int early) +{ + struct ioapic_domain_cfg ioapic_cfg = { + .type = IOAPIC_DOMAIN_STRICT, + .ops = &mp_ioapic_irqdomain_ops, + }; + struct mpc_intsrc mp_irq = { + .type = MP_INTSRC, + .irqtype = mp_INT, + .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE, + }; + unsigned int cpu; + + jailhouse_x2apic_init(); + + register_lapic_address(0xfee00000); + + for (cpu = 0; cpu < setup_data.num_cpus; cpu++) { + generic_processor_info(setup_data.cpu_ids[cpu], + boot_cpu_apic_version); + } + + smp_found_config = 1; + + if (setup_data.standard_ioapic) { + mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg); + + /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */ + mp_irq.srcbusirq = mp_irq.dstirq = 3; + mp_save_irq(&mp_irq); + + mp_irq.srcbusirq = mp_irq.dstirq = 4; + mp_save_irq(&mp_irq); + } +} + +static void jailhouse_no_restart(void) +{ + pr_notice("Jailhouse: Restart not supported, halting\n"); + machine_halt(); +} + +static int __init jailhouse_pci_arch_init(void) +{ + pci_direct_init(1); + + /* + * There are no bridges on the virtual PCI root bus under Jailhouse, + * thus no other way to discover all devices than a full scan. + * Respect any overrides via the command line, though. + */ + if (pcibios_last_bus < 0) + pcibios_last_bus = 0xff; + + return 0; +} + +static void __init jailhouse_init_platform(void) +{ + u64 pa_data = boot_params.hdr.setup_data; + struct setup_data header; + void *mapping; + + x86_init.irqs.pre_vector_init = x86_init_noop; + x86_init.timers.timer_init = jailhouse_timer_init; + x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; + x86_init.pci.arch_init = jailhouse_pci_arch_init; + + x86_platform.calibrate_cpu = jailhouse_get_tsc; + x86_platform.calibrate_tsc = jailhouse_get_tsc; + x86_platform.get_wallclock = jailhouse_get_wallclock; + x86_platform.legacy.rtc = 0; + x86_platform.legacy.warm_reset = 0; + x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; + + legacy_pic = &null_legacy_pic; + + machine_ops.emergency_restart = jailhouse_no_restart; + + while (pa_data) { + mapping = early_memremap(pa_data, sizeof(header)); + memcpy(&header, mapping, sizeof(header)); + early_memunmap(mapping, sizeof(header)); + + if (header.type == SETUP_JAILHOUSE && + header.len >= sizeof(setup_data)) { + pa_data += offsetof(struct setup_data, data); + + mapping = early_memremap(pa_data, sizeof(setup_data)); + memcpy(&setup_data, mapping, sizeof(setup_data)); + early_memunmap(mapping, sizeof(setup_data)); + + break; + } + + pa_data = header.next; + } + + if (!pa_data) + panic("Jailhouse: No valid setup data found"); + + if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION) + panic("Jailhouse: Unsupported setup data structure"); + + pmtmr_ioport = setup_data.pm_timer_address; + pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); + + precalibrated_tsc_khz = setup_data.tsc_khz; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + + pci_probe = 0; + + /* + * Avoid that the kernel complains about missing ACPI tables - there + * are none in a non-root cell. + */ + disable_acpi(); +} + +bool jailhouse_paravirt(void) +{ + return jailhouse_cpuid_base() != 0; +} + +static bool jailhouse_x2apic_available(void) +{ + /* + * The x2APIC is only available if the root cell enabled it. Jailhouse + * does not support switching between xAPIC and x2APIC. + */ + return x2apic_enabled(); +} + +const struct hypervisor_x86 x86_hyper_jailhouse __refconst = { + .name = "Jailhouse", + .detect = jailhouse_detect, + .init.init_platform = jailhouse_init_platform, + .init.x2apic_available = jailhouse_x2apic_available, +}; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b40ffbf156c1..4e37d1a851a6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -498,6 +498,34 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } +static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); + +static void kvm_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + u8 state; + int cpu; + struct kvm_steal_time *src; + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); + + cpumask_copy(flushmask, cpumask); + /* + * We have to call flush only on online vCPUs. And + * queue flush_on_enter for pre-empted vCPUs + */ + for_each_cpu(cpu, flushmask) { + src = &per_cpu(steal_time, cpu); + state = READ_ONCE(src->preempted); + if ((state & KVM_VCPU_PREEMPTED)) { + if (try_cmpxchg(&src->preempted, &state, + state | KVM_VCPU_FLUSH_TLB)) + __cpumask_clear_cpu(cpu, flushmask); + } + } + + native_flush_tlb_others(flushmask, info); +} + static void __init kvm_guest_init(void) { int i; @@ -517,6 +545,9 @@ static void __init kvm_guest_init(void) pv_time_ops.steal_clock = kvm_steal_clock; } + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) + pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) apic_set_eoi_write(kvm_guest_apic_eoi_write); @@ -598,6 +629,22 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); +static __init int kvm_setup_pv_tlb_flush(void) +{ + int cpu; + + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { + for_each_possible_cpu(cpu) { + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + } + pr_info("KVM setup pv remote TLB flush\n"); + } + + return 0; +} +arch_initcall(kvm_setup_pv_tlb_flush); + #ifdef CONFIG_PARAVIRT_SPINLOCKS /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ @@ -643,7 +690,7 @@ __visible bool __kvm_vcpu_is_preempted(long cpu) { struct kvm_steal_time *src = &per_cpu(steal_time, cpu); - return !!src->preempted; + return !!(src->preempted & KVM_VCPU_PREEMPTED); } PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index bc6bc6689e68..f1c5eb99d445 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -281,7 +281,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) int ELCR_fallback = 0; intsrc.type = MP_INTSRC; - intsrc.irqflag = 0; /* conforming */ + intsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT; intsrc.srcbus = 0; intsrc.dstapic = mpc_ioapic_id(0); @@ -324,10 +324,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * copy that information over to the MP table in the * irqflag field (level sensitive, active high polarity). */ - if (ELCR_trigger(i)) - intsrc.irqflag = 13; - else - intsrc.irqflag = 0; + if (ELCR_trigger(i)) { + intsrc.irqflag = MP_IRQTRIG_LEVEL | + MP_IRQPOL_ACTIVE_HIGH; + } else { + intsrc.irqflag = MP_IRQTRIG_DEFAULT | + MP_IRQPOL_DEFAULT; + } } intsrc.srcbusirq = i; @@ -419,7 +422,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) construct_ioapic_table(mpc_default_type); lintsrc.type = MP_LINTSRC; - lintsrc.irqflag = 0; /* conforming */ + lintsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT; lintsrc.srcbusid = 0; lintsrc.srcbusirq = 0; lintsrc.destapic = MP_APIC_ALL; @@ -664,7 +667,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) if (m->irqtype != mp_INT) return 0; - if (m->irqflag != 0x0f) + if (m->irqflag != (MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW)) return 0; /* not legacy */ @@ -673,7 +676,8 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].irqflag != 0x0f) + if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL | + MP_IRQPOL_ACTIVE_LOW)) continue; if (mp_irqs[i].srcbus != m->srcbus) @@ -784,7 +788,8 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].irqflag != 0x0f) + if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL | + MP_IRQPOL_ACTIVE_LOW)) continue; if (nr_m_spare > 0) { diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 599d7462eccc..df7ab02f959f 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/dma-debug.h> #include <linux/dmar.h> #include <linux/export.h> @@ -87,7 +87,6 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_mask = dma_alloc_coherent_mask(dev, flag); - flag &= ~__GFP_ZERO; again: page = NULL; /* CMA can be used only in the context which permits sleeping */ @@ -139,7 +138,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) if (!*dev) *dev = &x86_dma_fallback_dev; - *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); if (!is_device_dma_capable(*dev)) @@ -217,7 +215,7 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -int x86_dma_supported(struct device *dev, u64 mask) +int arch_dma_supported(struct device *dev, u64 mask) { #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { @@ -226,12 +224,6 @@ int x86_dma_supported(struct device *dev, u64 mask) } #endif - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_BIT_MASK(24)) - return 0; - /* Tell the device to use SAC when IOMMU force is on. This allows the driver to use cheaper accesses in some cases. @@ -251,6 +243,17 @@ int x86_dma_supported(struct device *dev, u64 mask) return 1; } +EXPORT_SYMBOL(arch_dma_supported); + +int x86_dma_supported(struct device *dev, u64 mask) +{ + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_BIT_MASK(24)) + return 0; + return 1; +} static int __init pci_iommu_init(void) { diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index b0caae27e1b7..618285e475c6 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Fallback functions when the main IOMMU code is not compiled in. This code is roughly equivalent to i386. */ -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/scatterlist.h> #include <linux/string.h> #include <linux/gfp.h> diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 53bd05ea90d8..0ee0f8f34251 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -6,7 +6,7 @@ #include <linux/init.h> #include <linux/swiotlb.h> #include <linux/bootmem.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/mem_encrypt.h> #include <asm/iommu.h> @@ -48,7 +48,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size, dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } -static const struct dma_map_ops swiotlb_dma_ops = { +static const struct dma_map_ops x86_swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc = x86_swiotlb_alloc_coherent, .free = x86_swiotlb_free_coherent, @@ -112,7 +112,7 @@ void __init pci_swiotlb_init(void) { if (swiotlb) { swiotlb_init(0); - dma_ops = &swiotlb_dma_ops; + dma_ops = &x86_swiotlb_dma_ops; } } @@ -120,7 +120,7 @@ void __init pci_swiotlb_late_init(void) { /* An IOMMU turned us off. */ if (!swiotlb) - swiotlb_free(); + swiotlb_exit(); else { printk(KERN_INFO "PCI-DMA: " "Using software bounce buffering for IO (SWIOTLB)\n"); diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 39a59299bfa0..235fe6008ac8 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -9,6 +9,7 @@ void __init x86_early_init_platform_quirks(void) { x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT; x86_platform.legacy.rtc = 1; + x86_platform.legacy.warm_reset = 1; x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 1; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cb368c2a22ab..03408b942adb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -21,7 +21,6 @@ #include <linux/dmi.h> #include <linux/utsname.h> #include <linux/stackprotector.h> -#include <linux/tick.h> #include <linux/cpuidle.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 307d3bac5f04..11eda21eb697 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -68,6 +68,9 @@ relocate_kernel: movq %cr4, %rax movq %rax, CR4(%r11) + /* Save CR4. Required to enable the right paging mode later. */ + movq %rax, %r13 + /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -126,8 +129,13 @@ identity_mapped: /* * Set cr4 to a known state: * - physical address extension enabled + * - 5-level paging, if it was enabled before */ movl $X86_CR4_PAE, %eax + testq $X86_CR4_LA57, %r13 + jz 1f + orl $X86_CR4_LA57, %eax +1: movq %rax, %cr4 jmp 1f diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68d7ab81c62f..1ae67e982af7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,7 +114,6 @@ #include <asm/alternative.h> #include <asm/prom.h> #include <asm/microcode.h> -#include <asm/mmu_context.h> #include <asm/kaslr.h> #include <asm/unwind.h> diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 8c6da1a643da..ac057f9b0763 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -25,8 +25,8 @@ static inline void signal_compat_build_tests(void) * limits also have to look at this code. Make sure any * new fields are handled in copy_siginfo_to_user32()! */ - BUILD_BUG_ON(NSIGILL != 8); - BUILD_BUG_ON(NSIGFPE != 8); + BUILD_BUG_ON(NSIGILL != 11); + BUILD_BUG_ON(NSIGFPE != 13); BUILD_BUG_ON(NSIGSEGV != 4); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 4); @@ -64,7 +64,7 @@ static inline void signal_compat_build_tests(void) CHECK_SI_SIZE (_kill, 2*sizeof(int)); CHECK_CSI_OFFSET(_timer); - CHECK_CSI_SIZE (_timer, 5*sizeof(int)); + CHECK_CSI_SIZE (_timer, 3*sizeof(int)); CHECK_SI_SIZE (_timer, 6*sizeof(int)); CHECK_CSI_OFFSET(_rt); @@ -75,9 +75,11 @@ static inline void signal_compat_build_tests(void) CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); +#ifdef CONFIG_X86_X32_ABI CHECK_CSI_OFFSET(_sigchld_x32); CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); /* no _sigchld_x32 in the generic siginfo_t */ +#endif CHECK_CSI_OFFSET(_sigfault); CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); @@ -96,6 +98,8 @@ static inline void signal_compat_build_tests(void) void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) { + signal_compat_build_tests(); + /* Don't leak in-kernel non-uapi flags to user-space */ if (oact) oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); @@ -111,116 +115,3 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) if (in_x32_syscall()) act->sa.sa_flags |= SA_X32_ABI; } - -int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, - bool x32_ABI) -{ - int err = 0; - - signal_compat_build_tests(); - - if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) - return -EFAULT; - - put_user_try { - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - put_user_ex(from->si_signo, &to->si_signo); - put_user_ex(from->si_errno, &to->si_errno); - put_user_ex(from->si_code, &to->si_code); - - if (from->si_code < 0) { - put_user_ex(from->si_pid, &to->si_pid); - put_user_ex(from->si_uid, &to->si_uid); - put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - put_user_ex(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (siginfo_layout(from->si_signo, from->si_code)) { - case SIL_FAULT: - if (from->si_signo == SIGBUS && - (from->si_code == BUS_MCEERR_AR || - from->si_code == BUS_MCEERR_AO)) - put_user_ex(from->si_addr_lsb, &to->si_addr_lsb); - - if (from->si_signo == SIGSEGV) { - if (from->si_code == SEGV_BNDERR) { - compat_uptr_t lower = (unsigned long)from->si_lower; - compat_uptr_t upper = (unsigned long)from->si_upper; - put_user_ex(lower, &to->si_lower); - put_user_ex(upper, &to->si_upper); - } - if (from->si_code == SEGV_PKUERR) - put_user_ex(from->si_pkey, &to->si_pkey); - } - break; - case SIL_SYS: - put_user_ex(from->si_syscall, &to->si_syscall); - put_user_ex(from->si_arch, &to->si_arch); - break; - case SIL_CHLD: - if (!x32_ABI) { - put_user_ex(from->si_utime, &to->si_utime); - put_user_ex(from->si_stime, &to->si_stime); - } else { - put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); - put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); - } - put_user_ex(from->si_status, &to->si_status); - /* FALL THROUGH */ - case SIL_KILL: - put_user_ex(from->si_uid, &to->si_uid); - break; - case SIL_POLL: - put_user_ex(from->si_fd, &to->si_fd); - break; - case SIL_TIMER: - put_user_ex(from->si_overrun, &to->si_overrun); - put_user_ex(ptr_to_compat(from->si_ptr), - &to->si_ptr); - break; - case SIL_RT: - put_user_ex(from->si_uid, &to->si_uid); - put_user_ex(from->si_int, &to->si_int); - break; - } - } - } put_user_catch(err); - - return err; -} - -/* from syscall's path, where we know the ABI */ -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) -{ - return __copy_siginfo_to_user32(to, from, in_x32_syscall()); -} - -int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) -{ - int err = 0; - u32 ptr32; - - if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) - return -EFAULT; - - get_user_try { - get_user_ex(to->si_signo, &from->si_signo); - get_user_ex(to->si_errno, &from->si_errno); - get_user_ex(to->si_code, &from->si_code); - - get_user_ex(to->si_pid, &from->si_pid); - get_user_ex(to->si_uid, &from->si_uid); - get_user_ex(ptr32, &from->si_ptr); - to->si_ptr = compat_ptr(ptr32); - } get_user_catch(err); - - return err; -} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed556d50d7ed..6f27facbaa9b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -75,7 +75,6 @@ #include <asm/uv/uv.h> #include <linux/mc146818rtc.h> #include <asm/i8259.h> -#include <asm/realmode.h> #include <asm/misc.h> #include <asm/qspinlock.h> @@ -934,7 +933,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * the targeted processor. */ - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + if (x86_platform.legacy.warm_reset) { pr_debug("Setting warm reset code and vector.\n"); @@ -1006,7 +1005,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, /* mark "stuck" area as not stuck */ *trampoline_status = 0; - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + if (x86_platform.legacy.warm_reset) { /* * Cleanup possible dangling ends... */ diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 749d189f8cd4..774ebafa97c4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -69,9 +69,12 @@ static struct irqaction irq0 = { static void __init setup_default_timer_irq(void) { - if (!nr_legacy_irqs()) - return; - setup_irq(0, &irq0); + /* + * Unconditionally register the legacy timer; even without legacy + * PIC/PIT we need this for the HPET0 in legacy replacement mode. + */ + if (setup_irq(0, &irq0)) + pr_info("Failed to register legacy timer interrupt\n"); } /* Default timer init function */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e169e85db434..fb4302738410 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -25,6 +25,7 @@ #include <asm/geode.h> #include <asm/apic.h> #include <asm/intel-family.h> +#include <asm/i8259.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -363,6 +364,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) unsigned long tscmin, tscmax; int pitcnt; + if (!has_legacy_pic()) { + /* + * Relies on tsc_early_delay_calibrate() to have given us semi + * usable udelay(), wait for the same 50ms we would have with + * the PIT loop below. + */ + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + return ULONG_MAX; + } + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); @@ -487,6 +502,9 @@ static unsigned long quick_pit_calibrate(void) u64 tsc, delta; unsigned long d1, d2; + if (!has_legacy_pic()) + return 0; + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); @@ -988,8 +1006,6 @@ static void __init detect_art(void) /* clocksource code */ -static struct clocksource clocksource_tsc; - static void tsc_resume(struct clocksource *cs) { tsc_verify_tsc_adjust(true); @@ -1040,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs) /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ +static struct clocksource clocksource_tsc_early = { + .name = "tsc-early", + .rating = 299, + .read = read_tsc, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_MUST_VERIFY, + .archdata = { .vclock_mode = VCLOCK_TSC }, + .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, + .tick_stable = tsc_cs_tick_stable, +}; + +/* + * Must mark VALID_FOR_HRES early such that when we unregister tsc_early + * this one will immediately take over. We will only register if TSC has + * been found good. + */ static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_MUST_VERIFY, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, @@ -1169,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work) int cpu; /* Don't bother refining TSC on unstable systems */ - if (check_tsc_unstable()) - goto out; + if (tsc_unstable) + return; /* * Since the work is started early in boot, we may be @@ -1222,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work) set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); out: + if (tsc_unstable) + return; + if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); + clocksource_unregister(&clocksource_tsc_early); } @@ -1233,13 +1272,11 @@ static int __init init_tsc_clocksource(void) if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) return 0; + if (check_tsc_unstable()) + return 0; + if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; @@ -1252,6 +1289,7 @@ static int __init init_tsc_clocksource(void) if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); + clocksource_unregister(&clocksource_tsc_early); return 0; } @@ -1356,9 +1394,12 @@ void __init tsc_init(void) check_system_tsc_reliable(); - if (unsynchronized_tsc()) + if (unsynchronized_tsc()) { mark_tsc_unstable("TSCs unsynchronized"); + return; + } + clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); } diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index a3755d293a48..85c7ef23d99f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -528,11 +528,11 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) return 0; } -static int push_ret_address(struct pt_regs *regs, unsigned long ip) +static int emulate_push_stack(struct pt_regs *regs, unsigned long val) { unsigned long new_sp = regs->sp - sizeof_long(); - if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) + if (copy_to_user((void __user *)new_sp, &val, sizeof_long())) return -EFAULT; regs->sp = new_sp; @@ -566,7 +566,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs regs->ip += correction; } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { regs->sp += sizeof_long(); /* Pop incorrect return address */ - if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) + if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; } /* popf; tell the caller to not touch TF */ @@ -655,7 +655,7 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (push_ret_address(regs, new_ip)) + if (emulate_push_stack(regs, new_ip)) return false; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; @@ -665,6 +665,16 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) return true; } +static bool push_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + unsigned long *src_ptr = (void *)regs + auprobe->push.reg_offset; + + if (emulate_push_stack(regs, *src_ptr)) + return false; + regs->ip += auprobe->push.ilen; + return true; +} + static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { BUG_ON(!branch_is_call(auprobe)); @@ -703,6 +713,10 @@ static const struct uprobe_xol_ops branch_xol_ops = { .post_xol = branch_post_xol_op, }; +static const struct uprobe_xol_ops push_xol_ops = { + .emulate = push_emulate_op, +}; + /* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { @@ -750,6 +764,87 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return 0; } +/* Returns -ENOSYS if push_xol_ops doesn't handle this insn */ +static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) +{ + u8 opc1 = OPCODE1(insn), reg_offset = 0; + + if (opc1 < 0x50 || opc1 > 0x57) + return -ENOSYS; + + if (insn->length > 2) + return -ENOSYS; + if (insn->length == 2) { + /* only support rex_prefix 0x41 (x64 only) */ +#ifdef CONFIG_X86_64 + if (insn->rex_prefix.nbytes != 1 || + insn->rex_prefix.bytes[0] != 0x41) + return -ENOSYS; + + switch (opc1) { + case 0x50: + reg_offset = offsetof(struct pt_regs, r8); + break; + case 0x51: + reg_offset = offsetof(struct pt_regs, r9); + break; + case 0x52: + reg_offset = offsetof(struct pt_regs, r10); + break; + case 0x53: + reg_offset = offsetof(struct pt_regs, r11); + break; + case 0x54: + reg_offset = offsetof(struct pt_regs, r12); + break; + case 0x55: + reg_offset = offsetof(struct pt_regs, r13); + break; + case 0x56: + reg_offset = offsetof(struct pt_regs, r14); + break; + case 0x57: + reg_offset = offsetof(struct pt_regs, r15); + break; + } +#else + return -ENOSYS; +#endif + } else { + switch (opc1) { + case 0x50: + reg_offset = offsetof(struct pt_regs, ax); + break; + case 0x51: + reg_offset = offsetof(struct pt_regs, cx); + break; + case 0x52: + reg_offset = offsetof(struct pt_regs, dx); + break; + case 0x53: + reg_offset = offsetof(struct pt_regs, bx); + break; + case 0x54: + reg_offset = offsetof(struct pt_regs, sp); + break; + case 0x55: + reg_offset = offsetof(struct pt_regs, bp); + break; + case 0x56: + reg_offset = offsetof(struct pt_regs, si); + break; + case 0x57: + reg_offset = offsetof(struct pt_regs, di); + break; + } + } + + auprobe->push.reg_offset = reg_offset; + auprobe->push.ilen = insn->length; + auprobe->ops = &push_xol_ops; + return 0; +} + /** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @mm: the probed address space. @@ -771,6 +866,10 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, if (ret != -ENOSYS) return ret; + ret = push_setup_xol_ops(auprobe, &insn); + if (ret != -ENOSYS) + return ret; + /* * Figure out which fixups default_post_xol_op() will need to perform, * and annotate defparam->fixups accordingly. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3df51c287844..92fd433c50b9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -81,6 +81,14 @@ config KVM_AMD To compile this as a module, choose M here: the module will be called kvm-amd. +config KVM_AMD_SEV + def_bool y + bool "AMD Secure Encrypted Virtualization (SEV) support" + depends on KVM_AMD && X86_64 + depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP + ---help--- + Provides support for launching Encrypted VMs on AMD processors. + config KVM_MMU_AUDIT bool "Audit KVM MMU" depends on KVM && TRACEPOINTS diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 13f5d4217e4f..a0c5a69bc7c4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, { switch (func) { case 0: - entry->eax = 1; /* only one leaf currently */ + entry->eax = 7; ++*nent; break; case 1: entry->ecx = F(MOVBE); ++*nent; break; + case 7: + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (index == 0) + entry->ecx = F(RDPID); + ++*nent; default: break; } @@ -325,6 +330,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; + unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; /* cpuid 1.edx */ const u32 kvm_cpuid_1_edx_x86_features = @@ -363,7 +369,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | - 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); + 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | + F(TOPOEXT); /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = @@ -389,8 +396,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | - 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); + F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | + F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | + F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = @@ -476,6 +484,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(TSC_ADJUST); entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_7_ECX); + entry->ecx |= f_umip; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); @@ -597,7 +606,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | - (1 << KVM_FEATURE_PV_UNHALT); + (1 << KVM_FEATURE_PV_UNHALT) | + (1 << KVM_FEATURE_PV_TLB_FLUSH); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); @@ -607,7 +617,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->edx = 0; break; case 0x80000000: - entry->eax = min(entry->eax, 0x8000001a); + entry->eax = min(entry->eax, 0x8000001f); break; case 0x80000001: entry->edx &= kvm_cpuid_8000_0001_edx_x86_features; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 290ecf711aec..d91eaeb01034 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3533,6 +3533,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_rdpid(struct x86_emulate_ctxt *ctxt) +{ + u64 tsc_aux = 0; + + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + return emulate_gp(ctxt, 0); + ctxt->dst.val = tsc_aux; + return X86EMUL_CONTINUE; +} + static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { u64 tsc = 0; @@ -3652,17 +3662,27 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) +static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment) { - if (ctxt->modrm_reg > VCPU_SREG_GS) - return emulate_ud(ctxt); + if (segment > VCPU_SREG_GS && + (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) && + ctxt->ops->cpl(ctxt) > 0) + return emulate_gp(ctxt, 0); - ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg); + ctxt->dst.val = get_segment_selector(ctxt, segment); if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM) ctxt->dst.bytes = 2; return X86EMUL_CONTINUE; } +static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt) +{ + if (ctxt->modrm_reg > VCPU_SREG_GS) + return emulate_ud(ctxt); + + return em_store_sreg(ctxt, ctxt->modrm_reg); +} + static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) { u16 sel = ctxt->src.val; @@ -3678,6 +3698,11 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); } +static int em_sldt(struct x86_emulate_ctxt *ctxt) +{ + return em_store_sreg(ctxt, VCPU_SREG_LDTR); +} + static int em_lldt(struct x86_emulate_ctxt *ctxt) { u16 sel = ctxt->src.val; @@ -3687,6 +3712,11 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt) return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR); } +static int em_str(struct x86_emulate_ctxt *ctxt) +{ + return em_store_sreg(ctxt, VCPU_SREG_TR); +} + static int em_ltr(struct x86_emulate_ctxt *ctxt) { u16 sel = ctxt->src.val; @@ -3739,6 +3769,10 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt, { struct desc_ptr desc_ptr; + if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) && + ctxt->ops->cpl(ctxt) > 0) + return emulate_gp(ctxt, 0); + if (ctxt->mode == X86EMUL_MODE_PROT64) ctxt->op_bytes = 8; get(ctxt, &desc_ptr); @@ -3798,6 +3832,10 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) static int em_smsw(struct x86_emulate_ctxt *ctxt) { + if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) && + ctxt->ops->cpl(ctxt) > 0) + return emulate_gp(ctxt, 0); + if (ctxt->dst.type == OP_MEM) ctxt->dst.bytes = 2; ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); @@ -4383,8 +4421,8 @@ static const struct opcode group5[] = { }; static const struct opcode group6[] = { - DI(Prot | DstMem, sldt), - DI(Prot | DstMem, str), + II(Prot | DstMem, em_sldt, sldt), + II(Prot | DstMem, em_str, str), II(Prot | Priv | SrcMem16, em_lldt, lldt), II(Prot | Priv | SrcMem16, em_ltr, ltr), N, N, N, N, @@ -4415,10 +4453,20 @@ static const struct opcode group8[] = { F(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; +/* + * The "memory" destination is actually always a register, since we come + * from the register case of group9. + */ +static const struct gprefix pfx_0f_c7_7 = { + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), +}; + + static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { - N, N, N, N, N, N, N, N, + N, N, N, N, N, N, N, + GP(0, &pfx_0f_c7_7), } }; static const struct opcode group11[] = { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 5c24811e8b0b..f171051eecf3 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -79,7 +79,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (kvm_cpu_has_extint(v)) return 1; - if (kvm_vcpu_apicv_active(v)) + if (!is_guest_mode(v) && kvm_vcpu_apicv_active(v)) return 0; return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e2c1fb8d35ce..924ac8ce9d50 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -364,32 +364,41 @@ static u8 count_vectors(void *bitmap) return count; } -int __kvm_apic_update_irr(u32 *pir, void *regs) +bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr) { u32 i, vec; - u32 pir_val, irr_val; - int max_irr = -1; + u32 pir_val, irr_val, prev_irr_val; + int max_updated_irr; + + max_updated_irr = -1; + *max_irr = -1; for (i = vec = 0; i <= 7; i++, vec += 32) { pir_val = READ_ONCE(pir[i]); irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); if (pir_val) { + prev_irr_val = irr_val; irr_val |= xchg(&pir[i], 0); *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; + if (prev_irr_val != irr_val) { + max_updated_irr = + __fls(irr_val ^ prev_irr_val) + vec; + } } if (irr_val) - max_irr = __fls(irr_val) + vec; + *max_irr = __fls(irr_val) + vec; } - return max_irr; + return ((max_updated_irr != -1) && + (max_updated_irr == *max_irr)); } EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); -int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr) { struct kvm_lapic *apic = vcpu->arch.apic; - return __kvm_apic_update_irr(pir, apic->regs); + return __kvm_apic_update_irr(pir, apic->regs, max_irr); } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); @@ -581,7 +590,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) { int highest_irr; - if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) + if (apic->vcpu->arch.apicv_active) highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu); else highest_irr = apic_find_highest_irr(apic); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4b9935a38347..56c36014f7b7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -75,8 +75,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode); -int __kvm_apic_update_irr(u32 *pir, void *regs); -int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); +bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr); +bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, struct dest_map *dest_map); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cc83bdcb65d1..46ff304140c7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -42,6 +42,7 @@ #include <linux/kern_levels.h> #include <asm/page.h> +#include <asm/pat.h> #include <asm/cmpxchg.h> #include <asm/io.h> #include <asm/vmx.h> @@ -381,7 +382,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); -void kvm_mmu_clear_all_pte_masks(void) +static void kvm_mmu_clear_all_pte_masks(void) { shadow_user_mask = 0; shadow_accessed_mask = 0; @@ -2708,7 +2709,18 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) - return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) && + /* + * Some reserved pages, such as those from NVDIMM + * DAX devices, are not for MMIO, and can be mapped + * with cached memory type for better performance. + * However, the above check misconceives those pages + * as MMIO, and results in KVM mapping them with UC + * memory type, which would hurt the performance. + * Therefore, we check the host memory type in addition + * and only treat UC/UC-/WC pages as MMIO. + */ + (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn)); return true; } @@ -4951,6 +4963,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, if (mmio_info_in_cache(vcpu, cr2, direct)) emulation_type = 0; emulate: + /* + * On AMD platforms, under certain conditions insn_len may be zero on #NPF. + * This can happen if a guest gets a page-fault on data access but the HW + * table walker is not able to read the instruction page (e.g instruction + * page is not present in memory). In those cases we simply restart the + * guest. + */ + if (unlikely(insn && !insn_len)) + return 1; + er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); switch (er) { diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index d22ddbdf5e6e..1272861e77b9 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -19,7 +19,7 @@ #include <linux/ratelimit.h> -char const *audit_point_name[] = { +static char const *audit_point_name[] = { "pre page fault", "post page fault", "pre pte write", diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4e3c79530526..b3e488a74828 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -37,6 +37,10 @@ #include <linux/amd-iommu.h> #include <linux/hashtable.h> #include <linux/frame.h> +#include <linux/psp-sev.h> +#include <linux/file.h> +#include <linux/pagemap.h> +#include <linux/swap.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -214,6 +218,9 @@ struct vcpu_svm { */ struct list_head ir_list; spinlock_t ir_list_lock; + + /* which host CPU was used for running this vcpu */ + unsigned int last_cpu; }; /* @@ -289,8 +296,12 @@ module_param(vls, int, 0444); static int vgif = true; module_param(vgif, int, 0444); +/* enable/disable SEV support */ +static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); +module_param(sev, int, 0444); + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -static void svm_flush_tlb(struct kvm_vcpu *vcpu); +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); static int nested_svm_exit_handled(struct vcpu_svm *svm); @@ -324,6 +335,38 @@ enum { #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL +static unsigned int max_sev_asid; +static unsigned int min_sev_asid; +static unsigned long *sev_asid_bitmap; +#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) + +struct enc_region { + struct list_head list; + unsigned long npages; + struct page **pages; + unsigned long uaddr; + unsigned long size; +}; + +static inline bool svm_sev_enabled(void) +{ + return max_sev_asid; +} + +static inline bool sev_guest(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + + return sev->active; +} + +static inline int sev_get_asid(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + + return sev->asid; +} + static inline void mark_all_dirty(struct vmcb *vmcb) { vmcb->control.clean = 0; @@ -530,10 +573,14 @@ struct svm_cpu_data { u64 asid_generation; u32 max_asid; u32 next_asid; + u32 min_asid; struct kvm_ldttss_desc *tss_desc; struct page *save_area; struct vmcb *current_vmcb; + + /* index = sev_asid, value = vmcb pointer */ + struct vmcb **sev_vmcbs; }; static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); @@ -788,6 +835,7 @@ static int svm_hardware_enable(void) sd->asid_generation = 1; sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; sd->next_asid = sd->max_asid + 1; + sd->min_asid = max_sev_asid + 1; gdt = get_current_gdt_rw(); sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); @@ -846,6 +894,7 @@ static void svm_cpu_uninit(int cpu) return; per_cpu(svm_data, raw_smp_processor_id()) = NULL; + kfree(sd->sev_vmcbs); __free_page(sd->save_area); kfree(sd); } @@ -859,11 +908,18 @@ static int svm_cpu_init(int cpu) if (!sd) return -ENOMEM; sd->cpu = cpu; - sd->save_area = alloc_page(GFP_KERNEL); r = -ENOMEM; + sd->save_area = alloc_page(GFP_KERNEL); if (!sd->save_area) goto err_1; + if (svm_sev_enabled()) { + r = -ENOMEM; + sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL); + if (!sd->sev_vmcbs) + goto err_1; + } + per_cpu(svm_data, cpu) = sd; return 0; @@ -1070,6 +1126,48 @@ static int avic_ga_log_notifier(u32 ga_tag) return 0; } +static __init int sev_hardware_setup(void) +{ + struct sev_user_data_status *status; + int rc; + + /* Maximum number of encrypted guests supported simultaneously */ + max_sev_asid = cpuid_ecx(0x8000001F); + + if (!max_sev_asid) + return 1; + + /* Minimum ASID value that should be used for SEV guest */ + min_sev_asid = cpuid_edx(0x8000001F); + + /* Initialize SEV ASID bitmap */ + sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid), + sizeof(unsigned long), GFP_KERNEL); + if (!sev_asid_bitmap) + return 1; + + status = kmalloc(sizeof(*status), GFP_KERNEL); + if (!status) + return 1; + + /* + * Check SEV platform status. + * + * PLATFORM_STATUS can be called in any state, if we failed to query + * the PLATFORM status then either PSP firmware does not support SEV + * feature or SEV firmware is dead. + */ + rc = sev_platform_status(status, NULL); + if (rc) + goto err; + + pr_info("SEV supported\n"); + +err: + kfree(status); + return rc; +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1105,6 +1203,17 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } + if (sev) { + if (boot_cpu_has(X86_FEATURE_SEV) && + IS_ENABLED(CONFIG_KVM_AMD_SEV)) { + r = sev_hardware_setup(); + if (r) + sev = false; + } else { + sev = false; + } + } + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -1166,6 +1275,9 @@ static __exit void svm_hardware_unsetup(void) { int cpu; + if (svm_sev_enabled()) + kfree(sev_asid_bitmap); + for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); @@ -1318,7 +1430,7 @@ static void init_vmcb(struct vcpu_svm *svm) if (npt_enabled) { /* Setup VMCB for Nested Paging */ - control->nested_ctl = 1; + control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; clr_intercept(svm, INTERCEPT_INVLPG); clr_exception_intercept(svm, PF_VECTOR); clr_cr_intercept(svm, INTERCEPT_CR3_READ); @@ -1356,6 +1468,11 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; } + if (sev_guest(svm->vcpu.kvm)) { + svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; + clr_exception_intercept(svm, UD_VECTOR); + } + mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -1438,6 +1555,179 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return 0; } +static void __sev_asid_free(int asid) +{ + struct svm_cpu_data *sd; + int cpu, pos; + + pos = asid - 1; + clear_bit(pos, sev_asid_bitmap); + + for_each_possible_cpu(cpu) { + sd = per_cpu(svm_data, cpu); + sd->sev_vmcbs[pos] = NULL; + } +} + +static void sev_asid_free(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + + __sev_asid_free(sev->asid); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ + struct sev_data_decommission *decommission; + struct sev_data_deactivate *data; + + if (!handle) + return; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return; + + /* deactivate handle */ + data->handle = handle; + sev_guest_deactivate(data, NULL); + + wbinvd_on_all_cpus(); + sev_guest_df_flush(NULL); + kfree(data); + + decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); + if (!decommission) + return; + + /* decommission handle */ + decommission->handle = handle; + sev_guest_decommission(decommission, NULL); + + kfree(decommission); +} + +static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, + unsigned long ulen, unsigned long *n, + int write) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + unsigned long npages, npinned, size; + unsigned long locked, lock_limit; + struct page **pages; + int first, last; + + /* Calculate number of pages. */ + first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; + last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; + npages = (last - first + 1); + + locked = sev->pages_locked + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { + pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit); + return NULL; + } + + /* Avoid using vmalloc for smaller buffers. */ + size = npages * sizeof(struct page *); + if (size > PAGE_SIZE) + pages = vmalloc(size); + else + pages = kmalloc(size, GFP_KERNEL); + + if (!pages) + return NULL; + + /* Pin the user virtual address. */ + npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); + if (npinned != npages) { + pr_err("SEV: Failure locking %lu pages.\n", npages); + goto err; + } + + *n = npages; + sev->pages_locked = locked; + + return pages; + +err: + if (npinned > 0) + release_pages(pages, npinned); + + kvfree(pages); + return NULL; +} + +static void sev_unpin_memory(struct kvm *kvm, struct page **pages, + unsigned long npages) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + + release_pages(pages, npages); + kvfree(pages); + sev->pages_locked -= npages; +} + +static void sev_clflush_pages(struct page *pages[], unsigned long npages) +{ + uint8_t *page_virtual; + unsigned long i; + + if (npages == 0 || pages == NULL) + return; + + for (i = 0; i < npages; i++) { + page_virtual = kmap_atomic(pages[i]); + clflush_cache_range(page_virtual, PAGE_SIZE); + kunmap_atomic(page_virtual); + } +} + +static void __unregister_enc_region_locked(struct kvm *kvm, + struct enc_region *region) +{ + /* + * The guest may change the memory encryption attribute from C=0 -> C=1 + * or vice versa for this memory range. Lets make sure caches are + * flushed to ensure that guest data gets written into memory with + * correct C-bit. + */ + sev_clflush_pages(region->pages, region->npages); + + sev_unpin_memory(kvm, region->pages, region->npages); + list_del(®ion->list); + kfree(region); +} + +static void sev_vm_destroy(struct kvm *kvm) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct list_head *head = &sev->regions_list; + struct list_head *pos, *q; + + if (!sev_guest(kvm)) + return; + + mutex_lock(&kvm->lock); + + /* + * if userspace was terminated before unregistering the memory regions + * then lets unpin all the registered memory. + */ + if (!list_empty(head)) { + list_for_each_safe(pos, q, head) { + __unregister_enc_region_locked(kvm, + list_entry(pos, struct enc_region, list)); + } + } + + mutex_unlock(&kvm->lock); + + sev_unbind_asid(kvm, sev->handle); + sev_asid_free(kvm); +} + static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; @@ -1456,6 +1746,12 @@ static void avic_vm_destroy(struct kvm *kvm) spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); } +static void svm_vm_destroy(struct kvm *kvm) +{ + avic_vm_destroy(kvm); + sev_vm_destroy(kvm); +} + static int avic_vm_init(struct kvm *kvm) { unsigned long flags; @@ -2066,7 +2362,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); vcpu->arch.cr4 = cr4; if (!npt_enabled) @@ -2125,7 +2421,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { if (sd->next_asid > sd->max_asid) { ++sd->asid_generation; - sd->next_asid = 1; + sd->next_asid = sd->min_asid; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } @@ -2173,22 +2469,24 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) static int pf_interception(struct vcpu_svm *svm) { - u64 fault_address = svm->vmcb->control.exit_info_2; + u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); u64 error_code = svm->vmcb->control.exit_info_1; return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address, - svm->vmcb->control.insn_bytes, + static_cpu_has(X86_FEATURE_DECODEASSISTS) ? + svm->vmcb->control.insn_bytes : NULL, svm->vmcb->control.insn_len); } static int npf_interception(struct vcpu_svm *svm) { - u64 fault_address = svm->vmcb->control.exit_info_2; + u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); u64 error_code = svm->vmcb->control.exit_info_1; trace_kvm_page_fault(fault_address, error_code); return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, - svm->vmcb->control.insn_bytes, + static_cpu_has(X86_FEATURE_DECODEASSISTS) ? + svm->vmcb->control.insn_bytes : NULL, svm->vmcb->control.insn_len); } @@ -2415,7 +2713,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -2957,7 +3255,8 @@ static bool nested_vmcb_checks(struct vmcb *vmcb) if (vmcb->control.asid == 0) return false; - if (vmcb->control.nested_ctl && !npt_enabled) + if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && + !npt_enabled) return false; return true; @@ -2971,7 +3270,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, else svm->vcpu.arch.hflags &= ~HF_HIF_MASK; - if (nested_vmcb->control.nested_ctl) { + if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { kvm_mmu_unload(&svm->vcpu); svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; nested_svm_init_mmu_context(&svm->vcpu); @@ -3019,7 +3318,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; svm->nested.intercept = nested_vmcb->control.intercept; - svm_flush_tlb(&svm->vcpu); + svm_flush_tlb(&svm->vcpu, true); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; @@ -4442,12 +4741,39 @@ static void reload_tss(struct kvm_vcpu *vcpu) load_TR_desc(); } +static void pre_sev_run(struct vcpu_svm *svm, int cpu) +{ + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + int asid = sev_get_asid(svm->vcpu.kvm); + + /* Assign the asid allocated with this SEV guest */ + svm->vmcb->control.asid = asid; + + /* + * Flush guest TLB: + * + * 1) when different VMCB for the same ASID is to be run on the same host CPU. + * 2) or this VMCB was executed on different host CPU in previous VMRUNs. + */ + if (sd->sev_vmcbs[asid] == svm->vmcb && + svm->last_cpu == cpu) + return; + + svm->last_cpu = cpu; + sd->sev_vmcbs[asid] = svm->vmcb; + svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; + mark_dirty(svm->vmcb, VMCB_ASID); +} + static void pre_svm_run(struct vcpu_svm *svm) { int cpu = raw_smp_processor_id(); struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + if (sev_guest(svm->vcpu.kvm)) + return pre_sev_run(svm, cpu); + /* FIXME: handle wraparound of asid_generation */ if (svm->asid_generation != sd->asid_generation) new_asid(svm, sd); @@ -4865,7 +5191,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void svm_flush_tlb(struct kvm_vcpu *vcpu) +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5208,7 +5534,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) @@ -5222,7 +5548,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static int is_disabled(void) @@ -5308,6 +5634,12 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) entry->edx |= SVM_FEATURE_NPT; break; + case 0x8000001F: + /* Support memory encryption cpuid if host supports it */ + if (boot_cpu_has(X86_FEATURE_SEV)) + cpuid(0x8000001f, &entry->eax, &entry->ebx, + &entry->ecx, &entry->edx); + } } @@ -5336,6 +5668,11 @@ static bool svm_xsaves_supported(void) return false; } +static bool svm_umip_emulated(void) +{ + return false; +} + static bool svm_has_wbinvd_exit(void) { return true; @@ -5637,6 +5974,828 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static int sev_asid_new(void) +{ + int pos; + + /* + * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid. + */ + pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1); + if (pos >= max_sev_asid) + return -EBUSY; + + set_bit(pos, sev_asid_bitmap); + return pos + 1; +} + +static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + int asid, ret; + + ret = -EBUSY; + asid = sev_asid_new(); + if (asid < 0) + return ret; + + ret = sev_platform_init(&argp->error); + if (ret) + goto e_free; + + sev->active = true; + sev->asid = asid; + INIT_LIST_HEAD(&sev->regions_list); + + return 0; + +e_free: + __sev_asid_free(asid); + return ret; +} + +static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) +{ + struct sev_data_activate *data; + int asid = sev_get_asid(kvm); + int ret; + + wbinvd_on_all_cpus(); + + ret = sev_guest_df_flush(error); + if (ret) + return ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* activate ASID on the given handle */ + data->handle = handle; + data->asid = asid; + ret = sev_guest_activate(data, error); + kfree(data); + + return ret; +} + +static int __sev_issue_cmd(int fd, int id, void *data, int *error) +{ + struct fd f; + int ret; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + ret = sev_issue_cmd_external_user(f.file, id, data, error); + + fdput(f); + return ret; +} + +static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + + return __sev_issue_cmd(sev->fd, id, data, error); +} + +static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct sev_data_launch_start *start; + struct kvm_sev_launch_start params; + void *dh_blob, *session_blob; + int *error = &argp->error; + int ret; + + if (!sev_guest(kvm)) + return -ENOTTY; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + return -EFAULT; + + start = kzalloc(sizeof(*start), GFP_KERNEL); + if (!start) + return -ENOMEM; + + dh_blob = NULL; + if (params.dh_uaddr) { + dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len); + if (IS_ERR(dh_blob)) { + ret = PTR_ERR(dh_blob); + goto e_free; + } + + start->dh_cert_address = __sme_set(__pa(dh_blob)); + start->dh_cert_len = params.dh_len; + } + + session_blob = NULL; + if (params.session_uaddr) { + session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len); + if (IS_ERR(session_blob)) { + ret = PTR_ERR(session_blob); + goto e_free_dh; + } + + start->session_address = __sme_set(__pa(session_blob)); + start->session_len = params.session_len; + } + + start->handle = params.handle; + start->policy = params.policy; + + /* create memory encryption context */ + ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error); + if (ret) + goto e_free_session; + + /* Bind ASID to this guest */ + ret = sev_bind_asid(kvm, start->handle, error); + if (ret) + goto e_free_session; + + /* return handle to userspace */ + params.handle = start->handle; + if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { + sev_unbind_asid(kvm, start->handle); + ret = -EFAULT; + goto e_free_session; + } + + sev->handle = start->handle; + sev->fd = argp->sev_fd; + +e_free_session: + kfree(session_blob); +e_free_dh: + kfree(dh_blob); +e_free: + kfree(start); + return ret; +} + +static int get_num_contig_pages(int idx, struct page **inpages, + unsigned long npages) +{ + unsigned long paddr, next_paddr; + int i = idx + 1, pages = 1; + + /* find the number of contiguous pages starting from idx */ + paddr = __sme_page_pa(inpages[idx]); + while (i < npages) { + next_paddr = __sme_page_pa(inpages[i++]); + if ((paddr + PAGE_SIZE) == next_paddr) { + pages++; + paddr = next_paddr; + continue; + } + break; + } + + return pages; +} + +static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + unsigned long vaddr, vaddr_end, next_vaddr, npages, size; + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_launch_update_data params; + struct sev_data_launch_update_data *data; + struct page **inpages; + int i, ret, pages; + + if (!sev_guest(kvm)) + return -ENOTTY; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + return -EFAULT; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + vaddr = params.uaddr; + size = params.len; + vaddr_end = vaddr + size; + + /* Lock the user memory. */ + inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1); + if (!inpages) { + ret = -ENOMEM; + goto e_free; + } + + /* + * The LAUNCH_UPDATE command will perform in-place encryption of the + * memory content (i.e it will write the same memory region with C=1). + * It's possible that the cache may contain the data with C=0, i.e., + * unencrypted so invalidate it first. + */ + sev_clflush_pages(inpages, npages); + + for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { + int offset, len; + + /* + * If the user buffer is not page-aligned, calculate the offset + * within the page. + */ + offset = vaddr & (PAGE_SIZE - 1); + + /* Calculate the number of pages that can be encrypted in one go. */ + pages = get_num_contig_pages(i, inpages, npages); + + len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); + + data->handle = sev->handle; + data->len = len; + data->address = __sme_page_pa(inpages[i]) + offset; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error); + if (ret) + goto e_unpin; + + size -= len; + next_vaddr = vaddr + len; + } + +e_unpin: + /* content of memory is updated, mark pages dirty */ + for (i = 0; i < npages; i++) { + set_page_dirty_lock(inpages[i]); + mark_page_accessed(inpages[i]); + } + /* unlock the user pages */ + sev_unpin_memory(kvm, inpages, npages); +e_free: + kfree(data); + return ret; +} + +static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct sev_data_launch_measure *data; + struct kvm_sev_launch_measure params; + void *blob = NULL; + int ret; + + if (!sev_guest(kvm)) + return -ENOTTY; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + return -EFAULT; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* User wants to query the blob length */ + if (!params.len) + goto cmd; + + if (params.uaddr) { + if (params.len > SEV_FW_BLOB_MAX_SIZE) { + ret = -EINVAL; + goto e_free; + } + + if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { + ret = -EFAULT; + goto e_free; + } + + ret = -ENOMEM; + blob = kmalloc(params.len, GFP_KERNEL); + if (!blob) + goto e_free; + + data->address = __psp_pa(blob); + data->len = params.len; + } + +cmd: + data->handle = sev->handle; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error); + + /* + * If we query the session length, FW responded with expected data. + */ + if (!params.len) + goto done; + + if (ret) + goto e_free_blob; + + if (blob) { + if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) + ret = -EFAULT; + } + +done: + params.len = data->len; + if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + ret = -EFAULT; +e_free_blob: + kfree(blob); +e_free: + kfree(data); + return ret; +} + +static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct sev_data_launch_finish *data; + int ret; + + if (!sev_guest(kvm)) + return -ENOTTY; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->handle = sev->handle; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error); + + kfree(data); + return ret; +} + +static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_guest_status params; + struct sev_data_guest_status *data; + int ret; + + if (!sev_guest(kvm)) + return -ENOTTY; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->handle = sev->handle; + ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error); + if (ret) + goto e_free; + + params.policy = data->policy; + params.state = data->state; + params.handle = data->handle; + + if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + ret = -EFAULT; +e_free: + kfree(data); + return ret; +} + +static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, + unsigned long dst, int size, + int *error, bool enc) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct sev_data_dbg *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->handle = sev->handle; + data->dst_addr = dst; + data->src_addr = src; + data->len = size; + + ret = sev_issue_cmd(kvm, + enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, + data, error); + kfree(data); + return ret; +} + +static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, + unsigned long dst_paddr, int sz, int *err) +{ + int offset; + + /* + * Its safe to read more than we are asked, caller should ensure that + * destination has enough space. + */ + src_paddr = round_down(src_paddr, 16); + offset = src_paddr & 15; + sz = round_up(sz + offset, 16); + + return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); +} + +static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, + unsigned long __user dst_uaddr, + unsigned long dst_paddr, + int size, int *err) +{ + struct page *tpage = NULL; + int ret, offset; + + /* if inputs are not 16-byte then use intermediate buffer */ + if (!IS_ALIGNED(dst_paddr, 16) || + !IS_ALIGNED(paddr, 16) || + !IS_ALIGNED(size, 16)) { + tpage = (void *)alloc_page(GFP_KERNEL); + if (!tpage) + return -ENOMEM; + + dst_paddr = __sme_page_pa(tpage); + } + + ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err); + if (ret) + goto e_free; + + if (tpage) { + offset = paddr & 15; + if (copy_to_user((void __user *)(uintptr_t)dst_uaddr, + page_address(tpage) + offset, size)) + ret = -EFAULT; + } + +e_free: + if (tpage) + __free_page(tpage); + + return ret; +} + +static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, + unsigned long __user vaddr, + unsigned long dst_paddr, + unsigned long __user dst_vaddr, + int size, int *error) +{ + struct page *src_tpage = NULL; + struct page *dst_tpage = NULL; + int ret, len = size; + + /* If source buffer is not aligned then use an intermediate buffer */ + if (!IS_ALIGNED(vaddr, 16)) { + src_tpage = alloc_page(GFP_KERNEL); + if (!src_tpage) + return -ENOMEM; + + if (copy_from_user(page_address(src_tpage), + (void __user *)(uintptr_t)vaddr, size)) { + __free_page(src_tpage); + return -EFAULT; + } + + paddr = __sme_page_pa(src_tpage); + } + + /* + * If destination buffer or length is not aligned then do read-modify-write: + * - decrypt destination in an intermediate buffer + * - copy the source buffer in an intermediate buffer + * - use the intermediate buffer as source buffer + */ + if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { + int dst_offset; + + dst_tpage = alloc_page(GFP_KERNEL); + if (!dst_tpage) { + ret = -ENOMEM; + goto e_free; + } + + ret = __sev_dbg_decrypt(kvm, dst_paddr, + __sme_page_pa(dst_tpage), size, error); + if (ret) + goto e_free; + + /* + * If source is kernel buffer then use memcpy() otherwise + * copy_from_user(). + */ + dst_offset = dst_paddr & 15; + + if (src_tpage) + memcpy(page_address(dst_tpage) + dst_offset, + page_address(src_tpage), size); + else { + if (copy_from_user(page_address(dst_tpage) + dst_offset, + (void __user *)(uintptr_t)vaddr, size)) { + ret = -EFAULT; + goto e_free; + } + } + + paddr = __sme_page_pa(dst_tpage); + dst_paddr = round_down(dst_paddr, 16); + len = round_up(size, 16); + } + + ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true); + +e_free: + if (src_tpage) + __free_page(src_tpage); + if (dst_tpage) + __free_page(dst_tpage); + return ret; +} + +static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) +{ + unsigned long vaddr, vaddr_end, next_vaddr; + unsigned long dst_vaddr, dst_vaddr_end; + struct page **src_p, **dst_p; + struct kvm_sev_dbg debug; + unsigned long n; + int ret, size; + + if (!sev_guest(kvm)) + return -ENOTTY; + + if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) + return -EFAULT; + + vaddr = debug.src_uaddr; + size = debug.len; + vaddr_end = vaddr + size; + dst_vaddr = debug.dst_uaddr; + dst_vaddr_end = dst_vaddr + size; + + for (; vaddr < vaddr_end; vaddr = next_vaddr) { + int len, s_off, d_off; + + /* lock userspace source and destination page */ + src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0); + if (!src_p) + return -EFAULT; + + dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1); + if (!dst_p) { + sev_unpin_memory(kvm, src_p, n); + return -EFAULT; + } + + /* + * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the + * memory content (i.e it will write the same memory region with C=1). + * It's possible that the cache may contain the data with C=0, i.e., + * unencrypted so invalidate it first. + */ + sev_clflush_pages(src_p, 1); + sev_clflush_pages(dst_p, 1); + + /* + * Since user buffer may not be page aligned, calculate the + * offset within the page. + */ + s_off = vaddr & ~PAGE_MASK; + d_off = dst_vaddr & ~PAGE_MASK; + len = min_t(size_t, (PAGE_SIZE - s_off), size); + + if (dec) + ret = __sev_dbg_decrypt_user(kvm, + __sme_page_pa(src_p[0]) + s_off, + dst_vaddr, + __sme_page_pa(dst_p[0]) + d_off, + len, &argp->error); + else + ret = __sev_dbg_encrypt_user(kvm, + __sme_page_pa(src_p[0]) + s_off, + vaddr, + __sme_page_pa(dst_p[0]) + d_off, + dst_vaddr, + len, &argp->error); + + sev_unpin_memory(kvm, src_p, 1); + sev_unpin_memory(kvm, dst_p, 1); + + if (ret) + goto err; + + next_vaddr = vaddr + len; + dst_vaddr = dst_vaddr + len; + size -= len; + } +err: + return ret; +} + +static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct sev_data_launch_secret *data; + struct kvm_sev_launch_secret params; + struct page **pages; + void *blob, *hdr; + unsigned long n; + int ret; + + if (!sev_guest(kvm)) + return -ENOTTY; + + if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + return -EFAULT; + + pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); + if (!pages) + return -ENOMEM; + + /* + * The secret must be copied into contiguous memory region, lets verify + * that userspace memory pages are contiguous before we issue command. + */ + if (get_num_contig_pages(0, pages, n) != n) { + ret = -EINVAL; + goto e_unpin_memory; + } + + ret = -ENOMEM; + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto e_unpin_memory; + + blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); + if (IS_ERR(blob)) { + ret = PTR_ERR(blob); + goto e_free; + } + + data->trans_address = __psp_pa(blob); + data->trans_len = params.trans_len; + + hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); + if (IS_ERR(hdr)) { + ret = PTR_ERR(hdr); + goto e_free_blob; + } + data->trans_address = __psp_pa(blob); + data->trans_len = params.trans_len; + + data->handle = sev->handle; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); + + kfree(hdr); + +e_free_blob: + kfree(blob); +e_free: + kfree(data); +e_unpin_memory: + sev_unpin_memory(kvm, pages, n); + return ret; +} + +static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) +{ + struct kvm_sev_cmd sev_cmd; + int r; + + if (!svm_sev_enabled()) + return -ENOTTY; + + if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) + return -EFAULT; + + mutex_lock(&kvm->lock); + + switch (sev_cmd.id) { + case KVM_SEV_INIT: + r = sev_guest_init(kvm, &sev_cmd); + break; + case KVM_SEV_LAUNCH_START: + r = sev_launch_start(kvm, &sev_cmd); + break; + case KVM_SEV_LAUNCH_UPDATE_DATA: + r = sev_launch_update_data(kvm, &sev_cmd); + break; + case KVM_SEV_LAUNCH_MEASURE: + r = sev_launch_measure(kvm, &sev_cmd); + break; + case KVM_SEV_LAUNCH_FINISH: + r = sev_launch_finish(kvm, &sev_cmd); + break; + case KVM_SEV_GUEST_STATUS: + r = sev_guest_status(kvm, &sev_cmd); + break; + case KVM_SEV_DBG_DECRYPT: + r = sev_dbg_crypt(kvm, &sev_cmd, true); + break; + case KVM_SEV_DBG_ENCRYPT: + r = sev_dbg_crypt(kvm, &sev_cmd, false); + break; + case KVM_SEV_LAUNCH_SECRET: + r = sev_launch_secret(kvm, &sev_cmd); + break; + default: + r = -EINVAL; + goto out; + } + + if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd))) + r = -EFAULT; + +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int svm_register_enc_region(struct kvm *kvm, + struct kvm_enc_region *range) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct enc_region *region; + int ret = 0; + + if (!sev_guest(kvm)) + return -ENOTTY; + + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return -ENOMEM; + + region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); + if (!region->pages) { + ret = -ENOMEM; + goto e_free; + } + + /* + * The guest may change the memory encryption attribute from C=0 -> C=1 + * or vice versa for this memory range. Lets make sure caches are + * flushed to ensure that guest data gets written into memory with + * correct C-bit. + */ + sev_clflush_pages(region->pages, region->npages); + + region->uaddr = range->addr; + region->size = range->size; + + mutex_lock(&kvm->lock); + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + + return ret; + +e_free: + kfree(region); + return ret; +} + +static struct enc_region * +find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) +{ + struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct list_head *head = &sev->regions_list; + struct enc_region *i; + + list_for_each_entry(i, head, list) { + if (i->uaddr == range->addr && + i->size == range->size) + return i; + } + + return NULL; +} + + +static int svm_unregister_enc_region(struct kvm *kvm, + struct kvm_enc_region *range) +{ + struct enc_region *region; + int ret; + + mutex_lock(&kvm->lock); + + if (!sev_guest(kvm)) { + ret = -ENOTTY; + goto failed; + } + + region = find_enc_region(kvm, range); + if (!region) { + ret = -EINVAL; + goto failed; + } + + __unregister_enc_region_locked(kvm, region); + + mutex_unlock(&kvm->lock); + return 0; + +failed: + mutex_unlock(&kvm->lock); + return ret; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -5653,7 +6812,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_reset = svm_vcpu_reset, .vm_init = avic_vm_init, - .vm_destroy = avic_vm_destroy, + .vm_destroy = svm_vm_destroy, .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, @@ -5713,6 +6872,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, + .sync_pir_to_irr = kvm_lapic_find_highest_irr, .apicv_post_state_restore = avic_post_state_restore, .set_tss_addr = svm_set_tss_addr, @@ -5729,6 +6889,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .invpcid_supported = svm_invpcid_supported, .mpx_supported = svm_mpx_supported, .xsaves_supported = svm_xsaves_supported, + .umip_emulated = svm_umip_emulated, .set_supported_cpuid = svm_set_supported_cpuid, @@ -5752,6 +6913,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .pre_enter_smm = svm_pre_enter_smm, .pre_leave_smm = svm_pre_leave_smm, .enable_smi_window = enable_smi_window, + + .mem_enc_op = svm_mem_enc_op, + .mem_enc_reg_region = svm_register_enc_region, + .mem_enc_unreg_region = svm_unregister_enc_region, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 91e3539cba02..3dec126aa302 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -419,6 +419,12 @@ struct __packed vmcs12 { #define VMCS12_SIZE 0x1000 /* + * VMCS12_MAX_FIELD_INDEX is the highest index value used in any + * supported VMCS12 field encoding. + */ +#define VMCS12_MAX_FIELD_INDEX 0x17 + +/* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. */ @@ -441,6 +447,7 @@ struct nested_vmx { * data hold by vmcs12 */ bool sync_shadow_vmcs; + bool dirty_vmcs12; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ @@ -664,6 +671,8 @@ struct vcpu_vmx { u32 host_pkru; + unsigned long host_debugctlmsr; + /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included @@ -692,67 +701,24 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) return &(to_vmx(vcpu)->pi_desc); } +#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) -#define FIELD(number, name) [number] = VMCS12_OFFSET(name) -#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ - [number##_HIGH] = VMCS12_OFFSET(name)+4 +#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name) +#define FIELD64(number, name) \ + FIELD(number, name), \ + [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32) -static unsigned long shadow_read_only_fields[] = { - /* - * We do NOT shadow fields that are modified when L0 - * traps and emulates any vmx instruction (e.g. VMPTRLD, - * VMXON...) executed by L1. - * For example, VM_INSTRUCTION_ERROR is read - * by L1 if a vmx instruction fails (part of the error path). - * Note the code assumes this logic. If for some reason - * we start shadowing these fields then we need to - * force a shadow sync when L0 emulates vmx instructions - * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified - * by nested_vmx_failValid) - */ - VM_EXIT_REASON, - VM_EXIT_INTR_INFO, - VM_EXIT_INSTRUCTION_LEN, - IDT_VECTORING_INFO_FIELD, - IDT_VECTORING_ERROR_CODE, - VM_EXIT_INTR_ERROR_CODE, - EXIT_QUALIFICATION, - GUEST_LINEAR_ADDRESS, - GUEST_PHYSICAL_ADDRESS +static u16 shadow_read_only_fields[] = { +#define SHADOW_FIELD_RO(x) x, +#include "vmx_shadow_fields.h" }; static int max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields); -static unsigned long shadow_read_write_fields[] = { - TPR_THRESHOLD, - GUEST_RIP, - GUEST_RSP, - GUEST_CR0, - GUEST_CR3, - GUEST_CR4, - GUEST_INTERRUPTIBILITY_INFO, - GUEST_RFLAGS, - GUEST_CS_SELECTOR, - GUEST_CS_AR_BYTES, - GUEST_CS_LIMIT, - GUEST_CS_BASE, - GUEST_ES_BASE, - GUEST_BNDCFGS, - CR0_GUEST_HOST_MASK, - CR0_READ_SHADOW, - CR4_READ_SHADOW, - TSC_OFFSET, - EXCEPTION_BITMAP, - CPU_BASED_VM_EXEC_CONTROL, - VM_ENTRY_EXCEPTION_ERROR_CODE, - VM_ENTRY_INTR_INFO_FIELD, - VM_ENTRY_INSTRUCTION_LEN, - VM_ENTRY_EXCEPTION_ERROR_CODE, - HOST_FS_BASE, - HOST_GS_BASE, - HOST_FS_SELECTOR, - HOST_GS_SELECTOR +static u16 shadow_read_write_fields[] = { +#define SHADOW_FIELD_RW(x) x, +#include "vmx_shadow_fields.h" }; static int max_shadow_read_write_fields = ARRAY_SIZE(shadow_read_write_fields); @@ -905,13 +871,17 @@ static inline short vmcs_field_to_offset(unsigned long field) { const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); unsigned short offset; + unsigned index; + + if (field >> 15) + return -ENOENT; - BUILD_BUG_ON(size > SHRT_MAX); - if (field >= size) + index = ROL16(field, 6); + if (index >= size) return -ENOENT; - field = array_index_nospec(field, size); - offset = vmcs_field_to_offset_table[field]; + index = array_index_nospec(index, size); + offset = vmcs_field_to_offset_table[index]; if (offset == 0) return -ENOENT; return offset; @@ -957,8 +927,6 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); enum { - VMX_IO_BITMAP_A, - VMX_IO_BITMAP_B, VMX_VMREAD_BITMAP, VMX_VMWRITE_BITMAP, VMX_BITMAP_NR @@ -966,8 +934,6 @@ enum { static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; -#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) -#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) @@ -2373,6 +2339,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmx_vcpu_pi_load(vcpu, cpu); vmx->host_pkru = read_pkru(); + vmx->host_debugctlmsr = get_debugctlmsr(); } static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) @@ -2930,7 +2897,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1); /* highest index: VMX_PREEMPTION_TIMER_VALUE */ - vmx->nested.nested_vmx_vmcs_enum = 0x2e; + vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; } /* @@ -3266,6 +3233,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, */ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { + struct vcpu_vmx *vmx = to_vmx(vcpu); struct shared_msr_entry *msr; switch (msr_info->index) { @@ -3277,8 +3245,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_GS_BASE); break; case MSR_KERNEL_GS_BASE: - vmx_load_host_state(to_vmx(vcpu)); - msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base; + vmx_load_host_state(vmx); + msr_info->data = vmx->msr_guest_kernel_gs_base; break; #endif case MSR_EFER: @@ -3318,13 +3286,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_MCG_EXT_CTL: if (!msr_info->host_initiated && - !(to_vmx(vcpu)->msr_ia32_feature_control & + !(vmx->msr_ia32_feature_control & FEATURE_CONTROL_LMCE)) return 1; msr_info->data = vcpu->arch.mcg_ext_ctl; break; case MSR_IA32_FEATURE_CONTROL: - msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control; + msr_info->data = vmx->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) @@ -3341,7 +3309,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; /* Otherwise falls through */ default: - msr = find_msr_entry(to_vmx(vcpu), msr_info->index); + msr = find_msr_entry(vmx, msr_info->index); if (msr) { msr_info->data = msr->data; break; @@ -3727,7 +3695,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #endif CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING | - CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | CPU_BASED_INVLPG_EXITING | @@ -3757,6 +3725,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_UNRESTRICTED_GUEST | SECONDARY_EXEC_PAUSE_LOOP_EXITING | + SECONDARY_EXEC_DESC | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_APIC_REGISTER_VIRT | @@ -3982,17 +3951,17 @@ static void free_kvm_area(void) } } -enum vmcs_field_type { - VMCS_FIELD_TYPE_U16 = 0, - VMCS_FIELD_TYPE_U64 = 1, - VMCS_FIELD_TYPE_U32 = 2, - VMCS_FIELD_TYPE_NATURAL_WIDTH = 3 +enum vmcs_field_width { + VMCS_FIELD_WIDTH_U16 = 0, + VMCS_FIELD_WIDTH_U64 = 1, + VMCS_FIELD_WIDTH_U32 = 2, + VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3 }; -static inline int vmcs_field_type(unsigned long field) +static inline int vmcs_field_width(unsigned long field) { if (0x1 & field) /* the *_HIGH fields are all 32 bit */ - return VMCS_FIELD_TYPE_U32; + return VMCS_FIELD_WIDTH_U32; return (field >> 13) & 0x3 ; } @@ -4005,43 +3974,66 @@ static void init_vmcs_shadow_fields(void) { int i, j; - /* No checks for read only fields yet */ + for (i = j = 0; i < max_shadow_read_only_fields; i++) { + u16 field = shadow_read_only_fields[i]; + if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && + (i + 1 == max_shadow_read_only_fields || + shadow_read_only_fields[i + 1] != field + 1)) + pr_err("Missing field from shadow_read_only_field %x\n", + field + 1); + + clear_bit(field, vmx_vmread_bitmap); +#ifdef CONFIG_X86_64 + if (field & 1) + continue; +#endif + if (j < i) + shadow_read_only_fields[j] = field; + j++; + } + max_shadow_read_only_fields = j; for (i = j = 0; i < max_shadow_read_write_fields; i++) { - switch (shadow_read_write_fields[i]) { - case GUEST_BNDCFGS: - if (!kvm_mpx_supported()) + u16 field = shadow_read_write_fields[i]; + if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && + (i + 1 == max_shadow_read_write_fields || + shadow_read_write_fields[i + 1] != field + 1)) + pr_err("Missing field from shadow_read_write_field %x\n", + field + 1); + + /* + * PML and the preemption timer can be emulated, but the + * processor cannot vmwrite to fields that don't exist + * on bare metal. + */ + switch (field) { + case GUEST_PML_INDEX: + if (!cpu_has_vmx_pml()) + continue; + break; + case VMX_PREEMPTION_TIMER_VALUE: + if (!cpu_has_vmx_preemption_timer()) + continue; + break; + case GUEST_INTR_STATUS: + if (!cpu_has_vmx_apicv()) continue; break; default: break; } + clear_bit(field, vmx_vmwrite_bitmap); + clear_bit(field, vmx_vmread_bitmap); +#ifdef CONFIG_X86_64 + if (field & 1) + continue; +#endif if (j < i) - shadow_read_write_fields[j] = - shadow_read_write_fields[i]; + shadow_read_write_fields[j] = field; j++; } max_shadow_read_write_fields = j; - - /* shadowed fields guest access without vmexit */ - for (i = 0; i < max_shadow_read_write_fields; i++) { - unsigned long field = shadow_read_write_fields[i]; - - clear_bit(field, vmx_vmwrite_bitmap); - clear_bit(field, vmx_vmread_bitmap); - if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) { - clear_bit(field + 1, vmx_vmwrite_bitmap); - clear_bit(field + 1, vmx_vmread_bitmap); - } - } - for (i = 0; i < max_shadow_read_only_fields; i++) { - unsigned long field = shadow_read_only_fields[i]; - - clear_bit(field, vmx_vmread_bitmap); - if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) - clear_bit(field + 1, vmx_vmread_bitmap); - } } static __init int alloc_kvm_area(void) @@ -4254,9 +4246,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) +static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, + bool invalidate_gpa) { - if (enable_ept) { + if (enable_ept && (invalidate_gpa || !enable_vpid)) { if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); @@ -4265,15 +4258,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) } } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); } static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) { if (enable_ept) - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -4471,7 +4464,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ept_load_pdptrs(vcpu); } - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); vmcs_writel(GUEST_CR3, guest_cr3); } @@ -4488,6 +4481,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (to_vmx(vcpu)->rmode.vm86_active ? KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_DESC); + hw_cr4 &= ~X86_CR4_UMIP; + } else + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_DESC); + if (cr4 & X86_CR4_VMXE) { /* * To use VMXON (and later other VMX instructions), a guest @@ -5119,11 +5120,6 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, { int f = sizeof(unsigned long); - if (!cpu_has_vmx_msr_bitmap()) { - WARN_ON(1); - return; - } - /* * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals * have the write-low and read-high bitmap offsets the wrong way round. @@ -5263,7 +5259,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr != 256) { vapic_page = kmap(vmx->nested.virtual_apic_page); - __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); + __kvm_apic_update_irr(vmx->nested.pi_desc->pir, + vapic_page, &max_irr); kunmap(vmx->nested.virtual_apic_page); status = vmcs_read16(GUEST_INTR_STATUS); @@ -5323,14 +5320,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { - /* the PIR and ON have been set by L1. */ - kvm_vcpu_trigger_posted_interrupt(vcpu, true); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. */ vmx->nested.pi_pending = true; kvm_make_request(KVM_REQ_EVENT, vcpu); + /* the PIR and ON have been set by L1. */ + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) + kvm_vcpu_kick(vcpu); return 0; } return -1; @@ -5509,6 +5507,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) struct kvm_vcpu *vcpu = &vmx->vcpu; u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; + if (!cpu_need_virtualize_apic_accesses(vcpu)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) @@ -5527,6 +5526,11 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + + /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP, + * in vmx_set_cr4. */ + exec_control &= ~SECONDARY_EXEC_DESC; + /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD (handle_vmptrld). We can NOT enable shadow_vmcs here because we don't have yet @@ -5646,10 +5650,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) #endif int i; - /* I/O */ - vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); - vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); - if (enable_shadow_vmcs) { vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); @@ -6304,6 +6304,12 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) return kvm_set_cr4(vcpu, val); } +static int handle_desc(struct kvm_vcpu *vcpu) +{ + WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); + return emulate_instruction(vcpu, 0) == EMULATE_DONE; +} + static int handle_cr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification, val; @@ -6760,7 +6766,21 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); - return kvm_skip_emulated_instruction(vcpu); + /* + * Doing kvm_skip_emulated_instruction() depends on undefined + * behavior: Intel's manual doesn't mandate + * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG + * occurs and while on real hardware it was observed to be set, + * other hypervisors (namely Hyper-V) don't set it, we end up + * advancing IP with some random value. Disable fast mmio when + * running nested and keep it for real hardware in hope that + * VM_EXIT_INSTRUCTION_LEN will always be set correctly. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + return kvm_skip_emulated_instruction(vcpu); + else + return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, + NULL, 0) == EMULATE_DONE; } ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -6957,10 +6977,6 @@ static __init int hardware_setup(void) memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; goto out; @@ -6973,11 +6989,6 @@ static __init int hardware_setup(void) !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; - if (!cpu_has_vmx_shadow_vmcs()) - enable_shadow_vmcs = 0; - if (enable_shadow_vmcs) - init_vmcs_shadow_fields(); - if (!cpu_has_vmx_ept() || !cpu_has_vmx_ept_4levels() || !cpu_has_vmx_ept_mt_wb() || @@ -7063,6 +7074,11 @@ static __init int hardware_setup(void) kvm_x86_ops->cancel_hv_timer = NULL; } + if (!cpu_has_vmx_shadow_vmcs()) + enable_shadow_vmcs = 0; + if (enable_shadow_vmcs) + init_vmcs_shadow_fields(); + kvm_set_posted_intr_wakeup_handler(wakeup_handler); kvm_mce_cap_supported |= MCG_LMCE_P; @@ -7593,17 +7609,17 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, p = ((char *)(get_vmcs12(vcpu))) + offset; - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_NATURAL_WIDTH: + switch (vmcs_field_width(field)) { + case VMCS_FIELD_WIDTH_NATURAL_WIDTH: *ret = *((natural_width *)p); return 0; - case VMCS_FIELD_TYPE_U16: + case VMCS_FIELD_WIDTH_U16: *ret = *((u16 *)p); return 0; - case VMCS_FIELD_TYPE_U32: + case VMCS_FIELD_WIDTH_U32: *ret = *((u32 *)p); return 0; - case VMCS_FIELD_TYPE_U64: + case VMCS_FIELD_WIDTH_U64: *ret = *((u64 *)p); return 0; default: @@ -7620,17 +7636,17 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu, if (offset < 0) return offset; - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: + switch (vmcs_field_width(field)) { + case VMCS_FIELD_WIDTH_U16: *(u16 *)p = field_value; return 0; - case VMCS_FIELD_TYPE_U32: + case VMCS_FIELD_WIDTH_U32: *(u32 *)p = field_value; return 0; - case VMCS_FIELD_TYPE_U64: + case VMCS_FIELD_WIDTH_U64: *(u64 *)p = field_value; return 0; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: + case VMCS_FIELD_WIDTH_NATURAL_WIDTH: *(natural_width *)p = field_value; return 0; default: @@ -7646,7 +7662,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) unsigned long field; u64 field_value; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; - const unsigned long *fields = shadow_read_write_fields; + const u16 *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; preempt_disable(); @@ -7655,23 +7671,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) for (i = 0; i < num_fields; i++) { field = fields[i]; - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - field_value = vmcs_read16(field); - break; - case VMCS_FIELD_TYPE_U32: - field_value = vmcs_read32(field); - break; - case VMCS_FIELD_TYPE_U64: - field_value = vmcs_read64(field); - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - field_value = vmcs_readl(field); - break; - default: - WARN_ON(1); - continue; - } + field_value = __vmcs_readl(field); vmcs12_write_any(&vmx->vcpu, field, field_value); } @@ -7683,7 +7683,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) { - const unsigned long *fields[] = { + const u16 *fields[] = { shadow_read_write_fields, shadow_read_only_fields }; @@ -7702,24 +7702,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) for (i = 0; i < max_fields[q]; i++) { field = fields[q][i]; vmcs12_read_any(&vmx->vcpu, field, &field_value); - - switch (vmcs_field_type(field)) { - case VMCS_FIELD_TYPE_U16: - vmcs_write16(field, (u16)field_value); - break; - case VMCS_FIELD_TYPE_U32: - vmcs_write32(field, (u32)field_value); - break; - case VMCS_FIELD_TYPE_U64: - vmcs_write64(field, (u64)field_value); - break; - case VMCS_FIELD_TYPE_NATURAL_WIDTH: - vmcs_writel(field, (long)field_value); - break; - default: - WARN_ON(1); - break; - } + __vmcs_writel(field, field_value); } } @@ -7788,8 +7771,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) { unsigned long field; gva_t gva; + struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + /* The value to write might be 32 or 64 bits, depending on L1's long * mode, and eventually we need to write that into a field of several * possible lengths. The code below first zero-extends the value to 64 @@ -7832,6 +7817,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } + switch (field) { +#define SHADOW_FIELD_RW(x) case x: +#include "vmx_shadow_fields.h" + /* + * The fields that can be updated by L1 without a vmexit are + * always updated in the vmcs02, the others go down the slow + * path of prepare_vmcs02. + */ + break; + default: + vmx->nested.dirty_vmcs12 = true; + break; + } + nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); } @@ -7846,6 +7845,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) __pa(vmx->vmcs01.shadow_vmcs)); vmx->nested.sync_shadow_vmcs = true; } + vmx->nested.dirty_vmcs12 = true; } /* Emulate the VMPTRLD instruction */ @@ -8066,7 +8066,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } - __vmx_flush_tlb(vcpu, vmx->nested.vpid02); + __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); @@ -8260,6 +8260,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, + [EXIT_REASON_GDTR_IDTR] = handle_desc, + [EXIT_REASON_LDTR_TR] = handle_desc, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, @@ -9069,36 +9071,23 @@ static void vmx_set_rvi(int vector) static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) { - if (!is_guest_mode(vcpu)) { - vmx_set_rvi(max_irr); - return; - } - - if (max_irr == -1) - return; - - /* - * In guest mode. If a vmexit is needed, vmx_check_nested_events - * handles it. - */ - if (nested_exit_on_intr(vcpu)) - return; - /* - * Else, fall back to pre-APICv interrupt injection since L2 - * is run without virtual interrupt delivery. + * When running L2, updating RVI is only relevant when + * vmcs12 virtual-interrupt-delivery enabled. + * However, it can be enabled only when L1 also + * intercepts external-interrupts and in that case + * we should not update vmcs02 RVI but instead intercept + * interrupt. Therefore, do nothing when running L2. */ - if (!kvm_event_needs_reinjection(vcpu) && - vmx_interrupt_allowed(vcpu)) { - kvm_queue_interrupt(vcpu, max_irr, false); - vmx_inject_irq(vcpu); - } + if (!is_guest_mode(vcpu)) + vmx_set_rvi(max_irr); } static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; + bool max_irr_updated; WARN_ON(!vcpu->arch.apicv_active); if (pi_test_on(&vmx->pi_desc)) { @@ -9108,7 +9097,23 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) * But on x86 this is just a compiler barrier anyway. */ smp_mb__after_atomic(); - max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); + max_irr_updated = + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr); + + /* + * If we are running L2 and L1 has a new pending interrupt + * which can be injected, we should re-evaluate + * what should be done with this new L1 interrupt. + * If L1 intercepts external-interrupts, we should + * exit from L2 to L1. Otherwise, interrupt should be + * delivered directly to L2. + */ + if (is_guest_mode(vcpu) && max_irr_updated) { + if (nested_exit_on_intr(vcpu)) + kvm_vcpu_exiting_guest_mode(vcpu); + else + kvm_make_request(KVM_REQ_EVENT, vcpu); + } } else { max_irr = kvm_lapic_find_highest_irr(vcpu); } @@ -9223,6 +9228,12 @@ static bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } +static bool vmx_umip_emulated(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_DESC; +} + static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -9378,7 +9389,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr, cr3, cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -9431,7 +9442,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vcpu->arch.pkru); atomic_switch_perf_msrs(vmx); - debugctlmsr = get_debugctlmsr(); vmx_arm_hv_timer(vcpu); @@ -9587,8 +9597,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmexit_fill_RSB(); /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (debugctlmsr) - update_debugctlmsr(debugctlmsr); + if (vmx->host_debugctlmsr) + update_debugctlmsr(vmx->host_debugctlmsr); #ifndef CONFIG_X86_64 /* @@ -9668,10 +9678,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int r; - r = vcpu_load(vcpu); - BUG_ON(r); + vcpu_load(vcpu); vmx_switch_vmcs(vcpu, &vmx->vmcs01); free_nested(vmx); vcpu_put(vcpu); @@ -9871,7 +9879,8 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl) u32 mask = SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_DESC; u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); @@ -10037,8 +10046,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, } } -static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12); +static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12); static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) @@ -10127,9 +10136,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, (unsigned long)(vmcs12->posted_intr_desc_addr & (PAGE_SIZE - 1))); } - if (cpu_has_vmx_msr_bitmap() && - nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) && - nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_USE_MSR_BITMAPS); else @@ -10200,8 +10207,8 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. */ -static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) { int msr; struct page *page; @@ -10223,6 +10230,11 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); + /* Nothing to do if the MSR bitmap is not in use. */ + if (!cpu_has_vmx_msr_bitmap() || + !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) + return false; + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && !pred_cmd && !spec_ctrl) return false; @@ -10230,32 +10242,41 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); if (is_error_page(page)) return false; - msr_bitmap_l1 = (unsigned long *)kmap(page); - memset(msr_bitmap_l0, 0xff, PAGE_SIZE); + msr_bitmap_l1 = (unsigned long *)kmap(page); + if (nested_cpu_has_apic_reg_virt(vmcs12)) { + /* + * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it + * just lets the processor take the value from the virtual-APIC page; + * take those 256 bits directly from the L1 bitmap. + */ + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap_l0[word] = msr_bitmap_l1[word]; + msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0; + } + } else { + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { + unsigned word = msr / BITS_PER_LONG; + msr_bitmap_l0[word] = ~0; + msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0; + } + } - if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { - if (nested_cpu_has_apic_reg_virt(vmcs12)) - for (msr = 0x800; msr <= 0x8ff; msr++) - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - msr, MSR_TYPE_R); + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_TASKPRI), + MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_R | MSR_TYPE_W); - - if (nested_cpu_has_vid(vmcs12)) { - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); - } + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_EOI), + MSR_TYPE_W); + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, + X2APIC_MSR(APIC_SELF_IPI), + MSR_TYPE_W); } if (spec_ctrl) @@ -10535,25 +10556,12 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne return 0; } -/* - * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested - * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it - * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 - * guest in a way that will both be appropriate to L1's requests, and our - * needs. In addition to modifying the active vmcs (which is vmcs02), this - * function also has additional necessary side-effects, like setting various - * vcpu->arch fields. - * Returns 0 on success, 1 on failure. Invalid state exit qualification code - * is assigned to entry_failure_code on failure. - */ -static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - bool from_vmentry, u32 *entry_failure_code) +static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exec_control, vmcs12_exec_ctrl; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); - vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); @@ -10561,7 +10569,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); - vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); @@ -10571,15 +10578,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); - vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); - vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); - vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); @@ -10589,6 +10593,125 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs12->guest_pending_dbg_exceptions); + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); + + if (nested_cpu_has_xsaves(vmcs12)) + vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); + vmcs_write64(VMCS_LINK_POINTER, -1ull); + + if (cpu_has_vmx_posted_intr()) + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); + + /* + * Whether page-faults are trapped is determined by a combination of + * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. + * If enable_ept, L0 doesn't care about page faults and we should + * set all of these to L1's desires. However, if !enable_ept, L0 does + * care about (at least some) page faults, and because it is not easy + * (if at all possible?) to merge L0 and L1's desires, we simply ask + * to exit on each and every L2 page fault. This is done by setting + * MASK=MATCH=0 and (see below) EB.PF=1. + * Note that below we don't need special code to set EB.PF beyond the + * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, + * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when + * !enable_ept, EB.PF is 1, so the "or" will always be 1. + */ + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, + enable_ept ? vmcs12->page_fault_error_code_mask : 0); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, + enable_ept ? vmcs12->page_fault_error_code_match : 0); + + /* All VMFUNCs are currently emulated through L0 vmexits. */ + if (cpu_has_vmx_vmfunc()) + vmcs_write64(VM_FUNCTION_CONTROL, 0); + + if (cpu_has_vmx_apicv()) { + vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); + vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1); + vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2); + vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); + } + + /* + * Set host-state according to L0's settings (vmcs12 is irrelevant here) + * Some constant fields are set here by vmx_set_constant_host_state(). + * Other fields are different per CPU, and will be set later when + * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. + */ + vmx_set_constant_host_state(vmx); + + /* + * Set the MSR load/store lists to match L0's settings. + */ + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); + vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); + + set_cr4_guest_host_mask(vmx); + + if (vmx_mpx_supported()) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + + if (enable_vpid) { + if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); + else + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); + } + + /* + * L1 may access the L2's PDPTR, so save them to construct vmcs12 + */ + if (enable_ept) { + vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); + vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); + vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); + vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); + } + + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); +} + +/* + * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested + * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it + * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 + * guest in a way that will both be appropriate to L1's requests, and our + * needs. In addition to modifying the active vmcs (which is vmcs02), this + * function also has additional necessary side-effects, like setting various + * vcpu->arch fields. + * Returns 0 on success, 1 on failure. Invalid state exit qualification code + * is assigned to entry_failure_code on failure. + */ +static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + bool from_vmentry, u32 *entry_failure_code) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 exec_control, vmcs12_exec_ctrl; + + /* + * First, the fields that are shadowed. This must be kept in sync + * with vmx_shadow_fields.h. + */ + + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); + vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); + vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); + vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); + vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); + + /* + * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR, + * HOST_FS_BASE, HOST_GS_BASE. + */ + if (from_vmentry && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); @@ -10611,16 +10734,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } else { vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); } - vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); vmx_set_rflags(vcpu, vmcs12->guest_rflags); - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, - vmcs12->guest_pending_dbg_exceptions); - vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); - vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); - - if (nested_cpu_has_xsaves(vmcs12)) - vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); - vmcs_write64(VMCS_LINK_POINTER, -1ull); exec_control = vmcs12->pin_based_vm_exec_control; @@ -10634,7 +10748,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (nested_cpu_has_posted_intr(vmcs12)) { vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); } else { exec_control &= ~PIN_BASED_POSTED_INTR; } @@ -10645,25 +10758,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (nested_cpu_has_preemption_timer(vmcs12)) vmx_start_preemption_timer(vcpu); - /* - * Whether page-faults are trapped is determined by a combination of - * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. - * If enable_ept, L0 doesn't care about page faults and we should - * set all of these to L1's desires. However, if !enable_ept, L0 does - * care about (at least some) page faults, and because it is not easy - * (if at all possible?) to merge L0 and L1's desires, we simply ask - * to exit on each and every L2 page fault. This is done by setting - * MASK=MATCH=0 and (see below) EB.PF=1. - * Note that below we don't need special code to set EB.PF beyond the - * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, - * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when - * !enable_ept, EB.PF is 1, so the "or" will always be 1. - */ - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, - enable_ept ? vmcs12->page_fault_error_code_mask : 0); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, - enable_ept ? vmcs12->page_fault_error_code_match : 0); - if (cpu_has_secondary_exec_ctrls()) { exec_control = vmx->secondary_exec_control; @@ -10682,22 +10776,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control |= vmcs12_exec_ctrl; } - /* All VMFUNCs are currently emulated through L0 vmexits. */ - if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC) - vmcs_write64(VM_FUNCTION_CONTROL, 0); - - if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { - vmcs_write64(EOI_EXIT_BITMAP0, - vmcs12->eoi_exit_bitmap0); - vmcs_write64(EOI_EXIT_BITMAP1, - vmcs12->eoi_exit_bitmap1); - vmcs_write64(EOI_EXIT_BITMAP2, - vmcs12->eoi_exit_bitmap2); - vmcs_write64(EOI_EXIT_BITMAP3, - vmcs12->eoi_exit_bitmap3); + if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) vmcs_write16(GUEST_INTR_STATUS, vmcs12->guest_intr_status); - } /* * Write an illegal value to APIC_ACCESS_ADDR. Later, @@ -10710,24 +10791,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } - - /* - * Set host-state according to L0's settings (vmcs12 is irrelevant here) - * Some constant fields are set here by vmx_set_constant_host_state(). - * Other fields are different per CPU, and will be set later when - * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. - */ - vmx_set_constant_host_state(vmx); - - /* - * Set the MSR load/store lists to match L0's settings. - */ - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); - /* * HOST_RSP is normally set correctly in vmx_vcpu_run() just before * entry, but only if the current (host) sp changed from the value @@ -10759,8 +10822,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } /* - * Merging of IO bitmap not currently supported. - * Rather, exit every time. + * A vmexit (to either L1 hypervisor or L0 userspace) is always needed + * for I/O port accesses. */ exec_control &= ~CPU_BASED_USE_IO_BITMAPS; exec_control |= CPU_BASED_UNCOND_IO_EXITING; @@ -10797,12 +10860,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); } - set_cr4_guest_host_mask(vmx); - - if (from_vmentry && - vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); - if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset + vmcs12->tsc_offset); @@ -10811,9 +10868,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); - if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); - if (enable_vpid) { /* * There is no direct mapping between vpid02 and vpid12, the @@ -10824,16 +10878,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * even if spawn a lot of nested vCPUs. */ if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) { - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { vmx->nested.last_vpid = vmcs12->virtual_processor_id; - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true); } } else { - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } - } if (enable_pml) { @@ -10882,6 +10933,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ vmx_set_efer(vcpu, vcpu->arch.efer); + if (vmx->nested.dirty_vmcs12) { + prepare_vmcs02_full(vcpu, vmcs12, from_vmentry); + vmx->nested.dirty_vmcs12 = false; + } + /* Shadow page tables on either EPT or shadow page tables. */ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) @@ -10890,16 +10946,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; - /* - * L1 may access the L2's PDPTR, so save them to construct vmcs12 - */ - if (enable_ept) { - vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); - vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); - vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); - vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); - } - kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); return 0; @@ -11255,7 +11301,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); - vcpu->arch.exception.pending = false; return 0; } @@ -11536,11 +11581,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * L1's vpid. TODO: move to a more elaborate solution, giving * each L2 its own vpid and exposing the vpid feature to L1. */ - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } - /* Restore posted intr vector. */ - if (nested_cpu_has_posted_intr(vmcs12)) - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); @@ -11801,6 +11843,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ + if (info->intercept == x86_intercept_rdtscp && + !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + + /* TODO: check more intercepts... */ return X86EMUL_CONTINUE; } @@ -12314,6 +12371,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .handle_external_intr = vmx_handle_external_intr, .mpx_supported = vmx_mpx_supported, .xsaves_supported = vmx_xsaves_supported, + .umip_emulated = vmx_umip_emulated, .check_nested_events = vmx_check_nested_events, diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h new file mode 100644 index 000000000000..cd0c75f6d037 --- /dev/null +++ b/arch/x86/kvm/vmx_shadow_fields.h @@ -0,0 +1,77 @@ +#ifndef SHADOW_FIELD_RO +#define SHADOW_FIELD_RO(x) +#endif +#ifndef SHADOW_FIELD_RW +#define SHADOW_FIELD_RW(x) +#endif + +/* + * We do NOT shadow fields that are modified when L0 + * traps and emulates any vmx instruction (e.g. VMPTRLD, + * VMXON...) executed by L1. + * For example, VM_INSTRUCTION_ERROR is read + * by L1 if a vmx instruction fails (part of the error path). + * Note the code assumes this logic. If for some reason + * we start shadowing these fields then we need to + * force a shadow sync when L0 emulates vmx instructions + * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified + * by nested_vmx_failValid) + * + * When adding or removing fields here, note that shadowed + * fields must always be synced by prepare_vmcs02, not just + * prepare_vmcs02_full. + */ + +/* + * Keeping the fields ordered by size is an attempt at improving + * branch prediction in vmcs_read_any and vmcs_write_any. + */ + +/* 16-bits */ +SHADOW_FIELD_RW(GUEST_CS_SELECTOR) +SHADOW_FIELD_RW(GUEST_INTR_STATUS) +SHADOW_FIELD_RW(GUEST_PML_INDEX) +SHADOW_FIELD_RW(HOST_FS_SELECTOR) +SHADOW_FIELD_RW(HOST_GS_SELECTOR) + +/* 32-bits */ +SHADOW_FIELD_RO(VM_EXIT_REASON) +SHADOW_FIELD_RO(VM_EXIT_INTR_INFO) +SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN) +SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD) +SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE) +SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE) +SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL) +SHADOW_FIELD_RW(EXCEPTION_BITMAP) +SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE) +SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD) +SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN) +SHADOW_FIELD_RW(TPR_THRESHOLD) +SHADOW_FIELD_RW(GUEST_CS_LIMIT) +SHADOW_FIELD_RW(GUEST_CS_AR_BYTES) +SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO) +SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE) + +/* Natural width */ +SHADOW_FIELD_RO(EXIT_QUALIFICATION) +SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS) +SHADOW_FIELD_RW(GUEST_RIP) +SHADOW_FIELD_RW(GUEST_RSP) +SHADOW_FIELD_RW(GUEST_CR0) +SHADOW_FIELD_RW(GUEST_CR3) +SHADOW_FIELD_RW(GUEST_CR4) +SHADOW_FIELD_RW(GUEST_RFLAGS) +SHADOW_FIELD_RW(GUEST_CS_BASE) +SHADOW_FIELD_RW(GUEST_ES_BASE) +SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK) +SHADOW_FIELD_RW(CR0_READ_SHADOW) +SHADOW_FIELD_RW(CR4_READ_SHADOW) +SHADOW_FIELD_RW(HOST_FS_BASE) +SHADOW_FIELD_RW(HOST_GS_BASE) + +/* 64-bit */ +SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS) +SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH) + +#undef SHADOW_FIELD_RO +#undef SHADOW_FIELD_RW diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac381437c291..c8a0b545ac20 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,8 @@ #include <asm/pvclock.h> #include <asm/div64.h> #include <asm/irq_remapping.h> +#include <asm/mshyperv.h> +#include <asm/hypervisor.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -177,7 +179,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "request_irq", VCPU_STAT(request_irq_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, - { "efer_reload", VCPU_STAT(efer_reload) }, { "fpu_reload", VCPU_STAT(fpu_reload) }, { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, @@ -702,7 +703,8 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu) if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) && !vcpu->guest_xcr0_loaded) { /* kvm_set_xcr() also depends on this */ - xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); + if (vcpu->arch.xcr0 != host_xcr0) + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); vcpu->guest_xcr0_loaded = 1; } } @@ -794,6 +796,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) return 1; + if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP)) + return 1; + if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; @@ -1037,6 +1042,7 @@ static u32 emulated_msrs[] = { MSR_IA32_MCG_CTL, MSR_IA32_MCG_EXT_CTL, MSR_IA32_SMBASE, + MSR_SMI_COUNT, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, }; @@ -1378,6 +1384,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) return tsc; } +static inline int gtod_is_based_on_tsc(int mode) +{ + return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK; +} + static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 @@ -1397,7 +1408,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) * perform request to enable masterclock. */ if (ka->use_master_clock || - (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) + (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched)) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, @@ -1460,6 +1471,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) vcpu->arch.tsc_offset = offset; } +static inline bool kvm_check_tsc_unstable(void) +{ +#ifdef CONFIG_X86_64 + /* + * TSC is marked unstable when we're running on Hyper-V, + * 'TSC page' clocksource is good. + */ + if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK) + return false; +#endif + return check_tsc_unstable(); +} + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1505,7 +1529,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) */ if (synchronizing && vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { - if (!check_tsc_unstable()) { + if (!kvm_check_tsc_unstable()) { offset = kvm->arch.cur_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { @@ -1605,18 +1629,43 @@ static u64 read_tsc(void) return last; } -static inline u64 vgettsc(u64 *cycle_now) +static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) { long v; struct pvclock_gtod_data *gtod = &pvclock_gtod_data; + u64 tsc_pg_val; + + switch (gtod->clock.vclock_mode) { + case VCLOCK_HVCLOCK: + tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), + tsc_timestamp); + if (tsc_pg_val != U64_MAX) { + /* TSC page valid */ + *mode = VCLOCK_HVCLOCK; + v = (tsc_pg_val - gtod->clock.cycle_last) & + gtod->clock.mask; + } else { + /* TSC page invalid */ + *mode = VCLOCK_NONE; + } + break; + case VCLOCK_TSC: + *mode = VCLOCK_TSC; + *tsc_timestamp = read_tsc(); + v = (*tsc_timestamp - gtod->clock.cycle_last) & + gtod->clock.mask; + break; + default: + *mode = VCLOCK_NONE; + } - *cycle_now = read_tsc(); + if (*mode == VCLOCK_NONE) + *tsc_timestamp = v = 0; - v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask; return v * gtod->clock.mult; } -static int do_monotonic_boot(s64 *t, u64 *cycle_now) +static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -1625,9 +1674,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) do { seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; ns = gtod->nsec_base; - ns += vgettsc(cycle_now); + ns += vgettsc(tsc_timestamp, &mode); ns >>= gtod->clock.shift; ns += gtod->boot_ns; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -1636,7 +1684,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) return mode; } -static int do_realtime(struct timespec *ts, u64 *cycle_now) +static int do_realtime(struct timespec *ts, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -1645,10 +1693,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now) do { seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->nsec_base; - ns += vgettsc(cycle_now); + ns += vgettsc(tsc_timestamp, &mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -1658,25 +1705,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now) return mode; } -/* returns true if host is using tsc clocksource */ -static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) +/* returns true if host is using TSC based clocksource */ +static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp) { /* checked again under seqlock below */ - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; + return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns, + tsc_timestamp)); } -/* returns true if host is using tsc clocksource */ +/* returns true if host is using TSC based clocksource */ static bool kvm_get_walltime_and_clockread(struct timespec *ts, - u64 *cycle_now) + u64 *tsc_timestamp) { /* checked again under seqlock below */ - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return do_realtime(ts, cycle_now) == VCLOCK_TSC; + return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp)); } #endif @@ -2119,6 +2167,12 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) vcpu->arch.pv_time_enabled = false; } +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa); +} + static void record_steal_time(struct kvm_vcpu *vcpu) { if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) @@ -2128,7 +2182,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; - vcpu->arch.st.steal.preempted = 0; + /* + * Doing a TLB flush here, on the guest's behalf, can avoid + * expensive IPIs. + */ + if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) + kvm_vcpu_flush_tlb(vcpu, false); if (vcpu->arch.st.steal.version & 1) vcpu->arch.st.steal.version += 1; /* first time write, random junk */ @@ -2229,6 +2288,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.smbase = data; break; + case MSR_SMI_COUNT: + if (!msr_info->host_initiated) + return 1; + vcpu->arch.smi_count = data; + break; case MSR_KVM_WALL_CLOCK_NEW: case MSR_KVM_WALL_CLOCK: vcpu->kvm->arch.wall_clock = data; @@ -2503,6 +2567,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vcpu->arch.smbase; break; + case MSR_SMI_COUNT: + msr_info->data = vcpu->arch.smi_count; + break; case MSR_IA32_PERF_STATUS: /* TSC increment by tick */ msr_info->data = 1000ULL; @@ -2870,13 +2937,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } - if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { + if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (check_tsc_unstable()) { + if (kvm_check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_vcpu_write_tsc_offset(vcpu, offset); @@ -2905,7 +2972,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - vcpu->arch.st.steal.preempted = 1; + vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal.preempted, @@ -2939,12 +3006,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); vcpu->arch.last_host_tsc = rdtsc(); + /* + * If userspace has set any breakpoints or watchpoints, dr6 is restored + * on every vmexit, but if not, we might have a stale dr6 from the + * guest. do_debug expects dr6 to be cleared after it runs, do the same. + */ + set_debugreg(0, 6); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) + if (vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); return kvm_apic_get_state(vcpu, s); @@ -3473,6 +3546,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void *buffer; } u; + vcpu_load(vcpu); + u.buffer = NULL; switch (ioctl) { case KVM_GET_LAPIC: { @@ -3498,8 +3573,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!lapic_in_kernel(vcpu)) goto out; u.lapic = memdup_user(argp, sizeof(*u.lapic)); - if (IS_ERR(u.lapic)) - return PTR_ERR(u.lapic); + if (IS_ERR(u.lapic)) { + r = PTR_ERR(u.lapic); + goto out_nofree; + } r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic); break; @@ -3673,8 +3750,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_SET_XSAVE: { u.xsave = memdup_user(argp, sizeof(*u.xsave)); - if (IS_ERR(u.xsave)) - return PTR_ERR(u.xsave); + if (IS_ERR(u.xsave)) { + r = PTR_ERR(u.xsave); + goto out_nofree; + } r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave); break; @@ -3696,8 +3775,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_SET_XCRS: { u.xcrs = memdup_user(argp, sizeof(*u.xcrs)); - if (IS_ERR(u.xcrs)) - return PTR_ERR(u.xcrs); + if (IS_ERR(u.xcrs)) { + r = PTR_ERR(u.xcrs); + goto out_nofree; + } r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs); break; @@ -3741,6 +3822,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } out: kfree(u.buffer); +out_nofree: + vcpu_put(vcpu); return r; } @@ -4238,13 +4321,14 @@ set_identity_unlock: mutex_unlock(&kvm->lock); break; case KVM_XEN_HVM_CONFIG: { + struct kvm_xen_hvm_config xhc; r = -EFAULT; - if (copy_from_user(&kvm->arch.xen_hvm_config, argp, - sizeof(struct kvm_xen_hvm_config))) + if (copy_from_user(&xhc, argp, sizeof(xhc))) goto out; r = -EINVAL; - if (kvm->arch.xen_hvm_config.flags) + if (xhc.flags) goto out; + memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); r = 0; break; } @@ -4296,6 +4380,36 @@ set_identity_unlock: r = kvm_vm_ioctl_enable_cap(kvm, &cap); break; } + case KVM_MEMORY_ENCRYPT_OP: { + r = -ENOTTY; + if (kvm_x86_ops->mem_enc_op) + r = kvm_x86_ops->mem_enc_op(kvm, argp); + break; + } + case KVM_MEMORY_ENCRYPT_REG_REGION: { + struct kvm_enc_region region; + + r = -EFAULT; + if (copy_from_user(®ion, argp, sizeof(region))) + goto out; + + r = -ENOTTY; + if (kvm_x86_ops->mem_enc_reg_region) + r = kvm_x86_ops->mem_enc_reg_region(kvm, ®ion); + break; + } + case KVM_MEMORY_ENCRYPT_UNREG_REGION: { + struct kvm_enc_region region; + + r = -EFAULT; + if (copy_from_user(®ion, argp, sizeof(region))) + goto out; + + r = -ENOTTY; + if (kvm_x86_ops->mem_enc_unreg_region) + r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion); + break; + } default: r = -ENOTTY; } @@ -5704,7 +5818,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, * handle watchpoints yet, those would be handled in * the emulate_ops. */ - if (kvm_vcpu_check_breakpoint(vcpu, &r)) + if (!(emulation_type & EMULTYPE_SKIP) && + kvm_vcpu_check_breakpoint(vcpu, &r)) return r; ctxt->interruptibility = 0; @@ -5890,6 +6005,43 @@ static void tsc_khz_changed(void *data) __this_cpu_write(cpu_tsc_khz, khz); } +#ifdef CONFIG_X86_64 +static void kvm_hyperv_tsc_notifier(void) +{ + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int cpu; + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_make_mclock_inprogress_request(kvm); + + hyperv_stop_tsc_emulation(); + + /* TSC frequency always matches when on Hyper-V */ + for_each_present_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + kvm_max_guest_tsc_khz = tsc_khz; + + list_for_each_entry(kvm, &vm_list, vm_list) { + struct kvm_arch *ka = &kvm->arch; + + spin_lock(&ka->pvclock_gtod_sync_lock); + + pvclock_update_vm_gtod_copy(kvm); + + kvm_for_each_vcpu(cpu, vcpu, kvm) + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + + kvm_for_each_vcpu(cpu, vcpu, kvm) + kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); + + spin_unlock(&ka->pvclock_gtod_sync_lock); + } + spin_unlock(&kvm_lock); +} +#endif + static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -6111,9 +6263,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); /* disable master clock if host does not trust, or does not - * use, TSC clocksource + * use, TSC based clocksource. */ - if (gtod->clock.vclock_mode != VCLOCK_TSC && + if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) queue_work(system_long_wq, &pvclock_gtod_work); @@ -6175,6 +6327,9 @@ int kvm_arch_init(void *opaque) kvm_lapic_init(); #ifdef CONFIG_X86_64 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); + + if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) + set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); #endif return 0; @@ -6187,6 +6342,10 @@ out: void kvm_arch_exit(void) { +#ifdef CONFIG_X86_64 + if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) + clear_hv_tscchange_cb(); +#endif kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); @@ -6449,6 +6608,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_x86_ops->queue_exception(vcpu); } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) { vcpu->arch.smi_pending = false; + ++vcpu->arch.smi_count; enter_smm(vcpu); } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { --vcpu->arch.nmi_pending; @@ -6750,7 +6910,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) + if (vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } @@ -6759,12 +6919,6 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) -{ - ++vcpu->stat.tlb_flush; - kvm_x86_ops->tlb_flush(vcpu); -} - void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, unsigned long start, unsigned long end) { @@ -6833,7 +6987,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_vcpu_flush_tlb(vcpu); + kvm_vcpu_flush_tlb(vcpu, true); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -6982,10 +7136,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * This handles the case where a posted interrupt was * notified with kvm_vcpu_kick. */ - if (kvm_lapic_enabled(vcpu)) { - if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) - kvm_x86_ops->sync_pir_to_irr(vcpu); - } + if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || need_resched() || signal_pending(current)) { @@ -7006,7 +7158,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } trace_kvm_entry(vcpu->vcpu_id); - wait_lapic_expire(vcpu); + if (lapic_timer_advance_ns) + wait_lapic_expire(vcpu); guest_enter_irqoff(); if (unlikely(vcpu->arch.switch_db_regs)) { @@ -7267,8 +7420,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + vcpu_load(vcpu); kvm_sigset_activate(vcpu); - kvm_load_guest_fpu(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { @@ -7315,11 +7468,14 @@ out: post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); + vcpu_put(vcpu); return r; } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); + if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { /* * We are here if userspace calls get_regs() in the middle of @@ -7353,11 +7509,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->rip = kvm_rip_read(vcpu); regs->rflags = kvm_get_rflags(vcpu); + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { + vcpu_load(vcpu); + vcpu->arch.emulate_regs_need_sync_from_vcpu = true; vcpu->arch.emulate_regs_need_sync_to_vcpu = false; @@ -7387,6 +7546,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) kvm_make_request(KVM_REQ_EVENT, vcpu); + vcpu_put(vcpu); return 0; } @@ -7405,6 +7565,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, { struct desc_ptr dt; + vcpu_load(vcpu); + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -7436,12 +7598,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, set_bit(vcpu->arch.interrupt.nr, (unsigned long *)sregs->interrupt_bitmap); + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + vcpu_load(vcpu); + kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && vcpu->arch.pv.pv_unhalted) @@ -7449,21 +7614,26 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + int ret = -EINVAL; + + vcpu_load(vcpu); + if (!lapic_in_kernel(vcpu) && mp_state->mp_state != KVM_MP_STATE_RUNNABLE) - return -EINVAL; + goto out; /* INITs are latched while in SMM */ if ((is_smm(vcpu) || vcpu->arch.smi_pending) && (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) - return -EINVAL; + goto out; if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; @@ -7471,7 +7641,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, } else vcpu->arch.mp_state = mp_state->mp_state; kvm_make_request(KVM_REQ_EVENT, vcpu); - return 0; + + ret = 0; +out: + vcpu_put(vcpu); + return ret; } int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, @@ -7525,18 +7699,21 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int mmu_reset_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; + int ret = -EINVAL; + + vcpu_load(vcpu); if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (sregs->cr4 & X86_CR4_OSXSAVE)) - return -EINVAL; + goto out; if (kvm_valid_sregs(vcpu, sregs)) - return -EINVAL; + goto out; apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) - return -EINVAL; + goto out; dt.size = sregs->idt.limit; dt.address = sregs->idt.base; @@ -7602,7 +7779,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_EVENT, vcpu); - return 0; + ret = 0; +out: + vcpu_put(vcpu); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -7611,6 +7791,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, unsigned long rflags; int i, r; + vcpu_load(vcpu); + if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { r = -EBUSY; if (vcpu->arch.exception.pending) @@ -7656,7 +7838,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, r = 0; out: - + vcpu_put(vcpu); return r; } @@ -7670,6 +7852,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; int idx; + vcpu_load(vcpu); + idx = srcu_read_lock(&vcpu->kvm->srcu); gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -7678,14 +7862,17 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, tr->writeable = 1; tr->usermode = 0; + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct fxregs_state *fxsave = - &vcpu->arch.guest_fpu.state.fxsave; + struct fxregs_state *fxsave; + + vcpu_load(vcpu); + fxsave = &vcpu->arch.guest_fpu.state.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; fpu->fsw = fxsave->swd; @@ -7695,13 +7882,17 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) fpu->last_dp = fxsave->rdp; memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space); + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct fxregs_state *fxsave = - &vcpu->arch.guest_fpu.state.fxsave; + struct fxregs_state *fxsave; + + vcpu_load(vcpu); + + fxsave = &vcpu->arch.guest_fpu.state.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; @@ -7712,6 +7903,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) fxsave->rdp = fpu->last_dp; memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space); + vcpu_put(vcpu); return 0; } @@ -7768,7 +7960,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) + if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); @@ -7780,16 +7972,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - int r; - kvm_vcpu_mtrr_init(vcpu); - r = vcpu_load(vcpu); - if (r) - return r; + vcpu_load(vcpu); kvm_vcpu_reset(vcpu, false); kvm_mmu_setup(vcpu); vcpu_put(vcpu); - return r; + return 0; } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -7799,13 +7987,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) kvm_hv_vcpu_postcreate(vcpu); - if (vcpu_load(vcpu)) + if (mutex_lock_killable(&vcpu->mutex)) return; + vcpu_load(vcpu); msr.data = 0x0; msr.index = MSR_IA32_TSC; msr.host_initiated = true; kvm_write_tsc(vcpu, &msr); vcpu_put(vcpu); + mutex_unlock(&vcpu->mutex); if (!kvmclock_periodic_sync) return; @@ -7816,11 +8006,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - int r; vcpu->arch.apf.msr_val = 0; - r = vcpu_load(vcpu); - BUG_ON(r); + vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); @@ -7832,6 +8020,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; + vcpu->arch.smi_count = 0; atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; vcpu->arch.nmi_injected = false; @@ -7925,7 +8114,7 @@ int kvm_arch_hardware_enable(void) return ret; local_tsc = rdtsc(); - stable = !check_tsc_unstable(); + stable = !kvm_check_tsc_unstable(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (!stable && vcpu->cpu == smp_processor_id()) @@ -8191,9 +8380,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - int r; - r = vcpu_load(vcpu); - BUG_ON(r); + vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d0b95b7a90b4..b91215d1fd80 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -12,6 +12,7 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) { + vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = false; } @@ -265,36 +266,8 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) static inline bool kvm_mwait_in_guest(void) { - unsigned int eax, ebx, ecx, edx; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT)) - return false; - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - /* All AMD CPUs have a working MWAIT implementation */ - return true; - case X86_VENDOR_INTEL: - /* Handle Intel below */ - break; - default: - return false; - } - - /* - * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as - * they would allow guest to stop the CPU completely by disabling - * interrupts then invoking MWAIT. - */ - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return false; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); - - if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) - return false; - - return true; + return boot_cpu_has(X86_FEATURE_MWAIT) && + !boot_cpu_has_bug(X86_BUG_MONITOR); } #endif diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 69a473919260..91e9700cc6dc 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RETPOLINE) += retpoline.o OBJECT_FILES_NON_STANDARD_retpoline.o :=y diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 4846eff7e4c8..f5b7f1b3b6d7 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -162,7 +162,7 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -inline void __const_udelay(unsigned long xloops) +void __const_udelay(unsigned long xloops) { unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c new file mode 100644 index 000000000000..7b881d03d0dd --- /dev/null +++ b/arch/x86/lib/error-inject.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/error-injection.h> +#include <linux/kprobes.h> + +asmlinkage void just_return_func(void); + +asm( + ".type just_return_func, @function\n" + "just_return_func:\n" + " ret\n" + ".size just_return_func, .-just_return_func\n" +); + +void override_function_with_return(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&just_return_func; +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 9fe656c42aa5..45f5d6cf65ae 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -21,16 +21,16 @@ ex_fixup_handler(const struct exception_table_entry *x) return (ex_handler_t)((unsigned long)&x->handler + x->handler); } -bool ex_handler_default(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ip = ex_fixup_addr(fixup); return true; } EXPORT_SYMBOL(ex_handler_default); -bool ex_handler_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ip = ex_fixup_addr(fixup); regs->ax = trapnr; @@ -42,8 +42,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault); * Handler for UD0 exception following a failed test against the * result of a refcount inc/dec/add/sub. */ -bool ex_handler_refcount(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_refcount(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { /* First unconditionally saturate the refcount. */ *(int *)regs->cx = INT_MIN / 2; @@ -95,8 +95,8 @@ EXPORT_SYMBOL(ex_handler_refcount); * of vulnerability by restoring from the initial state (essentially, zeroing * out all the FPU registers) if we can't restore from the task's FPU state. */ -bool ex_handler_fprestore(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ip = ex_fixup_addr(fixup); @@ -108,8 +108,8 @@ bool ex_handler_fprestore(const struct exception_table_entry *fixup, } EXPORT_SYMBOL_GPL(ex_handler_fprestore); -bool ex_handler_ext(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_ext(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { /* Special hack for uaccess_err */ current->thread.uaccess_err = 1; @@ -118,8 +118,8 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_ext); -bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) @@ -133,8 +133,8 @@ bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); -bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, @@ -147,8 +147,8 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); -bool ex_handler_clear_fs(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { if (static_cpu_has(X86_BUG_NULL_SEG)) asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); @@ -157,7 +157,7 @@ bool ex_handler_clear_fs(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_clear_fs); -bool ex_has_fault_handler(unsigned long ip) +__visible bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; ex_handler_t handler; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 135c9a7898c7..79cb066f40c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -829,23 +829,24 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + return __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 60ae1fe3609f..fecb0c0a6077 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, want_memblock); + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } -EXPORT_SYMBOL_GPL(arch_add_memory); #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order) +static void __meminit free_pagetable(struct page *page, int order, + struct vmem_altmap *altmap) { unsigned long magic; unsigned int nr_pages = 1 << order; - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); if (altmap) { vmem_altmap_free(altmap, nr_pages); @@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order) free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, + struct vmem_altmap *altmap) { pte_t *pte; int i; @@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0); + free_pagetable(pmd_page(*pmd), 0, altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, + struct vmem_altmap *altmap) { pmd_t *pmd; int i; @@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0); + free_pagetable(pud_page(*pud), 0, altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, + struct vmem_altmap *altmap) { pud_t *pud; int i; @@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0); + free_pagetable(p4d_page(*p4d), 0, altmap); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, - bool direct) + bool direct, struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte_base; @@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PMD_SIZE)) { if (!direct) free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, direct); - free_pte_table(pte_base, pmd); + remove_pte_table(pte_base, addr, next, altmap, direct); + free_pte_table(pte_base, pmd, altmap); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pmd_t *pmd_base; @@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, } pmd_base = pmd_offset(pud, 0); - remove_pmd_table(pmd_base, addr, next, direct); - free_pmd_table(pmd_base, pud); + remove_pmd_table(pmd_base, addr, next, direct, altmap); + free_pmd_table(pmd_base, pud, altmap); } if (direct) @@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, static void __meminit remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pud_t *pud_base; @@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, BUILD_BUG_ON(p4d_large(*p4d)); pud_base = pud_offset(p4d, 0); - remove_pud_table(pud_base, addr, next, direct); + remove_pud_table(pud_base, addr, next, altmap, direct); /* * For 4-level page tables we do not want to free PUDs, but in the * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d); + free_pud_table(pud_base, p4d, altmap); } if (direct) @@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, /* start and end are both virtual address. */ static void __meminit -remove_pagetable(unsigned long start, unsigned long end, bool direct) +remove_pagetable(unsigned long start, unsigned long end, bool direct, + struct vmem_altmap *altmap) { unsigned long next; unsigned long addr; @@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) continue; p4d = p4d_offset(pgd, 0); - remove_p4d_table(p4d, addr, next, direct); + remove_p4d_table(p4d, addr, next, altmap, direct); } flush_tlb_all(); } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { - remove_pagetable(start, end, false); + remove_pagetable(start, end, false, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) start = (unsigned long)__va(start); end = (unsigned long)__va(end); - remove_pagetable(start, end, true); + remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(u64 start, u64 size) +int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); - struct vmem_altmap *altmap; struct zone *zone; int ret; /* With altmap the first mapped page is offset from @start */ - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); @@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(PMD_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (p) { pte_t entry; @@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, return 0; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (boot_cpu_has(X86_FEATURE_PSE)) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index e1d61e8500f9..1a53071e2e17 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -15,7 +15,7 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/mm.h> -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/swiotlb.h> #include <linux/mem_encrypt.h> diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 4d434ddb75db..2c1ecf4763c4 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -29,7 +29,6 @@ #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/io.h> -#include <linux/kallsyms.h> #include <asm/pgtable.h> #include <linux/mmiotrace.h> #include <asm/e820/api.h> /* for ISA_START_ADDRESS */ @@ -123,8 +122,8 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", addr, my_reason->addr); print_pte(addr); - print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); - print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); + pr_emerg("faulting IP is at %pS\n", (void *)regs->ip); + pr_emerg("last faulting IP was at %pS\n", (void *)my_reason->ip); #ifdef __i386__ pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index fe7d57a8fb60..1555bd7d3449 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -678,6 +678,25 @@ static enum page_cache_mode lookup_memtype(u64 paddr) } /** + * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type + * of @pfn cannot be overridden by UC MTRR memory type. + * + * Only to be called when PAT is enabled. + * + * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC. + * Returns false in other cases. + */ +bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn) +{ + enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn)); + + return cm == _PAGE_CACHE_MODE_UC || + cm == _PAGE_CACHE_MODE_UC_MINUS || + cm == _PAGE_CACHE_MODE_WC; +} +EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr); + +/** * io_reserve_memtype - Request a memory type mapping for a region of memory * @start: start (physical address) of the region * @end: end (physical address) of the region diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0c936435ea93..7f1a51399674 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -229,6 +229,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, #endif this_cpu_write(cpu_tlbstate.is_lazy, false); + /* + * The membarrier system call requires a full memory barrier and + * core serialization before returning to user-space, after + * storing to rq->curr. Writing to CR3 provides that full + * memory barrier and core serializing instruction. + */ if (real_prev == next) { VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != next->context.ctx_id); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0554e8aef4d5..4923d92f918d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,8 +15,6 @@ #include <asm/set_memory.h> #include <linux/bpf.h> -int bpf_jit_enable __read_mostly; - /* * assembly code in arch/x86/net/bpf_jit.S */ @@ -154,6 +152,11 @@ static bool is_ereg(u32 reg) BIT(BPF_REG_AX)); } +static bool is_axreg(u32 reg) +{ + return reg == BPF_REG_0; +} + /* add modifiers if 'reg' maps to x64 registers r8..r15 */ static u8 add_1mod(u8 byte, u32 reg) { @@ -447,16 +450,36 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); + /* b3 holds 'normal' opcode, b2 short form only valid + * in case dst is eax/rax. + */ switch (BPF_OP(insn->code)) { - case BPF_ADD: b3 = 0xC0; break; - case BPF_SUB: b3 = 0xE8; break; - case BPF_AND: b3 = 0xE0; break; - case BPF_OR: b3 = 0xC8; break; - case BPF_XOR: b3 = 0xF0; break; + case BPF_ADD: + b3 = 0xC0; + b2 = 0x05; + break; + case BPF_SUB: + b3 = 0xE8; + b2 = 0x2D; + break; + case BPF_AND: + b3 = 0xE0; + b2 = 0x25; + break; + case BPF_OR: + b3 = 0xC8; + b2 = 0x0D; + break; + case BPF_XOR: + b3 = 0xF0; + b2 = 0x35; + break; } if (is_imm8(imm32)) EMIT3(0x83, add_1reg(b3, dst_reg), imm32); + else if (is_axreg(dst_reg)) + EMIT1_off32(b2, imm32); else EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); break; @@ -545,26 +568,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ EMIT2(0x31, 0xd2); - if (BPF_SRC(insn->code) == BPF_X) { - /* if (src_reg == 0) return 0 */ - - /* cmp r11, 0 */ - EMIT4(0x49, 0x83, 0xFB, 0x00); - - /* jne .+9 (skip over pop, pop, xor and jmp) */ - EMIT2(X86_JNE, 1 + 1 + 2 + 5); - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ - EMIT2(0x31, 0xc0); /* xor eax, eax */ - - /* jmp cleanup_addr - * addrs[i] - 11, because there are 11 bytes - * after this insn: div, mov, pop, pop, mov - */ - jmp_offset = ctx->cleanup_addr - (addrs[i] - 11); - EMIT1_off32(0xE9, jmp_offset); - } - if (BPF_CLASS(insn->code) == BPF_ALU64) /* div r11 */ EMIT3(0x49, 0xF7, 0xF3); @@ -1109,19 +1112,29 @@ common_load: return proglen; } +struct x64_jit_data { + struct bpf_binary_header *header; + int *addrs; + u8 *image; + int proglen; + struct jit_context ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; struct bpf_prog *tmp, *orig_prog = prog; + struct x64_jit_data *jit_data; int proglen, oldproglen = 0; struct jit_context ctx = {}; bool tmp_blinded = false; + bool extra_pass = false; u8 *image = NULL; int *addrs; int pass; int i; - if (!bpf_jit_enable) + if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); @@ -1135,10 +1148,28 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = tmp; } + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + addrs = jit_data->addrs; + if (addrs) { + ctx = jit_data->ctx; + oldproglen = jit_data->proglen; + image = jit_data->image; + header = jit_data->header; + extra_pass = true; + goto skip_init_addrs; + } addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); if (!addrs) { prog = orig_prog; - goto out; + goto out_addrs; } /* Before first pass, make a rough estimation of addrs[] @@ -1149,6 +1180,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) addrs[i] = proglen; } ctx.cleanup_addr = proglen; +skip_init_addrs: /* JITed image shrinks with every pass and the loop iterates * until the image stops shrinking. Very large bpf programs @@ -1189,7 +1221,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (image) { bpf_flush_icache(header, image + proglen); - bpf_jit_binary_lock_ro(header); + if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(header); + } else { + jit_data->addrs = addrs; + jit_data->ctx = ctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = header; + } prog->bpf_func = (void *)image; prog->jited = 1; prog->jited_len = proglen; @@ -1197,8 +1237,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; } + if (!prog->is_func || extra_pass) { out_addrs: - kfree(addrs); + kfree(addrs); + kfree(jit_data); + prog->aux->jit_data = NULL; + } out: if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 511921045312..43867bc85368 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -300,6 +300,7 @@ int __init intel_mid_pci_init(void) pci_root_ops = intel_mid_pci_ops; pci_soc_mode = 1; /* Continue with standard init */ + acpi_noirq_set(); return 1; } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0452629148be..52e55108404e 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -839,7 +839,8 @@ static void __init pirq_find_router(struct irq_router *r) DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", rt->rtr_vendor, rt->rtr_device); - pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); + pirq_router_dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus, + rt->rtr_devfn); if (!pirq_router_dev) { DBG(KERN_DEBUG "PCI: Interrupt router not found at " "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 53d600217973..75577c1490c4 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -26,6 +26,7 @@ #include <linux/pci_ids.h> #include <linux/export.h> #include <linux/list.h> +#include <linux/dma-direct.h> #include <asm/iommu.h> #define STA2X11_SWIOTLB_SIZE (4*1024*1024) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index c4b3646bd04c..9542a746dc50 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -409,10 +409,8 @@ int __init pci_xen_init(void) pcibios_enable_irq = xen_pcifront_enable_irq; pcibios_disable_irq = NULL; -#ifdef CONFIG_ACPI /* Keep ACPI out of the picture */ - acpi_noirq = 1; -#endif + acpi_noirq_set(); #ifdef CONFIG_PCI_MSI x86_msi.setup_msi_irqs = xen_setup_msi_irqs; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2dd15e967c3f..c310a8284358 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -25,7 +25,6 @@ #include <linux/spinlock.h> #include <linux/bootmem.h> #include <linux/ioport.h> -#include <linux/init.h> #include <linux/mc146818rtc.h> #include <linux/efi.h> #include <linux/uaccess.h> diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 86676cec99a1..2c67bae6bb53 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -194,7 +194,7 @@ void __init x86_intel_mid_early_setup(void) x86_platform.calibrate_tsc = intel_mid_calibrate_tsc; x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; - x86_init.pci.init = intel_mid_pci_init; + x86_init.pci.arch_init = intel_mid_pci_init; x86_init.pci.fixup_irqs = x86_init_noop; legacy_pic = &null_legacy_pic; diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 19b43e3a9f0f..7be1e1fe9ae3 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -96,8 +96,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) pentry->freq_hz, pentry->irq); mp_irq.type = MP_INTSRC; mp_irq.irqtype = mp_INT; - /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ - mp_irq.irqflag = 5; + mp_irq.irqflag = MP_IRQTRIG_EDGE | MP_IRQPOL_ACTIVE_HIGH; mp_irq.srcbus = MP_BUS_ISA; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; @@ -168,7 +167,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) totallen, (u32)pentry->phys_addr, pentry->irq); mp_irq.type = MP_INTSRC; mp_irq.irqtype = mp_INT; - mp_irq.irqflag = 0xf; /* level trigger and active low */ + mp_irq.irqflag = MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW; mp_irq.srcbus = MP_BUS_ISA; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index a952ac199741..6f37a2137a79 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -218,14 +218,23 @@ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(iosf_mbi_register_pmic_bus_access_notifier); +int iosf_mbi_unregister_pmic_bus_access_notifier_unlocked( + struct notifier_block *nb) +{ + iosf_mbi_assert_punit_acquired(); + + return blocking_notifier_chain_unregister( + &iosf_mbi_pmic_bus_access_notifier, nb); +} +EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier_unlocked); + int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) { int ret; /* Wait for the bus to go inactive before unregistering */ mutex_lock(&iosf_mbi_punit_mutex); - ret = blocking_notifier_chain_unregister( - &iosf_mbi_pmic_bus_access_notifier, nb); + ret = iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(nb); mutex_unlock(&iosf_mbi_punit_mutex); return ret; @@ -239,6 +248,12 @@ int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) } EXPORT_SYMBOL(iosf_mbi_call_pmic_bus_access_notifier_chain); +void iosf_mbi_assert_punit_acquired(void) +{ + WARN_ON(!mutex_is_locked(&iosf_mbi_punit_mutex)); +} +EXPORT_SYMBOL(iosf_mbi_assert_punit_acquired); + #ifdef CONFIG_IOSF_MBI_DEBUG static u32 dbg_mdr; static u32 dbg_mcr; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7d5d53f36a7a..db77e087adaf 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1751,7 +1751,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) uv1 = 1; /* the 14-bit pnode */ - write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); + write_mmr_descriptor_base(pnode, + (n << UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT | m)); /* * Initializing all 8 (ITEMS_PER_DESC) descriptors for each * cpu even though we only use the first one; one descriptor can diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index c35fdb585c68..afc4ed7b1578 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -145,7 +145,7 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir) #endif } -int swsusp_arch_resume(void) +asmlinkage int swsusp_arch_resume(void) { int error; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index f910c514438f..0ef5e5204968 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -174,7 +174,7 @@ out: return 0; } -int swsusp_arch_resume(void) +asmlinkage int swsusp_arch_resume(void) { int error; diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 972b8e8d939c..09af7ff53044 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -13,28 +13,28 @@ else posttest_64bit = -n endif -distill_awk = $(srctree)/arch/x86/tools/distill.awk +reformatter = $(srctree)/arch/x86/tools/objdump_reformat.awk chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk quiet_cmd_posttest = TEST $@ - cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(reformatter) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose) quiet_cmd_sanitytest = TEST $@ cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 -posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity +posttest: $(obj)/insn_decoder_test vmlinux $(obj)/insn_sanity $(call cmd,posttest) $(call cmd,sanitytest) -hostprogs-y += test_get_len insn_sanity +hostprogs-y += insn_decoder_test insn_sanity # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ +HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ # Dependencies are also needed. -$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c +$(obj)/insn_decoder_test.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/insn_decoder_test.c index ecf31e0358c8..a3b4fd954931 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -9,10 +9,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright (C) IBM Corporation, 2009 */ @@ -21,6 +17,7 @@ #include <string.h> #include <assert.h> #include <unistd.h> +#include <stdarg.h> #define unlikely(cond) (cond) @@ -33,7 +30,7 @@ * particular. See if insn_get_length() and the disassembler agree * on the length of each instruction in an elf disassembly. * - * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + * Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test */ const char *prog; @@ -42,8 +39,8 @@ static int x86_64; static void usage(void) { - fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " %s [-y|-n] [-v]\n", prog); + fprintf(stderr, "Usage: objdump -d a.out | awk -f objdump_reformat.awk" + " | %s [-y|-n] [-v]\n", prog); fprintf(stderr, "\t-y 64bit mode\n"); fprintf(stderr, "\t-n 32bit mode\n"); fprintf(stderr, "\t-v verbose mode\n"); @@ -52,10 +49,21 @@ static void usage(void) static void malformed_line(const char *line, int line_nr) { - fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + fprintf(stderr, "%s: error: malformed line %d:\n%s", + prog, line_nr, line); exit(3); } +static void pr_warn(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s: warning: ", prog); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + static void dump_field(FILE *fp, const char *name, const char *indent, struct insn_field *field) { @@ -153,21 +161,20 @@ int main(int argc, char **argv) insn_get_length(&insn); if (insn.length != nb) { warnings++; - fprintf(stderr, "Warning: %s found difference at %s\n", - prog, sym); - fprintf(stderr, "Warning: %s", line); - fprintf(stderr, "Warning: objdump says %d bytes, but " - "insn_get_length() says %d\n", nb, - insn.length); + pr_warn("Found an x86 instruction decoder bug, " + "please report this.\n", sym); + pr_warn("%s", line); + pr_warn("objdump says %d bytes, but insn_get_length() " + "says %d\n", nb, insn.length); if (verbose) dump_insn(stderr, &insn); } } if (warnings) - fprintf(stderr, "Warning: decoded and checked %d" - " instructions with %d warnings\n", insns, warnings); + pr_warn("Decoded and checked %d instructions with %d " + "failures\n", insns, warnings); else - fprintf(stdout, "Success: decoded and checked %d" - " instructions\n", insns); + fprintf(stdout, "%s: success: Decoded and checked %d" + " instructions\n", prog, insns); return 0; } diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/objdump_reformat.awk index e0edeccc1429..f418c91b71f0 100644 --- a/arch/x86/tools/distill.awk +++ b/arch/x86/tools/objdump_reformat.awk @@ -1,7 +1,7 @@ #!/bin/awk -f # SPDX-License-Identifier: GPL-2.0 -# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len -# Distills the disassembly as follows: +# Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test +# Reformats the disassembly as follows: # - Removes all lines except the disassembled instructions. # - For instructions that exceed 1 line (7 bytes), crams all the hex bytes # into a single line. diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index 2cfcfe4f6b2a..dd2ad82eee80 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -75,6 +75,6 @@ void __init xen_hvm_init_mmu_ops(void) if (is_pagetable_dying_supported()) pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; #ifdef CONFIG_PROC_VMCORE - register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram); + WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); #endif } diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 13b4f19b9131..159a897151d6 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -694,6 +694,9 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i, ret = 0; pte_t *pte; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + if (kmap_ops) { ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, kmap_ops, count); @@ -736,6 +739,9 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, { int i, ret = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + for (i = 0; i < count; i++) { unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 02f3445a2b5f..cd97a62394e7 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -23,8 +23,6 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); static bool xen_pvspin = true; -#include <asm/qspinlock.h> - static void xen_qlock_kick(int cpu) { int irq = per_cpu(lock_kicker_irq, cpu); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 497cc55a0c16..96f26e026783 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -9,7 +9,9 @@ #include <asm/boot.h> #include <asm/asm.h> +#include <asm/msr.h> #include <asm/page_types.h> +#include <asm/percpu.h> #include <asm/unwind_hints.h> #include <xen/interface/elfnote.h> @@ -35,6 +37,20 @@ ENTRY(startup_xen) mov %_ASM_SI, xen_start_info mov $init_thread_union+THREAD_SIZE, %_ASM_SP +#ifdef CONFIG_X86_64 + /* Set up %gs. + * + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. + */ + movl $MSR_GS_BASE,%ecx + movq $INIT_PER_CPU_VAR(irq_stack_union),%rax + cdq + wrmsr +#endif + jmp xen_start_kernel END(startup_xen) __FINIT |