summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/unistd.h2
-rw-r--r--arch/alpha/kernel/entry.S53
-rw-r--r--arch/alpha/kernel/systbls.S318
-rw-r--r--arch/arc/include/uapi/asm/unistd.h1
-rw-r--r--arch/arm/boot/dts/imx7d.dtsi5
-rw-r--r--arch/arm/crypto/Kconfig7
-rw-r--r--arch/arm/crypto/Makefile2
-rw-r--r--arch/arm/crypto/chacha20-neon-core.S277
-rw-r--r--arch/arm/crypto/crc32-ce-glue.c2
-rw-r--r--arch/arm/crypto/ghash-ce-core.S108
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c38
-rw-r--r--arch/arm/crypto/speck-neon-core.S434
-rw-r--r--arch/arm/crypto/speck-neon-glue.c288
-rw-r--r--arch/arm/include/asm/kvm_arm.h3
-rw-r--r--arch/arm/include/asm/kvm_host.h13
-rw-r--r--arch/arm/include/asm/kvm_mmu.h15
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h54
-rw-r--r--arch/arm/include/asm/unistd.h4
-rw-r--r--arch/arm/mach-at91/pm_suspend.S8
-rw-r--r--arch/arm64/Kconfig.platforms1
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi37
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/crypto/Kconfig11
-rw-r--r--arch/arm64/crypto/Makefile6
-rw-r--r--arch/arm64/crypto/aes-ce.S5
-rw-r--r--arch/arm64/crypto/aes-glue.c217
-rw-r--r--arch/arm64/crypto/aes-modes.S416
-rw-r--r--arch/arm64/crypto/aes-neon.S6
-rw-r--r--arch/arm64/crypto/crc32-ce-core.S287
-rw-r--r--arch/arm64/crypto/crc32-ce-glue.c244
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S314
-rw-r--r--arch/arm64/crypto/crct10dif-ce-glue.c14
-rw-r--r--arch/arm64/crypto/speck-neon-core.S352
-rw-r--r--arch/arm64/crypto/speck-neon-glue.c282
-rw-r--r--arch/arm64/include/asm/compat.h26
-rw-r--r--arch/arm64/include/asm/cpufeature.h21
-rw-r--r--arch/arm64/include/asm/kvm_arm.h155
-rw-r--r--arch/arm64/include/asm/kvm_asm.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h18
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h10
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h42
-rw-r--r--arch/arm64/include/asm/ptrace.h3
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopmd.h42
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopud.h39
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h236
-rw-r--r--arch/arm64/include/asm/stat.h2
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/uapi/asm/unistd.h1
-rw-r--r--arch/arm64/kernel/pci.c5
-rw-r--r--arch/arm64/kvm/guest.c6
-rw-r--r--arch/arm64/kvm/handle_exit.c7
-rw-r--r--arch/arm64/kvm/hyp/Makefile1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S16
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c90
-rw-r--r--arch/arm64/kvm/hyp/switch.c4
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c19
-rw-r--r--arch/arm64/kvm/hyp/tlb.c4
-rw-r--r--arch/arm64/kvm/reset.c108
-rw-r--r--arch/c6x/include/uapi/asm/unistd.h1
-rw-r--r--arch/h8300/include/uapi/asm/unistd.h1
-rw-r--r--arch/hexagon/include/uapi/asm/unistd.h1
-rw-r--r--arch/ia64/include/asm/unistd.h3
-rw-r--r--arch/m68k/configs/amiga_defconfig2
-rw-r--r--arch/m68k/configs/apollo_defconfig2
-rw-r--r--arch/m68k/configs/atari_defconfig2
-rw-r--r--arch/m68k/configs/bvme6000_defconfig2
-rw-r--r--arch/m68k/configs/hp300_defconfig2
-rw-r--r--arch/m68k/configs/mac_defconfig2
-rw-r--r--arch/m68k/configs/multi_defconfig2
-rw-r--r--arch/m68k/configs/mvme147_defconfig2
-rw-r--r--arch/m68k/configs/mvme16x_defconfig2
-rw-r--r--arch/m68k/configs/q40_defconfig2
-rw-r--r--arch/m68k/configs/sun3_defconfig2
-rw-r--r--arch/m68k/configs/sun3x_defconfig2
-rw-r--r--arch/m68k/include/asm/unistd.h2
-rw-r--r--arch/microblaze/include/asm/unistd.h2
-rw-r--r--arch/mips/boot/dts/ingenic/jz4740.dtsi15
-rw-r--r--arch/mips/boot/dts/ingenic/jz4770.dtsi30
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi3
-rw-r--r--arch/mips/include/asm/compat.h28
-rw-r--r--arch/mips/include/asm/unistd.h3
-rw-r--r--arch/mips/kernel/binfmt_elfn32.c14
-rw-r--r--arch/mips/kernel/binfmt_elfo32.c14
-rw-r--r--arch/nds32/include/uapi/asm/unistd.h1
-rw-r--r--arch/nios2/include/uapi/asm/unistd.h1
-rw-r--r--arch/openrisc/include/uapi/asm/unistd.h1
-rw-r--r--arch/parisc/include/asm/compat.h24
-rw-r--r--arch/parisc/include/asm/unistd.h3
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h1
-rw-r--r--arch/powerpc/include/asm/compat.h24
-rw-r--r--arch/powerpc/include/asm/hvcall.h41
-rw-r--r--arch/powerpc/include/asm/iommu.h2
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h45
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h118
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h3
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h16
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h8
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h2
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/unistd.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c13
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S4
-rw-r--r--arch/powerpc/kvm/Makefile3
-rw-r--r--arch/powerpc/kvm/book3s.c46
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c718
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c94
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c87
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c873
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c92
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S95
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c1291
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c13
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S809
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm_builtin.c5
-rw-r--r--arch/powerpc/kvm/book3s_pr.c5
-rw-r--r--arch/powerpc/kvm/book3s_xics.c14
-rw-r--r--arch/powerpc/kvm/book3s_xive.c63
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c8
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S8
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c15
-rw-r--r--arch/powerpc/kvm/tm.S250
-rw-r--r--arch/powerpc/kvm/trace_book3s.h1
-rw-r--r--arch/powerpc/mm/tlb-radix.c9
-rw-r--r--arch/powerpc/oprofile/backtrace.c2
-rw-r--r--arch/riscv/Kconfig52
-rw-r--r--arch/riscv/Kconfig.debug35
-rw-r--r--arch/riscv/Makefile21
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/futex.h128
-rw-r--r--arch/riscv/include/asm/processor.h2
-rw-r--r--arch/riscv/include/asm/smp.h47
-rw-r--r--arch/riscv/include/asm/switch_to.h12
-rw-r--r--arch/riscv/include/asm/tlbflush.h16
-rw-r--r--arch/riscv/include/asm/unistd.h1
-rw-r--r--arch/riscv/include/uapi/asm/elf.h3
-rw-r--r--arch/riscv/kernel/Makefile1
-rw-r--r--arch/riscv/kernel/cacheinfo.c7
-rw-r--r--arch/riscv/kernel/cpu.c87
-rw-r--r--arch/riscv/kernel/cpufeature.c15
-rw-r--r--arch/riscv/kernel/entry.S88
-rw-r--r--arch/riscv/kernel/fpu.S106
-rw-r--r--arch/riscv/kernel/head.S4
-rw-r--r--arch/riscv/kernel/irq.c12
-rw-r--r--arch/riscv/kernel/mcount.S1
-rw-r--r--arch/riscv/kernel/process.c6
-rw-r--r--arch/riscv/kernel/ptrace.c52
-rw-r--r--arch/riscv/kernel/setup.c13
-rw-r--r--arch/riscv/kernel/signal.c75
-rw-r--r--arch/riscv/kernel/smp.c82
-rw-r--r--arch/riscv/kernel/smpboot.c46
-rw-r--r--arch/riscv/lib/Makefile3
-rw-r--r--arch/riscv/mm/ioremap.c2
-rw-r--r--arch/s390/Kconfig11
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/s390/crypto/aes_s390.c48
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/include/asm/compat.h18
-rw-r--r--arch/s390/include/asm/kvm_host.h15
-rw-r--r--arch/s390/include/asm/unistd.h3
-rw-r--r--arch/s390/include/uapi/asm/kvm.h2
-rw-r--r--arch/s390/kvm/kvm-s390.c184
-rw-r--r--arch/s390/kvm/kvm-s390.h1
-rw-r--r--arch/s390/kvm/vsie.c210
-rw-r--r--arch/s390/mm/gmap.c10
-rw-r--r--arch/s390/tools/gen_facilities.c2
-rw-r--r--arch/sh/include/asm/unistd.h2
-rw-r--r--arch/sparc/include/asm/compat.h25
-rw-r--r--arch/sparc/include/asm/unistd.h3
-rw-r--r--arch/unicore32/include/uapi/asm/unistd.h1
-rw-r--r--arch/x86/crypto/Makefile5
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c47
-rw-r--r--arch/x86/crypto/fpu.c207
-rw-r--r--arch/x86/crypto/sha1-mb/Makefile14
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb.c1011
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_ctx.h134
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr.h110
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S287
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S304
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c64
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S209
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_x8_avx2.S492
-rw-r--r--arch/x86/crypto/sha256-mb/Makefile14
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb.c1013
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_ctx.h134
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr.h108
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S304
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S307
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c65
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S214
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_x8_avx2.S598
-rw-r--r--arch/x86/crypto/sha512-mb/Makefile12
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c1047
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h128
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr.h104
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S281
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S297
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c69
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S224
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_x4_avx2.S531
-rw-r--r--arch/x86/include/asm/compat.h19
-rw-r--r--arch/x86/include/asm/kvm_host.h70
-rw-r--r--arch/x86/include/asm/unistd.h3
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h13
-rw-r--r--arch/x86/include/uapi/asm/kvm.h8
-rw-r--r--arch/x86/kvm/hyperv.c280
-rw-r--r--arch/x86/kvm/hyperv.h4
-rw-r--r--arch/x86/kvm/lapic.c45
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c393
-rw-r--r--arch/x86/kvm/mmu.h13
-rw-r--r--arch/x86/kvm/mmu_audit.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h15
-rw-r--r--arch/x86/kvm/svm.c64
-rw-r--r--arch/x86/kvm/trace.h42
-rw-r--r--arch/x86/kvm/vmx.c2287
-rw-r--r--arch/x86/kvm/vmx_shadow_fields.h5
-rw-r--r--arch/x86/kvm/x86.c244
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/pci/fixup.c12
-rw-r--r--arch/xtensa/include/asm/unistd.h2
234 files changed, 9319 insertions, 13730 deletions
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index d6e29a1de4cc..9ff37aa1165f 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -6,6 +6,7 @@
#define NR_SYSCALLS 523
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_GETHOSTNAME
@@ -13,6 +14,7 @@
#define __ARCH_WANT_SYS_GETPGRP
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_UTIME
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index c64806a2daf5..2e09248f8324 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -473,7 +473,7 @@ entSys:
bne $3, strace
beq $4, 1f
ldq $27, 0($5)
-1: jsr $26, ($27), alpha_ni_syscall
+1: jsr $26, ($27), sys_ni_syscall
ldgp $gp, 0($26)
blt $0, $syscall_error /* the call failed */
stq $0, 0($sp)
@@ -587,7 +587,7 @@ strace:
/* get the system call pointer.. */
lda $1, NR_SYSCALLS($31)
lda $2, sys_call_table
- lda $27, alpha_ni_syscall
+ lda $27, sys_ni_syscall
cmpult $0, $1, $1
s8addq $0, $2, $2
beq $1, 1f
@@ -791,7 +791,7 @@ ret_from_kernel_thread:
/*
* Special system calls. Most of these are special in that they either
- * have to play switch_stack games or in some way use the pt_regs struct.
+ * have to play switch_stack games.
*/
.macro fork_like name
@@ -812,46 +812,41 @@ fork_like fork
fork_like vfork
fork_like clone
+.macro sigreturn_like name
.align 4
- .globl sys_sigreturn
- .ent sys_sigreturn
-sys_sigreturn:
+ .globl sys_\name
+ .ent sys_\name
+sys_\name:
.prologue 0
lda $9, ret_from_straced
cmpult $26, $9, $9
lda $sp, -SWITCH_STACK_SIZE($sp)
- jsr $26, do_sigreturn
+ jsr $26, do_\name
bne $9, 1f
jsr $26, syscall_trace_leave
1: br $1, undo_switch_stack
br ret_from_sys_call
-.end sys_sigreturn
+.end sys_\name
+.endm
- .align 4
- .globl sys_rt_sigreturn
- .ent sys_rt_sigreturn
-sys_rt_sigreturn:
- .prologue 0
- lda $9, ret_from_straced
- cmpult $26, $9, $9
- lda $sp, -SWITCH_STACK_SIZE($sp)
- jsr $26, do_rt_sigreturn
- bne $9, 1f
- jsr $26, syscall_trace_leave
-1: br $1, undo_switch_stack
- br ret_from_sys_call
-.end sys_rt_sigreturn
+sigreturn_like sigreturn
+sigreturn_like rt_sigreturn
.align 4
- .globl alpha_ni_syscall
- .ent alpha_ni_syscall
-alpha_ni_syscall:
+ .globl alpha_syscall_zero
+ .ent alpha_syscall_zero
+alpha_syscall_zero:
.prologue 0
- /* Special because it also implements overflow handling via
- syscall number 0. And if you recall, zero is a special
- trigger for "not an error". Store large non-zero there. */
+ /* Special because it needs to do something opposite to
+ force_successful_syscall_return(). We use the saved
+ syscall number for that, zero meaning "not an error".
+ That works nicely, but for real syscall 0 we need to
+ make sure that this logics doesn't get confused.
+ Store a non-zero there - -ENOSYS we need in register
+ for our return value will do just fine.
+ */
lda $0, -ENOSYS
unop
stq $0, 0($sp)
ret
-.end alpha_ni_syscall
+.end alpha_syscall_zero
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 1374e591511f..5b2e8ecb7ce3 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -11,93 +11,93 @@
.align 3
.globl sys_call_table
sys_call_table:
- .quad alpha_ni_syscall /* 0 */
+ .quad alpha_syscall_zero /* 0 */
.quad sys_exit
.quad alpha_fork
.quad sys_read
.quad sys_write
- .quad alpha_ni_syscall /* 5 */
+ .quad sys_ni_syscall /* 5 */
.quad sys_close
.quad sys_osf_wait4
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_link
.quad sys_unlink /* 10 */
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_chdir
.quad sys_fchdir
.quad sys_mknod
.quad sys_chmod /* 15 */
.quad sys_chown
.quad sys_osf_brk
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_lseek
.quad sys_getxpid /* 20 */
.quad sys_osf_mount
.quad sys_umount
.quad sys_setuid
.quad sys_getxuid
- .quad alpha_ni_syscall /* 25 */
+ .quad sys_ni_syscall /* 25 */
.quad sys_ptrace
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 30 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 30 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_access
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 35 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 35 */
.quad sys_sync
.quad sys_kill
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_setpgid
- .quad alpha_ni_syscall /* 40 */
+ .quad sys_ni_syscall /* 40 */
.quad sys_dup
.quad sys_alpha_pipe
.quad sys_osf_set_program_attributes
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_open /* 45 */
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_getxgid
.quad sys_osf_sigprocmask
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 50 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 50 */
.quad sys_acct
.quad sys_sigpending
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_ioctl
- .quad alpha_ni_syscall /* 55 */
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall /* 55 */
+ .quad sys_ni_syscall
.quad sys_symlink
.quad sys_readlink
.quad sys_execve
.quad sys_umask /* 60 */
.quad sys_chroot
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_getpgrp
.quad sys_getpagesize
- .quad alpha_ni_syscall /* 65 */
+ .quad sys_ni_syscall /* 65 */
.quad alpha_vfork
.quad sys_newstat
.quad sys_newlstat
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 70 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 70 */
.quad sys_osf_mmap
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_munmap
.quad sys_mprotect
.quad sys_madvise /* 75 */
.quad sys_vhangup
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_getgroups
/* map BSD's setpgrp to sys_setpgid for binary compatibility: */
.quad sys_setgroups /* 80 */
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_setpgid
.quad sys_osf_setitimer
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 85 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 85 */
.quad sys_osf_getitimer
.quad sys_gethostname
.quad sys_sethostname
@@ -119,19 +119,19 @@ sys_call_table:
.quad sys_bind
.quad sys_setsockopt /* 105 */
.quad sys_listen
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 110 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 110 */
.quad sys_sigsuspend
.quad sys_osf_sigstack
.quad sys_recvmsg
.quad sys_sendmsg
- .quad alpha_ni_syscall /* 115 */
+ .quad sys_ni_syscall /* 115 */
.quad sys_osf_gettimeofday
.quad sys_osf_getrusage
.quad sys_getsockopt
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
#ifdef CONFIG_OSF4_COMPAT
.quad sys_osf_readv /* 120 */
.quad sys_osf_writev
@@ -156,66 +156,66 @@ sys_call_table:
.quad sys_mkdir
.quad sys_rmdir
.quad sys_osf_utimes
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 140 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 140 */
.quad sys_getpeername
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_getrlimit
.quad sys_setrlimit /* 145 */
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_setsid
.quad sys_quotactl
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
.quad sys_getsockname /* 150 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 155 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 155 */
.quad sys_osf_sigaction
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_osf_getdirentries
.quad sys_osf_statfs /* 160 */
.quad sys_osf_fstatfs
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_osf_getdomainname /* 165 */
.quad sys_setdomainname
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 170 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 175 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 180 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 185 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 190 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 195 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 170 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 175 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 180 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 185 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 190 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 195 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
/* The OSF swapon has two extra arguments, but we ignore them. */
.quad sys_swapon
.quad sys_msgctl /* 200 */
@@ -231,93 +231,93 @@ sys_call_table:
.quad sys_shmctl /* 210 */
.quad sys_shmdt
.quad sys_shmget
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 215 */
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 215 */
+ .quad sys_ni_syscall
.quad sys_msync
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 220 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 220 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_osf_stat
.quad sys_osf_lstat /* 225 */
.quad sys_osf_fstat
.quad sys_osf_statfs64
.quad sys_osf_fstatfs64
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 230 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 230 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_getpgid
.quad sys_getsid
.quad sys_sigaltstack /* 235 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 240 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 240 */
.quad sys_osf_sysinfo
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_osf_proplist_syscall
- .quad alpha_ni_syscall /* 245 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 250 */
+ .quad sys_ni_syscall /* 245 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 250 */
.quad sys_osf_usleep_thread
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
.quad sys_sysfs
- .quad alpha_ni_syscall /* 255 */
+ .quad sys_ni_syscall /* 255 */
.quad sys_osf_getsysinfo
.quad sys_osf_setsysinfo
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 260 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 265 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 270 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 275 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 280 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 285 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 290 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall /* 295 */
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 260 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 265 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 270 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 275 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 280 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 285 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 290 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall /* 295 */
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
+ .quad sys_ni_syscall
/* linux-specific system calls start at 300 */
.quad sys_bdflush /* 300 */
.quad sys_sethae
diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 517178b1daef..3b3543fd151c 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -17,6 +17,7 @@
#define _UAPI_ASM_ARC_UNISTD_H
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_EXECVE
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_VFORK
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 7234e8330a57..efbdeaaa8dcd 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -146,8 +146,9 @@
fsl,max-link-speed = <2>;
power-domains = <&pgc_pcie_phy>;
resets = <&src IMX7_RESET_PCIEPHY>,
- <&src IMX7_RESET_PCIE_CTRL_APPS_EN>;
- reset-names = "pciephy", "apps";
+ <&src IMX7_RESET_PCIE_CTRL_APPS_EN>,
+ <&src IMX7_RESET_PCIE_CTRL_APPS_TURNOFF>;
+ reset-names = "pciephy", "apps", "turnoff";
status = "disabled";
};
};
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 925d1364727a..ef0c7feea6e2 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE
depends on KERNEL_MODE_NEON
select CRYPTO_HASH
select CRYPTO_CRYPTD
+ select CRYPTO_GF128MUL
help
Use an implementation of GHASH (used by the GCM AEAD chaining mode)
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
@@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
-config CRYPTO_SPECK_NEON
- tristate "NEON accelerated Speck cipher algorithms"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_SPECK
-
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8de542c48ade..bd5bceef0605 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -54,7 +53,6 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha20-neon-core.S
index 451a849ad518..50e7b9896818 100644
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ b/arch/arm/crypto/chacha20-neon-core.S
@@ -18,6 +18,34 @@
* (at your option) any later version.
*/
+ /*
+ * NEON doesn't have a rotate instruction. The alternatives are, more or less:
+ *
+ * (a) vshl.u32 + vsri.u32 (needs temporary register)
+ * (b) vshl.u32 + vshr.u32 + vorr (needs temporary register)
+ * (c) vrev32.16 (16-bit rotations only)
+ * (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only,
+ * needs index vector)
+ *
+ * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit
+ * rotations, the only choices are (a) and (b). We use (a) since it takes
+ * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
+ *
+ * For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
+ * and doesn't need a temporary register.
+ *
+ * For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence
+ * is twice as fast as (a), even when doing (a) on multiple registers
+ * simultaneously to eliminate the stall between vshl and vsri. Also, it
+ * parallelizes better when temporary registers are scarce.
+ *
+ * A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
+ * (a), so the need to load the rotation table actually makes the vtbl method
+ * slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it
+ * seems to be a good compromise to get a more significant speed boost on some
+ * CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
+ */
+
#include <linux/linkage.h>
.text
@@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon)
vmov q10, q2
vmov q11, q3
+ adr ip, .Lrol8_table
mov r3, #10
+ vld1.8 {d10}, [ip, :64]
.Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon)
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
vadd.i32 q0, q0, q1
- veor q4, q3, q0
- vshl.u32 q3, q4, #8
- vsri.u32 q3, q4, #24
+ veor q3, q3, q0
+ vtbl.8 d6, {d6}, d10
+ vtbl.8 d7, {d7}, d10
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
vadd.i32 q2, q2, q3
@@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon)
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
vadd.i32 q0, q0, q1
- veor q4, q3, q0
- vshl.u32 q3, q4, #8
- vsri.u32 q3, q4, #24
+ veor q3, q3, q0
+ vtbl.8 d6, {d6}, d10
+ vtbl.8 d7, {d7}, d10
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
vadd.i32 q2, q2, q3
@@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon)
bx lr
ENDPROC(chacha20_block_xor_neon)
+ .align 4
+.Lctrinc: .word 0, 1, 2, 3
+.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6
+
.align 5
ENTRY(chacha20_4block_xor_neon)
- push {r4-r6, lr}
- mov ip, sp // preserve the stack pointer
- sub r3, sp, #0x20 // allocate a 32 byte buffer
- bic r3, r3, #0x1f // aligned to 32 bytes
- mov sp, r3
+ push {r4-r5}
+ mov r4, sp // preserve the stack pointer
+ sub ip, sp, #0x20 // allocate a 32 byte buffer
+ bic ip, ip, #0x1f // aligned to 32 bytes
+ mov sp, ip
// r0: Input state matrix, s
// r1: 4 data blocks output, o
@@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon)
// This function encrypts four consecutive ChaCha20 blocks by loading
// the state matrix in NEON registers four times. The algorithm performs
// each operation on the corresponding word of each state matrix, hence
- // requires no word shuffling. For final XORing step we transpose the
- // matrix by interleaving 32- and then 64-bit words, which allows us to
- // do XOR in NEON registers.
+ // requires no word shuffling. The words are re-interleaved before the
+ // final addition of the original state and the XORing step.
//
- // x0..15[0-3] = s0..3[0..3]
- add r3, r0, #0x20
+ // x0..15[0-3] = s0..15[0-3]
+ add ip, r0, #0x20
vld1.32 {q0-q1}, [r0]
- vld1.32 {q2-q3}, [r3]
+ vld1.32 {q2-q3}, [ip]
- adr r3, CTRINC
+ adr r5, .Lctrinc
vdup.32 q15, d7[1]
vdup.32 q14, d7[0]
- vld1.32 {q11}, [r3, :128]
+ vld1.32 {q4}, [r5, :128]
vdup.32 q13, d6[1]
vdup.32 q12, d6[0]
- vadd.i32 q12, q12, q11 // x12 += counter values 0-3
vdup.32 q11, d5[1]
vdup.32 q10, d5[0]
+ vadd.u32 q12, q12, q4 // x12 += counter values 0-3
vdup.32 q9, d4[1]
vdup.32 q8, d4[0]
vdup.32 q7, d3[1]
@@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon)
vdup.32 q1, d0[1]
vdup.32 q0, d0[0]
+ adr ip, .Lrol8_table
mov r3, #10
+ b 1f
.Ldoubleround4:
+ vld1.32 {q8-q9}, [sp, :256]
+1:
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
@@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon)
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ vld1.8 {d16}, [ip, :64]
vadd.i32 q0, q0, q4
vadd.i32 q1, q1, q5
vadd.i32 q2, q2, q6
vadd.i32 q3, q3, q7
- veor q8, q12, q0
- veor q9, q13, q1
- vshl.u32 q12, q8, #8
- vshl.u32 q13, q9, #8
- vsri.u32 q12, q8, #24
- vsri.u32 q13, q9, #24
+ veor q12, q12, q0
+ veor q13, q13, q1
+ veor q14, q14, q2
+ veor q15, q15, q3
- veor q8, q14, q2
- veor q9, q15, q3
- vshl.u32 q14, q8, #8
- vshl.u32 q15, q9, #8
- vsri.u32 q14, q8, #24
- vsri.u32 q15, q9, #24
+ vtbl.8 d24, {d24}, d16
+ vtbl.8 d25, {d25}, d16
+ vtbl.8 d26, {d26}, d16
+ vtbl.8 d27, {d27}, d16
+ vtbl.8 d28, {d28}, d16
+ vtbl.8 d29, {d29}, d16
+ vtbl.8 d30, {d30}, d16
+ vtbl.8 d31, {d31}, d16
vld1.32 {q8-q9}, [sp, :256]
@@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon)
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ vld1.8 {d16}, [ip, :64]
vadd.i32 q0, q0, q5
vadd.i32 q1, q1, q6
vadd.i32 q2, q2, q7
vadd.i32 q3, q3, q4
- veor q8, q15, q0
- veor q9, q12, q1
- vshl.u32 q15, q8, #8
- vshl.u32 q12, q9, #8
- vsri.u32 q15, q8, #24
- vsri.u32 q12, q9, #24
+ veor q15, q15, q0
+ veor q12, q12, q1
+ veor q13, q13, q2
+ veor q14, q14, q3
- veor q8, q13, q2
- veor q9, q14, q3
- vshl.u32 q13, q8, #8
- vshl.u32 q14, q9, #8
- vsri.u32 q13, q8, #24
- vsri.u32 q14, q9, #24
+ vtbl.8 d30, {d30}, d16
+ vtbl.8 d31, {d31}, d16
+ vtbl.8 d24, {d24}, d16
+ vtbl.8 d25, {d25}, d16
+ vtbl.8 d26, {d26}, d16
+ vtbl.8 d27, {d27}, d16
+ vtbl.8 d28, {d28}, d16
+ vtbl.8 d29, {d29}, d16
vld1.32 {q8-q9}, [sp, :256]
@@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon)
vsri.u32 q6, q9, #25
subs r3, r3, #1
- beq 0f
-
- vld1.32 {q8-q9}, [sp, :256]
- b .Ldoubleround4
-
- // x0[0-3] += s0[0]
- // x1[0-3] += s0[1]
- // x2[0-3] += s0[2]
- // x3[0-3] += s0[3]
-0: ldmia r0!, {r3-r6}
- vdup.32 q8, r3
- vdup.32 q9, r4
- vadd.i32 q0, q0, q8
- vadd.i32 q1, q1, q9
- vdup.32 q8, r5
- vdup.32 q9, r6
- vadd.i32 q2, q2, q8
- vadd.i32 q3, q3, q9
-
- // x4[0-3] += s1[0]
- // x5[0-3] += s1[1]
- // x6[0-3] += s1[2]
- // x7[0-3] += s1[3]
- ldmia r0!, {r3-r6}
- vdup.32 q8, r3
- vdup.32 q9, r4
- vadd.i32 q4, q4, q8
- vadd.i32 q5, q5, q9
- vdup.32 q8, r5
- vdup.32 q9, r6
- vadd.i32 q6, q6, q8
- vadd.i32 q7, q7, q9
-
- // interleave 32-bit words in state n, n+1
- vzip.32 q0, q1
- vzip.32 q2, q3
- vzip.32 q4, q5
- vzip.32 q6, q7
-
- // interleave 64-bit words in state n, n+2
+ bne .Ldoubleround4
+
+ // x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
+ // x8..9[0-3] are on the stack.
+
+ // Re-interleave the words in the first two rows of each block (x0..7).
+ // Also add the counter values 0-3 to x12[0-3].
+ vld1.32 {q8}, [r5, :128] // load counter values 0-3
+ vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
+ vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
+ vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
+ vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7)
+ vadd.u32 q12, q8 // x12 += counter values 0-3
vswp d1, d4
vswp d3, d6
+ vld1.32 {q8-q9}, [r0]! // load s0..7
vswp d9, d12
vswp d11, d14
- // xor with corresponding input, write to output
+ // Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
+ // after XORing the first 32 bytes.
+ vswp q1, q4
+
+ // First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
+
+ // x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block)
+ vadd.u32 q0, q0, q8
+ vadd.u32 q2, q2, q8
+ vadd.u32 q4, q4, q8
+ vadd.u32 q3, q3, q8
+
+ // x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block)
+ vadd.u32 q1, q1, q9
+ vadd.u32 q6, q6, q9
+ vadd.u32 q5, q5, q9
+ vadd.u32 q7, q7, q9
+
+ // XOR first 32 bytes using keystream from first two rows of first block
vld1.8 {q8-q9}, [r2]!
veor q8, q8, q0
- veor q9, q9, q4
+ veor q9, q9, q1
vst1.8 {q8-q9}, [r1]!
+ // Re-interleave the words in the last two rows of each block (x8..15).
vld1.32 {q8-q9}, [sp, :256]
-
- // x8[0-3] += s2[0]
- // x9[0-3] += s2[1]
- // x10[0-3] += s2[2]
- // x11[0-3] += s2[3]
- ldmia r0!, {r3-r6}
- vdup.32 q0, r3
- vdup.32 q4, r4
- vadd.i32 q8, q8, q0
- vadd.i32 q9, q9, q4
- vdup.32 q0, r5
- vdup.32 q4, r6
- vadd.i32 q10, q10, q0
- vadd.i32 q11, q11, q4
-
- // x12[0-3] += s3[0]
- // x13[0-3] += s3[1]
- // x14[0-3] += s3[2]
- // x15[0-3] += s3[3]
- ldmia r0!, {r3-r6}
- vdup.32 q0, r3
- vdup.32 q4, r4
- adr r3, CTRINC
- vadd.i32 q12, q12, q0
- vld1.32 {q0}, [r3, :128]
- vadd.i32 q13, q13, q4
- vadd.i32 q12, q12, q0 // x12 += counter values 0-3
-
- vdup.32 q0, r5
- vdup.32 q4, r6
- vadd.i32 q14, q14, q0
- vadd.i32 q15, q15, q4
-
- // interleave 32-bit words in state n, n+1
- vzip.32 q8, q9
- vzip.32 q10, q11
- vzip.32 q12, q13
- vzip.32 q14, q15
-
- // interleave 64-bit words in state n, n+2
- vswp d17, d20
- vswp d19, d22
+ vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
+ vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
+ vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
+ vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11)
+ vld1.32 {q0-q1}, [r0] // load s8..15
vswp d25, d28
vswp d27, d30
+ vswp d17, d20
+ vswp d19, d22
+
+ // Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
+
+ // x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block)
+ vadd.u32 q8, q8, q0
+ vadd.u32 q10, q10, q0
+ vadd.u32 q9, q9, q0
+ vadd.u32 q11, q11, q0
+
+ // x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
+ vadd.u32 q12, q12, q1
+ vadd.u32 q14, q14, q1
+ vadd.u32 q13, q13, q1
+ vadd.u32 q15, q15, q1
- vmov q4, q1
+ // XOR the rest of the data with the keystream
vld1.8 {q0-q1}, [r2]!
veor q0, q0, q8
@@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon)
vst1.8 {q0-q1}, [r1]!
vld1.8 {q0-q1}, [r2]
+ mov sp, r4 // restore original stack pointer
veor q0, q0, q11
veor q1, q1, q15
vst1.8 {q0-q1}, [r1]
- mov sp, ip
- pop {r4-r6, pc}
+ pop {r4-r5}
+ bx lr
ENDPROC(chacha20_4block_xor_neon)
-
- .align 4
-CTRINC: .word 0, 1, 2, 3
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index 96e62ec105d0..cd9e93b46c2d 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void)
ARRAY_SIZE(crc32_pmull_algs));
}
-static const struct cpu_feature crc32_cpu_feature[] = {
+static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
};
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index 2f78c10b1881..406009afa9cf 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -63,6 +63,33 @@
k48 .req d31
SHASH2_p64 .req d31
+ HH .req q10
+ HH3 .req q11
+ HH4 .req q12
+ HH34 .req q13
+
+ HH_L .req d20
+ HH_H .req d21
+ HH3_L .req d22
+ HH3_H .req d23
+ HH4_L .req d24
+ HH4_H .req d25
+ HH34_L .req d26
+ HH34_H .req d27
+ SHASH2_H .req d29
+
+ XL2 .req q5
+ XM2 .req q6
+ XH2 .req q7
+ T3 .req q8
+
+ XL2_L .req d10
+ XL2_H .req d11
+ XM2_L .req d12
+ XM2_H .req d13
+ T3_L .req d16
+ T3_H .req d17
+
.text
.fpu crypto-neon-fp-armv8
@@ -175,12 +202,77 @@
beq 0f
vld1.64 {T1}, [ip]
teq r0, #0
- b 1f
+ b 3f
+
+0: .ifc \pn, p64
+ tst r0, #3 // skip until #blocks is a
+ bne 2f // round multiple of 4
+
+ vld1.8 {XL2-XM2}, [r2]!
+1: vld1.8 {T3-T2}, [r2]!
+ vrev64.8 XL2, XL2
+ vrev64.8 XM2, XM2
+
+ subs r0, r0, #4
+
+ vext.8 T1, XL2, XL2, #8
+ veor XL2_H, XL2_H, XL_L
+ veor XL, XL, T1
+
+ vrev64.8 T3, T3
+ vrev64.8 T1, T2
+
+ vmull.p64 XH, HH4_H, XL_H // a1 * b1
+ veor XL2_H, XL2_H, XL_H
+ vmull.p64 XL, HH4_L, XL_L // a0 * b0
+ vmull.p64 XM, HH34_H, XL2_H // (a1 + a0)(b1 + b0)
+
+ vmull.p64 XH2, HH3_H, XM2_L // a1 * b1
+ veor XM2_L, XM2_L, XM2_H
+ vmull.p64 XL2, HH3_L, XM2_H // a0 * b0
+ vmull.p64 XM2, HH34_L, XM2_L // (a1 + a0)(b1 + b0)
+
+ veor XH, XH, XH2
+ veor XL, XL, XL2
+ veor XM, XM, XM2
+
+ vmull.p64 XH2, HH_H, T3_L // a1 * b1
+ veor T3_L, T3_L, T3_H
+ vmull.p64 XL2, HH_L, T3_H // a0 * b0
+ vmull.p64 XM2, SHASH2_H, T3_L // (a1 + a0)(b1 + b0)
+
+ veor XH, XH, XH2
+ veor XL, XL, XL2
+ veor XM, XM, XM2
+
+ vmull.p64 XH2, SHASH_H, T1_L // a1 * b1
+ veor T1_L, T1_L, T1_H
+ vmull.p64 XL2, SHASH_L, T1_H // a0 * b0
+ vmull.p64 XM2, SHASH2_p64, T1_L // (a1 + a0)(b1 + b0)
+
+ veor XH, XH, XH2
+ veor XL, XL, XL2
+ veor XM, XM, XM2
-0: vld1.64 {T1}, [r2]!
+ beq 4f
+
+ vld1.8 {XL2-XM2}, [r2]!
+
+ veor T1, XL, XH
+ veor XM, XM, T1
+
+ __pmull_reduce_p64
+
+ veor T1, T1, XH
+ veor XL, XL, T1
+
+ b 1b
+ .endif
+
+2: vld1.64 {T1}, [r2]!
subs r0, r0, #1
-1: /* multiply XL by SHASH in GF(2^128) */
+3: /* multiply XL by SHASH in GF(2^128) */
#ifndef CONFIG_CPU_BIG_ENDIAN
vrev64.8 T1, T1
#endif
@@ -193,7 +285,7 @@
__pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0
__pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0)
- veor T1, XL, XH
+4: veor T1, XL, XH
veor XM, XM, T1
__pmull_reduce_\pn
@@ -212,8 +304,14 @@
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update_p64)
- vld1.64 {SHASH}, [r3]
+ vld1.64 {SHASH}, [r3]!
+ vld1.64 {HH}, [r3]!
+ vld1.64 {HH3-HH4}, [r3]
+
veor SHASH2_p64, SHASH_L, SHASH_H
+ veor SHASH2_H, HH_L, HH_H
+ veor HH34_L, HH3_L, HH3_H
+ veor HH34_H, HH4_L, HH4_H
vmov.i8 MASK, #0xe1
vshl.u64 MASK, MASK, #57
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 8930fc4e7c22..b7d30b6cf49c 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -1,7 +1,7 @@
/*
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
*
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published
@@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
- u64 a;
- u64 b;
+ u64 h[2];
+ u64 h2[2];
+ u64 h3[2];
+ u64 h4[2];
};
struct ghash_desc_ctx {
@@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
return 0;
}
+static void ghash_reflect(u64 h[], const be128 *k)
+{
+ u64 carry = be64_to_cpu(k->a) >> 63;
+
+ h[0] = (be64_to_cpu(k->b) << 1) | carry;
+ h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
+
+ if (carry)
+ h[1] ^= 0xc200000000000000UL;
+}
+
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
- u64 a, b;
+ be128 h, k;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
- /* perform multiplication by 'x' in GF(2^128) */
- b = get_unaligned_be64(inkey);
- a = get_unaligned_be64(inkey + 8);
+ memcpy(&k, inkey, GHASH_BLOCK_SIZE);
+ ghash_reflect(key->h, &k);
+
+ h = k;
+ gf128mul_lle(&h, &k);
+ ghash_reflect(key->h2, &h);
- key->a = (a << 1) | (b >> 63);
- key->b = (b << 1) | (a >> 63);
+ gf128mul_lle(&h, &k);
+ ghash_reflect(key->h3, &h);
- if (b >> 63)
- key->b ^= 0xc200000000000000UL;
+ gf128mul_lle(&h, &k);
+ ghash_reflect(key->h4, &h);
return 0;
}
diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S
deleted file mode 100644
index 57caa742016e..000000000000
--- a/arch/arm/crypto/speck-neon-core.S
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
- .text
- .fpu neon
-
- // arguments
- ROUND_KEYS .req r0 // const {u64,u32} *round_keys
- NROUNDS .req r1 // int nrounds
- DST .req r2 // void *dst
- SRC .req r3 // const void *src
- NBYTES .req r4 // unsigned int nbytes
- TWEAK .req r5 // void *tweak
-
- // registers which hold the data being encrypted/decrypted
- X0 .req q0
- X0_L .req d0
- X0_H .req d1
- Y0 .req q1
- Y0_H .req d3
- X1 .req q2
- X1_L .req d4
- X1_H .req d5
- Y1 .req q3
- Y1_H .req d7
- X2 .req q4
- X2_L .req d8
- X2_H .req d9
- Y2 .req q5
- Y2_H .req d11
- X3 .req q6
- X3_L .req d12
- X3_H .req d13
- Y3 .req q7
- Y3_H .req d15
-
- // the round key, duplicated in all lanes
- ROUND_KEY .req q8
- ROUND_KEY_L .req d16
- ROUND_KEY_H .req d17
-
- // index vector for vtbl-based 8-bit rotates
- ROTATE_TABLE .req d18
-
- // multiplication table for updating XTS tweaks
- GF128MUL_TABLE .req d19
- GF64MUL_TABLE .req d19
-
- // current XTS tweak value(s)
- TWEAKV .req q10
- TWEAKV_L .req d20
- TWEAKV_H .req d21
-
- TMP0 .req q12
- TMP0_L .req d24
- TMP0_H .req d25
- TMP1 .req q13
- TMP2 .req q14
- TMP3 .req q15
-
- .align 4
-.Lror64_8_table:
- .byte 1, 2, 3, 4, 5, 6, 7, 0
-.Lror32_8_table:
- .byte 1, 2, 3, 0, 5, 6, 7, 4
-.Lrol64_8_table:
- .byte 7, 0, 1, 2, 3, 4, 5, 6
-.Lrol32_8_table:
- .byte 3, 0, 1, 2, 7, 4, 5, 6
-.Lgf128mul_table:
- .byte 0, 0x87
- .fill 14
-.Lgf64mul_table:
- .byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
- .fill 12
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- *
- * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
- * the vtbl approach is faster on some processors and the same speed on others.
- */
-.macro _speck_round_128bytes n
-
- // x = ror(x, 8)
- vtbl.8 X0_L, {X0_L}, ROTATE_TABLE
- vtbl.8 X0_H, {X0_H}, ROTATE_TABLE
- vtbl.8 X1_L, {X1_L}, ROTATE_TABLE
- vtbl.8 X1_H, {X1_H}, ROTATE_TABLE
- vtbl.8 X2_L, {X2_L}, ROTATE_TABLE
- vtbl.8 X2_H, {X2_H}, ROTATE_TABLE
- vtbl.8 X3_L, {X3_L}, ROTATE_TABLE
- vtbl.8 X3_H, {X3_H}, ROTATE_TABLE
-
- // x += y
- vadd.u\n X0, Y0
- vadd.u\n X1, Y1
- vadd.u\n X2, Y2
- vadd.u\n X3, Y3
-
- // x ^= k
- veor X0, ROUND_KEY
- veor X1, ROUND_KEY
- veor X2, ROUND_KEY
- veor X3, ROUND_KEY
-
- // y = rol(y, 3)
- vshl.u\n TMP0, Y0, #3
- vshl.u\n TMP1, Y1, #3
- vshl.u\n TMP2, Y2, #3
- vshl.u\n TMP3, Y3, #3
- vsri.u\n TMP0, Y0, #(\n - 3)
- vsri.u\n TMP1, Y1, #(\n - 3)
- vsri.u\n TMP2, Y2, #(\n - 3)
- vsri.u\n TMP3, Y3, #(\n - 3)
-
- // y ^= x
- veor Y0, TMP0, X0
- veor Y1, TMP1, X1
- veor Y2, TMP2, X2
- veor Y3, TMP3, X3
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes n
-
- // y ^= x
- veor TMP0, Y0, X0
- veor TMP1, Y1, X1
- veor TMP2, Y2, X2
- veor TMP3, Y3, X3
-
- // y = ror(y, 3)
- vshr.u\n Y0, TMP0, #3
- vshr.u\n Y1, TMP1, #3
- vshr.u\n Y2, TMP2, #3
- vshr.u\n Y3, TMP3, #3
- vsli.u\n Y0, TMP0, #(\n - 3)
- vsli.u\n Y1, TMP1, #(\n - 3)
- vsli.u\n Y2, TMP2, #(\n - 3)
- vsli.u\n Y3, TMP3, #(\n - 3)
-
- // x ^= k
- veor X0, ROUND_KEY
- veor X1, ROUND_KEY
- veor X2, ROUND_KEY
- veor X3, ROUND_KEY
-
- // x -= y
- vsub.u\n X0, Y0
- vsub.u\n X1, Y1
- vsub.u\n X2, Y2
- vsub.u\n X3, Y3
-
- // x = rol(x, 8);
- vtbl.8 X0_L, {X0_L}, ROTATE_TABLE
- vtbl.8 X0_H, {X0_H}, ROTATE_TABLE
- vtbl.8 X1_L, {X1_L}, ROTATE_TABLE
- vtbl.8 X1_H, {X1_H}, ROTATE_TABLE
- vtbl.8 X2_L, {X2_L}, ROTATE_TABLE
- vtbl.8 X2_H, {X2_H}, ROTATE_TABLE
- vtbl.8 X3_L, {X3_L}, ROTATE_TABLE
- vtbl.8 X3_H, {X3_H}, ROTATE_TABLE
-.endm
-
-.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp
-
- // Load the next source block
- vld1.8 {\dst_reg}, [SRC]!
-
- // Save the current tweak in the tweak buffer
- vst1.8 {TWEAKV}, [\tweak_buf:128]!
-
- // XOR the next source block with the current tweak
- veor \dst_reg, TWEAKV
-
- /*
- * Calculate the next tweak by multiplying the current one by x,
- * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
- */
- vshr.u64 \tmp, TWEAKV, #63
- vshl.u64 TWEAKV, #1
- veor TWEAKV_H, \tmp\()_L
- vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
- veor TWEAKV_L, \tmp\()_H
-.endm
-
-.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp
-
- // Load the next two source blocks
- vld1.8 {\dst_reg}, [SRC]!
-
- // Save the current two tweaks in the tweak buffer
- vst1.8 {TWEAKV}, [\tweak_buf:128]!
-
- // XOR the next two source blocks with the current two tweaks
- veor \dst_reg, TWEAKV
-
- /*
- * Calculate the next two tweaks by multiplying the current ones by x^2,
- * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
- */
- vshr.u64 \tmp, TWEAKV, #62
- vshl.u64 TWEAKV, #2
- vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
- vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
- veor TWEAKV, \tmp
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt n, decrypting
- push {r4-r7}
- mov r7, sp
-
- /*
- * The first four parameters were passed in registers r0-r3. Load the
- * additional parameters, which were passed on the stack.
- */
- ldr NBYTES, [sp, #16]
- ldr TWEAK, [sp, #20]
-
- /*
- * If decrypting, modify the ROUND_KEYS parameter to point to the last
- * round key rather than the first, since for decryption the round keys
- * are used in reverse order.
- */
-.if \decrypting
-.if \n == 64
- add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
- sub ROUND_KEYS, #8
-.else
- add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
- sub ROUND_KEYS, #4
-.endif
-.endif
-
- // Load the index vector for vtbl-based 8-bit rotates
-.if \decrypting
- ldr r12, =.Lrol\n\()_8_table
-.else
- ldr r12, =.Lror\n\()_8_table
-.endif
- vld1.8 {ROTATE_TABLE}, [r12:64]
-
- // One-time XTS preparation
-
- /*
- * Allocate stack space to store 128 bytes worth of tweaks. For
- * performance, this space is aligned to a 16-byte boundary so that we
- * can use the load/store instructions that declare 16-byte alignment.
- * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
- */
- sub r12, sp, #128
- bic r12, #0xf
- mov sp, r12
-
-.if \n == 64
- // Load first tweak
- vld1.8 {TWEAKV}, [TWEAK]
-
- // Load GF(2^128) multiplication table
- ldr r12, =.Lgf128mul_table
- vld1.8 {GF128MUL_TABLE}, [r12:64]
-.else
- // Load first tweak
- vld1.8 {TWEAKV_L}, [TWEAK]
-
- // Load GF(2^64) multiplication table
- ldr r12, =.Lgf64mul_table
- vld1.8 {GF64MUL_TABLE}, [r12:64]
-
- // Calculate second tweak, packing it together with the first
- vshr.u64 TMP0_L, TWEAKV_L, #63
- vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L
- vshl.u64 TWEAKV_H, TWEAKV_L, #1
- veor TWEAKV_H, TMP0_L
-.endif
-
-.Lnext_128bytes_\@:
-
- /*
- * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
- * values, and save the tweaks on the stack for later. Then
- * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
- * that the X[0-3] registers contain only the second halves of blocks,
- * and the Y[0-3] registers contain only the first halves of blocks.
- * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
- */
- mov r12, sp
-.if \n == 64
- _xts128_precrypt_one X0, r12, TMP0
- _xts128_precrypt_one Y0, r12, TMP0
- _xts128_precrypt_one X1, r12, TMP0
- _xts128_precrypt_one Y1, r12, TMP0
- _xts128_precrypt_one X2, r12, TMP0
- _xts128_precrypt_one Y2, r12, TMP0
- _xts128_precrypt_one X3, r12, TMP0
- _xts128_precrypt_one Y3, r12, TMP0
- vswp X0_L, Y0_H
- vswp X1_L, Y1_H
- vswp X2_L, Y2_H
- vswp X3_L, Y3_H
-.else
- _xts64_precrypt_two X0, r12, TMP0
- _xts64_precrypt_two Y0, r12, TMP0
- _xts64_precrypt_two X1, r12, TMP0
- _xts64_precrypt_two Y1, r12, TMP0
- _xts64_precrypt_two X2, r12, TMP0
- _xts64_precrypt_two Y2, r12, TMP0
- _xts64_precrypt_two X3, r12, TMP0
- _xts64_precrypt_two Y3, r12, TMP0
- vuzp.32 Y0, X0
- vuzp.32 Y1, X1
- vuzp.32 Y2, X2
- vuzp.32 Y3, X3
-.endif
-
- // Do the cipher rounds
-
- mov r12, ROUND_KEYS
- mov r6, NROUNDS
-
-.Lnext_round_\@:
-.if \decrypting
-.if \n == 64
- vld1.64 ROUND_KEY_L, [r12]
- sub r12, #8
- vmov ROUND_KEY_H, ROUND_KEY_L
-.else
- vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
- sub r12, #4
-.endif
- _speck_unround_128bytes \n
-.else
-.if \n == 64
- vld1.64 ROUND_KEY_L, [r12]!
- vmov ROUND_KEY_H, ROUND_KEY_L
-.else
- vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
-.endif
- _speck_round_128bytes \n
-.endif
- subs r6, r6, #1
- bne .Lnext_round_\@
-
- // Re-interleave the 'x' and 'y' elements of each block
-.if \n == 64
- vswp X0_L, Y0_H
- vswp X1_L, Y1_H
- vswp X2_L, Y2_H
- vswp X3_L, Y3_H
-.else
- vzip.32 Y0, X0
- vzip.32 Y1, X1
- vzip.32 Y2, X2
- vzip.32 Y3, X3
-.endif
-
- // XOR the encrypted/decrypted blocks with the tweaks we saved earlier
- mov r12, sp
- vld1.8 {TMP0, TMP1}, [r12:128]!
- vld1.8 {TMP2, TMP3}, [r12:128]!
- veor X0, TMP0
- veor Y0, TMP1
- veor X1, TMP2
- veor Y1, TMP3
- vld1.8 {TMP0, TMP1}, [r12:128]!
- vld1.8 {TMP2, TMP3}, [r12:128]!
- veor X2, TMP0
- veor Y2, TMP1
- veor X3, TMP2
- veor Y3, TMP3
-
- // Store the ciphertext in the destination buffer
- vst1.8 {X0, Y0}, [DST]!
- vst1.8 {X1, Y1}, [DST]!
- vst1.8 {X2, Y2}, [DST]!
- vst1.8 {X3, Y3}, [DST]!
-
- // Continue if there are more 128-byte chunks remaining, else return
- subs NBYTES, #128
- bne .Lnext_128bytes_\@
-
- // Store the next tweak
-.if \n == 64
- vst1.8 {TWEAKV}, [TWEAK]
-.else
- vst1.8 {TWEAKV_L}, [TWEAK]
-.endif
-
- mov sp, r7
- pop {r4-r7}
- bx lr
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
- _speck_xts_crypt n=64, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
- _speck_xts_crypt n=64, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
- _speck_xts_crypt n=32, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
- _speck_xts_crypt n=32, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c
deleted file mode 100644
index f012c3ea998f..000000000000
--- a/arch/arm/crypto/speck-neon-glue.c
+++ /dev/null
@@ -1,288 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
- * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
- * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
- * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
- * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
- * OCB and PMAC"), represented as 0x1B.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE 128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
- struct speck128_tfm_ctx main_key;
- struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct skcipher_request *req,
- speck128_crypt_one_t crypt_one,
- speck128_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- le128 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- le128_xor((le128 *)dst, (const le128 *)src, &tweak);
- (*crypt_one)(&ctx->main_key, dst, dst);
- le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
- gf128mul_x_ble(&tweak, &tweak);
-
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck128_xts_encrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_encrypt,
- speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_decrypt,
- speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
- struct speck64_tfm_ctx main_key;
- struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
- speck64_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- __le64 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- *(__le64 *)dst = *(__le64 *)src ^ tweak;
- (*crypt_one)(&ctx->main_key, dst, dst);
- *(__le64 *)dst ^= tweak;
- tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
- ((tweak & cpu_to_le64(1ULL << 63)) ?
- 0x1B : 0));
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck64_xts_encrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_encrypt,
- speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_decrypt,
- speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct skcipher_alg speck_algs[] = {
- {
- .base.cra_name = "xts(speck128)",
- .base.cra_driver_name = "xts-speck128-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK128_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK128_128_KEY_SIZE,
- .max_keysize = 2 * SPECK128_256_KEY_SIZE,
- .ivsize = SPECK128_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck128_xts_setkey,
- .encrypt = speck128_xts_encrypt,
- .decrypt = speck128_xts_decrypt,
- }, {
- .base.cra_name = "xts(speck64)",
- .base.cra_driver_name = "xts-speck64-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK64_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK64_96_KEY_SIZE,
- .max_keysize = 2 * SPECK64_128_KEY_SIZE,
- .ivsize = SPECK64_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck64_xts_setkey,
- .encrypt = speck64_xts_encrypt,
- .decrypt = speck64_xts_decrypt,
- }
-};
-
-static int __init speck_neon_module_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
- return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
- crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 2d43dca29c72..b95f8d0d9f17 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -133,8 +133,7 @@
* space.
*/
#define KVM_PHYS_SHIFT (40)
-#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL))
+
#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
/* Virtualization Translation Control Register (VTCR) bits */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3ad482d2f1eb..5ca5d9af0c26 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -273,7 +273,7 @@ static inline void __cpu_init_stage2(void)
kvm_call_hyp(__init_stage2_translation);
}
-static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
+static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
return 0;
}
@@ -354,4 +354,15 @@ static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
+static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
+{
+ /*
+ * On 32bit ARM, VMs get a static 40bit IPA stage2 setup,
+ * so any non-zero value used as type is illegal.
+ */
+ if (type)
+ return -EINVAL;
+ return 0;
+}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 847f01fa429d..1098ffc3d54b 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -35,16 +35,12 @@
addr; \
})
-/*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
- */
-#define KVM_MMU_CACHE_MIN_PAGES 2
-
#ifndef __ASSEMBLY__
#include <linux/highmem.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
#include <asm/kvm_hyp.h>
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
@@ -52,6 +48,13 @@
/* Ensure compatibility with arm64 */
#define VA_BITS 32
+#define kvm_phys_shift(kvm) KVM_PHYS_SHIFT
+#define kvm_phys_size(kvm) (1ULL << kvm_phys_shift(kvm))
+#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - 1ULL)
+#define kvm_vttbr_baddr_mask(kvm) VTTBR_BADDR_MASK
+
+#define stage2_pgd_size(kvm) (PTRS_PER_S2_PGD * sizeof(pgd_t))
+
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
@@ -355,6 +358,8 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) (addr)
+static inline void kvm_set_ipa_limit(void) {}
+
static inline bool kvm_cpu_has_cnp(void)
{
return false;
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
index 460d616bb2d6..f6a7ea805232 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -19,43 +19,53 @@
#ifndef __ARM_S2_PGTABLE_H_
#define __ARM_S2_PGTABLE_H_
-#define stage2_pgd_none(pgd) pgd_none(pgd)
-#define stage2_pgd_clear(pgd) pgd_clear(pgd)
-#define stage2_pgd_present(pgd) pgd_present(pgd)
-#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
-#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
-#define stage2_pud_free(pud) pud_free(NULL, pud)
-
-#define stage2_pud_none(pud) pud_none(pud)
-#define stage2_pud_clear(pud) pud_clear(pud)
-#define stage2_pud_present(pud) pud_present(pud)
-#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
-#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
-#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
-
-#define stage2_pud_huge(pud) pud_huge(pud)
+/*
+ * kvm_mmu_cache_min_pages() is the number of pages required
+ * to install a stage-2 translation. We pre-allocate the entry
+ * level table at VM creation. Since we have a 3 level page-table,
+ * we need only two pages to add a new mapping.
+ */
+#define kvm_mmu_cache_min_pages(kvm) 2
+
+#define stage2_pgd_none(kvm, pgd) pgd_none(pgd)
+#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd)
+#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
+#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
+#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
+#define stage2_pud_free(kvm, pud) pud_free(NULL, pud)
+
+#define stage2_pud_none(kvm, pud) pud_none(pud)
+#define stage2_pud_clear(kvm, pud) pud_clear(pud)
+#define stage2_pud_present(kvm, pud) pud_present(pud)
+#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
+#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
+#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd)
+
+#define stage2_pud_huge(kvm, pud) pud_huge(pud)
/* Open coded p*d_addr_end that can deal with 64bit addresses */
-static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline phys_addr_t
+stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
}
-#define stage2_pud_addr_end(addr, end) (end)
+#define stage2_pud_addr_end(kvm, addr, end) (end)
-static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline phys_addr_t
+stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
return (boundary - 1 < end - 1) ? boundary : end;
}
-#define stage2_pgd_index(addr) pgd_index(addr)
+#define stage2_pgd_index(kvm, addr) pgd_index(addr)
-#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
-#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
-#define stage2_pud_table_empty(pudp) false
+#define stage2_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
+#define stage2_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
+#define stage2_pud_table_empty(kvm, pudp) false
#endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 076090d2dbf5..88ef2ce1f69a 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -16,23 +16,23 @@
#include <uapi/asm/unistd.h>
#include <asm/unistd-nr.h>
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_OLD_MMAP
#define __ARCH_WANT_SYS_OLD_SELECT
+#define __ARCH_WANT_SYS_UTIME
#if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_UTIME
#define __ARCH_WANT_SYS_OLD_GETRLIMIT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_SYS_SOCKETCALL
diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
index a7c6ae13c945..bfe1c4d06901 100644
--- a/arch/arm/mach-at91/pm_suspend.S
+++ b/arch/arm/mach-at91/pm_suspend.S
@@ -149,6 +149,14 @@ exit_suspend:
ENDPROC(at91_pm_suspend_in_sram)
ENTRY(at91_backup_mode)
+ /* Switch the master clock source to slow clock. */
+ ldr pmc, .pmc_base
+ ldr tmp1, [pmc, #AT91_PMC_MCKR]
+ bic tmp1, tmp1, #AT91_PMC_CSS
+ str tmp1, [pmc, #AT91_PMC_MCKR]
+
+ wait_mckrdy
+
/*BUMEN*/
ldr r0, .sfr
mov tmp1, #0x1
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b524284..5a89a957641b 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -128,6 +128,7 @@ config ARCH_MVEBU
select MVEBU_ICU
select MVEBU_ODMI
select MVEBU_PIC
+ select MVEBU_SEI
select OF_GPIO
select PINCTRL
select PINCTRL_ARMADA_37XX
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 8a2641c742ae..fb3d2ee77c56 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -473,16 +473,51 @@
status = "disabled";
};
+ sdr: sdr@f8011100 {
+ compatible = "altr,sdr-ctl", "syscon";
+ reg = <0xf8011100 0xc0>;
+ };
+
eccmgr {
- compatible = "altr,socfpga-s10-ecc-manager";
+ compatible = "altr,socfpga-a10-ecc-manager";
+ altr,sysmgr-syscon = <&sysmgr>;
+ #address-cells = <1>;
+ #size-cells = <1>;
interrupts = <0 15 4>, <0 95 4>;
interrupt-controller;
#interrupt-cells = <2>;
+ ranges;
sdramedac {
compatible = "altr,sdram-edac-s10";
+ altr,sdr-syscon = <&sdr>;
interrupts = <16 4>, <48 4>;
};
+
+ usb0-ecc@ff8c4000 {
+ compatible = "altr,socfpga-usb-ecc";
+ reg = <0xff8c4000 0x100>;
+ altr,ecc-parent = <&usb0>;
+ interrupts = <2 4>,
+ <34 4>;
+ };
+
+ emac0-rx-ecc@ff8c0000 {
+ compatible = "altr,socfpga-eth-mac-ecc";
+ reg = <0xff8c0000 0x100>;
+ altr,ecc-parent = <&gmac0>;
+ interrupts = <4 4>,
+ <36 4>;
+ };
+
+ emac0-tx-ecc@ff8c0400 {
+ compatible = "altr,socfpga-eth-mac-ecc";
+ reg = <0xff8c0400 0x100>;
+ altr,ecc-parent = <&gmac0>;
+ interrupts = <5 4>,
+ <37 4>;
+ };
+
};
qspi: spi@ff8d2000 {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364f8476..3d165b4cdd2a 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -698,6 +698,7 @@ CONFIG_MEMTEST=y
CONFIG_SECURITY=y
CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_ANSI_CPRNG=y
+CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=y
CONFIG_ARM64_CRYPTO=y
CONFIG_CRYPTO_SHA1_ARM64_CE=y
CONFIG_CRYPTO_SHA2_ARM64_CE=y
@@ -706,7 +707,6 @@ CONFIG_CRYPTO_SHA3_ARM64=m
CONFIG_CRYPTO_SM3_ARM64_CE=m
CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
-CONFIG_CRYPTO_CRC32_ARM64_CE=m
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
CONFIG_CRYPTO_CHACHA20_NEON=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index e3fdb0fd6f70..a5606823ed4d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -66,11 +66,6 @@ config CRYPTO_CRCT10DIF_ARM64_CE
depends on KERNEL_MODE_NEON && CRC_T10DIF
select CRYPTO_HASH
-config CRYPTO_CRC32_ARM64_CE
- tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
- depends on CRC32
- select CRYPTO_HASH
-
config CRYPTO_AES_ARM64
tristate "AES core cipher using scalar instructions"
select CRYPTO_AES
@@ -119,10 +114,4 @@ config CRYPTO_AES_ARM64_BS
select CRYPTO_AES_ARM64
select CRYPTO_SIMD
-config CRYPTO_SPECK_NEON
- tristate "NEON accelerated Speck cipher algorithms"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_SPECK
-
endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index bcafd016618e..f476fede09ba 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,9 +32,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
-obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
-crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
@@ -56,9 +53,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
-obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
-speck-neon-y := speck-neon-core.o speck-neon-glue.o
-
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 623e74ed1c67..143070510809 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -17,6 +17,11 @@
.arch armv8-a+crypto
+ xtsmask .req v16
+
+ .macro xts_reload_mask, tmp
+ .endm
+
/* preload all round keys */
.macro load_round_keys, rounds, rk
cmp \rounds, #12
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index adcb83eb683c..1e676625ef33 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -15,6 +15,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <crypto/xts.h>
@@ -31,6 +32,8 @@
#define aes_ecb_decrypt ce_aes_ecb_decrypt
#define aes_cbc_encrypt ce_aes_cbc_encrypt
#define aes_cbc_decrypt ce_aes_cbc_decrypt
+#define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt
+#define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
@@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_ecb_decrypt neon_aes_ecb_decrypt
#define aes_cbc_encrypt neon_aes_cbc_encrypt
#define aes_cbc_decrypt neon_aes_cbc_decrypt
+#define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt
+#define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
@@ -63,30 +68,41 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
/* defined in aes-modes.S */
-asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
-asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
-asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
-asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
-asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 const iv[]);
+asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 const iv[]);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
-asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
- int rounds, int blocks, u8 const rk2[], u8 iv[],
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+ int rounds, int blocks, u32 const rk2[], u8 iv[],
int first);
-asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
- int rounds, int blocks, u8 const rk2[], u8 iv[],
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+ int rounds, int blocks, u32 const rk2[], u8 iv[],
int first);
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
+struct cts_cbc_req_ctx {
+ struct scatterlist sg_src[2];
+ struct scatterlist sg_dst[2];
+ struct skcipher_request subreq;
+};
+
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
@@ -142,7 +158,7 @@ static int ecb_encrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, rounds, blocks);
+ ctx->key_enc, rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -162,7 +178,7 @@ static int ecb_decrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_dec, rounds, blocks);
+ ctx->key_dec, rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -182,7 +198,7 @@ static int cbc_encrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+ ctx->key_enc, rounds, blocks, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -202,13 +218,149 @@ static int cbc_decrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_dec, rounds, blocks, walk.iv);
+ ctx->key_dec, rounds, blocks, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
}
+static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
+{
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
+ return 0;
+}
+
+static int cts_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
+ int err, rounds = 6 + ctx->key_length / 4;
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct skcipher_walk walk;
+
+ skcipher_request_set_tfm(&rctx->subreq, tfm);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ unsigned int blocks;
+
+ skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
+ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, blocks, walk.iv);
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
+ rctx->subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+ rctx->subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&rctx->subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, rounds, walk.nbytes, walk.iv);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int cts_cbc_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
+ int err, rounds = 6 + ctx->key_length / 4;
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct skcipher_walk walk;
+
+ skcipher_request_set_tfm(&rctx->subreq, tfm);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ unsigned int blocks;
+
+ skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
+ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, blocks, walk.iv);
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
+ rctx->subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
+ rctx->subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&rctx->subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, rounds, walk.nbytes, walk.iv);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -222,7 +374,7 @@ static int ctr_encrypt(struct skcipher_request *req)
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+ ctx->key_enc, rounds, blocks, walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -238,7 +390,7 @@ static int ctr_encrypt(struct skcipher_request *req)
blocks = -1;
kernel_neon_begin();
- aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
+ aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
blocks, walk.iv);
kernel_neon_end();
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
@@ -272,8 +424,8 @@ static int xts_encrypt(struct skcipher_request *req)
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
kernel_neon_begin();
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key1.key_enc, rounds, blocks,
- (u8 *)ctx->key2.key_enc, walk.iv, first);
+ ctx->key1.key_enc, rounds, blocks,
+ ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -294,8 +446,8 @@ static int xts_decrypt(struct skcipher_request *req)
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
kernel_neon_begin();
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key1.key_dec, rounds, blocks,
- (u8 *)ctx->key2.key_enc, walk.iv, first);
+ ctx->key1.key_dec, rounds, blocks,
+ ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -336,6 +488,24 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = cbc_decrypt,
}, {
.base = {
+ .cra_name = "__cts(cbc(aes))",
+ .cra_driver_name = "__cts-cbc-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
+ .setkey = skcipher_aes_setkey,
+ .encrypt = cts_cbc_encrypt,
+ .decrypt = cts_cbc_decrypt,
+ .init = cts_cbc_init_tfm,
+}, {
+ .base = {
.cra_name = "__ctr(aes)",
.cra_driver_name = "__ctr-aes-" MODE,
.cra_priority = PRIO,
@@ -412,7 +582,6 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
{
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
be128 *consts = (be128 *)ctx->consts;
- u8 *rk = (u8 *)ctx->key.key_enc;
int rounds = 6 + key_len / 4;
int err;
@@ -422,7 +591,8 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
/* encrypt the zero vector */
kernel_neon_begin();
- aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
+ rounds, 1);
kernel_neon_end();
cmac_gf128_mul_by_x(consts, consts);
@@ -441,7 +611,6 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
};
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
- u8 *rk = (u8 *)ctx->key.key_enc;
int rounds = 6 + key_len / 4;
u8 key[AES_BLOCK_SIZE];
int err;
@@ -451,8 +620,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
return err;
kernel_neon_begin();
- aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
- aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
+ aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
kernel_neon_end();
return cbcmac_setkey(tfm, key, sizeof(key));
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 483a7130cf0e..67700045a0e0 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
.align 4
aes_encrypt_block4x:
- encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x:
- decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
ENDPROC(aes_decrypt_block4x)
@@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
*/
AES_ENTRY(aes_ecb_encrypt)
- frame_push 5
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
-
-.Lecbencrestart:
- enc_prepare w22, x21, x5
+ enc_prepare w3, x2, x5
.LecbencloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lecbenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
bl aes_encrypt_block4x
- st1 {v0.16b-v3.16b}, [x19], #64
- cond_yield_neon .Lecbencrestart
+ st1 {v0.16b-v3.16b}, [x0], #64
b .LecbencloopNx
.Lecbenc1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lecbencout
.Lecbencloop:
- ld1 {v0.16b}, [x20], #16 /* get next pt block */
- encrypt_block v0, w22, x21, x5, w6
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lecbencloop
.Lecbencout:
- frame_pop
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
- frame_push 5
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
-
-.Lecbdecrestart:
- dec_prepare w22, x21, x5
+ dec_prepare w3, x2, x5
.LecbdecloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lecbdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
bl aes_decrypt_block4x
- st1 {v0.16b-v3.16b}, [x19], #64
- cond_yield_neon .Lecbdecrestart
+ st1 {v0.16b-v3.16b}, [x0], #64
b .LecbdecloopNx
.Lecbdec1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lecbdecout
.Lecbdecloop:
- ld1 {v0.16b}, [x20], #16 /* get next ct block */
- decrypt_block v0, w22, x21, x5, w6
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ ld1 {v0.16b}, [x1], #16 /* get next ct block */
+ decrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lecbdecloop
.Lecbdecout:
- frame_pop
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_ecb_decrypt)
@@ -108,162 +94,211 @@ AES_ENDPROC(aes_ecb_decrypt)
*/
AES_ENTRY(aes_cbc_encrypt)
- frame_push 6
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
-
-.Lcbcencrestart:
- ld1 {v4.16b}, [x24] /* get iv */
- enc_prepare w22, x21, x6
+ ld1 {v4.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x6
.Lcbcencloop4x:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lcbcenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
- encrypt_block v0, w22, x21, x6, w7
+ encrypt_block v0, w3, x2, x6, w7
eor v1.16b, v1.16b, v0.16b
- encrypt_block v1, w22, x21, x6, w7
+ encrypt_block v1, w3, x2, x6, w7
eor v2.16b, v2.16b, v1.16b
- encrypt_block v2, w22, x21, x6, w7
+ encrypt_block v2, w3, x2, x6, w7
eor v3.16b, v3.16b, v2.16b
- encrypt_block v3, w22, x21, x6, w7
- st1 {v0.16b-v3.16b}, [x19], #64
+ encrypt_block v3, w3, x2, x6, w7
+ st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v3.16b
- st1 {v4.16b}, [x24] /* return iv */
- cond_yield_neon .Lcbcencrestart
b .Lcbcencloop4x
.Lcbcenc1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lcbcencout
.Lcbcencloop:
- ld1 {v0.16b}, [x20], #16 /* get next pt block */
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
- encrypt_block v4, w22, x21, x6, w7
- st1 {v4.16b}, [x19], #16
- subs w23, w23, #1
+ encrypt_block v4, w3, x2, x6, w7
+ st1 {v4.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lcbcencloop
.Lcbcencout:
- st1 {v4.16b}, [x24] /* return iv */
- frame_pop
+ st1 {v4.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
- frame_push 6
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
-.Lcbcdecrestart:
- ld1 {v7.16b}, [x24] /* get iv */
- dec_prepare w22, x21, x6
+ ld1 {v7.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x6
.LcbcdecloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lcbcdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
- sub x20, x20, #16
+ sub x1, x1, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
+ ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
- st1 {v7.16b}, [x24] /* return iv */
- cond_yield_neon .Lcbcdecrestart
+ st1 {v0.16b-v3.16b}, [x0], #64
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lcbcdecout
.Lcbcdecloop:
- ld1 {v1.16b}, [x20], #16 /* get next ct block */
+ ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w22, x21, x6, w7
+ decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v7.16b}, [x24] /* return iv */
- frame_pop
+ st1 {v7.16b}, [x5] /* return iv */
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_cbc_decrypt)
/*
+ * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ * int rounds, int bytes, u8 const iv[])
+ * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+ * int rounds, int bytes, u8 const iv[])
+ */
+
+AES_ENTRY(aes_cbc_cts_encrypt)
+ adr_l x8, .Lcts_permute_table
+ sub x4, x4, #16
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ ld1 {v3.16b}, [x8]
+ ld1 {v4.16b}, [x9]
+
+ ld1 {v0.16b}, [x1], x4 /* overlapping loads */
+ ld1 {v1.16b}, [x1]
+
+ ld1 {v5.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x6
+
+ eor v0.16b, v0.16b, v5.16b /* xor with iv */
+ tbl v1.16b, {v1.16b}, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+
+ eor v1.16b, v1.16b, v0.16b
+ tbl v0.16b, {v0.16b}, v3.16b
+ encrypt_block v1, w3, x2, x6, w7
+
+ add x4, x0, x4
+ st1 {v0.16b}, [x4] /* overlapping stores */
+ st1 {v1.16b}, [x0]
+ ret
+AES_ENDPROC(aes_cbc_cts_encrypt)
+
+AES_ENTRY(aes_cbc_cts_decrypt)
+ adr_l x8, .Lcts_permute_table
+ sub x4, x4, #16
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ ld1 {v3.16b}, [x8]
+ ld1 {v4.16b}, [x9]
+
+ ld1 {v0.16b}, [x1], x4 /* overlapping loads */
+ ld1 {v1.16b}, [x1]
+
+ ld1 {v5.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x6
+
+ tbl v2.16b, {v1.16b}, v4.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v2.16b, v2.16b, v0.16b
+
+ tbx v0.16b, {v1.16b}, v4.16b
+ tbl v2.16b, {v2.16b}, v3.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v5.16b /* xor with iv */
+
+ add x4, x0, x4
+ st1 {v2.16b}, [x4] /* overlapping stores */
+ st1 {v0.16b}, [x0]
+ ret
+AES_ENDPROC(aes_cbc_cts_decrypt)
+
+ .section ".rodata", "a"
+ .align 6
+.Lcts_permute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .previous
+
+
+ /*
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 ctr[])
*/
AES_ENTRY(aes_ctr_encrypt)
- frame_push 6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x5
-
-.Lctrrestart:
- enc_prepare w22, x21, x6
- ld1 {v4.16b}, [x24]
+ enc_prepare w3, x2, x6
+ ld1 {v4.16b}, [x5]
umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
+ cmn w6, w4 /* 32 bit overflow? */
+ bcs .Lctrloop
.LctrloopNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lctr1x
- cmn w6, #4 /* 32 bit overflow? */
- bcs .Lctr1x
- ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w6
+ add w7, w6, #1
mov v0.16b, v4.16b
- add v7.4s, v7.4s, v8.4s
+ add w8, w6, #2
mov v1.16b, v4.16b
- rev32 v8.16b, v7.16b
+ add w9, w6, #3
mov v2.16b, v4.16b
+ rev w7, w7
mov v3.16b, v4.16b
- mov v1.s[3], v8.s[0]
- mov v2.s[3], v8.s[1]
- mov v3.s[3], v8.s[2]
- ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
+ rev w8, w8
+ mov v1.s[3], w7
+ rev w9, w9
+ mov v2.s[3], w8
+ mov v3.s[3], w9
+ ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x20], #16 /* get 1 input block */
+ ld1 {v5.16b}, [x1], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
- st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v0.16b-v3.16b}, [x0], #64
add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
- cbz w23, .Lctrout
- st1 {v4.16b}, [x24] /* return next CTR value */
- cond_yield_neon .Lctrrestart
+ cbz w4, .Lctrout
b .LctrloopNx
.Lctr1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lctrout
.Lctrloop:
mov v0.16b, v4.16b
- encrypt_block v0, w22, x21, x8, w7
+ encrypt_block v0, w3, x2, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
@@ -271,22 +306,22 @@ AES_ENTRY(aes_ctr_encrypt)
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
- subs w23, w23, #1
+ subs w4, w4, #1
bmi .Lctrtailblock /* blocks <0 means tail block */
- ld1 {v3.16b}, [x20], #16
+ ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
- st1 {v3.16b}, [x19], #16
+ st1 {v3.16b}, [x0], #16
bne .Lctrloop
.Lctrout:
- st1 {v4.16b}, [x24] /* return next CTR value */
-.Lctrret:
- frame_pop
+ st1 {v4.16b}, [x5] /* return next CTR value */
+ ldp x29, x30, [sp], #16
ret
.Lctrtailblock:
- st1 {v0.16b}, [x19]
- b .Lctrret
+ st1 {v0.16b}, [x0]
+ ldp x29, x30, [sp], #16
+ ret
.Lctrcarry:
umov x7, v4.d[0] /* load upper word of ctr */
@@ -296,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
ins v4.d[0], x7
b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
- .ltorg
/*
@@ -306,150 +340,132 @@ AES_ENDPROC(aes_ctr_encrypt)
* int blocks, u8 const rk2[], u8 iv[], int first)
*/
- .macro next_tweak, out, in, const, tmp
+ .macro next_tweak, out, in, tmp
sshr \tmp\().2d, \in\().2d, #63
- and \tmp\().16b, \tmp\().16b, \const\().16b
+ and \tmp\().16b, \tmp\().16b, xtsmask.16b
add \out\().2d, \in\().2d, \in\().2d
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
eor \out\().16b, \out\().16b, \tmp\().16b
.endm
-.Lxts_mul_x:
-CPU_LE( .quad 1, 0x87 )
-CPU_BE( .quad 0x87, 1 )
+ .macro xts_load_mask, tmp
+ movi xtsmask.2s, #0x1
+ movi \tmp\().2s, #0x87
+ uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
+ .endm
AES_ENTRY(aes_xts_encrypt)
- frame_push 6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
-
- ld1 {v4.16b}, [x24]
+ ld1 {v4.16b}, [x6]
+ xts_load_mask v8
cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
enc_switch_key w3, x2, x8
- ldr q7, .Lxts_mul_x
b .LxtsencNx
-.Lxtsencrestart:
- ld1 {v4.16b}, [x24]
.Lxtsencnotfirst:
- enc_prepare w22, x21, x8
+ enc_prepare w3, x2, x8
.LxtsencloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
.LxtsencNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lxtsenc1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
- next_tweak v5, v4, v7, v8
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ next_tweak v5, v4, v8
eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
+ next_tweak v6, v5, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
+ next_tweak v7, v6, v8
eor v3.16b, v3.16b, v7.16b
bl aes_encrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
- cbz w23, .Lxtsencout
- st1 {v4.16b}, [x24]
- cond_yield_neon .Lxtsencrestart
+ cbz w4, .Lxtsencout
+ xts_reload_mask v8
b .LxtsencloopNx
.Lxtsenc1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lxtsencout
.Lxtsencloop:
- ld1 {v1.16b}, [x20], #16
+ ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
- encrypt_block v0, w22, x21, x8, w7
+ encrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
beq .Lxtsencout
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
b .Lxtsencloop
.Lxtsencout:
- st1 {v4.16b}, [x24]
- frame_pop
+ st1 {v4.16b}, [x6]
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
- frame_push 6
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
- mov x23, x4
- mov x24, x6
-
- ld1 {v4.16b}, [x24]
+ ld1 {v4.16b}, [x6]
+ xts_load_mask v8
cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
dec_prepare w3, x2, x8
- ldr q7, .Lxts_mul_x
b .LxtsdecNx
-.Lxtsdecrestart:
- ld1 {v4.16b}, [x24]
.Lxtsdecnotfirst:
- dec_prepare w22, x21, x8
+ dec_prepare w3, x2, x8
.LxtsdecloopNx:
- ldr q7, .Lxts_mul_x
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
.LxtsdecNx:
- subs w23, w23, #4
+ subs w4, w4, #4
bmi .Lxtsdec1x
- ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
- next_tweak v5, v4, v7, v8
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ next_tweak v5, v4, v8
eor v0.16b, v0.16b, v4.16b
- next_tweak v6, v5, v7, v8
+ next_tweak v6, v5, v8
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- next_tweak v7, v6, v7, v8
+ next_tweak v7, v6, v8
eor v3.16b, v3.16b, v7.16b
bl aes_decrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
eor v2.16b, v2.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
- cbz w23, .Lxtsdecout
- st1 {v4.16b}, [x24]
- cond_yield_neon .Lxtsdecrestart
+ cbz w4, .Lxtsdecout
+ xts_reload_mask v8
b .LxtsdecloopNx
.Lxtsdec1x:
- adds w23, w23, #4
+ adds w4, w4, #4
beq .Lxtsdecout
.Lxtsdecloop:
- ld1 {v1.16b}, [x20], #16
+ ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
- decrypt_block v0, w22, x21, x8, w7
+ decrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x19], #16
- subs w23, w23, #1
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
beq .Lxtsdecout
- next_tweak v4, v4, v7, v8
+ next_tweak v4, v4, v8
b .Lxtsdecloop
.Lxtsdecout:
- st1 {v4.16b}, [x24]
- frame_pop
+ st1 {v4.16b}, [x6]
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 1c7b45b7268e..29100f692e8a 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -14,6 +14,12 @@
#define AES_ENTRY(func) ENTRY(neon_ ## func)
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
+ xtsmask .req v7
+
+ .macro xts_reload_mask, tmp
+ xts_load_mask \tmp
+ .endm
+
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
deleted file mode 100644
index 8061bf0f9c66..000000000000
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- * Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .section ".rodata", "a"
- .align 6
- .cpu generic+crypto+crc
-
-.Lcrc32_constants:
- /*
- * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
- * #define CONSTANT_R1 0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
- * #define CONSTANT_R2 0x1c6e41596LL
- */
- .octa 0x00000001c6e415960000000154442bd4
-
- /*
- * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
- * #define CONSTANT_R3 0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
- * #define CONSTANT_R4 0x0ccaa009eLL
- */
- .octa 0x00000000ccaa009e00000001751997d0
-
- /*
- * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
- * #define CONSTANT_R5 0x163cd6124LL
- */
- .quad 0x0000000163cd6124
- .quad 0x00000000FFFFFFFF
-
- /*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
- * = 0x1F7011641LL
- * #define CONSTANT_RU 0x1F7011641LL
- */
- .octa 0x00000001F701164100000001DB710641
-
-.Lcrc32c_constants:
- .octa 0x000000009e4addf800000000740eef02
- .octa 0x000000014cd00bd600000000f20c0dfe
- .quad 0x00000000dd45aab8
- .quad 0x00000000FFFFFFFF
- .octa 0x00000000dea713f10000000105ec76f0
-
- vCONSTANT .req v0
- dCONSTANT .req d0
- qCONSTANT .req q0
-
- BUF .req x19
- LEN .req x20
- CRC .req x21
- CONST .req x22
-
- vzr .req v9
-
- /**
- * Calculate crc32
- * BUF - buffer
- * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
- * CRC - initial crc32
- * return %eax crc32
- * uint crc32_pmull_le(unsigned char const *buffer,
- * size_t len, uint crc32)
- */
- .text
-ENTRY(crc32_pmull_le)
- adr_l x3, .Lcrc32_constants
- b 0f
-
-ENTRY(crc32c_pmull_le)
- adr_l x3, .Lcrc32c_constants
-
-0: frame_push 4, 64
-
- mov BUF, x0
- mov LEN, x1
- mov CRC, x2
- mov CONST, x3
-
- bic LEN, LEN, #15
- ld1 {v1.16b-v4.16b}, [BUF], #0x40
- movi vzr.16b, #0
- fmov dCONSTANT, CRC
- eor v1.16b, v1.16b, vCONSTANT.16b
- sub LEN, LEN, #0x40
- cmp LEN, #0x40
- b.lt less_64
-
- ldr qCONSTANT, [CONST]
-
-loop_64: /* 64 bytes Full cache line folding */
- sub LEN, LEN, #0x40
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull2 v6.1q, v2.2d, vCONSTANT.2d
- pmull2 v7.1q, v3.2d, vCONSTANT.2d
- pmull2 v8.1q, v4.2d, vCONSTANT.2d
-
- pmull v1.1q, v1.1d, vCONSTANT.1d
- pmull v2.1q, v2.1d, vCONSTANT.1d
- pmull v3.1q, v3.1d, vCONSTANT.1d
- pmull v4.1q, v4.1d, vCONSTANT.1d
-
- eor v1.16b, v1.16b, v5.16b
- ld1 {v5.16b}, [BUF], #0x10
- eor v2.16b, v2.16b, v6.16b
- ld1 {v6.16b}, [BUF], #0x10
- eor v3.16b, v3.16b, v7.16b
- ld1 {v7.16b}, [BUF], #0x10
- eor v4.16b, v4.16b, v8.16b
- ld1 {v8.16b}, [BUF], #0x10
-
- eor v1.16b, v1.16b, v5.16b
- eor v2.16b, v2.16b, v6.16b
- eor v3.16b, v3.16b, v7.16b
- eor v4.16b, v4.16b, v8.16b
-
- cmp LEN, #0x40
- b.lt less_64
-
- if_will_cond_yield_neon
- stp q1, q2, [sp, #.Lframe_local_offset]
- stp q3, q4, [sp, #.Lframe_local_offset + 32]
- do_cond_yield_neon
- ldp q1, q2, [sp, #.Lframe_local_offset]
- ldp q3, q4, [sp, #.Lframe_local_offset + 32]
- ldr qCONSTANT, [CONST]
- movi vzr.16b, #0
- endif_yield_neon
- b loop_64
-
-less_64: /* Folding cache line into 128bit */
- ldr qCONSTANT, [CONST, #16]
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v2.16b
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v3.16b
-
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v4.16b
-
- cbz LEN, fold_64
-
-loop_16: /* Folding rest buffer into 128bit */
- subs LEN, LEN, #0x10
-
- ld1 {v2.16b}, [BUF], #0x10
- pmull2 v5.1q, v1.2d, vCONSTANT.2d
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v5.16b
- eor v1.16b, v1.16b, v2.16b
-
- b.ne loop_16
-
-fold_64:
- /* perform the last 64 bit fold, also adds 32 zeroes
- * to the input stream */
- ext v2.16b, v1.16b, v1.16b, #8
- pmull2 v2.1q, v2.2d, vCONSTANT.2d
- ext v1.16b, v1.16b, vzr.16b, #8
- eor v1.16b, v1.16b, v2.16b
-
- /* final 32-bit fold */
- ldr dCONSTANT, [CONST, #32]
- ldr d3, [CONST, #40]
-
- ext v2.16b, v1.16b, vzr.16b, #4
- and v1.16b, v1.16b, v3.16b
- pmull v1.1q, v1.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v2.16b
-
- /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
- ldr qCONSTANT, [CONST, #48]
-
- and v2.16b, v1.16b, v3.16b
- ext v2.16b, vzr.16b, v2.16b, #8
- pmull2 v2.1q, v2.2d, vCONSTANT.2d
- and v2.16b, v2.16b, v3.16b
- pmull v2.1q, v2.1d, vCONSTANT.1d
- eor v1.16b, v1.16b, v2.16b
- mov w0, v1.s[1]
-
- frame_pop
- ret
-ENDPROC(crc32_pmull_le)
-ENDPROC(crc32c_pmull_le)
-
- .macro __crc32, c
-0: subs x2, x2, #16
- b.mi 8f
- ldp x3, x4, [x1], #16
-CPU_BE( rev x3, x3 )
-CPU_BE( rev x4, x4 )
- crc32\c\()x w0, w0, x3
- crc32\c\()x w0, w0, x4
- b.ne 0b
- ret
-
-8: tbz x2, #3, 4f
- ldr x3, [x1], #8
-CPU_BE( rev x3, x3 )
- crc32\c\()x w0, w0, x3
-4: tbz x2, #2, 2f
- ldr w3, [x1], #4
-CPU_BE( rev w3, w3 )
- crc32\c\()w w0, w0, w3
-2: tbz x2, #1, 1f
- ldrh w3, [x1], #2
-CPU_BE( rev16 w3, w3 )
- crc32\c\()h w0, w0, w3
-1: tbz x2, #0, 0f
- ldrb w3, [x1]
- crc32\c\()b w0, w0, w3
-0: ret
- .endm
-
- .align 5
-ENTRY(crc32_armv8_le)
- __crc32
-ENDPROC(crc32_armv8_le)
-
- .align 5
-ENTRY(crc32c_armv8_le)
- __crc32 c
-ENDPROC(crc32c_armv8_le)
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
deleted file mode 100644
index 34b4e3d46aab..000000000000
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/hash.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <asm/unaligned.h>
-
-#define PMULL_MIN_LEN 64L /* minimum size of buffer
- * for crc32_pmull_le_16 */
-#define SCALE_F 16L /* size of NEON register */
-
-asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
-
-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
-
-static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
-static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
-
-static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = 0;
- return 0;
-}
-
-static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
- return 0;
-}
-
-static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32)) {
- crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
- return -EINVAL;
- }
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32_pmull_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = *mctx;
- return 0;
-}
-
-static int crc32_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = crc32_armv8_le(*crc, data, length);
- return 0;
-}
-
-static int crc32c_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = crc32c_armv8_le(*crc, data, length);
- return 0;
-}
-
-static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
- unsigned int l;
-
- if ((u64)data % SCALE_F) {
- l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
-
- *crc = fallback_crc32(*crc, data, l);
-
- data += l;
- length -= l;
- }
-
- if (length >= PMULL_MIN_LEN && may_use_simd()) {
- l = round_down(length, SCALE_F);
-
- kernel_neon_begin();
- *crc = crc32_pmull_le(data, l, *crc);
- kernel_neon_end();
-
- data += l;
- length -= l;
- }
-
- if (length > 0)
- *crc = fallback_crc32(*crc, data, length);
-
- return 0;
-}
-
-static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
- unsigned int l;
-
- if ((u64)data % SCALE_F) {
- l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
-
- *crc = fallback_crc32c(*crc, data, l);
-
- data += l;
- length -= l;
- }
-
- if (length >= PMULL_MIN_LEN && may_use_simd()) {
- l = round_down(length, SCALE_F);
-
- kernel_neon_begin();
- *crc = crc32c_pmull_le(data, l, *crc);
- kernel_neon_end();
-
- data += l;
- length -= l;
- }
-
- if (length > 0) {
- *crc = fallback_crc32c(*crc, data, length);
- }
-
- return 0;
-}
-
-static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- put_unaligned_le32(*crc, out);
- return 0;
-}
-
-static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- put_unaligned_le32(~*crc, out);
- return 0;
-}
-
-static struct shash_alg crc32_pmull_algs[] = { {
- .setkey = crc32_pmull_setkey,
- .init = crc32_pmull_init,
- .update = crc32_update,
- .final = crc32_pmull_final,
- .descsize = sizeof(u32),
- .digestsize = sizeof(u32),
-
- .base.cra_ctxsize = sizeof(u32),
- .base.cra_init = crc32_pmull_cra_init,
- .base.cra_name = "crc32",
- .base.cra_driver_name = "crc32-arm64-ce",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_blocksize = 1,
- .base.cra_module = THIS_MODULE,
-}, {
- .setkey = crc32_pmull_setkey,
- .init = crc32_pmull_init,
- .update = crc32c_update,
- .final = crc32c_pmull_final,
- .descsize = sizeof(u32),
- .digestsize = sizeof(u32),
-
- .base.cra_ctxsize = sizeof(u32),
- .base.cra_init = crc32c_pmull_cra_init,
- .base.cra_name = "crc32c",
- .base.cra_driver_name = "crc32c-arm64-ce",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_blocksize = 1,
- .base.cra_module = THIS_MODULE,
-} };
-
-static int __init crc32_pmull_mod_init(void)
-{
- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
- crc32_pmull_algs[0].update = crc32_pmull_update;
- crc32_pmull_algs[1].update = crc32c_pmull_update;
-
- if (elf_hwcap & HWCAP_CRC32) {
- fallback_crc32 = crc32_armv8_le;
- fallback_crc32c = crc32c_armv8_le;
- } else {
- fallback_crc32 = crc32_le;
- fallback_crc32c = __crc32c_le;
- }
- } else if (!(elf_hwcap & HWCAP_CRC32)) {
- return -ENODEV;
- }
- return crypto_register_shashes(crc32_pmull_algs,
- ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static void __exit crc32_pmull_mod_exit(void)
-{
- crypto_unregister_shashes(crc32_pmull_algs,
- ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static const struct cpu_feature crc32_cpu_feature[] = {
- { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
-};
-MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
-
-module_init(crc32_pmull_mod_init);
-module_exit(crc32_pmull_mod_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 663ea71cdb38..9e82e8e8ed05 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -80,7 +80,186 @@
vzr .req v13
-ENTRY(crc_t10dif_pmull)
+ ad .req v14
+ bd .req v10
+
+ k00_16 .req v15
+ k32_48 .req v16
+
+ t3 .req v17
+ t4 .req v18
+ t5 .req v19
+ t6 .req v20
+ t7 .req v21
+ t8 .req v22
+ t9 .req v23
+
+ perm1 .req v24
+ perm2 .req v25
+ perm3 .req v26
+ perm4 .req v27
+
+ bd1 .req v28
+ bd2 .req v29
+ bd3 .req v30
+ bd4 .req v31
+
+ .macro __pmull_init_p64
+ .endm
+
+ .macro __pmull_pre_p64, bd
+ .endm
+
+ .macro __pmull_init_p8
+ // k00_16 := 0x0000000000000000_000000000000ffff
+ // k32_48 := 0x00000000ffffffff_0000ffffffffffff
+ movi k32_48.2d, #0xffffffff
+ mov k32_48.h[2], k32_48.h[0]
+ ushr k00_16.2d, k32_48.2d, #32
+
+ // prepare the permutation vectors
+ mov_q x5, 0x080f0e0d0c0b0a09
+ movi perm4.8b, #8
+ dup perm1.2d, x5
+ eor perm1.16b, perm1.16b, perm4.16b
+ ushr perm2.2d, perm1.2d, #8
+ ushr perm3.2d, perm1.2d, #16
+ ushr perm4.2d, perm1.2d, #24
+ sli perm2.2d, perm1.2d, #56
+ sli perm3.2d, perm1.2d, #48
+ sli perm4.2d, perm1.2d, #40
+ .endm
+
+ .macro __pmull_pre_p8, bd
+ tbl bd1.16b, {\bd\().16b}, perm1.16b
+ tbl bd2.16b, {\bd\().16b}, perm2.16b
+ tbl bd3.16b, {\bd\().16b}, perm3.16b
+ tbl bd4.16b, {\bd\().16b}, perm4.16b
+ .endm
+
+__pmull_p8_core:
+.L__pmull_p8_core:
+ ext t4.8b, ad.8b, ad.8b, #1 // A1
+ ext t5.8b, ad.8b, ad.8b, #2 // A2
+ ext t6.8b, ad.8b, ad.8b, #3 // A3
+
+ pmull t4.8h, t4.8b, bd.8b // F = A1*B
+ pmull t8.8h, ad.8b, bd1.8b // E = A*B1
+ pmull t5.8h, t5.8b, bd.8b // H = A2*B
+ pmull t7.8h, ad.8b, bd2.8b // G = A*B2
+ pmull t6.8h, t6.8b, bd.8b // J = A3*B
+ pmull t9.8h, ad.8b, bd3.8b // I = A*B3
+ pmull t3.8h, ad.8b, bd4.8b // K = A*B4
+ b 0f
+
+.L__pmull_p8_core2:
+ tbl t4.16b, {ad.16b}, perm1.16b // A1
+ tbl t5.16b, {ad.16b}, perm2.16b // A2
+ tbl t6.16b, {ad.16b}, perm3.16b // A3
+
+ pmull2 t4.8h, t4.16b, bd.16b // F = A1*B
+ pmull2 t8.8h, ad.16b, bd1.16b // E = A*B1
+ pmull2 t5.8h, t5.16b, bd.16b // H = A2*B
+ pmull2 t7.8h, ad.16b, bd2.16b // G = A*B2
+ pmull2 t6.8h, t6.16b, bd.16b // J = A3*B
+ pmull2 t9.8h, ad.16b, bd3.16b // I = A*B3
+ pmull2 t3.8h, ad.16b, bd4.16b // K = A*B4
+
+0: eor t4.16b, t4.16b, t8.16b // L = E + F
+ eor t5.16b, t5.16b, t7.16b // M = G + H
+ eor t6.16b, t6.16b, t9.16b // N = I + J
+
+ uzp1 t8.2d, t4.2d, t5.2d
+ uzp2 t4.2d, t4.2d, t5.2d
+ uzp1 t7.2d, t6.2d, t3.2d
+ uzp2 t6.2d, t6.2d, t3.2d
+
+ // t4 = (L) (P0 + P1) << 8
+ // t5 = (M) (P2 + P3) << 16
+ eor t8.16b, t8.16b, t4.16b
+ and t4.16b, t4.16b, k32_48.16b
+
+ // t6 = (N) (P4 + P5) << 24
+ // t7 = (K) (P6 + P7) << 32
+ eor t7.16b, t7.16b, t6.16b
+ and t6.16b, t6.16b, k00_16.16b
+
+ eor t8.16b, t8.16b, t4.16b
+ eor t7.16b, t7.16b, t6.16b
+
+ zip2 t5.2d, t8.2d, t4.2d
+ zip1 t4.2d, t8.2d, t4.2d
+ zip2 t3.2d, t7.2d, t6.2d
+ zip1 t6.2d, t7.2d, t6.2d
+
+ ext t4.16b, t4.16b, t4.16b, #15
+ ext t5.16b, t5.16b, t5.16b, #14
+ ext t6.16b, t6.16b, t6.16b, #13
+ ext t3.16b, t3.16b, t3.16b, #12
+
+ eor t4.16b, t4.16b, t5.16b
+ eor t6.16b, t6.16b, t3.16b
+ ret
+ENDPROC(__pmull_p8_core)
+
+ .macro __pmull_p8, rq, ad, bd, i
+ .ifnc \bd, v10
+ .err
+ .endif
+ mov ad.16b, \ad\().16b
+ .ifb \i
+ pmull \rq\().8h, \ad\().8b, bd.8b // D = A*B
+ .else
+ pmull2 \rq\().8h, \ad\().16b, bd.16b // D = A*B
+ .endif
+
+ bl .L__pmull_p8_core\i
+
+ eor \rq\().16b, \rq\().16b, t4.16b
+ eor \rq\().16b, \rq\().16b, t6.16b
+ .endm
+
+ .macro fold64, p, reg1, reg2
+ ldp q11, q12, [arg2], #0x20
+
+ __pmull_\p v8, \reg1, v10, 2
+ __pmull_\p \reg1, \reg1, v10
+
+CPU_LE( rev64 v11.16b, v11.16b )
+CPU_LE( rev64 v12.16b, v12.16b )
+
+ __pmull_\p v9, \reg2, v10, 2
+ __pmull_\p \reg2, \reg2, v10
+
+CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
+CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
+
+ eor \reg1\().16b, \reg1\().16b, v8.16b
+ eor \reg2\().16b, \reg2\().16b, v9.16b
+ eor \reg1\().16b, \reg1\().16b, v11.16b
+ eor \reg2\().16b, \reg2\().16b, v12.16b
+ .endm
+
+ .macro fold16, p, reg, rk
+ __pmull_\p v8, \reg, v10
+ __pmull_\p \reg, \reg, v10, 2
+ .ifnb \rk
+ ldr_l q10, \rk, x8
+ __pmull_pre_\p v10
+ .endif
+ eor v7.16b, v7.16b, v8.16b
+ eor v7.16b, v7.16b, \reg\().16b
+ .endm
+
+ .macro __pmull_p64, rd, rn, rm, n
+ .ifb \n
+ pmull \rd\().1q, \rn\().1d, \rm\().1d
+ .else
+ pmull2 \rd\().1q, \rn\().2d, \rm\().2d
+ .endif
+ .endm
+
+ .macro crc_t10dif_pmull, p
frame_push 3, 128
mov arg1_low32, w0
@@ -89,6 +268,8 @@ ENTRY(crc_t10dif_pmull)
movi vzr.16b, #0 // init zero register
+ __pmull_init_\p
+
// adjust the 16-bit initial_crc value, scale it to 32 bits
lsl arg1_low32, arg1_low32, #16
@@ -96,7 +277,7 @@ ENTRY(crc_t10dif_pmull)
cmp arg3, #256
// for sizes less than 128, we can't fold 64B at a time...
- b.lt _less_than_128
+ b.lt .L_less_than_128_\@
// load the initial crc value
// crc value does not need to be byte-reflected, but it needs
@@ -137,6 +318,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
ldr_l q10, rk3, x8 // xmm10 has rk3 and rk4
// type of pmull instruction
// will determine which constant to use
+ __pmull_pre_\p v10
//
// we subtract 256 instead of 128 to save one instruction from the loop
@@ -147,41 +329,19 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
// buffer. The _fold_64_B_loop will fold 64B at a time
// until we have 64+y Bytes of buffer
-
// fold 64B at a time. This section of the code folds 4 vector
// registers in parallel
-_fold_64_B_loop:
+.L_fold_64_B_loop_\@:
- .macro fold64, reg1, reg2
- ldp q11, q12, [arg2], #0x20
-
- pmull2 v8.1q, \reg1\().2d, v10.2d
- pmull \reg1\().1q, \reg1\().1d, v10.1d
-
-CPU_LE( rev64 v11.16b, v11.16b )
-CPU_LE( rev64 v12.16b, v12.16b )
-
- pmull2 v9.1q, \reg2\().2d, v10.2d
- pmull \reg2\().1q, \reg2\().1d, v10.1d
-
-CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
-CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
-
- eor \reg1\().16b, \reg1\().16b, v8.16b
- eor \reg2\().16b, \reg2\().16b, v9.16b
- eor \reg1\().16b, \reg1\().16b, v11.16b
- eor \reg2\().16b, \reg2\().16b, v12.16b
- .endm
-
- fold64 v0, v1
- fold64 v2, v3
- fold64 v4, v5
- fold64 v6, v7
+ fold64 \p, v0, v1
+ fold64 \p, v2, v3
+ fold64 \p, v4, v5
+ fold64 \p, v6, v7
subs arg3, arg3, #128
// check if there is another 64B in the buffer to be able to fold
- b.lt _fold_64_B_end
+ b.lt .L_fold_64_B_end_\@
if_will_cond_yield_neon
stp q0, q1, [sp, #.Lframe_local_offset]
@@ -195,11 +355,13 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
ldr_l q10, rk3, x8
movi vzr.16b, #0 // init zero register
+ __pmull_init_\p
+ __pmull_pre_\p v10
endif_yield_neon
- b _fold_64_B_loop
+ b .L_fold_64_B_loop_\@
-_fold_64_B_end:
+.L_fold_64_B_end_\@:
// at this point, the buffer pointer is pointing at the last y Bytes
// of the buffer the 64B of folded data is in 4 of the vector
// registers: v0, v1, v2, v3
@@ -208,38 +370,29 @@ _fold_64_B_end:
// constants
ldr_l q10, rk9, x8
+ __pmull_pre_\p v10
- .macro fold16, reg, rk
- pmull v8.1q, \reg\().1d, v10.1d
- pmull2 \reg\().1q, \reg\().2d, v10.2d
- .ifnb \rk
- ldr_l q10, \rk, x8
- .endif
- eor v7.16b, v7.16b, v8.16b
- eor v7.16b, v7.16b, \reg\().16b
- .endm
-
- fold16 v0, rk11
- fold16 v1, rk13
- fold16 v2, rk15
- fold16 v3, rk17
- fold16 v4, rk19
- fold16 v5, rk1
- fold16 v6
+ fold16 \p, v0, rk11
+ fold16 \p, v1, rk13
+ fold16 \p, v2, rk15
+ fold16 \p, v3, rk17
+ fold16 \p, v4, rk19
+ fold16 \p, v5, rk1
+ fold16 \p, v6
// instead of 64, we add 48 to the loop counter to save 1 instruction
// from the loop instead of a cmp instruction, we use the negative
// flag with the jl instruction
adds arg3, arg3, #(128-16)
- b.lt _final_reduction_for_128
+ b.lt .L_final_reduction_for_128_\@
// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
// and the rest is in memory. We can fold 16 bytes at a time if y>=16
// continue folding 16B at a time
-_16B_reduction_loop:
- pmull v8.1q, v7.1d, v10.1d
- pmull2 v7.1q, v7.2d, v10.2d
+.L_16B_reduction_loop_\@:
+ __pmull_\p v8, v7, v10
+ __pmull_\p v7, v7, v10, 2
eor v7.16b, v7.16b, v8.16b
ldr q0, [arg2], #16
@@ -251,22 +404,22 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
// instead of a cmp instruction, we utilize the flags with the
// jge instruction equivalent of: cmp arg3, 16-16
// check if there is any more 16B in the buffer to be able to fold
- b.ge _16B_reduction_loop
+ b.ge .L_16B_reduction_loop_\@
// now we have 16+z bytes left to reduce, where 0<= z < 16.
// first, we reduce the data in the xmm7 register
-_final_reduction_for_128:
+.L_final_reduction_for_128_\@:
// check if any more data to fold. If not, compute the CRC of
// the final 128 bits
adds arg3, arg3, #16
- b.eq _128_done
+ b.eq .L_128_done_\@
// here we are getting data that is less than 16 bytes.
// since we know that there was data before the pointer, we can
// offset the input pointer before the actual point, to receive
// exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_regs:
+.L_get_last_two_regs_\@:
add arg2, arg2, arg3
ldr q1, [arg2, #-16]
CPU_LE( rev64 v1.16b, v1.16b )
@@ -291,47 +444,48 @@ CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
bsl v0.16b, v2.16b, v1.16b
// fold 16 Bytes
- pmull v8.1q, v7.1d, v10.1d
- pmull2 v7.1q, v7.2d, v10.2d
+ __pmull_\p v8, v7, v10
+ __pmull_\p v7, v7, v10, 2
eor v7.16b, v7.16b, v8.16b
eor v7.16b, v7.16b, v0.16b
-_128_done:
+.L_128_done_\@:
// compute crc of a 128-bit value
ldr_l q10, rk5, x8 // rk5 and rk6 in xmm10
+ __pmull_pre_\p v10
// 64b fold
ext v0.16b, vzr.16b, v7.16b, #8
mov v7.d[0], v7.d[1]
- pmull v7.1q, v7.1d, v10.1d
+ __pmull_\p v7, v7, v10
eor v7.16b, v7.16b, v0.16b
// 32b fold
ext v0.16b, v7.16b, vzr.16b, #4
mov v7.s[3], vzr.s[0]
- pmull2 v0.1q, v0.2d, v10.2d
+ __pmull_\p v0, v0, v10, 2
eor v7.16b, v7.16b, v0.16b
// barrett reduction
-_barrett:
ldr_l q10, rk7, x8
+ __pmull_pre_\p v10
mov v0.d[0], v7.d[1]
- pmull v0.1q, v0.1d, v10.1d
+ __pmull_\p v0, v0, v10
ext v0.16b, vzr.16b, v0.16b, #12
- pmull2 v0.1q, v0.2d, v10.2d
+ __pmull_\p v0, v0, v10, 2
ext v0.16b, vzr.16b, v0.16b, #12
eor v7.16b, v7.16b, v0.16b
mov w0, v7.s[1]
-_cleanup:
+.L_cleanup_\@:
// scale the result back to 16 bits
lsr x0, x0, #16
frame_pop
ret
-_less_than_128:
- cbz arg3, _cleanup
+.L_less_than_128_\@:
+ cbz arg3, .L_cleanup_\@
movi v0.16b, #0
mov v0.s[3], arg1_low32 // get the initial crc value
@@ -342,20 +496,21 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
eor v7.16b, v7.16b, v0.16b // xor the initial crc value
cmp arg3, #16
- b.eq _128_done // exactly 16 left
- b.lt _less_than_16_left
+ b.eq .L_128_done_\@ // exactly 16 left
+ b.lt .L_less_than_16_left_\@
ldr_l q10, rk1, x8 // rk1 and rk2 in xmm10
+ __pmull_pre_\p v10
// update the counter. subtract 32 instead of 16 to save one
// instruction from the loop
subs arg3, arg3, #32
- b.ge _16B_reduction_loop
+ b.ge .L_16B_reduction_loop_\@
add arg3, arg3, #16
- b _get_last_two_regs
+ b .L_get_last_two_regs_\@
-_less_than_16_left:
+.L_less_than_16_left_\@:
// shl r9, 4
adr_l x0, tbl_shf_table + 16
sub x0, x0, arg3
@@ -363,8 +518,17 @@ _less_than_16_left:
movi v9.16b, #0x80
eor v0.16b, v0.16b, v9.16b
tbl v7.16b, {v7.16b}, v0.16b
- b _128_done
-ENDPROC(crc_t10dif_pmull)
+ b .L_128_done_\@
+ .endm
+
+ENTRY(crc_t10dif_pmull_p8)
+ crc_t10dif_pmull p8
+ENDPROC(crc_t10dif_pmull_p8)
+
+ .align 5
+ENTRY(crc_t10dif_pmull_p64)
+ crc_t10dif_pmull p64
+ENDPROC(crc_t10dif_pmull_p64)
// precomputed constants
// these constants are precomputed from the poly:
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 96f0cae4a022..b461d62023f2 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -22,7 +22,10 @@
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
-asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
+asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
+
+static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
static int crct10dif_init(struct shash_desc *desc)
{
@@ -85,6 +88,11 @@ static struct shash_alg crc_t10dif_alg = {
static int __init crc_t10dif_mod_init(void)
{
+ if (elf_hwcap & HWCAP_PMULL)
+ crc_t10dif_pmull = crc_t10dif_pmull_p64;
+ else
+ crc_t10dif_pmull = crc_t10dif_pmull_p8;
+
return crypto_register_shash(&crc_t10dif_alg);
}
@@ -93,8 +101,10 @@ static void __exit crc_t10dif_mod_exit(void)
crypto_unregister_shash(&crc_t10dif_alg);
}
-module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
+module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
module_exit(crc_t10dif_mod_exit);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S
deleted file mode 100644
index b14463438b09..000000000000
--- a/arch/arm64/crypto/speck-neon-core.S
+++ /dev/null
@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- *
- * Copyright (c) 2018 Google, Inc
- *
- * Author: Eric Biggers <ebiggers@google.com>
- */
-
-#include <linux/linkage.h>
-
- .text
-
- // arguments
- ROUND_KEYS .req x0 // const {u64,u32} *round_keys
- NROUNDS .req w1 // int nrounds
- NROUNDS_X .req x1
- DST .req x2 // void *dst
- SRC .req x3 // const void *src
- NBYTES .req w4 // unsigned int nbytes
- TWEAK .req x5 // void *tweak
-
- // registers which hold the data being encrypted/decrypted
- // (underscores avoid a naming collision with ARM64 registers x0-x3)
- X_0 .req v0
- Y_0 .req v1
- X_1 .req v2
- Y_1 .req v3
- X_2 .req v4
- Y_2 .req v5
- X_3 .req v6
- Y_3 .req v7
-
- // the round key, duplicated in all lanes
- ROUND_KEY .req v8
-
- // index vector for tbl-based 8-bit rotates
- ROTATE_TABLE .req v9
- ROTATE_TABLE_Q .req q9
-
- // temporary registers
- TMP0 .req v10
- TMP1 .req v11
- TMP2 .req v12
- TMP3 .req v13
-
- // multiplication table for updating XTS tweaks
- GFMUL_TABLE .req v14
- GFMUL_TABLE_Q .req q14
-
- // next XTS tweak value(s)
- TWEAKV_NEXT .req v15
-
- // XTS tweaks for the blocks currently being encrypted/decrypted
- TWEAKV0 .req v16
- TWEAKV1 .req v17
- TWEAKV2 .req v18
- TWEAKV3 .req v19
- TWEAKV4 .req v20
- TWEAKV5 .req v21
- TWEAKV6 .req v22
- TWEAKV7 .req v23
-
- .align 4
-.Lror64_8_table:
- .octa 0x080f0e0d0c0b0a090007060504030201
-.Lror32_8_table:
- .octa 0x0c0f0e0d080b0a090407060500030201
-.Lrol64_8_table:
- .octa 0x0e0d0c0b0a09080f0605040302010007
-.Lrol32_8_table:
- .octa 0x0e0d0c0f0a09080b0605040702010003
-.Lgf128mul_table:
- .octa 0x00000000000000870000000000000001
-.Lgf64mul_table:
- .octa 0x0000000000000000000000002d361b00
-
-/*
- * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
- *
- * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
- * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
- * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
- * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
- */
-.macro _speck_round_128bytes n, lanes
-
- // x = ror(x, 8)
- tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
- tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
- tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
- tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-
- // x += y
- add X_0.\lanes, X_0.\lanes, Y_0.\lanes
- add X_1.\lanes, X_1.\lanes, Y_1.\lanes
- add X_2.\lanes, X_2.\lanes, Y_2.\lanes
- add X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
- // x ^= k
- eor X_0.16b, X_0.16b, ROUND_KEY.16b
- eor X_1.16b, X_1.16b, ROUND_KEY.16b
- eor X_2.16b, X_2.16b, ROUND_KEY.16b
- eor X_3.16b, X_3.16b, ROUND_KEY.16b
-
- // y = rol(y, 3)
- shl TMP0.\lanes, Y_0.\lanes, #3
- shl TMP1.\lanes, Y_1.\lanes, #3
- shl TMP2.\lanes, Y_2.\lanes, #3
- shl TMP3.\lanes, Y_3.\lanes, #3
- sri TMP0.\lanes, Y_0.\lanes, #(\n - 3)
- sri TMP1.\lanes, Y_1.\lanes, #(\n - 3)
- sri TMP2.\lanes, Y_2.\lanes, #(\n - 3)
- sri TMP3.\lanes, Y_3.\lanes, #(\n - 3)
-
- // y ^= x
- eor Y_0.16b, TMP0.16b, X_0.16b
- eor Y_1.16b, TMP1.16b, X_1.16b
- eor Y_2.16b, TMP2.16b, X_2.16b
- eor Y_3.16b, TMP3.16b, X_3.16b
-.endm
-
-/*
- * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
- *
- * This is the inverse of _speck_round_128bytes().
- */
-.macro _speck_unround_128bytes n, lanes
-
- // y ^= x
- eor TMP0.16b, Y_0.16b, X_0.16b
- eor TMP1.16b, Y_1.16b, X_1.16b
- eor TMP2.16b, Y_2.16b, X_2.16b
- eor TMP3.16b, Y_3.16b, X_3.16b
-
- // y = ror(y, 3)
- ushr Y_0.\lanes, TMP0.\lanes, #3
- ushr Y_1.\lanes, TMP1.\lanes, #3
- ushr Y_2.\lanes, TMP2.\lanes, #3
- ushr Y_3.\lanes, TMP3.\lanes, #3
- sli Y_0.\lanes, TMP0.\lanes, #(\n - 3)
- sli Y_1.\lanes, TMP1.\lanes, #(\n - 3)
- sli Y_2.\lanes, TMP2.\lanes, #(\n - 3)
- sli Y_3.\lanes, TMP3.\lanes, #(\n - 3)
-
- // x ^= k
- eor X_0.16b, X_0.16b, ROUND_KEY.16b
- eor X_1.16b, X_1.16b, ROUND_KEY.16b
- eor X_2.16b, X_2.16b, ROUND_KEY.16b
- eor X_3.16b, X_3.16b, ROUND_KEY.16b
-
- // x -= y
- sub X_0.\lanes, X_0.\lanes, Y_0.\lanes
- sub X_1.\lanes, X_1.\lanes, Y_1.\lanes
- sub X_2.\lanes, X_2.\lanes, Y_2.\lanes
- sub X_3.\lanes, X_3.\lanes, Y_3.\lanes
-
- // x = rol(x, 8)
- tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
- tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
- tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
- tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
-.endm
-
-.macro _next_xts_tweak next, cur, tmp, n
-.if \n == 64
- /*
- * Calculate the next tweak by multiplying the current one by x,
- * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
- */
- sshr \tmp\().2d, \cur\().2d, #63
- and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
- shl \next\().2d, \cur\().2d, #1
- ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
- eor \next\().16b, \next\().16b, \tmp\().16b
-.else
- /*
- * Calculate the next two tweaks by multiplying the current ones by x^2,
- * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
- */
- ushr \tmp\().2d, \cur\().2d, #62
- shl \next\().2d, \cur\().2d, #2
- tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
- eor \next\().16b, \next\().16b, \tmp\().16b
-.endif
-.endm
-
-/*
- * _speck_xts_crypt() - Speck-XTS encryption/decryption
- *
- * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
- * using Speck-XTS, specifically the variant with a block size of '2n' and round
- * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
- * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
- * nonzero multiple of 128.
- */
-.macro _speck_xts_crypt n, lanes, decrypting
-
- /*
- * If decrypting, modify the ROUND_KEYS parameter to point to the last
- * round key rather than the first, since for decryption the round keys
- * are used in reverse order.
- */
-.if \decrypting
- mov NROUNDS, NROUNDS /* zero the high 32 bits */
-.if \n == 64
- add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
- sub ROUND_KEYS, ROUND_KEYS, #8
-.else
- add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
- sub ROUND_KEYS, ROUND_KEYS, #4
-.endif
-.endif
-
- // Load the index vector for tbl-based 8-bit rotates
-.if \decrypting
- ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table
-.else
- ldr ROTATE_TABLE_Q, .Lror\n\()_8_table
-.endif
-
- // One-time XTS preparation
-.if \n == 64
- // Load first tweak
- ld1 {TWEAKV0.16b}, [TWEAK]
-
- // Load GF(2^128) multiplication table
- ldr GFMUL_TABLE_Q, .Lgf128mul_table
-.else
- // Load first tweak
- ld1 {TWEAKV0.8b}, [TWEAK]
-
- // Load GF(2^64) multiplication table
- ldr GFMUL_TABLE_Q, .Lgf64mul_table
-
- // Calculate second tweak, packing it together with the first
- ushr TMP0.2d, TWEAKV0.2d, #63
- shl TMP1.2d, TWEAKV0.2d, #1
- tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
- eor TMP0.8b, TMP0.8b, TMP1.8b
- mov TWEAKV0.d[1], TMP0.d[0]
-.endif
-
-.Lnext_128bytes_\@:
-
- // Calculate XTS tweaks for next 128 bytes
- _next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n
- _next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n
- _next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n
- _next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n
- _next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n
- _next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n
- _next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n
- _next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n
-
- // Load the next source blocks into {X,Y}[0-3]
- ld1 {X_0.16b-Y_1.16b}, [SRC], #64
- ld1 {X_2.16b-Y_3.16b}, [SRC], #64
-
- // XOR the source blocks with their XTS tweaks
- eor TMP0.16b, X_0.16b, TWEAKV0.16b
- eor Y_0.16b, Y_0.16b, TWEAKV1.16b
- eor TMP1.16b, X_1.16b, TWEAKV2.16b
- eor Y_1.16b, Y_1.16b, TWEAKV3.16b
- eor TMP2.16b, X_2.16b, TWEAKV4.16b
- eor Y_2.16b, Y_2.16b, TWEAKV5.16b
- eor TMP3.16b, X_3.16b, TWEAKV6.16b
- eor Y_3.16b, Y_3.16b, TWEAKV7.16b
-
- /*
- * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
- * that the X[0-3] registers contain only the second halves of blocks,
- * and the Y[0-3] registers contain only the first halves of blocks.
- * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
- */
- uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes
- uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
- uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes
- uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
- uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes
- uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
- uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes
- uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
-
- // Do the cipher rounds
- mov x6, ROUND_KEYS
- mov w7, NROUNDS
-.Lnext_round_\@:
-.if \decrypting
- ld1r {ROUND_KEY.\lanes}, [x6]
- sub x6, x6, #( \n / 8 )
- _speck_unround_128bytes \n, \lanes
-.else
- ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 )
- _speck_round_128bytes \n, \lanes
-.endif
- subs w7, w7, #1
- bne .Lnext_round_\@
-
- // Re-interleave the 'x' and 'y' elements of each block
- zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes
- zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes
- zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes
- zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes
- zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes
- zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes
- zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes
- zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes
-
- // XOR the encrypted/decrypted blocks with the tweaks calculated earlier
- eor X_0.16b, TMP0.16b, TWEAKV0.16b
- eor Y_0.16b, Y_0.16b, TWEAKV1.16b
- eor X_1.16b, TMP1.16b, TWEAKV2.16b
- eor Y_1.16b, Y_1.16b, TWEAKV3.16b
- eor X_2.16b, TMP2.16b, TWEAKV4.16b
- eor Y_2.16b, Y_2.16b, TWEAKV5.16b
- eor X_3.16b, TMP3.16b, TWEAKV6.16b
- eor Y_3.16b, Y_3.16b, TWEAKV7.16b
- mov TWEAKV0.16b, TWEAKV_NEXT.16b
-
- // Store the ciphertext in the destination buffer
- st1 {X_0.16b-Y_1.16b}, [DST], #64
- st1 {X_2.16b-Y_3.16b}, [DST], #64
-
- // Continue if there are more 128-byte chunks remaining
- subs NBYTES, NBYTES, #128
- bne .Lnext_128bytes_\@
-
- // Store the next tweak and return
-.if \n == 64
- st1 {TWEAKV_NEXT.16b}, [TWEAK]
-.else
- st1 {TWEAKV_NEXT.8b}, [TWEAK]
-.endif
- ret
-.endm
-
-ENTRY(speck128_xts_encrypt_neon)
- _speck_xts_crypt n=64, lanes=2d, decrypting=0
-ENDPROC(speck128_xts_encrypt_neon)
-
-ENTRY(speck128_xts_decrypt_neon)
- _speck_xts_crypt n=64, lanes=2d, decrypting=1
-ENDPROC(speck128_xts_decrypt_neon)
-
-ENTRY(speck64_xts_encrypt_neon)
- _speck_xts_crypt n=32, lanes=4s, decrypting=0
-ENDPROC(speck64_xts_encrypt_neon)
-
-ENTRY(speck64_xts_decrypt_neon)
- _speck_xts_crypt n=32, lanes=4s, decrypting=1
-ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c
deleted file mode 100644
index 6e233aeb4ff4..000000000000
--- a/arch/arm64/crypto/speck-neon-glue.c
+++ /dev/null
@@ -1,282 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
- * (64-bit version; based on the 32-bit version)
- *
- * Copyright (c) 2018 Google, Inc
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <crypto/algapi.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/speck.h>
-#include <crypto/xts.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-/* The assembly functions only handle multiples of 128 bytes */
-#define SPECK_NEON_CHUNK_SIZE 128
-
-/* Speck128 */
-
-struct speck128_xts_tfm_ctx {
- struct speck128_tfm_ctx main_key;
- struct speck128_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck128_xts_crypt(struct skcipher_request *req,
- speck128_crypt_one_t crypt_one,
- speck128_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- le128 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- le128_xor((le128 *)dst, (const le128 *)src, &tweak);
- (*crypt_one)(&ctx->main_key, dst, dst);
- le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
- gf128mul_x_ble(&tweak, &tweak);
-
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck128_xts_encrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_encrypt,
- speck128_xts_encrypt_neon);
-}
-
-static int speck128_xts_decrypt(struct skcipher_request *req)
-{
- return __speck128_xts_crypt(req, crypto_speck128_decrypt,
- speck128_xts_decrypt_neon);
-}
-
-static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-/* Speck64 */
-
-struct speck64_xts_tfm_ctx {
- struct speck64_tfm_ctx main_key;
- struct speck64_tfm_ctx tweak_key;
-};
-
-asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
- void *dst, const void *src,
- unsigned int nbytes, void *tweak);
-
-typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
- u8 *, const u8 *);
-typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
- const void *, unsigned int, void *);
-
-static __always_inline int
-__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
- speck64_xts_crypt_many_t crypt_many)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- __le64 tweak;
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- const u8 *src = walk.src.virt.addr;
-
- if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
- unsigned int count;
-
- count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
- kernel_neon_begin();
- (*crypt_many)(ctx->main_key.round_keys,
- ctx->main_key.nrounds,
- dst, src, count, &tweak);
- kernel_neon_end();
- dst += count;
- src += count;
- nbytes -= count;
- }
-
- /* Handle any remainder with generic code */
- while (nbytes >= sizeof(tweak)) {
- *(__le64 *)dst = *(__le64 *)src ^ tweak;
- (*crypt_one)(&ctx->main_key, dst, dst);
- *(__le64 *)dst ^= tweak;
- tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
- ((tweak & cpu_to_le64(1ULL << 63)) ?
- 0x1B : 0));
- dst += sizeof(tweak);
- src += sizeof(tweak);
- nbytes -= sizeof(tweak);
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static int speck64_xts_encrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_encrypt,
- speck64_xts_encrypt_neon);
-}
-
-static int speck64_xts_decrypt(struct skcipher_request *req)
-{
- return __speck64_xts_crypt(req, crypto_speck64_decrypt,
- speck64_xts_decrypt_neon);
-}
-
-static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
- if (err)
- return err;
-
- return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
-}
-
-static struct skcipher_alg speck_algs[] = {
- {
- .base.cra_name = "xts(speck128)",
- .base.cra_driver_name = "xts-speck128-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK128_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK128_128_KEY_SIZE,
- .max_keysize = 2 * SPECK128_256_KEY_SIZE,
- .ivsize = SPECK128_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck128_xts_setkey,
- .encrypt = speck128_xts_encrypt,
- .decrypt = speck128_xts_decrypt,
- }, {
- .base.cra_name = "xts(speck64)",
- .base.cra_driver_name = "xts-speck64-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = SPECK64_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
- .base.cra_alignmask = 7,
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SPECK64_96_KEY_SIZE,
- .max_keysize = 2 * SPECK64_128_KEY_SIZE,
- .ivsize = SPECK64_BLOCK_SIZE,
- .walksize = SPECK_NEON_CHUNK_SIZE,
- .setkey = speck64_xts_setkey,
- .encrypt = speck64_xts_encrypt,
- .decrypt = speck64_xts_decrypt,
- }
-};
-
-static int __init speck_neon_module_init(void)
-{
- if (!(elf_hwcap & HWCAP_ASIMD))
- return -ENODEV;
- return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-static void __exit speck_neon_module_exit(void)
-{
- crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
-}
-
-module_init(speck_neon_module_init);
-module_exit(speck_neon_module_exit);
-
-MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("xts(speck128)");
-MODULE_ALIAS_CRYPTO("xts-speck128-neon");
-MODULE_ALIAS_CRYPTO("xts(speck64)");
-MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index cee28a05ee98..93ce86d5dae1 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -25,6 +25,8 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <asm-generic/compat.h>
+
#define COMPAT_USER_HZ 100
#ifdef __AARCH64EB__
#define COMPAT_UTS_MACHINE "armv8b\0\0"
@@ -32,10 +34,6 @@
#define COMPAT_UTS_MACHINE "armv8l\0\0"
#endif
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
typedef u16 __compat_uid16_t;
@@ -43,27 +41,13 @@ typedef u16 __compat_gid16_t;
typedef u32 __compat_uid32_t;
typedef u32 __compat_gid32_t;
typedef u16 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u32 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef s32 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s16 compat_short_t;
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 compat_s64;
-typedef u16 compat_ushort_t;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
struct compat_stat {
#ifdef __AARCH64EB__
@@ -86,11 +70,11 @@ struct compat_stat {
compat_off_t st_size;
compat_off_t st_blksize;
compat_off_t st_blocks;
- compat_time_t st_atime;
+ old_time32_t st_atime;
compat_ulong_t st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
compat_ulong_t st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
compat_ulong_t st_ctime_nsec;
compat_ulong_t __unused4[2];
};
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 6db48d90ad63..7e2ec64aa414 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -537,6 +537,27 @@ static inline void arm64_set_ssbd_mitigation(bool state) {}
#endif
extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
+
+static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
+{
+ switch (parange) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5: return 48;
+ case 6: return 52;
+ /*
+ * A future PE could use a value unknown to the kernel.
+ * However, by the "D10.1.4 Principles of the ID scheme
+ * for fields in ID registers", ARM DDI 0487C.a, any new
+ * value is guaranteed to be higher than what we know already.
+ * As a safe limit, we return the limit supported by the kernel.
+ */
+ default: return CONFIG_ARM64_PA_BITS;
+ }
+}
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b476bc46f0ab..6f602af5263c 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,6 +107,7 @@
#define VTCR_EL2_RES1 (1 << 31)
#define VTCR_EL2_HD (1 << 22)
#define VTCR_EL2_HA (1 << 21)
+#define VTCR_EL2_PS_SHIFT TCR_EL2_PS_SHIFT
#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
#define VTCR_EL2_TG0_4K TCR_TG0_4K
@@ -120,63 +121,150 @@
#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
#define VTCR_EL2_SL0_SHIFT 6
#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
-#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
#define VTCR_EL2_T0SZ_MASK 0x3f
-#define VTCR_EL2_T0SZ_40B 24
#define VTCR_EL2_VS_SHIFT 19
#define VTCR_EL2_VS_8BIT (0 << VTCR_EL2_VS_SHIFT)
#define VTCR_EL2_VS_16BIT (1 << VTCR_EL2_VS_SHIFT)
+#define VTCR_EL2_T0SZ(x) TCR_T0SZ(x)
+
/*
* We configure the Stage-2 page tables to always restrict the IPA space to be
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
- * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
- * (see hyp-init.S).
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
*
* Note that when using 4K pages, we concatenate two first level page tables
* together. With 16K pages, we concatenate 16 first level page tables.
*
- * The magic numbers used for VTTBR_X in this patch can be found in Tables
- * D4-23 and D4-25 in ARM DDI 0487A.b.
*/
-#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
-#ifdef CONFIG_ARM64_64K_PAGES
/*
- * Stage2 translation configuration:
- * 64kB pages (TG0 = 1)
- * 2 level page tables (SL = 1)
+ * VTCR_EL2:SL0 indicates the entry level for Stage2 translation.
+ * Interestingly, it depends on the page size.
+ * See D.10.2.121, VTCR_EL2, in ARM DDI 0487C.a
+ *
+ * -----------------------------------------
+ * | Entry level | 4K | 16K/64K |
+ * ------------------------------------------
+ * | Level: 0 | 2 | - |
+ * ------------------------------------------
+ * | Level: 1 | 1 | 2 |
+ * ------------------------------------------
+ * | Level: 2 | 0 | 1 |
+ * ------------------------------------------
+ * | Level: 3 | - | 0 |
+ * ------------------------------------------
+ *
+ * The table roughly translates to :
+ *
+ * SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level
+ *
+ * Where TGRAN_SL0_BASE is a magic number depending on the page size:
+ * TGRAN_SL0_BASE(4K) = 2
+ * TGRAN_SL0_BASE(16K) = 3
+ * TGRAN_SL0_BASE(64K) = 3
+ * provided we take care of ruling out the unsupported cases and
+ * Entry_Level = 4 - Number_of_levels.
+ *
*/
-#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
-#define VTTBR_X_TGRAN_MAGIC 38
+#ifdef CONFIG_ARM64_64K_PAGES
+
+#define VTCR_EL2_TGRAN VTCR_EL2_TG0_64K
+#define VTCR_EL2_TGRAN_SL0_BASE 3UL
+
#elif defined(CONFIG_ARM64_16K_PAGES)
-/*
- * Stage2 translation configuration:
- * 16kB pages (TG0 = 2)
- * 2 level page tables (SL = 1)
- */
-#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
-#define VTTBR_X_TGRAN_MAGIC 42
+
+#define VTCR_EL2_TGRAN VTCR_EL2_TG0_16K
+#define VTCR_EL2_TGRAN_SL0_BASE 3UL
+
#else /* 4K */
-/*
- * Stage2 translation configuration:
- * 4kB pages (TG0 = 0)
- * 3 level page tables (SL = 1)
- */
-#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
-#define VTTBR_X_TGRAN_MAGIC 37
+
+#define VTCR_EL2_TGRAN VTCR_EL2_TG0_4K
+#define VTCR_EL2_TGRAN_SL0_BASE 2UL
+
#endif
-#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
-#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
+#define VTCR_EL2_LVLS_TO_SL0(levels) \
+ ((VTCR_EL2_TGRAN_SL0_BASE - (4 - (levels))) << VTCR_EL2_SL0_SHIFT)
+#define VTCR_EL2_SL0_TO_LVLS(sl0) \
+ ((sl0) + 4 - VTCR_EL2_TGRAN_SL0_BASE)
+#define VTCR_EL2_LVLS(vtcr) \
+ VTCR_EL2_SL0_TO_LVLS(((vtcr) & VTCR_EL2_SL0_MASK) >> VTCR_EL2_SL0_SHIFT)
+
+#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN)
+#define VTCR_EL2_IPA(vtcr) (64 - ((vtcr) & VTCR_EL2_T0SZ_MASK))
+
+/*
+ * ARM VMSAv8-64 defines an algorithm for finding the translation table
+ * descriptors in section D4.2.8 in ARM DDI 0487C.a.
+ *
+ * The algorithm defines the expectations on the translation table
+ * addresses for each level, based on PAGE_SIZE, entry level
+ * and the translation table size (T0SZ). The variable "x" in the
+ * algorithm determines the alignment of a table base address at a given
+ * level and thus determines the alignment of VTTBR:BADDR for stage2
+ * page table entry level.
+ * Since the number of bits resolved at the entry level could vary
+ * depending on the T0SZ, the value of "x" is defined based on a
+ * Magic constant for a given PAGE_SIZE and Entry Level. The
+ * intermediate levels must be always aligned to the PAGE_SIZE (i.e,
+ * x = PAGE_SHIFT).
+ *
+ * The value of "x" for entry level is calculated as :
+ * x = Magic_N - T0SZ
+ *
+ * where Magic_N is an integer depending on the page size and the entry
+ * level of the page table as below:
+ *
+ * --------------------------------------------
+ * | Entry level | 4K 16K 64K |
+ * --------------------------------------------
+ * | Level: 0 (4 levels) | 28 | - | - |
+ * --------------------------------------------
+ * | Level: 1 (3 levels) | 37 | 31 | 25 |
+ * --------------------------------------------
+ * | Level: 2 (2 levels) | 46 | 42 | 38 |
+ * --------------------------------------------
+ * | Level: 3 (1 level) | - | 53 | 51 |
+ * --------------------------------------------
+ *
+ * We have a magic formula for the Magic_N below:
+ *
+ * Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels)
+ *
+ * where Number_of_levels = (4 - Level). We are only interested in the
+ * value for Entry_Level for the stage2 page table.
+ *
+ * So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows:
+ *
+ * x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT)
+ * = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels)
+ *
+ * Here is one way to explain the Magic Formula:
+ *
+ * x = log2(Size_of_Entry_Level_Table)
+ *
+ * Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another
+ * PAGE_SHIFT bits in the PTE, we have :
+ *
+ * Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT)
+ * = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3
+ * where n = number of levels, and since each pointer is 8bytes, we have:
+ *
+ * x = Bits_Entry_Level + 3
+ * = IPA_SHIFT - (PAGE_SHIFT - 3) * n
+ *
+ * The only constraint here is that, we have to find the number of page table
+ * levels for a given IPA size (which we do, see stage2_pt_levels())
+ */
+#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3)))
#define VTTBR_CNP_BIT (UL(1))
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
@@ -224,6 +312,13 @@
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
+/*
+ * We have
+ * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12]
+ * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12]
+ */
+#define PAR_TO_HPFAR(par) \
+ (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
#define kvm_arm_exception_type \
{0, "IRQ" }, \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 102b5a5c47b6..aea01a09eb94 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -30,6 +30,7 @@
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_EL1_SERROR 1
#define ARM_EXCEPTION_TRAP 2
+#define ARM_EXCEPTION_IL 3
/* The hyp-stub will return this for any kvm_call_hyp() call */
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
@@ -72,8 +73,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
-extern u32 __init_stage2_translation(void);
-
/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
#define __hyp_this_cpu_ptr(sym) \
({ \
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2842bf149029..52fbc823ff8c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -53,7 +53,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
-int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
+int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
struct kvm_arch {
@@ -61,11 +61,13 @@ struct kvm_arch {
u64 vmid_gen;
u32 vmid;
- /* 1-level 2nd stage table, protected by kvm->mmu_lock */
+ /* stage2 entry level table */
pgd_t *pgd;
/* VTTBR value associated with above pgd and vmid */
u64 vttbr;
+ /* VTCR_EL2 value for this VM */
+ u64 vtcr;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
@@ -451,13 +453,7 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
-static inline void __cpu_init_stage2(void)
-{
- u32 parange = kvm_call_hyp(__init_stage2_translation);
-
- WARN_ONCE(parange < 40,
- "PARange is %d bits, unsupported configuration!", parange);
-}
+static inline void __cpu_init_stage2(void) {}
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
@@ -520,8 +516,12 @@ static inline int kvm_arm_have_ssbd(void)
void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+void kvm_set_ipa_limit(void);
+
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
+int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 384c34397619..23aca66767f9 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -155,5 +155,15 @@ void deactivate_traps_vhe_put(void);
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
+/*
+ * Must be called from hyp code running at EL2 with an updated VTTBR
+ * and interrupts disabled.
+ */
+static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
+{
+ write_sysreg(kvm->arch.vtcr, vtcr_el2);
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 64337afbf124..658657367f2f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -141,8 +141,16 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
* We currently only support a 40bit IPA.
*/
#define KVM_PHYS_SHIFT (40)
-#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
+
+#define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr)
+#define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm))
+#define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL))
+
+static inline bool kvm_page_empty(void *ptr)
+{
+ struct page *ptr_page = virt_to_page(ptr);
+ return page_count(ptr_page) == 1;
+}
#include <asm/stage2_pgtable.h>
@@ -238,12 +246,6 @@ static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
}
-static inline bool kvm_page_empty(void *ptr)
-{
- struct page *ptr_page = virt_to_page(ptr);
- return page_count(ptr_page) == 1;
-}
-
#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
#ifdef __PAGETABLE_PMD_FOLDED
@@ -517,6 +519,30 @@ static inline int hyp_map_aux_data(void)
#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
+/*
+ * Get the magic number 'x' for VTTBR:BADDR of this KVM instance.
+ * With v8.2 LVA extensions, 'x' should be a minimum of 6 with
+ * 52bit IPS.
+ */
+static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels)
+{
+ int x = ARM64_VTTBR_X(ipa_shift, levels);
+
+ return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x;
+}
+
+static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels)
+{
+ unsigned int x = arm64_vttbr_x(ipa_shift, levels);
+
+ return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x);
+}
+
+static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
+{
+ return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
+}
+
static inline bool kvm_cpu_has_cnp(void)
{
return system_supports_cnp();
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 6bc43889d11e..fce22c4b2f73 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -25,6 +25,9 @@
#define CurrentEL_EL1 (1 << 2)
#define CurrentEL_EL2 (2 << 2)
+/* Additional SPSR bits not exposed in the UABI */
+#define PSR_IL_BIT (1 << 20)
+
/* AArch32-specific ptrace requests */
#define COMPAT_PTRACE_GETREGS 12
#define COMPAT_PTRACE_SETREGS 13
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
deleted file mode 100644
index 2656a0fd05a6..000000000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopmd.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
-#define __ARM64_S2_PGTABLE_NOPMD_H_
-
-#include <asm/stage2_pgtable-nopud.h>
-
-#define __S2_PGTABLE_PMD_FOLDED
-
-#define S2_PMD_SHIFT S2_PUD_SHIFT
-#define S2_PTRS_PER_PMD 1
-#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
-#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
-
-#define stage2_pud_none(pud) (0)
-#define stage2_pud_present(pud) (1)
-#define stage2_pud_clear(pud) do { } while (0)
-#define stage2_pud_populate(pud, pmd) do { } while (0)
-#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
-
-#define stage2_pmd_free(pmd) do { } while (0)
-
-#define stage2_pmd_addr_end(addr, end) (end)
-
-#define stage2_pud_huge(pud) (0)
-#define stage2_pmd_table_empty(pmdp) (0)
-
-#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
deleted file mode 100644
index 5ee87b54ebf3..000000000000
--- a/arch/arm64/include/asm/stage2_pgtable-nopud.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
-#define __ARM64_S2_PGTABLE_NOPUD_H_
-
-#define __S2_PGTABLE_PUD_FOLDED
-
-#define S2_PUD_SHIFT S2_PGDIR_SHIFT
-#define S2_PTRS_PER_PUD 1
-#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
-#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
-
-#define stage2_pgd_none(pgd) (0)
-#define stage2_pgd_present(pgd) (1)
-#define stage2_pgd_clear(pgd) do { } while (0)
-#define stage2_pgd_populate(pgd, pud) do { } while (0)
-
-#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
-
-#define stage2_pud_free(x) do { } while (0)
-
-#define stage2_pud_addr_end(addr, end) (end)
-#define stage2_pud_table_empty(pmdp) (0)
-
-#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 8b68099348e5..d352f6df8d2c 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -19,9 +19,17 @@
#ifndef __ARM64_S2_PGTABLE_H_
#define __ARM64_S2_PGTABLE_H_
+#include <linux/hugetlb.h>
#include <asm/pgtable.h>
/*
+ * PGDIR_SHIFT determines the size a top-level page table entry can map
+ * and depends on the number of levels in the page table. Compute the
+ * PGDIR_SHIFT for a given number of levels.
+ */
+#define pt_levels_pgdir_shift(lvls) ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - (lvls))
+
+/*
* The hardware supports concatenation of up to 16 tables at stage2 entry level
* and we use the feature whenever possible.
*
@@ -29,112 +37,208 @@
* On arm64, the smallest PAGE_SIZE supported is 4k, which means
* (PAGE_SHIFT - 3) > 4 holds for all page sizes.
* This implies, the total number of page table levels at stage2 expected
- * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
+ * by the hardware is actually the number of levels required for (IPA_SHIFT - 4)
* in normal translations(e.g, stage1), since we cannot have another level in
- * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
+ * the range (IPA_SHIFT, IPA_SHIFT - 4).
*/
-#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
+#define stage2_pgtable_levels(ipa) ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
+#define kvm_stage2_levels(kvm) VTCR_EL2_LVLS(kvm->arch.vtcr)
-/*
- * With all the supported VA_BITs and 40bit guest IPA, the following condition
- * is always true:
- *
- * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
- *
- * We base our stage-2 page table walker helpers on this assumption and
- * fall back to using the host version of the helper wherever possible.
- * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
- * to using the host version, since it is guaranteed it is not folded at host.
- *
- * If the condition breaks in the future, we can rearrange the host level
- * definitions and reuse them for stage2. Till then...
- */
-#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
-#error "Unsupported combination of guest IPA and host VA_BITS."
-#endif
-
-/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
-#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
-#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
-#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
+/* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the VM */
+#define stage2_pgdir_shift(kvm) pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
+#define stage2_pgdir_size(kvm) (1ULL << stage2_pgdir_shift(kvm))
+#define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1)
/*
* The number of PTRS across all concatenated stage2 tables given by the
* number of bits resolved at the initial level.
+ * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA),
+ * in which case, stage2_pgd_ptrs will have one entry.
*/
-#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
+#define pgd_ptrs_shift(ipa, pgdir_shift) \
+ ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
+#define __s2_pgd_ptrs(ipa, lvls) \
+ (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls))))
+#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t))
+
+#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
+#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm))
/*
- * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
- * levels in addition to the PGD.
+ * kvm_mmmu_cache_min_pages() is the number of pages required to install
+ * a stage-2 translation. We pre-allocate the entry level page table at
+ * the VM creation.
*/
-#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1)
+#define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1)
-
-#if STAGE2_PGTABLE_LEVELS > 3
+/* Stage2 PUD definitions when the level is present */
+static inline bool kvm_stage2_has_pud(struct kvm *kvm)
+{
+ return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3);
+}
#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
-#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
+#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT)
#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
-#define stage2_pgd_none(pgd) pgd_none(pgd)
-#define stage2_pgd_clear(pgd) pgd_clear(pgd)
-#define stage2_pgd_present(pgd) pgd_present(pgd)
-#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
-#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
-#define stage2_pud_free(pud) pud_free(NULL, pud)
+static inline bool stage2_pgd_none(struct kvm *kvm, pgd_t pgd)
+{
+ if (kvm_stage2_has_pud(kvm))
+ return pgd_none(pgd);
+ else
+ return 0;
+}
-#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp)
+static inline void stage2_pgd_clear(struct kvm *kvm, pgd_t *pgdp)
+{
+ if (kvm_stage2_has_pud(kvm))
+ pgd_clear(pgdp);
+}
-static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline bool stage2_pgd_present(struct kvm *kvm, pgd_t pgd)
{
- phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+ if (kvm_stage2_has_pud(kvm))
+ return pgd_present(pgd);
+ else
+ return 1;
+}
- return (boundary - 1 < end - 1) ? boundary : end;
+static inline void stage2_pgd_populate(struct kvm *kvm, pgd_t *pgd, pud_t *pud)
+{
+ if (kvm_stage2_has_pud(kvm))
+ pgd_populate(NULL, pgd, pud);
+}
+
+static inline pud_t *stage2_pud_offset(struct kvm *kvm,
+ pgd_t *pgd, unsigned long address)
+{
+ if (kvm_stage2_has_pud(kvm))
+ return pud_offset(pgd, address);
+ else
+ return (pud_t *)pgd;
}
-#endif /* STAGE2_PGTABLE_LEVELS > 3 */
+static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
+{
+ if (kvm_stage2_has_pud(kvm))
+ pud_free(NULL, pud);
+}
+static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
+{
+ if (kvm_stage2_has_pud(kvm))
+ return kvm_page_empty(pudp);
+ else
+ return false;
+}
-#if STAGE2_PGTABLE_LEVELS > 2
+static inline phys_addr_t
+stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+{
+ if (kvm_stage2_has_pud(kvm)) {
+ phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
+
+ return (boundary - 1 < end - 1) ? boundary : end;
+ } else {
+ return end;
+ }
+}
+
+/* Stage2 PMD definitions when the level is present */
+static inline bool kvm_stage2_has_pmd(struct kvm *kvm)
+{
+ return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2);
+}
#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
-#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
+#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
-#define stage2_pud_none(pud) pud_none(pud)
-#define stage2_pud_clear(pud) pud_clear(pud)
-#define stage2_pud_present(pud) pud_present(pud)
-#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
-#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
-#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
+static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ return pud_none(pud);
+ else
+ return 0;
+}
+
+static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ pud_clear(pud);
+}
-#define stage2_pud_huge(pud) pud_huge(pud)
-#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ return pud_present(pud);
+ else
+ return 1;
+}
-static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd)
{
- phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
+ if (kvm_stage2_has_pmd(kvm))
+ pud_populate(NULL, pud, pmd);
+}
- return (boundary - 1 < end - 1) ? boundary : end;
+static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
+ pud_t *pud, unsigned long address)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ return pmd_offset(pud, address);
+ else
+ return (pmd_t *)pud;
}
-#endif /* STAGE2_PGTABLE_LEVELS > 2 */
+static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ pmd_free(NULL, pmd);
+}
+
+static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ return pud_huge(pud);
+ else
+ return 0;
+}
+
+static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp)
+{
+ if (kvm_stage2_has_pmd(kvm))
+ return kvm_page_empty(pmdp);
+ else
+ return 0;
+}
-#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
+static inline phys_addr_t
+stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
+{
+ if (kvm_stage2_has_pmd(kvm)) {
+ phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
-#if STAGE2_PGTABLE_LEVELS == 2
-#include <asm/stage2_pgtable-nopmd.h>
-#elif STAGE2_PGTABLE_LEVELS == 3
-#include <asm/stage2_pgtable-nopud.h>
-#endif
+ return (boundary - 1 < end - 1) ? boundary : end;
+ } else {
+ return end;
+ }
+}
+static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep)
+{
+ return kvm_page_empty(ptep);
+}
-#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr)
+{
+ return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1));
+}
-static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
+static inline phys_addr_t
+stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
{
- phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
+ phys_addr_t boundary = (addr + stage2_pgdir_size(kvm)) & stage2_pgdir_mask(kvm);
return (boundary - 1 < end - 1) ? boundary : end;
}
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
index eab738019707..397c6ccd04e7 100644
--- a/arch/arm64/include/asm/stat.h
+++ b/arch/arm64/include/asm/stat.h
@@ -20,7 +20,7 @@
#ifdef CONFIG_COMPAT
-#include <linux/compat_time.h>
+#include <linux/time.h>
#include <asm/compat.h>
/*
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index e0d0f5b856e7..b13ca091f833 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -18,11 +18,11 @@
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index 5072cbd15c82..dae1584cf017 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -16,5 +16,6 @@
*/
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
#include <asm-generic/unistd.h>
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 0e2ea1c78542..bb85e2f4603f 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -165,16 +165,15 @@ static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
/* Interface called from ACPI code to setup PCI host controller */
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
- int node = acpi_get_node(root->device->handle);
struct acpi_pci_generic_root_info *ri;
struct pci_bus *bus, *child;
struct acpi_pci_root_ops *root_ops;
- ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node);
+ ri = kzalloc(sizeof(*ri), GFP_KERNEL);
if (!ri)
return NULL;
- root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
+ root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
if (!root_ops) {
kfree(ri);
return NULL;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index a6c9fbaeaefc..dd436a50fce7 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -391,15 +391,15 @@ int __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_CORTEX_A53;
case ARM_CPU_PART_CORTEX_A57:
return KVM_ARM_TARGET_CORTEX_A57;
- };
+ }
break;
case ARM_CPU_IMP_APM:
switch (part_number) {
case APM_CPU_PART_POTENZA:
return KVM_ARM_TARGET_XGENE_POTENZA;
- };
+ }
break;
- };
+ }
/* Return a default generic target */
return KVM_ARM_TARGET_GENERIC_V8;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e5e741bfffe1..35a81bebd02b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -284,6 +284,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
*/
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
return 0;
+ case ARM_EXCEPTION_IL:
+ /*
+ * We attempted an illegal exception return. Guest state must
+ * have been corrupted somehow. Give up.
+ */
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ return -EINVAL;
default:
kvm_pr_unimpl("Unsupported exception type: %d",
exception_index);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 2fabc2dc1966..82d1904328ad 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_KVM_ARM_HOST) += switch.o
obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
-obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o
# KVM code is run at a different exception code with a different map, so
# compiler instrumentation that inserts callbacks or checks into the code may
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 24b4fbafe3e4..b1f14f736962 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -162,6 +162,20 @@ el1_error:
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
+el2_sync:
+ /* Check for illegal exception return, otherwise panic */
+ mrs x0, spsr_el2
+
+ /* if this was something else, then panic! */
+ tst x0, #PSR_IL_BIT
+ b.eq __hyp_panic
+
+ /* Let's attempt a recovery from the illegal exception return */
+ get_vcpu_ptr x1, x0
+ mov x0, #ARM_EXCEPTION_IL
+ b __guest_exit
+
+
el2_error:
ldp x0, x1, [sp], #16
@@ -240,7 +254,7 @@ ENTRY(__kvm_hyp_vector)
invalid_vect el2t_fiq_invalid // FIQ EL2t
invalid_vect el2t_error_invalid // Error EL2t
- invalid_vect el2h_sync_invalid // Synchronous EL2h
+ valid_vect el2_sync // Synchronous EL2h
invalid_vect el2h_irq_invalid // IRQ EL2h
invalid_vect el2h_fiq_invalid // FIQ EL2h
valid_vect el2_error // Error EL2h
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
deleted file mode 100644
index 603e1ee83e89..000000000000
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2016 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/types.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_hyp.h>
-
-u32 __hyp_text __init_stage2_translation(void)
-{
- u64 val = VTCR_EL2_FLAGS;
- u64 parange;
- u64 tmp;
-
- /*
- * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS
- * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
- * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
- */
- parange = read_sysreg(id_aa64mmfr0_el1) & 7;
- if (parange > ID_AA64MMFR0_PARANGE_MAX)
- parange = ID_AA64MMFR0_PARANGE_MAX;
- val |= parange << 16;
-
- /* Compute the actual PARange... */
- switch (parange) {
- case 0:
- parange = 32;
- break;
- case 1:
- parange = 36;
- break;
- case 2:
- parange = 40;
- break;
- case 3:
- parange = 42;
- break;
- case 4:
- parange = 44;
- break;
- case 5:
- default:
- parange = 48;
- break;
- }
-
- /*
- * ... and clamp it to 40 bits, unless we have some braindead
- * HW that implements less than that. In all cases, we'll
- * return that value for the rest of the kernel to decide what
- * to do.
- */
- val |= 64 - (parange > 40 ? 40 : parange);
-
- /*
- * Check the availability of Hardware Access Flag / Dirty Bit
- * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
- */
- tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
- if (tmp)
- val |= VTCR_EL2_HA;
-
- /*
- * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
- * bit in VTCR_EL2.
- */
- tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_VMIDBITS_SHIFT) & 0xf;
- val |= (tmp == ID_AA64MMFR1_VMIDBITS_16) ?
- VTCR_EL2_VS_16BIT :
- VTCR_EL2_VS_8BIT;
-
- write_sysreg(val, vtcr_el2);
-
- return parange;
-}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ca46153d7915..7cc175c88a37 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -198,7 +198,7 @@ void deactivate_traps_vhe_put(void)
static void __hyp_text __activate_vm(struct kvm *kvm)
{
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ __load_guest_stage2(kvm);
}
static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
@@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
return false; /* Translation failed, back to guest */
/* Convert PAR to HPFAR format */
- *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
+ *hpfar = PAR_TO_HPFAR(tmp);
return true;
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 76d016b446b2..68d6f7c3b237 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -152,8 +152,25 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
static void __hyp_text
__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
{
+ u64 pstate = ctxt->gp_regs.regs.pstate;
+ u64 mode = pstate & PSR_AA32_MODE_MASK;
+
+ /*
+ * Safety check to ensure we're setting the CPU up to enter the guest
+ * in a less privileged mode.
+ *
+ * If we are attempting a return to EL2 or higher in AArch64 state,
+ * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that
+ * we'll take an illegal exception state exception immediately after
+ * the ERET to the guest. Attempts to return to AArch32 Hyp will
+ * result in an illegal exception return because EL2's execution state
+ * is determined by SCR_EL3.RW.
+ */
+ if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
+ pstate = PSR_MODE_EL2h | PSR_IL_BIT;
+
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
- write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
+ write_sysreg_el2(pstate, spsr);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 131c7772703c..4dbd9c69a96d 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -30,7 +30,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
* bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
* let's flip TGE before executing the TLB operation.
*/
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ __load_guest_stage2(kvm);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
@@ -39,7 +39,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
{
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ __load_guest_stage2(kvm);
isb();
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e37c78bbe1ca..b72a3dd56204 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -26,6 +26,7 @@
#include <kvm/arm_arch_timer.h>
+#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/ptrace.h>
#include <asm/kvm_arm.h>
@@ -33,6 +34,9 @@
#include <asm/kvm_coproc.h>
#include <asm/kvm_mmu.h>
+/* Maximum phys_shift supported for any VM on this host */
+static u32 kvm_ipa_limit;
+
/*
* ARMv8 Reset Values
*/
@@ -55,12 +59,12 @@ static bool cpu_has_32bit_el1(void)
}
/**
- * kvm_arch_dev_ioctl_check_extension
+ * kvm_arch_vm_ioctl_check_extension
*
* We currently assume that the number of HW registers is uniform
* across all CPUs (see cpuinfo_sanity_check).
*/
-int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
+int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -82,9 +86,11 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
- case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ r = kvm_ipa_limit;
+ break;
default:
r = 0;
}
@@ -133,3 +139,99 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu);
}
+
+void kvm_set_ipa_limit(void)
+{
+ unsigned int ipa_max, pa_max, va_max, parange;
+
+ parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
+ pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+
+ /* Clamp the IPA limit to the PA size supported by the kernel */
+ ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
+ /*
+ * Since our stage2 table is dependent on the stage1 page table code,
+ * we must always honor the following condition:
+ *
+ * Number of levels in Stage1 >= Number of levels in Stage2.
+ *
+ * So clamp the ipa limit further down to limit the number of levels.
+ * Since we can concatenate upto 16 tables at entry level, we could
+ * go upto 4bits above the maximum VA addressible with the current
+ * number of levels.
+ */
+ va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
+ va_max += 4;
+
+ if (va_max < ipa_max)
+ ipa_max = va_max;
+
+ /*
+ * If the final limit is lower than the real physical address
+ * limit of the CPUs, report the reason.
+ */
+ if (ipa_max < pa_max)
+ pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
+ (va_max < pa_max) ? "Virtual" : "Physical");
+
+ WARN(ipa_max < KVM_PHYS_SHIFT,
+ "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
+ kvm_ipa_limit = ipa_max;
+ kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
+}
+
+/*
+ * Configure the VTCR_EL2 for this VM. The VTCR value is common
+ * across all the physical CPUs on the system. We use system wide
+ * sanitised values to fill in different fields, except for Hardware
+ * Management of Access Flags. HA Flag is set unconditionally on
+ * all CPUs, as it is safe to run with or without the feature and
+ * the bit is RES0 on CPUs that don't support it.
+ */
+int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
+{
+ u64 vtcr = VTCR_EL2_FLAGS;
+ u32 parange, phys_shift;
+ u8 lvls;
+
+ if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+ return -EINVAL;
+
+ phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
+ if (phys_shift) {
+ if (phys_shift > kvm_ipa_limit ||
+ phys_shift < 32)
+ return -EINVAL;
+ } else {
+ phys_shift = KVM_PHYS_SHIFT;
+ }
+
+ parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
+ if (parange > ID_AA64MMFR0_PARANGE_MAX)
+ parange = ID_AA64MMFR0_PARANGE_MAX;
+ vtcr |= parange << VTCR_EL2_PS_SHIFT;
+
+ vtcr |= VTCR_EL2_T0SZ(phys_shift);
+ /*
+ * Use a minimum 2 level page table to prevent splitting
+ * host PMD huge pages at stage2.
+ */
+ lvls = stage2_pgtable_levels(phys_shift);
+ if (lvls < 2)
+ lvls = 2;
+ vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
+
+ /*
+ * Enable the Hardware Access Flag management, unconditionally
+ * on all CPUs. The features is RES0 on CPUs without the support
+ * and must be ignored by the CPUs.
+ */
+ vtcr |= VTCR_EL2_HA;
+
+ /* Set the vmid bits */
+ vtcr |= (kvm_get_vmid_bits() == 16) ?
+ VTCR_EL2_VS_16BIT :
+ VTCR_EL2_VS_8BIT;
+ kvm->arch.vtcr = vtcr;
+ return 0;
+}
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index 0d2daf7f9809..6b2fe792de9d 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -16,6 +16,7 @@
*/
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_CLONE
/* Use the standard ABI for syscalls. */
diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h
index 7dd20ef7625a..628195823816 100644
--- a/arch/h8300/include/uapi/asm/unistd.h
+++ b/arch/h8300/include/uapi/asm/unistd.h
@@ -1,5 +1,6 @@
#define __ARCH_NOMMU
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
#include <asm-generic/unistd.h>
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index ea181e79162e..c91ca7d02461 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -29,6 +29,7 @@
#define sys_mmap2 sys_mmap_pgoff
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_EXECVE
#define __ARCH_WANT_SYS_CLONE
#define __ARCH_WANT_SYS_VFORK
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index ffb705dc9c13..49e34db2529c 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -28,6 +28,9 @@
#define __IGNORE_vfork /* clone() */
#define __IGNORE_umount2 /* umount() */
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SYS_UTIME
+
#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
#include <linux/types.h>
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 1d5483f6e457..85904b73e261 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -657,7 +656,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 52a0af127951..9b3818bbb68b 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -614,7 +613,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index b3103e51268a..769677809945 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -635,7 +634,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index fb7d651a4cab..7dd264ddf2ea 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 6b37f5537c39..515f7439c755 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -616,7 +615,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index c717bf879449..8e1038ceb407 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -638,7 +637,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 226c994ce794..62c8aaa15cc7 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -720,7 +719,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index b383327fd77a..733973f91297 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 9783d3deb9e9..fee30cc9ac16 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index a35d10ee10cb..eebf9c9088e7 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -629,7 +628,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 573bf922d448..dabc54318c09 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -607,7 +606,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index efb27a7fcc55..0d9a5c2a311a 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
@@ -608,7 +607,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 30d0d3fbd4ef..e680031bda7b 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -7,6 +7,7 @@
#define NR_syscalls 380
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_OLD_STAT
#define __ARCH_WANT_STAT64
@@ -21,7 +22,6 @@
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLD_GETRLIMIT
#define __ARCH_WANT_SYS_OLD_MMAP
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index a62d09420a47..f42c40f5001b 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -15,6 +15,7 @@
/* #define __ARCH_WANT_OLD_READDIR */
/* #define __ARCH_WANT_OLD_STAT */
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
@@ -26,7 +27,6 @@
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
/* #define __ARCH_WANT_SYS_OLD_GETRLIMIT */
#define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index 26c6b561d6f7..6fb16fd24035 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -154,6 +154,21 @@
clock-names = "baud", "module";
};
+ dmac: dma-controller@13020000 {
+ compatible = "ingenic,jz4740-dma";
+ reg = <0x13020000 0xbc
+ 0x13020300 0x14>;
+ #dma-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <29>;
+
+ clocks = <&cgu JZ4740_CLK_DMA>;
+
+ /* Disable dmac until we have something that uses it */
+ status = "disabled";
+ };
+
uhc: uhc@13030000 {
compatible = "ingenic,jz4740-ohci", "generic-ohci";
reg = <0x13030000 0x1000>;
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
index 7c2804f3f5f1..49ede6c14ff3 100644
--- a/arch/mips/boot/dts/ingenic/jz4770.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -196,6 +196,36 @@
status = "disabled";
};
+ dmac0: dma-controller@13420000 {
+ compatible = "ingenic,jz4770-dma";
+ reg = <0x13420000 0xC0
+ 0x13420300 0x20>;
+
+ #dma-cells = <1>;
+
+ clocks = <&cgu JZ4770_CLK_DMA>;
+ interrupt-parent = <&intc>;
+ interrupts = <24>;
+
+ /* Disable dmac0 until we have something that uses it */
+ status = "disabled";
+ };
+
+ dmac1: dma-controller@13420100 {
+ compatible = "ingenic,jz4770-dma";
+ reg = <0x13420100 0xC0
+ 0x13420400 0x20>;
+
+ #dma-cells = <1>;
+
+ clocks = <&cgu JZ4770_CLK_DMA>;
+ interrupt-parent = <&intc>;
+ interrupts = <23>;
+
+ /* Disable dmac1 until we have something that uses it */
+ status = "disabled";
+ };
+
uhc: uhc@13430000 {
compatible = "generic-ohci";
reg = <0x13430000 0x1000>;
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index ce93d57f1b4d..b03cdec56de9 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -266,7 +266,8 @@
dma: dma@13420000 {
compatible = "ingenic,jz4780-dma";
- reg = <0x13420000 0x10000>;
+ reg = <0x13420000 0x400
+ 0x13421000 0x40>;
#dma-cells = <2>;
interrupt-parent = <&intc>;
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 78675f19440f..c99166eadbde 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -9,43 +9,25 @@
#include <asm/page.h>
#include <asm/ptrace.h>
+#include <asm-generic/compat.h>
+
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "mips\0\0\0"
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_suseconds_t;
-
-typedef s32 compat_pid_t;
typedef s32 __compat_uid_t;
typedef s32 __compat_gid_t;
typedef __compat_uid_t __compat_uid32_t;
typedef __compat_gid_t __compat_gid32_t;
typedef u32 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u32 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef u32 compat_nlink_t;
typedef s32 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef s32 compat_caddr_t;
typedef struct {
s32 val[2];
} compat_fsid_t;
-typedef s32 compat_timer_t;
-typedef s32 compat_key_t;
-
-typedef s16 compat_short_t;
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 compat_s64;
-typedef u16 compat_ushort_t;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
struct compat_stat {
compat_dev_t st_dev;
@@ -59,11 +41,11 @@ struct compat_stat {
s32 st_pad2[2];
compat_off_t st_size;
s32 st_pad3;
- compat_time_t st_atime;
+ old_time32_t st_atime;
s32 st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
s32 st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
s32 st_ctime_nsec;
s32 st_blksize;
s32 st_blocks;
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 3c09450908aa..c68b8ae3efcb 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -24,16 +24,17 @@
#ifndef __ASSEMBLY__
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_WAITPID
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLD_UNAME
#define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 89b234844534..7a12763d553a 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -54,10 +54,10 @@ struct elf_prstatus32
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
- struct compat_timeval pr_utime; /* User time */
- struct compat_timeval pr_stime; /* System time */
- struct compat_timeval pr_cutime;/* Cumulative user time */
- struct compat_timeval pr_cstime;/* Cumulative system time */
+ struct old_timeval32 pr_utime; /* User time */
+ struct old_timeval32 pr_stime; /* System time */
+ struct old_timeval32 pr_cutime;/* Cumulative user time */
+ struct old_timeval32 pr_cstime;/* Cumulative system time */
elf_gregset_t pr_reg; /* GP registers */
int pr_fpvalid; /* True if math co-processor being used. */
};
@@ -81,9 +81,9 @@ struct elf_prpsinfo32
#define elf_caddr_t u32
#define init_elf_binfmt init_elfn32_binfmt
-#define jiffies_to_timeval jiffies_to_compat_timeval
+#define jiffies_to_timeval jiffies_to_old_timeval32
static __inline__ void
-jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
+jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
{
/*
* Convert jiffies to nanoseconds and separate with
@@ -101,6 +101,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
#define TASK_SIZE TASK_SIZE32
#undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index a88c59db3d48..e6db06a1d31a 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -59,10 +59,10 @@ struct elf_prstatus32
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
- struct compat_timeval pr_utime; /* User time */
- struct compat_timeval pr_stime; /* System time */
- struct compat_timeval pr_cutime;/* Cumulative user time */
- struct compat_timeval pr_cstime;/* Cumulative system time */
+ struct old_timeval32 pr_utime; /* User time */
+ struct old_timeval32 pr_stime; /* System time */
+ struct old_timeval32 pr_cutime;/* Cumulative user time */
+ struct old_timeval32 pr_cstime;/* Cumulative system time */
elf_gregset_t pr_reg; /* GP registers */
int pr_fpvalid; /* True if math co-processor being used. */
};
@@ -86,9 +86,9 @@ struct elf_prpsinfo32
#define elf_caddr_t u32
#define init_elf_binfmt init_elf32_binfmt
-#define jiffies_to_timeval jiffies_to_compat_timeval
+#define jiffies_to_timeval jiffies_to_old_timeval32
static inline void
-jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
+jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
{
/*
* Convert jiffies to nanoseconds and separate with
@@ -104,6 +104,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
#define TASK_SIZE TASK_SIZE32
#undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index 6e95901cabe3..603e826e0449 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2005-2017 Andes Technology Corporation
+#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYNC_FILE_RANGE2
/* Use the standard ABI for syscalls */
diff --git a/arch/nios2/include/uapi/asm/unistd.h b/arch/nios2/include/uapi/asm/unistd.h
index b6bdae04bc84..d9948d88790b 100644
--- a/arch/nios2/include/uapi/asm/unistd.h
+++ b/arch/nios2/include/uapi/asm/unistd.h
@@ -19,6 +19,7 @@
#define sys_mmap2 sys_mmap_pgoff
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
/* Use the standard ABI for syscalls */
#include <asm-generic/unistd.h>
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 11c5a58ab333..ec37df18d8ed 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -20,6 +20,7 @@
#define sys_mmap2 sys_mmap_pgoff
#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index ab8a54771507..e03e3c849f40 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -8,36 +8,22 @@
#include <linux/sched.h>
#include <linux/thread_info.h>
+#include <asm-generic/compat.h>
+
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "parisc\0\0"
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
typedef u32 __compat_uid_t;
typedef u32 __compat_gid_t;
typedef u32 __compat_uid32_t;
typedef u32 __compat_gid32_t;
typedef u16 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u32 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef u32 compat_caddr_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 compat_s64;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
struct compat_stat {
compat_dev_t st_dev; /* dev_t is 32 bits on parisc */
@@ -48,11 +34,11 @@ struct compat_stat {
u16 st_reserved2; /* old st_gid */
compat_dev_t st_rdev;
compat_off_t st_size;
- compat_time_t st_atime;
+ old_time32_t st_atime;
u32 st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
u32 st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
u32 st_ctime_nsec;
s32 st_blksize;
s32 st_blocks;
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3d507d04eb4c..bc37a4953eaa 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -141,6 +141,7 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5); \
}
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
@@ -151,11 +152,11 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \
#define __ARCH_WANT_COMPAT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_WAITPID
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1f4691ce4126..c55ba3b4873b 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -150,4 +150,25 @@ extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_count_cache;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+#else
+static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+ bool preserve_nv) { }
+static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+ bool preserve_nv) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+void kvmhv_save_host_pmu(void);
+void kvmhv_load_host_pmu(void);
+void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
+void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
+
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
+long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
+long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
+ unsigned long dabrx);
+
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index b3520b549cba..66db23e2f4dc 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
BUG();
}
+static inline unsigned int ap_to_shift(unsigned long ap)
+{
+ int psize;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ if (mmu_psize_defs[psize].ap == ap)
+ return mmu_psize_defs[psize].shift;
+ }
+
+ return -1;
+}
+
static inline unsigned long get_sllp_encoding(int psize)
{
unsigned long sllp;
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 1154a6dc6d26..671316f9e95d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -53,6 +53,7 @@ extern void radix__flush_tlb_lpid_page(unsigned int lpid,
unsigned long addr,
unsigned long page_size);
extern void radix__flush_pwc_lpid(unsigned int lpid);
+extern void radix__flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid(unsigned int lpid);
extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid);
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 85c8af2bb272..74d0db511099 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <asm-generic/compat.h>
+
#define COMPAT_USER_HZ 100
#ifdef __BIG_ENDIAN__
#define COMPAT_UTS_MACHINE "ppc\0\0"
@@ -15,34 +17,18 @@
#define COMPAT_UTS_MACHINE "ppcle\0\0"
#endif
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
typedef u32 __compat_uid_t;
typedef u32 __compat_gid_t;
typedef u32 __compat_uid32_t;
typedef u32 __compat_gid32_t;
typedef u32 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u32 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef s16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 compat_s64;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
struct compat_stat {
compat_dev_t st_dev;
@@ -55,11 +41,11 @@ struct compat_stat {
compat_off_t st_size;
compat_off_t st_blksize;
compat_off_t st_blocks;
- compat_time_t st_atime;
+ old_time32_t st_atime;
u32 st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
u32 st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
u32 st_ctime_nsec;
u32 __unused4[2];
};
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a0b17f9f1ea4..45e8789bb770 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -322,6 +322,11 @@
#define H_GET_24X7_DATA 0xF07C
#define H_GET_PERF_COUNTER_INFO 0xF080
+/* Platform-specific hcalls used for nested HV KVM */
+#define H_SET_PARTITION_TABLE 0xF800
+#define H_ENTER_NESTED 0xF804
+#define H_TLB_INVALIDATE 0xF808
+
/* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1
#define H_SET_MODE_RESOURCE_SET_DAWR 2
@@ -461,6 +466,42 @@ struct h_cpu_char_result {
u64 behaviour;
};
+/* Register state for entering a nested guest with H_ENTER_NESTED */
+struct hv_guest_state {
+ u64 version; /* version of this structure layout */
+ u32 lpid;
+ u32 vcpu_token;
+ /* These registers are hypervisor privileged (at least for writing) */
+ u64 lpcr;
+ u64 pcr;
+ u64 amor;
+ u64 dpdes;
+ u64 hfscr;
+ s64 tb_offset;
+ u64 dawr0;
+ u64 dawrx0;
+ u64 ciabr;
+ u64 hdec_expiry;
+ u64 purr;
+ u64 spurr;
+ u64 ic;
+ u64 vtb;
+ u64 hdar;
+ u64 hdsisr;
+ u64 heir;
+ u64 asdr;
+ /* These are OS privileged but need to be set late in guest entry */
+ u64 srr0;
+ u64 srr1;
+ u64 sprg[4];
+ u64 pidr;
+ u64 cfar;
+ u64 ppr;
+};
+
+/* Latest version of hv_guest_state structure */
+#define HV_GUEST_STATE_VERSION 1
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 3d4b88cb8599..35db0cbc9222 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -126,7 +126,7 @@ struct iommu_table {
int it_nid;
};
-#define IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry) \
+#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
((tbl)->it_ops->useraddrptr((tbl), (entry), false))
#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
((tbl)->it_ops->useraddrptr((tbl), (entry), true))
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a790d5cf6ea3..1f321914676d 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -84,7 +84,6 @@
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
-#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700
@@ -134,8 +133,7 @@
#define BOOK3S_IRQPRIO_EXTERNAL 14
#define BOOK3S_IRQPRIO_DECREMENTER 15
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
-#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17
-#define BOOK3S_IRQPRIO_MAX 18
+#define BOOK3S_IRQPRIO_MAX 17
#define BOOK3S_HFLAG_DCBZ32 0x1
#define BOOK3S_HFLAG_SLB 0x2
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 83a9aa3cf689..09f8e9ba69bc 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -188,14 +188,37 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr);
+extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 root,
+ u64 *pte_ret_p);
+extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 table,
+ int table_index, u64 *pte_ret_p);
extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, bool data, bool iswrite);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+ unsigned int shift, struct kvm_memory_slot *memslot,
+ unsigned int lpid);
+extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable,
+ bool writing, unsigned long gpa,
+ unsigned int lpid);
+extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+ unsigned long gpa,
+ struct kvm_memory_slot *memslot,
+ bool writing, bool kvm_ro,
+ pte_t *inserted_pte, unsigned int *levelp);
extern int kvmppc_init_vm_radix(struct kvm *kvm);
extern void kvmppc_free_radix(struct kvm *kvm);
+extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
+ unsigned int lpid);
extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void);
extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
+ unsigned long gpa, unsigned int shift,
+ struct kvm_memory_slot *memslot,
+ unsigned int lpid);
extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -271,6 +294,21 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
#endif
+long kvmhv_nested_init(void);
+void kvmhv_nested_exit(void);
+void kvmhv_vm_nested_init(struct kvm *kvm);
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
+void kvmhv_release_all_nested(struct kvm *kvm);
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu,
+ u64 time_limit, unsigned long lpcr);
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *hr);
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
+
void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
extern int kvm_irq_bypass;
@@ -301,12 +339,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
- vcpu->arch.cr = val;
+ vcpu->arch.regs.ccr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.cr;
+ return vcpu->arch.regs.ccr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
@@ -384,9 +422,6 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
/* TO = 31 for unconditional trap */
#define INS_TW 0x7fe00008
-/* LPIDs we support with this build -- runtime limit may be lower */
-#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
-
#define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index dc435a5af7d6..6d298145d564 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -23,6 +23,108 @@
#include <linux/string.h>
#include <asm/bitops.h>
#include <asm/book3s/64/mmu-hash.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC_PSERIES
+static inline bool kvmhv_on_pseries(void)
+{
+ return !cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool kvmhv_on_pseries(void)
+{
+ return false;
+}
+#endif
+
+/*
+ * Structure for a nested guest, that is, for a guest that is managed by
+ * one of our guests.
+ */
+struct kvm_nested_guest {
+ struct kvm *l1_host; /* L1 VM that owns this nested guest */
+ int l1_lpid; /* lpid L1 guest thinks this guest is */
+ int shadow_lpid; /* real lpid of this nested guest */
+ pgd_t *shadow_pgtable; /* our page table for this guest */
+ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
+ u64 process_table; /* process table entry for this guest */
+ long refcnt; /* number of pointers to this struct */
+ struct mutex tlb_lock; /* serialize page faults and tlbies */
+ struct kvm_nested_guest *next;
+ cpumask_t need_tlb_flush;
+ cpumask_t cpu_in_guest;
+ short prev_cpu[NR_CPUS];
+};
+
+/*
+ * We define a nested rmap entry as a single 64-bit quantity
+ * 0xFFF0000000000000 12-bit lpid field
+ * 0x000FFFFFFFFFF000 40-bit guest 4k page frame number
+ * 0x0000000000000001 1-bit single entry flag
+ */
+#define RMAP_NESTED_LPID_MASK 0xFFF0000000000000UL
+#define RMAP_NESTED_LPID_SHIFT (52)
+#define RMAP_NESTED_GPA_MASK 0x000FFFFFFFFFF000UL
+#define RMAP_NESTED_IS_SINGLE_ENTRY 0x0000000000000001UL
+
+/* Structure for a nested guest rmap entry */
+struct rmap_nested {
+ struct llist_node list;
+ u64 rmap;
+};
+
+/*
+ * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
+ * safe against removal of the list entry or NULL list
+ * @pos: a (struct rmap_nested *) to use as a loop cursor
+ * @node: pointer to the first entry
+ * NOTE: this can be NULL
+ * @rmapp: an (unsigned long *) in which to return the rmap entries on each
+ * iteration
+ * NOTE: this must point to already allocated memory
+ *
+ * The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
+ * rmap entry in the memslot. The list is always terminated by a "single entry"
+ * stored in the list element of the final entry of the llist. If there is ONLY
+ * a single entry then this is itself in the rmap entry of the memslot, not a
+ * llist head pointer.
+ *
+ * Note that the iterator below assumes that a nested rmap entry is always
+ * non-zero. This is true for our usage because the LPID field is always
+ * non-zero (zero is reserved for the host).
+ *
+ * This should be used to iterate over the list of rmap_nested entries with
+ * processing done on the u64 rmap value given by each iteration. This is safe
+ * against removal of list entries and it is always safe to call free on (pos).
+ *
+ * e.g.
+ * struct rmap_nested *cursor;
+ * struct llist_node *first;
+ * unsigned long rmap;
+ * for_each_nest_rmap_safe(cursor, first, &rmap) {
+ * do_something(rmap);
+ * free(cursor);
+ * }
+ */
+#define for_each_nest_rmap_safe(pos, node, rmapp) \
+ for ((pos) = llist_entry((node), typeof(*(pos)), list); \
+ (node) && \
+ (*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
+ ((u64) (node)) : ((pos)->rmap))) && \
+ (((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
+ ((struct llist_node *) ((pos) = NULL)) : \
+ (pos)->list.next)), true); \
+ (pos) = llist_entry((node), typeof(*(pos)), list))
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+ bool create);
+void kvmhv_put_nested(struct kvm_nested_guest *gp);
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
+
+/* Encoding of first parameter for H_TLB_INVALIDATE */
+#define H_TLBIE_P1_ENC(ric, prs, r) (___PPC_RIC(ric) | ___PPC_PRS(prs) | \
+ ___PPC_R(r))
/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
#define PPC_MIN_HPT_ORDER 18
@@ -435,6 +537,7 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
extern void kvmhv_rm_send_ipi(int cpu);
@@ -482,7 +585,7 @@ static inline u64 sanitize_msr(u64 msr)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
{
- vcpu->arch.cr = vcpu->arch.cr_tm;
+ vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm;
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
@@ -499,7 +602,7 @@ static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
{
- vcpu->arch.cr_tm = vcpu->arch.cr;
+ vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.lr_tm = vcpu->arch.regs.link;
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
@@ -515,6 +618,17 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+ unsigned long gpa, unsigned int level,
+ unsigned long mmu_seq, unsigned int lpid,
+ unsigned long *rmapp, struct rmap_nested **n_rmap);
+extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+ struct rmap_nested **n_rmap);
+extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ unsigned long gpa, unsigned long hpa,
+ unsigned long nbytes);
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index d978fdf698af..eb3ba6390108 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -25,6 +25,9 @@
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
+/* LPIDs we support with this build -- runtime limit may be lower */
+#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
+
/* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index d513e3ed1c65..f0cef625f17c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -46,12 +46,12 @@ static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
- vcpu->arch.cr = val;
+ vcpu->arch.regs.ccr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.cr;
+ return vcpu->arch.regs.ccr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 906bcbdfd2a1..fac6f631ed29 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
+#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS
#else
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -94,6 +95,7 @@ struct dtl_entry;
struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu;
+struct kvm_nested_guest;
struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -287,10 +289,12 @@ struct kvm_arch {
u8 radix;
u8 fwnmi_enabled;
bool threads_indep;
+ bool nested_enable;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
+ struct dentry *radix_dentry;
struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -311,6 +315,9 @@ struct kvm_arch {
#endif
struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ u64 l1_ptcr;
+ int max_nested_lpid;
+ struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS];
/* This array can grow quite large, keep it at the end */
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
#endif
@@ -360,7 +367,9 @@ struct kvmppc_pte {
bool may_write : 1;
bool may_execute : 1;
unsigned long wimg;
+ unsigned long rc;
u8 page_size; /* MMU_PAGE_xxx */
+ u8 page_shift;
};
struct kvmppc_mmu {
@@ -537,8 +546,6 @@ struct kvm_vcpu_arch {
ulong tar;
#endif
- u32 cr;
-
#ifdef CONFIG_PPC_BOOK3S
ulong hflags;
ulong guest_owned_ext;
@@ -707,6 +714,7 @@ struct kvm_vcpu_arch {
u8 hcall_needed;
u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
+ u8 external_oneshot; /* clear external irq after delivery */
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -781,6 +789,10 @@ struct kvm_vcpu_arch {
u32 emul_inst;
u32 online;
+
+ /* For support of nested guests */
+ struct kvm_nested_guest *nested;
+ u32 nested_vcpu_id;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e991821dd7fa..9b89b1918dfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -194,9 +194,7 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS)
-extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
- unsigned long tce);
-extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long *ua, unsigned long **prmap);
extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
unsigned long idx, unsigned long tce);
@@ -327,6 +325,7 @@ struct kvmppc_ops {
int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
unsigned long flags);
void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
+ int (*enable_nested)(struct kvm *kvm);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
@@ -585,6 +584,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
+extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
@@ -607,6 +607,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
+static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */
/*
@@ -652,6 +653,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
/*
* Host-side operations we want to set up while running in real
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 7f627e3f4da4..630eb8b1b7ed 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -54,7 +54,6 @@ void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
struct pnv_php_slot {
struct hotplug_slot slot;
- struct hotplug_slot_info slot_info;
uint64_t id;
char *name;
int slot_no;
@@ -72,6 +71,7 @@ struct pnv_php_slot {
struct pci_dev *pdev;
struct pci_bus *bus;
bool power_state_check;
+ u8 attention_state;
void *fdt;
void *dt;
struct of_changeset ocs;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 665af14850e4..6093bc8f74e5 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -104,6 +104,7 @@
#define OP_31_XOP_LHZUX 311
#define OP_31_XOP_MSGSNDP 142
#define OP_31_XOP_MSGCLRP 174
+#define OP_31_XOP_TLBIE 306
#define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e5b314ed054e..c90698972f42 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -415,6 +415,7 @@
#define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
#define HFSCR_FP __MASK(FSCR_FP_LG)
+#define HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
#define SPRN_TAR 0x32f /* Target Address Register */
#define SPRN_LPCR 0x13E /* LPAR Control Register */
#define LPCR_VPM0 ASM_CONST(0x8000000000000000)
@@ -766,6 +767,7 @@
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
#define HSRR1_DENORM 0x00100000 /* Denorm exception */
+#define HSRR1_HISI_WRITE 0x00010000 /* HISI bcs couldn't update mem */
#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index c19379f0a32e..b0de85b477e1 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -22,6 +22,7 @@
#include <linux/compiler.h>
#include <linux/linkage.h>
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
@@ -35,7 +36,6 @@
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLD_GETRLIMIT
#define __ARCH_WANT_SYS_OLD_UNAME
@@ -47,6 +47,7 @@
#endif
#ifdef CONFIG_PPC64
#define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_NEWFSTATAT
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 1b32b56a03d3..8c876c166ef2 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf15566c4e..d68b9ef38328 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -387,12 +387,12 @@ int main(void)
OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
- OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec);
- OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec);
+ OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
+ OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
- OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec);
- OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec);
+ OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
+ OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
#else
OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
@@ -438,7 +438,7 @@ int main(void)
#ifdef CONFIG_PPC_BOOK3S
OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif
- OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
@@ -503,6 +503,7 @@ int main(void)
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+ OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
@@ -695,7 +696,7 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
- OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 458b928dbd84..c317080db771 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -147,8 +147,8 @@ __init_hvmode_206:
rldicl. r0,r3,4,63
bnelr
ld r5,CPU_SPEC_FEATURES(r4)
- LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
- xor r5,r5,r6
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST)
+ andc r5,r5,r6
std r5,CPU_SPEC_FEATURES(r4)
blr
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index f872c04bb5b1..e814f40ab836 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -75,7 +75,8 @@ kvm-hv-y += \
book3s_hv.o \
book3s_hv_interrupts.o \
book3s_64_mmu_hv.o \
- book3s_64_mmu_radix.o
+ book3s_64_mmu_radix.o \
+ book3s_hv_nested.o
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
book3s_hv_tm.o
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 87348e498c89..fd9893bc7aa1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -78,8 +78,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
ulong pc = kvmppc_get_pc(vcpu);
+ ulong lr = kvmppc_get_lr(vcpu);
if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+ if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
}
}
@@ -150,7 +153,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
- case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
@@ -236,18 +238,35 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
- unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
-
- if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
- vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+ /*
+ * This case (KVM_INTERRUPT_SET) should never actually arise for
+ * a pseries guest (because pseries guests expect their interrupt
+ * controllers to continue asserting an external interrupt request
+ * until it is acknowledged at the interrupt controller), but is
+ * included to avoid ABI breakage and potentially for other
+ * sorts of guest.
+ *
+ * There is a subtlety here: HV KVM does not test the
+ * external_oneshot flag in the code that synthesizes
+ * external interrupts for the guest just before entering
+ * the guest. That is OK even if userspace did do a
+ * KVM_INTERRUPT_SET on a pseries guest vcpu, because the
+ * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
+ * which ends up doing a smp_send_reschedule(), which will
+ * pull the guest all the way out to the host, meaning that
+ * we will call kvmppc_core_prepare_to_enter() before entering
+ * the guest again, and that will handle the external_oneshot
+ * flag correctly.
+ */
+ if (irq->irq == KVM_INTERRUPT_SET)
+ vcpu->arch.external_oneshot = 1;
- kvmppc_book3s_queue_irqprio(vcpu, vec);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
- kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
}
void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
@@ -278,7 +297,6 @@ static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
- case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
@@ -352,8 +370,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_DECREMENTER:
/* DEC interrupts get cleared by mtdec */
return false;
- case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
- /* External interrupts get cleared by userspace */
+ case BOOK3S_IRQPRIO_EXTERNAL:
+ /*
+ * External interrupts get cleared by userspace
+ * except when set by the KVM_INTERRUPT ioctl with
+ * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
+ */
+ if (vcpu->arch.external_oneshot) {
+ vcpu->arch.external_oneshot = 0;
+ return true;
+ }
return false;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 68e14afecac8..c615617e78ac 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -268,14 +268,13 @@ int kvmppc_mmu_hv_init(void)
{
unsigned long host_lpid, rsvd_lpid;
- if (!cpu_has_feature(CPU_FTR_HVMODE))
- return -EINVAL;
-
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
- host_lpid = mfspr(SPRN_LPID);
+ host_lpid = 0;
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ host_lpid = mfspr(SPRN_LPID);
rsvd_lpid = LPID_RSVD;
kvmppc_init_lpid(rsvd_lpid + 1);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 998f8d089ac7..d68162ee159b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -10,6 +10,9 @@
#include <linux/string.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/debugfs.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -26,87 +29,74 @@
*/
static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
-int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
- struct kvmppc_pte *gpte, bool data, bool iswrite)
+int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 root,
+ u64 *pte_ret_p)
{
struct kvm *kvm = vcpu->kvm;
- u32 pid;
int ret, level, ps;
- __be64 prte, rpte;
- unsigned long ptbl;
- unsigned long root, pte, index;
- unsigned long rts, bits, offset;
- unsigned long gpa;
- unsigned long proc_tbl_size;
-
- /* Work out effective PID */
- switch (eaddr >> 62) {
- case 0:
- pid = vcpu->arch.pid;
- break;
- case 3:
- pid = 0;
- break;
- default:
- return -EINVAL;
- }
- proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12);
- if (pid * 16 >= proc_tbl_size)
- return -EINVAL;
-
- /* Read partition table to find root of tree for effective PID */
- ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
- ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
- if (ret)
- return ret;
+ unsigned long rts, bits, offset, index;
+ u64 pte, base, gpa;
+ __be64 rpte;
- root = be64_to_cpu(prte);
rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
((root & RTS2_MASK) >> RTS2_SHIFT);
bits = root & RPDS_MASK;
- root = root & RPDB_MASK;
+ base = root & RPDB_MASK;
offset = rts + 31;
- /* current implementations only support 52-bit space */
+ /* Current implementations only support 52-bit space */
if (offset != 52)
return -EINVAL;
+ /* Walk each level of the radix tree */
for (level = 3; level >= 0; --level) {
+ u64 addr;
+ /* Check a valid size */
if (level && bits != p9_supported_radix_bits[level])
return -EINVAL;
if (level == 0 && !(bits == 5 || bits == 9))
return -EINVAL;
offset -= bits;
index = (eaddr >> offset) & ((1UL << bits) - 1);
- /* check that low bits of page table base are zero */
- if (root & ((1UL << (bits + 3)) - 1))
+ /* Check that low bits of page table base are zero */
+ if (base & ((1UL << (bits + 3)) - 1))
return -EINVAL;
- ret = kvm_read_guest(kvm, root + index * 8,
- &rpte, sizeof(rpte));
- if (ret)
+ /* Read the entry from guest memory */
+ addr = base + (index * sizeof(rpte));
+ ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+ if (ret) {
+ if (pte_ret_p)
+ *pte_ret_p = addr;
return ret;
+ }
pte = __be64_to_cpu(rpte);
if (!(pte & _PAGE_PRESENT))
return -ENOENT;
+ /* Check if a leaf entry */
if (pte & _PAGE_PTE)
break;
- bits = pte & 0x1f;
- root = pte & 0x0fffffffffffff00ul;
+ /* Get ready to walk the next level */
+ base = pte & RPDB_MASK;
+ bits = pte & RPDS_MASK;
}
- /* need a leaf at lowest level; 512GB pages not supported */
+
+ /* Need a leaf at lowest level; 512GB pages not supported */
if (level < 0 || level == 3)
return -EINVAL;
- /* offset is now log base 2 of the page size */
+ /* We found a valid leaf PTE */
+ /* Offset is now log base 2 of the page size */
gpa = pte & 0x01fffffffffff000ul;
if (gpa & ((1ul << offset) - 1))
return -EINVAL;
- gpa += eaddr & ((1ul << offset) - 1);
+ gpa |= eaddr & ((1ul << offset) - 1);
for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
if (offset == mmu_psize_defs[ps].shift)
break;
gpte->page_size = ps;
+ gpte->page_shift = offset;
gpte->eaddr = eaddr;
gpte->raddr = gpa;
@@ -115,6 +105,77 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_read = !!(pte & _PAGE_READ);
gpte->may_write = !!(pte & _PAGE_WRITE);
gpte->may_execute = !!(pte & _PAGE_EXEC);
+
+ gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
+
+ if (pte_ret_p)
+ *pte_ret_p = pte;
+
+ return 0;
+}
+
+/*
+ * Used to walk a partition or process table radix tree in guest memory
+ * Note: We exploit the fact that a partition table and a process
+ * table have the same layout, a partition-scoped page table and a
+ * process-scoped page table have the same layout, and the 2nd
+ * doubleword of a partition table entry has the same layout as
+ * the PTCR register.
+ */
+int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 table,
+ int table_index, u64 *pte_ret_p)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int ret;
+ unsigned long size, ptbl, root;
+ struct prtb_entry entry;
+
+ if ((table & PRTS_MASK) > 24)
+ return -EINVAL;
+ size = 1ul << ((table & PRTS_MASK) + 12);
+
+ /* Is the table big enough to contain this entry? */
+ if ((table_index * sizeof(entry)) >= size)
+ return -EINVAL;
+
+ /* Read the table to find the root of the radix tree */
+ ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+ ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+ if (ret)
+ return ret;
+
+ /* Root is stored in the first double word */
+ root = be64_to_cpu(entry.prtb0);
+
+ return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
+}
+
+int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data, bool iswrite)
+{
+ u32 pid;
+ u64 pte;
+ int ret;
+
+ /* Work out effective PID */
+ switch (eaddr >> 62) {
+ case 0:
+ pid = vcpu->arch.pid;
+ break;
+ case 3:
+ pid = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
+ vcpu->kvm->arch.process_table, pid, &pte);
+ if (ret)
+ return ret;
+
+ /* Check privilege (applies only to process scoped translations) */
if (kvmppc_get_msr(vcpu) & MSR_PR) {
if (pte & _PAGE_PRIVILEGED) {
gpte->may_read = 0;
@@ -137,20 +198,46 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
}
static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
- unsigned int pshift)
+ unsigned int pshift, unsigned int lpid)
{
unsigned long psize = PAGE_SIZE;
+ int psi;
+ long rc;
+ unsigned long rb;
if (pshift)
psize = 1UL << pshift;
+ else
+ pshift = PAGE_SHIFT;
addr &= ~(psize - 1);
- radix__flush_tlb_lpid_page(kvm->arch.lpid, addr, psize);
+
+ if (!kvmhv_on_pseries()) {
+ radix__flush_tlb_lpid_page(lpid, addr, psize);
+ return;
+ }
+
+ psi = shift_to_mmu_psize(pshift);
+ rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+ lpid, rb);
+ if (rc)
+ pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
}
-static void kvmppc_radix_flush_pwc(struct kvm *kvm)
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
{
- radix__flush_pwc_lpid(kvm->arch.lpid);
+ long rc;
+
+ if (!kvmhv_on_pseries()) {
+ radix__flush_pwc_lpid(lpid);
+ return;
+ }
+
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ if (rc)
+ pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
}
static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -195,23 +282,38 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
kmem_cache_free(kvm_pmd_cache, pmdp);
}
-static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
- unsigned long gpa, unsigned int shift)
+/* Called with kvm->mmu_lock held */
+void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+ unsigned int shift, struct kvm_memory_slot *memslot,
+ unsigned int lpid)
{
- unsigned long page_size = 1ul << shift;
unsigned long old;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ unsigned long page_size = PAGE_SIZE;
+ unsigned long hpa;
old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
- kvmppc_radix_tlbie_page(kvm, gpa, shift);
- if (old & _PAGE_DIRTY) {
- unsigned long gfn = gpa >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot;
+ kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+
+ /* The following only applies to L1 entries */
+ if (lpid != kvm->arch.lpid)
+ return;
+ if (!memslot) {
memslot = gfn_to_memslot(kvm, gfn);
- if (memslot && memslot->dirty_bitmap)
- kvmppc_update_dirty_map(memslot, gfn, page_size);
+ if (!memslot)
+ return;
}
+ if (shift)
+ page_size = 1ul << shift;
+
+ gpa &= ~(page_size - 1);
+ hpa = old & PTE_RPN_MASK;
+ kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
+
+ if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
+ kvmppc_update_dirty_map(memslot, gfn, page_size);
}
/*
@@ -224,7 +326,8 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
* and emit a warning if encountered, but there may already be data
* corruption due to the unexpected mappings.
*/
-static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
+ unsigned int lpid)
{
if (full) {
memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
@@ -238,14 +341,15 @@ static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
WARN_ON_ONCE(1);
kvmppc_unmap_pte(kvm, p,
pte_pfn(*p) << PAGE_SHIFT,
- PAGE_SHIFT);
+ PAGE_SHIFT, NULL, lpid);
}
}
kvmppc_pte_free(pte);
}
-static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
+ unsigned int lpid)
{
unsigned long im;
pmd_t *p = pmd;
@@ -260,20 +364,21 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
WARN_ON_ONCE(1);
kvmppc_unmap_pte(kvm, (pte_t *)p,
pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
- PMD_SHIFT);
+ PMD_SHIFT, NULL, lpid);
}
} else {
pte_t *pte;
pte = pte_offset_map(p, 0);
- kvmppc_unmap_free_pte(kvm, pte, full);
+ kvmppc_unmap_free_pte(kvm, pte, full, lpid);
pmd_clear(p);
}
}
kvmppc_pmd_free(pmd);
}
-static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
+ unsigned int lpid)
{
unsigned long iu;
pud_t *p = pud;
@@ -287,36 +392,40 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud)
pmd_t *pmd;
pmd = pmd_offset(p, 0);
- kvmppc_unmap_free_pmd(kvm, pmd, true);
+ kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
pud_clear(p);
}
}
pud_free(kvm->mm, pud);
}
-void kvmppc_free_radix(struct kvm *kvm)
+void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
{
unsigned long ig;
- pgd_t *pgd;
- if (!kvm->arch.pgtable)
- return;
- pgd = kvm->arch.pgtable;
for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
pud_t *pud;
if (!pgd_present(*pgd))
continue;
pud = pud_offset(pgd, 0);
- kvmppc_unmap_free_pud(kvm, pud);
+ kvmppc_unmap_free_pud(kvm, pud, lpid);
pgd_clear(pgd);
}
- pgd_free(kvm->mm, kvm->arch.pgtable);
- kvm->arch.pgtable = NULL;
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+ if (kvm->arch.pgtable) {
+ kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
+ kvm->arch.lpid);
+ pgd_free(kvm->mm, kvm->arch.pgtable);
+ kvm->arch.pgtable = NULL;
+ }
}
static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
- unsigned long gpa)
+ unsigned long gpa, unsigned int lpid)
{
pte_t *pte = pte_offset_kernel(pmd, 0);
@@ -326,13 +435,13 @@ static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
* flushing the PWC again.
*/
pmd_clear(pmd);
- kvmppc_radix_flush_pwc(kvm);
+ kvmppc_radix_flush_pwc(kvm, lpid);
- kvmppc_unmap_free_pte(kvm, pte, false);
+ kvmppc_unmap_free_pte(kvm, pte, false, lpid);
}
static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
- unsigned long gpa)
+ unsigned long gpa, unsigned int lpid)
{
pmd_t *pmd = pmd_offset(pud, 0);
@@ -342,9 +451,9 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
* so can be freed without flushing the PWC again.
*/
pud_clear(pud);
- kvmppc_radix_flush_pwc(kvm);
+ kvmppc_radix_flush_pwc(kvm, lpid);
- kvmppc_unmap_free_pmd(kvm, pmd, false);
+ kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
}
/*
@@ -356,8 +465,10 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
*/
#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
-static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
- unsigned int level, unsigned long mmu_seq)
+int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+ unsigned long gpa, unsigned int level,
+ unsigned long mmu_seq, unsigned int lpid,
+ unsigned long *rmapp, struct rmap_nested **n_rmap)
{
pgd_t *pgd;
pud_t *pud, *new_pud = NULL;
@@ -366,7 +477,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
int ret;
/* Traverse the guest's 2nd-level tree, allocate new levels needed */
- pgd = kvm->arch.pgtable + pgd_index(gpa);
+ pgd = pgtable + pgd_index(gpa);
pud = NULL;
if (pgd_present(*pgd))
pud = pud_offset(pgd, gpa);
@@ -423,7 +534,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
goto out_unlock;
}
/* Valid 1GB page here already, remove it */
- kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT);
+ kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
+ lpid);
}
if (level == 2) {
if (!pud_none(*pud)) {
@@ -432,9 +544,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
* install a large page, so remove and free the page
* table page.
*/
- kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa);
+ kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
}
kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+ if (rmapp && n_rmap)
+ kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
ret = 0;
goto out_unlock;
}
@@ -458,7 +572,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
PTE_BITS_MUST_MATCH);
kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
- 0, pte_val(pte), lgpa, PMD_SHIFT);
+ 0, pte_val(pte), lgpa, PMD_SHIFT);
ret = 0;
goto out_unlock;
}
@@ -472,7 +586,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
goto out_unlock;
}
/* Valid 2MB page here already, remove it */
- kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT);
+ kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
+ lpid);
}
if (level == 1) {
if (!pmd_none(*pmd)) {
@@ -481,9 +596,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
* install a large page, so remove and free the page
* table page.
*/
- kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa);
+ kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
}
kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+ if (rmapp && n_rmap)
+ kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
ret = 0;
goto out_unlock;
}
@@ -508,6 +625,8 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
goto out_unlock;
}
kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+ if (rmapp && n_rmap)
+ kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
ret = 0;
out_unlock:
@@ -521,95 +640,49 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
return ret;
}
-int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned long ea, unsigned long dsisr)
+bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing,
+ unsigned long gpa, unsigned int lpid)
+{
+ unsigned long pgflags;
+ unsigned int shift;
+ pte_t *ptep;
+
+ /*
+ * Need to set an R or C bit in the 2nd-level tables;
+ * since we are just helping out the hardware here,
+ * it is sufficient to do what the hardware does.
+ */
+ pgflags = _PAGE_ACCESSED;
+ if (writing)
+ pgflags |= _PAGE_DIRTY;
+ /*
+ * We are walking the secondary (partition-scoped) page table here.
+ * We can do this without disabling irq because the Linux MM
+ * subsystem doesn't do THP splits and collapses on this tree.
+ */
+ ptep = __find_linux_pte(pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
+ kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
+ return true;
+ }
+ return false;
+}
+
+int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+ unsigned long gpa,
+ struct kvm_memory_slot *memslot,
+ bool writing, bool kvm_ro,
+ pte_t *inserted_pte, unsigned int *levelp)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long mmu_seq;
- unsigned long gpa, gfn, hva;
- struct kvm_memory_slot *memslot;
struct page *page = NULL;
- long ret;
- bool writing;
+ unsigned long mmu_seq;
+ unsigned long hva, gfn = gpa >> PAGE_SHIFT;
bool upgrade_write = false;
bool *upgrade_p = &upgrade_write;
pte_t pte, *ptep;
- unsigned long pgflags;
unsigned int shift, level;
-
- /* Check for unusual errors */
- if (dsisr & DSISR_UNSUPP_MMU) {
- pr_err("KVM: Got unsupported MMU fault\n");
- return -EFAULT;
- }
- if (dsisr & DSISR_BADACCESS) {
- /* Reflect to the guest as DSI */
- pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
- kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
- return RESUME_GUEST;
- }
-
- /* Translate the logical address and get the page */
- gpa = vcpu->arch.fault_gpa & ~0xfffUL;
- gpa &= ~0xF000000000000000ul;
- gfn = gpa >> PAGE_SHIFT;
- if (!(dsisr & DSISR_PRTABLE_FAULT))
- gpa |= ea & 0xfff;
- memslot = gfn_to_memslot(kvm, gfn);
-
- /* No memslot means it's an emulated MMIO region */
- if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
- if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
- DSISR_SET_RC)) {
- /*
- * Bad address in guest page table tree, or other
- * unusual error - reflect it to the guest as DSI.
- */
- kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
- return RESUME_GUEST;
- }
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
- dsisr & DSISR_ISSTORE);
- }
-
- writing = (dsisr & DSISR_ISSTORE) != 0;
- if (memslot->flags & KVM_MEM_READONLY) {
- if (writing) {
- /* give the guest a DSI */
- dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
- kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
- return RESUME_GUEST;
- }
- upgrade_p = NULL;
- }
-
- if (dsisr & DSISR_SET_RC) {
- /*
- * Need to set an R or C bit in the 2nd-level tables;
- * since we are just helping out the hardware here,
- * it is sufficient to do what the hardware does.
- */
- pgflags = _PAGE_ACCESSED;
- if (writing)
- pgflags |= _PAGE_DIRTY;
- /*
- * We are walking the secondary page table here. We can do this
- * without disabling irq.
- */
- spin_lock(&kvm->mmu_lock);
- ptep = __find_linux_pte(kvm->arch.pgtable,
- gpa, NULL, &shift);
- if (ptep && pte_present(*ptep) &&
- (!writing || pte_write(*ptep))) {
- kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
- gpa, shift);
- dsisr &= ~DSISR_SET_RC;
- }
- spin_unlock(&kvm->mmu_lock);
- if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
- DSISR_PROTFAULT | DSISR_SET_RC)))
- return RESUME_GUEST;
- }
+ int ret;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
@@ -622,7 +695,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* is that the page is writable.
*/
hva = gfn_to_hva_memslot(memslot, gfn);
- if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
upgrade_write = true;
} else {
unsigned long pfn;
@@ -690,7 +763,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/* Allocate space in the tree and write the PTE */
- ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+ ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
+ mmu_seq, kvm->arch.lpid, NULL, NULL);
+ if (inserted_pte)
+ *inserted_pte = pte;
+ if (levelp)
+ *levelp = level;
if (page) {
if (!ret && (pte_val(pte) & _PAGE_WRITE))
@@ -698,6 +776,82 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
put_page(page);
}
+ return ret;
+}
+
+int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long gpa, gfn;
+ struct kvm_memory_slot *memslot;
+ long ret;
+ bool writing = !!(dsisr & DSISR_ISSTORE);
+ bool kvm_ro = false;
+
+ /* Check for unusual errors */
+ if (dsisr & DSISR_UNSUPP_MMU) {
+ pr_err("KVM: Got unsupported MMU fault\n");
+ return -EFAULT;
+ }
+ if (dsisr & DSISR_BADACCESS) {
+ /* Reflect to the guest as DSI */
+ pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
+ kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+ return RESUME_GUEST;
+ }
+
+ /* Translate the logical address */
+ gpa = vcpu->arch.fault_gpa & ~0xfffUL;
+ gpa &= ~0xF000000000000000ul;
+ gfn = gpa >> PAGE_SHIFT;
+ if (!(dsisr & DSISR_PRTABLE_FAULT))
+ gpa |= ea & 0xfff;
+
+ /* Get the corresponding memslot */
+ memslot = gfn_to_memslot(kvm, gfn);
+
+ /* No memslot means it's an emulated MMIO region */
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
+ DSISR_SET_RC)) {
+ /*
+ * Bad address in guest page table tree, or other
+ * unusual error - reflect it to the guest as DSI.
+ */
+ kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+ return RESUME_GUEST;
+ }
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
+ }
+
+ if (memslot->flags & KVM_MEM_READONLY) {
+ if (writing) {
+ /* give the guest a DSI */
+ kvmppc_core_queue_data_storage(vcpu, ea, DSISR_ISSTORE |
+ DSISR_PROTFAULT);
+ return RESUME_GUEST;
+ }
+ kvm_ro = true;
+ }
+
+ /* Failed to set the reference/change bits */
+ if (dsisr & DSISR_SET_RC) {
+ spin_lock(&kvm->mmu_lock);
+ if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable,
+ writing, gpa, kvm->arch.lpid))
+ dsisr &= ~DSISR_SET_RC;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+ DSISR_PROTFAULT | DSISR_SET_RC)))
+ return RESUME_GUEST;
+ }
+
+ /* Try to insert a pte */
+ ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
+ kvm_ro, NULL, NULL);
+
if (ret == 0 || ret == -EAGAIN)
ret = RESUME_GUEST;
return ret;
@@ -710,20 +864,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
pte_t *ptep;
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
- unsigned long old;
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
- if (ptep && pte_present(*ptep)) {
- old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
- gpa, shift);
- kvmppc_radix_tlbie_page(kvm, gpa, shift);
- if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
- unsigned long psize = PAGE_SIZE;
- if (shift)
- psize = 1ul << shift;
- kvmppc_update_dirty_map(memslot, gfn, psize);
- }
- }
+ if (ptep && pte_present(*ptep))
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+ kvm->arch.lpid);
return 0;
}
@@ -778,7 +923,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
ret = 1 << (shift - PAGE_SHIFT);
kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
gpa, shift);
- kvmppc_radix_tlbie_page(kvm, gpa, shift);
+ kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
}
return ret;
}
@@ -863,6 +1008,215 @@ static void pmd_ctor(void *addr)
memset(addr, 0, RADIX_PMD_TABLE_SIZE);
}
+struct debugfs_radix_state {
+ struct kvm *kvm;
+ struct mutex mutex;
+ unsigned long gpa;
+ int lpid;
+ int chars_left;
+ int buf_index;
+ char buf[128];
+ u8 hdr;
+};
+
+static int debugfs_radix_open(struct inode *inode, struct file *file)
+{
+ struct kvm *kvm = inode->i_private;
+ struct debugfs_radix_state *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ kvm_get_kvm(kvm);
+ p->kvm = kvm;
+ mutex_init(&p->mutex);
+ file->private_data = p;
+
+ return nonseekable_open(inode, file);
+}
+
+static int debugfs_radix_release(struct inode *inode, struct file *file)
+{
+ struct debugfs_radix_state *p = file->private_data;
+
+ kvm_put_kvm(p->kvm);
+ kfree(p);
+ return 0;
+}
+
+static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct debugfs_radix_state *p = file->private_data;
+ ssize_t ret, r;
+ unsigned long n;
+ struct kvm *kvm;
+ unsigned long gpa;
+ pgd_t *pgt;
+ struct kvm_nested_guest *nested;
+ pgd_t pgd, *pgdp;
+ pud_t pud, *pudp;
+ pmd_t pmd, *pmdp;
+ pte_t *ptep;
+ int shift;
+ unsigned long pte;
+
+ kvm = p->kvm;
+ if (!kvm_is_radix(kvm))
+ return 0;
+
+ ret = mutex_lock_interruptible(&p->mutex);
+ if (ret)
+ return ret;
+
+ if (p->chars_left) {
+ n = p->chars_left;
+ if (n > len)
+ n = len;
+ r = copy_to_user(buf, p->buf + p->buf_index, n);
+ n -= r;
+ p->chars_left -= n;
+ p->buf_index += n;
+ buf += n;
+ len -= n;
+ ret = n;
+ if (r) {
+ if (!n)
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ gpa = p->gpa;
+ nested = NULL;
+ pgt = NULL;
+ while (len != 0 && p->lpid >= 0) {
+ if (gpa >= RADIX_PGTABLE_RANGE) {
+ gpa = 0;
+ pgt = NULL;
+ if (nested) {
+ kvmhv_put_nested(nested);
+ nested = NULL;
+ }
+ p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
+ p->hdr = 0;
+ if (p->lpid < 0)
+ break;
+ }
+ if (!pgt) {
+ if (p->lpid == 0) {
+ pgt = kvm->arch.pgtable;
+ } else {
+ nested = kvmhv_get_nested(kvm, p->lpid, false);
+ if (!nested) {
+ gpa = RADIX_PGTABLE_RANGE;
+ continue;
+ }
+ pgt = nested->shadow_pgtable;
+ }
+ }
+ n = 0;
+ if (!p->hdr) {
+ if (p->lpid > 0)
+ n = scnprintf(p->buf, sizeof(p->buf),
+ "\nNested LPID %d: ", p->lpid);
+ n += scnprintf(p->buf + n, sizeof(p->buf) - n,
+ "pgdir: %lx\n", (unsigned long)pgt);
+ p->hdr = 1;
+ goto copy;
+ }
+
+ pgdp = pgt + pgd_index(gpa);
+ pgd = READ_ONCE(*pgdp);
+ if (!(pgd_val(pgd) & _PAGE_PRESENT)) {
+ gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE;
+ continue;
+ }
+
+ pudp = pud_offset(&pgd, gpa);
+ pud = READ_ONCE(*pudp);
+ if (!(pud_val(pud) & _PAGE_PRESENT)) {
+ gpa = (gpa & PUD_MASK) + PUD_SIZE;
+ continue;
+ }
+ if (pud_val(pud) & _PAGE_PTE) {
+ pte = pud_val(pud);
+ shift = PUD_SHIFT;
+ goto leaf;
+ }
+
+ pmdp = pmd_offset(&pud, gpa);
+ pmd = READ_ONCE(*pmdp);
+ if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
+ gpa = (gpa & PMD_MASK) + PMD_SIZE;
+ continue;
+ }
+ if (pmd_val(pmd) & _PAGE_PTE) {
+ pte = pmd_val(pmd);
+ shift = PMD_SHIFT;
+ goto leaf;
+ }
+
+ ptep = pte_offset_kernel(&pmd, gpa);
+ pte = pte_val(READ_ONCE(*ptep));
+ if (!(pte & _PAGE_PRESENT)) {
+ gpa += PAGE_SIZE;
+ continue;
+ }
+ shift = PAGE_SHIFT;
+ leaf:
+ n = scnprintf(p->buf, sizeof(p->buf),
+ " %lx: %lx %d\n", gpa, pte, shift);
+ gpa += 1ul << shift;
+ copy:
+ p->chars_left = n;
+ if (n > len)
+ n = len;
+ r = copy_to_user(buf, p->buf, n);
+ n -= r;
+ p->chars_left -= n;
+ p->buf_index = n;
+ buf += n;
+ len -= n;
+ ret += n;
+ if (r) {
+ if (!ret)
+ ret = -EFAULT;
+ break;
+ }
+ }
+ p->gpa = gpa;
+ if (nested)
+ kvmhv_put_nested(nested);
+
+ out:
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return -EACCES;
+}
+
+static const struct file_operations debugfs_radix_fops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_radix_open,
+ .release = debugfs_radix_release,
+ .read = debugfs_radix_read,
+ .write = debugfs_radix_write,
+ .llseek = generic_file_llseek,
+};
+
+void kvmhv_radix_debugfs_init(struct kvm *kvm)
+{
+ kvm->arch.radix_dentry = debugfs_create_file("radix", 0400,
+ kvm->arch.debugfs_dir, kvm,
+ &debugfs_radix_fops);
+}
+
int kvmppc_radix_init(void)
{
unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 9a3f2646ecc7..62a8d03ba7e9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -363,6 +363,40 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return ret;
}
+static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long tce)
+{
+ unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ enum dma_data_direction dir = iommu_tce_direction(tce);
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long ua = 0;
+
+ /* Allow userspace to poison TCE table */
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ if (iommu_tce_check_gpa(stt->page_shift, gpa))
+ return H_TOO_HARD;
+
+ if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ return H_TOO_HARD;
+
+ list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+ unsigned long hpa = 0;
+ struct mm_iommu_table_group_mem_t *mem;
+ long shift = stit->tbl->it_page_shift;
+
+ mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
+ if (!mem)
+ return H_TOO_HARD;
+
+ if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa))
+ return H_TOO_HARD;
+ }
+
+ return H_SUCCESS;
+}
+
static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
{
unsigned long hpa = 0;
@@ -376,11 +410,10 @@ static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
{
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
if (!pua)
- /* it_userspace allocation might be delayed */
- return H_TOO_HARD;
+ return H_SUCCESS;
mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
if (!mem)
@@ -401,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
long ret;
if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
- return H_HARDWARE;
+ return H_TOO_HARD;
if (dir == DMA_NONE)
return H_SUCCESS;
@@ -449,15 +482,15 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
return H_TOO_HARD;
if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
- return H_HARDWARE;
+ return H_TOO_HARD;
if (mm_iommu_mapped_inc(mem))
- return H_CLOSED;
+ return H_TOO_HARD;
ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
- return H_HARDWARE;
+ return H_TOO_HARD;
}
if (dir != DMA_NONE)
@@ -517,8 +550,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
- tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) {
+ if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
ret = H_PARAMETER;
goto unlock_exit;
}
@@ -533,14 +565,10 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
entry, ua, dir);
- if (ret == H_SUCCESS)
- continue;
-
- if (ret == H_TOO_HARD)
+ if (ret != H_SUCCESS) {
+ kvmppc_clear_tce(stit->tbl, entry);
goto unlock_exit;
-
- WARN_ON_ONCE(1);
- kvmppc_clear_tce(stit->tbl, entry);
+ }
}
kvmppc_tce_put(stt, entry, tce);
@@ -583,7 +611,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -599,10 +627,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ /*
+ * This looks unsafe, because we validate, then regrab
+ * the TCE from userspace which could have been changed by
+ * another thread.
+ *
+ * But it actually is safe, because the relevant checks will be
+ * re-executed in the following code. If userspace tries to
+ * change this dodgily it will result in a messier failure mode
+ * but won't threaten the host.
+ */
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
- if (kvmppc_gpa_to_ua(vcpu->kvm,
- tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
- &ua, NULL))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -610,14 +654,10 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua,
iommu_tce_direction(tce));
- if (ret == H_SUCCESS)
- continue;
-
- if (ret == H_TOO_HARD)
+ if (ret != H_SUCCESS) {
+ kvmppc_clear_tce(stit->tbl, entry);
goto unlock_exit;
-
- WARN_ON_ONCE(1);
- kvmppc_clear_tce(stit->tbl, entry);
+ }
}
kvmppc_tce_put(stt, entry + i, tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 6821ead4b4eb..2206bc729b9a 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -87,6 +87,7 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvmppc_find_table);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
@@ -94,14 +95,14 @@ EXPORT_SYMBOL_GPL(kvmppc_find_table);
* to the table and user space is supposed to process them), we can skip
* checking other things (such as TCE is a guest RAM address or the page
* was actually allocated).
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
*/
-long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
+static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long tce)
{
unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
enum dma_data_direction dir = iommu_tce_direction(tce);
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long ua = 0;
/* Allow userspace to poison TCE table */
if (dir == DMA_NONE)
@@ -110,9 +111,25 @@ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
+ if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ return H_TOO_HARD;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long hpa = 0;
+ struct mm_iommu_table_group_mem_t *mem;
+ long shift = stit->tbl->it_page_shift;
+
+ mem = mm_iommu_lookup_rm(stt->kvm->mm, ua, 1ULL << shift);
+ if (!mem)
+ return H_TOO_HARD;
+
+ if (mm_iommu_ua_to_hpa_rm(mem, ua, shift, &hpa))
+ return H_TOO_HARD;
+ }
+
return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_validate);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode,
*
@@ -164,10 +181,10 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
}
EXPORT_SYMBOL_GPL(kvmppc_tce_put);
-long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
+long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
unsigned long *ua, unsigned long **prmap)
{
- unsigned long gfn = gpa >> PAGE_SHIFT;
+ unsigned long gfn = tce >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = search_memslots(kvm_memslots(kvm), gfn);
@@ -175,7 +192,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return -EINVAL;
*ua = __gfn_to_hva_memslot(memslot, gfn) |
- (gpa & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (prmap)
@@ -184,7 +201,7 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
return 0;
}
-EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
+EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
@@ -197,7 +214,7 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
@@ -223,7 +240,7 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
{
struct mm_iommu_table_group_mem_t *mem = NULL;
const unsigned long pgsize = 1ULL << tbl->it_page_shift;
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
if (!pua)
/* it_userspace allocation might be delayed */
@@ -287,7 +304,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
{
long ret;
unsigned long hpa = 0;
- __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
struct mm_iommu_table_group_mem_t *mem;
if (!pua)
@@ -300,10 +317,10 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift,
&hpa)))
- return H_HARDWARE;
+ return H_TOO_HARD;
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
- return H_CLOSED;
+ return H_TOO_HARD;
ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) {
@@ -368,13 +385,12 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
- ret = kvmppc_tce_validate(stt, tce);
+ ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS)
return ret;
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
- tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+ if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -387,14 +403,10 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
stit->tbl, entry, ua, dir);
- if (ret == H_SUCCESS)
- continue;
-
- if (ret == H_TOO_HARD)
+ if (ret != H_SUCCESS) {
+ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
return ret;
-
- WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
}
kvmppc_tce_put(stt, entry, tce);
@@ -480,7 +492,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
struct mm_iommu_table_group_mem_t *mem;
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -496,12 +508,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap);
if (WARN_ON_ONCE_RM(!rmap))
- return H_HARDWARE;
+ return H_TOO_HARD;
/*
* Synchronize with the MMU notifier callbacks in
@@ -521,14 +533,16 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
for (i = 0; i < npages; ++i) {
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
- ret = kvmppc_tce_validate(stt, tce);
+ ret = kvmppc_rm_tce_validate(stt, tce);
if (ret != H_SUCCESS)
goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_gpa_to_ua(vcpu->kvm,
- tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
- &ua, NULL))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -536,14 +550,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
stit->tbl, entry + i, ua,
iommu_tce_direction(tce));
- if (ret == H_SUCCESS)
- continue;
-
- if (ret == H_TOO_HARD)
+ if (ret != H_SUCCESS) {
+ kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
+ entry);
goto unlock_exit;
-
- WARN_ON_ONCE_RM(1);
- kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
+ }
}
kvmppc_tce_put(stt, entry + i, tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 36b11c5a0dbb..8c7e933e942e 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -36,7 +36,6 @@
#define OP_31_XOP_MTSR 210
#define OP_31_XOP_MTSRIN 242
#define OP_31_XOP_TLBIEL 274
-#define OP_31_XOP_TLBIE 306
/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
#define OP_31_XOP_FAKE_SC1 308
#define OP_31_XOP_SLBMTE 402
@@ -110,7 +109,7 @@ static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
vcpu->arch.tar_tm = vcpu->arch.tar;
vcpu->arch.lr_tm = vcpu->arch.regs.link;
- vcpu->arch.cr_tm = vcpu->arch.cr;
+ vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
vcpu->arch.xer_tm = vcpu->arch.regs.xer;
vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
}
@@ -129,7 +128,7 @@ static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
vcpu->arch.tar = vcpu->arch.tar_tm;
vcpu->arch.regs.link = vcpu->arch.lr_tm;
- vcpu->arch.cr = vcpu->arch.cr_tm;
+ vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
vcpu->arch.regs.xer = vcpu->arch.xer_tm;
vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
}
@@ -141,7 +140,7 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
uint64_t texasr;
/* CR0 = 0 | MSR[TS] | 0 */
- vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT);
@@ -220,7 +219,7 @@ void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
tm_abort(ra_val);
/* CR0 = 0 | MSR[TS] | 0 */
- vcpu->arch.cr = (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)) |
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
(((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
<< CR0_SHIFT);
@@ -494,8 +493,8 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
preempt_disable();
- vcpu->arch.cr = (CR0_TBEGIN_FAILURE |
- (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)));
+ vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
+ (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
(((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3e3a71594e63..bf8def2159c3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -50,6 +50,7 @@
#include <asm/reg.h>
#include <asm/ppc-opcode.h>
#include <asm/asm-prototypes.h>
+#include <asm/archrandom.h>
#include <asm/debug.h>
#include <asm/disassemble.h>
#include <asm/cputable.h>
@@ -104,6 +105,10 @@ static bool indep_threads_mode = true;
module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
+static bool one_vm_per_core;
+module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+
#ifdef CONFIG_KVM_XICS
static struct kernel_param_ops module_param_ops = {
.set = param_set_int,
@@ -117,6 +122,16 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* If set, guests are allowed to create and control nested guests */
+static bool nested = true;
+module_param(nested, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+ return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
/* If set, the threads on each CPU core have to be in the same MMU mode */
static bool no_mixing_hpt_and_radix;
@@ -173,6 +188,10 @@ static bool kvmppc_ipi_thread(int cpu)
{
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+ /* If we're a nested hypervisor, fall back to ordinary IPIs for now */
+ if (kvmhv_on_pseries())
+ return false;
+
/* On POWER9 we can use msgsnd to IPI any cpu */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
@@ -410,8 +429,8 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
- pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
- vcpu->arch.cr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
+ pr_err("cr = %.8lx xer = %.16lx dsisr = %.8x\n",
+ vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
pr_err("fault dar = %.16lx dsisr = %.8x\n",
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
@@ -730,8 +749,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
- * lwsync in book3s_hv_rmhandlers.S just before the
- * fast_guest_return label.
+ * smb_wmb() in kvmppc_guest_entry_inject().
*/
smp_rmb();
vc = vcpu->arch.vcore;
@@ -912,6 +930,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
}
return RESUME_HOST;
+ case H_SET_DABR:
+ ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_SET_XDABR:
+ ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ break;
+ case H_GET_TCE:
+ ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
case H_PUT_TCE:
ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
@@ -935,6 +966,32 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
+ case H_RANDOM:
+ if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
+ ret = H_HARDWARE;
+ break;
+
+ case H_SET_PARTITION_TABLE:
+ ret = H_FUNCTION;
+ if (nesting_enabled(vcpu->kvm))
+ ret = kvmhv_set_partition_table(vcpu);
+ break;
+ case H_ENTER_NESTED:
+ ret = H_FUNCTION;
+ if (!nesting_enabled(vcpu->kvm))
+ break;
+ ret = kvmhv_enter_nested_guest(vcpu);
+ if (ret == H_INTERRUPT) {
+ kvmppc_set_gpr(vcpu, 3, 0);
+ return -EINTR;
+ }
+ break;
+ case H_TLB_INVALIDATE:
+ ret = H_FUNCTION;
+ if (nesting_enabled(vcpu->kvm))
+ ret = kvmhv_do_nested_tlbie(vcpu);
+ break;
+
default:
return RESUME_HOST;
}
@@ -943,6 +1000,24 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+/*
+ * Handle H_CEDE in the nested virtualization case where we haven't
+ * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * This has to be done early, not in kvmppc_pseries_do_hcall(), so
+ * that the cede logic in kvmppc_run_single_vcpu() works properly.
+ */
+static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.shregs.msr |= MSR_EE;
+ vcpu->arch.ceded = 1;
+ smp_mb();
+ if (vcpu->arch.prodded) {
+ vcpu->arch.prodded = 0;
+ smp_mb();
+ vcpu->arch.ceded = 0;
+ }
+}
+
static int kvmppc_hcall_impl_hv(unsigned long cmd)
{
switch (cmd) {
@@ -1085,7 +1160,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
-/* Called with vcpu->arch.vcore->lock held */
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -1190,7 +1264,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
- vcpu->arch.fault_dsisr = 0;
+ vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
+ DSISR_SRR1_MATCH_64S;
+ if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
r = RESUME_PAGE_FAULT;
break;
/*
@@ -1206,10 +1283,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
swab32(vcpu->arch.emul_inst) :
vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
- /* Need vcore unlocked to call kvmppc_get_last_inst */
- spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_debug_inst(run, vcpu);
- spin_lock(&vcpu->arch.vcore->lock);
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1225,12 +1299,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
r = EMULATE_FAIL;
if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
- cpu_has_feature(CPU_FTR_ARCH_300)) {
- /* Need vcore unlocked to call kvmppc_get_last_inst */
- spin_unlock(&vcpu->arch.vcore->lock);
+ cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmppc_emulate_doorbell_instr(vcpu);
- spin_lock(&vcpu->arch.vcore->lock);
- }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1265,6 +1335,104 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
+static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+{
+ int r;
+ int srcu_idx;
+
+ vcpu->stat.sum_exits++;
+
+ /*
+ * This can happen if an interrupt occurs in the last stages
+ * of guest entry or the first stages of guest exit (i.e. after
+ * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+ * and before setting it to KVM_GUEST_MODE_HOST_HV).
+ * That can happen due to a bug, or due to a machine check
+ * occurring at just the wrong time.
+ */
+ if (vcpu->arch.shregs.msr & MSR_HV) {
+ pr_emerg("KVM trap in HV mode while nested!\n");
+ pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ kvmppc_dump_regs(vcpu);
+ return RESUME_HOST;
+ }
+ switch (vcpu->arch.trap) {
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_HOST;
+ break;
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ case BOOK3S_INTERRUPT_H_VIRT:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
+ case BOOK3S_INTERRUPT_HMI:
+ case BOOK3S_INTERRUPT_PERFMON:
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ /* Pass the machine check to the L1 guest */
+ r = RESUME_HOST;
+ /* Print the MCE event to host console. */
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false);
+ break;
+ /*
+ * We get these next two if the guest accesses a page which it thinks
+ * it has mapped but which is not actually present, either because
+ * it is for an emulated I/O device or because the corresonding
+ * host page has been paged out.
+ */
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmhv_nested_page_fault(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+ vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
+ DSISR_SRR1_MATCH_64S;
+ if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmhv_nested_page_fault(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ /*
+ * This occurs for various TM-related instructions that
+ * we need to emulate on POWER9 DD2.2. We have already
+ * handled the cases where the guest was in real-suspend
+ * mode and was transitioning to transactional state.
+ */
+ r = kvmhv_p9_tm_emulation(vcpu);
+ break;
+#endif
+
+ case BOOK3S_INTERRUPT_HV_RM_HARD:
+ vcpu->arch.trap = 0;
+ r = RESUME_GUEST;
+ if (!xive_enabled())
+ kvmppc_xics_rm_complete(vcpu, 0);
+ break;
+ default:
+ r = RESUME_HOST;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
@@ -1555,6 +1723,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ONLINE:
*val = get_reg_val(id, vcpu->arch.online);
break;
+ case KVM_REG_PPC_PTCR:
+ *val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
+ break;
default:
r = -EINVAL;
break;
@@ -1786,6 +1957,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
atomic_dec(&vcpu->arch.vcore->online_count);
vcpu->arch.online = i;
break;
+ case KVM_REG_PPC_PTCR:
+ vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
+ break;
default:
r = -EINVAL;
break;
@@ -2019,15 +2193,18 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
* Set the default HFSCR for the guest from the host value.
* This value is only used on POWER9.
* On POWER9, we want to virtualize the doorbell facility, so we
- * turn off the HFSCR bit, which causes those instructions to trap.
+ * don't set the HFSCR_MSGP bit, and that causes those instructions
+ * to trap and then we emulate them.
*/
- vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
- if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
+ HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ vcpu->arch.hfscr |= HFSCR_TM;
+ }
+ if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
- else if (!cpu_has_feature(CPU_FTR_TM_COMP))
- vcpu->arch.hfscr &= ~HFSCR_TM;
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- vcpu->arch.hfscr &= ~HFSCR_MSGP;
kvmppc_mmu_book3s_hv_init(vcpu);
@@ -2242,10 +2419,18 @@ static void kvmppc_release_hwthread(int cpu)
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ cpumask_t *cpu_in_guest;
int i;
cpu = cpu_first_thread_sibling(cpu);
- cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
+ if (nested) {
+ cpumask_set_cpu(cpu, &nested->need_tlb_flush);
+ cpu_in_guest = &nested->cpu_in_guest;
+ } else {
+ cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
+ cpu_in_guest = &kvm->arch.cpu_in_guest;
+ }
/*
* Make sure setting of bit in need_tlb_flush precedes
* testing of cpu_in_guest bits. The matching barrier on
@@ -2253,13 +2438,23 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
*/
smp_mb();
for (i = 0; i < threads_per_core; ++i)
- if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest))
+ if (cpumask_test_cpu(cpu + i, cpu_in_guest))
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
struct kvm *kvm = vcpu->kvm;
+ int prev_cpu;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return;
+
+ if (nested)
+ prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
+ else
+ prev_cpu = vcpu->arch.prev_cpu;
/*
* With radix, the guest can do TLB invalidations itself,
@@ -2273,12 +2468,46 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
* ran to flush the TLB. The TLB is shared between threads,
* so we use a single bit in .need_tlb_flush for all 4 threads.
*/
- if (vcpu->arch.prev_cpu != pcpu) {
- if (vcpu->arch.prev_cpu >= 0 &&
- cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+ if (prev_cpu != pcpu) {
+ if (prev_cpu >= 0 &&
+ cpu_first_thread_sibling(prev_cpu) !=
cpu_first_thread_sibling(pcpu))
- radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
- vcpu->arch.prev_cpu = pcpu;
+ radix_flush_cpu(kvm, prev_cpu, vcpu);
+ if (nested)
+ nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
+ else
+ vcpu->arch.prev_cpu = pcpu;
+ }
+}
+
+static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested)
+{
+ cpumask_t *need_tlb_flush;
+ int lpid;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pcpu &= ~0x3UL;
+
+ if (nested) {
+ lpid = nested->shadow_lpid;
+ need_tlb_flush = &nested->need_tlb_flush;
+ } else {
+ lpid = kvm->arch.lpid;
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+ }
+
+ mtspr(SPRN_LPID, lpid);
+ isync();
+ smp_mb();
+
+ if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+ radix__local_flush_tlb_lpid_guest(lpid);
+ /* Clear the bit after the TLB flush */
+ cpumask_clear_cpu(pcpu, need_tlb_flush);
}
}
@@ -2493,6 +2722,10 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return false;
+ /* In one_vm_per_core mode, require all vcores to be from the same vm */
+ if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
+ return false;
+
/* Some POWER9 chips require all threads to be in the same MMU mode */
if (no_mixing_hpt_and_radix &&
kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
@@ -2600,6 +2833,14 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
spin_lock(&vc->lock);
now = get_tb();
for_each_runnable_thread(i, vcpu, vc) {
+ /*
+ * It's safe to unlock the vcore in the loop here, because
+ * for_each_runnable_thread() is safe against removal of
+ * the vcpu, and the vcore state is VCORE_EXITING here,
+ * so any vcpus becoming runnable will have their arch.trap
+ * set to zero and can't actually run in the guest.
+ */
+ spin_unlock(&vc->lock);
/* cancel pending dec exception if dec is positive */
if (now < vcpu->arch.dec_expires &&
kvmppc_core_pending_dec(vcpu))
@@ -2615,6 +2856,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
vcpu->arch.ret = ret;
vcpu->arch.trap = 0;
+ spin_lock(&vc->lock);
if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
if (vcpu->arch.pending_exceptions)
kvmppc_core_prepare_to_enter(vcpu);
@@ -2963,8 +3205,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
spin_unlock(&core_info.vc[sub]->lock);
if (kvm_is_radix(vc->kvm)) {
- int tmp = pcpu;
-
/*
* Do we need to flush the process scoped TLB for the LPAR?
*
@@ -2975,17 +3215,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*
* Hash must be flushed in realmode in order to use tlbiel.
*/
- mtspr(SPRN_LPID, vc->kvm->arch.lpid);
- isync();
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- tmp &= ~0x3UL;
-
- if (cpumask_test_cpu(tmp, &vc->kvm->arch.need_tlb_flush)) {
- radix__local_flush_tlb_lpid_guest(vc->kvm->arch.lpid);
- /* Clear the bit after the TLB flush */
- cpumask_clear_cpu(tmp, &vc->kvm->arch.need_tlb_flush);
- }
+ kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL);
}
/*
@@ -3080,6 +3310,300 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
/*
+ * Load up hypervisor-mode registers on P9.
+ */
+static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ s64 hdec;
+ u64 tb, purr, spurr;
+ int trap;
+ unsigned long host_hfscr = mfspr(SPRN_HFSCR);
+ unsigned long host_ciabr = mfspr(SPRN_CIABR);
+ unsigned long host_dawr = mfspr(SPRN_DAWR);
+ unsigned long host_dawrx = mfspr(SPRN_DAWRX);
+ unsigned long host_psscr = mfspr(SPRN_PSSCR);
+ unsigned long host_pidr = mfspr(SPRN_PID);
+
+ hdec = time_limit - mftb();
+ if (hdec < 0)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ mtspr(SPRN_HDEC, hdec);
+
+ if (vc->tb_offset) {
+ u64 new_tb = mftb() + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr);
+ mtspr(SPRN_DPDES, vc->dpdes);
+ mtspr(SPRN_VTB, vc->vtb);
+
+ local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+ local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, vcpu->arch.purr);
+ mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ mtspr(SPRN_DAWR, vcpu->arch.dawr);
+ mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
+ }
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+ mtspr(SPRN_IC, vcpu->arch.ic);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+ mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ mtspr(SPRN_AMOR, ~0UL);
+
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+
+ kvmppc_xive_push_vcpu(vcpu);
+
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+ trap = __kvmhv_vcpu_entry_p9(vcpu);
+
+ /* Advance host PURR/SPURR by the amount used by guest */
+ purr = mfspr(SPRN_PURR);
+ spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+ purr - vcpu->arch.purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+ spurr - vcpu->arch.spurr);
+ vcpu->arch.purr = purr;
+ vcpu->arch.spurr = spurr;
+
+ vcpu->arch.ic = mfspr(SPRN_IC);
+ vcpu->arch.pid = mfspr(SPRN_PID);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+ vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+ vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+ vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+ vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+ mtspr(SPRN_PSSCR, host_psscr);
+ mtspr(SPRN_HFSCR, host_hfscr);
+ mtspr(SPRN_CIABR, host_ciabr);
+ mtspr(SPRN_DAWR, host_dawr);
+ mtspr(SPRN_DAWRX, host_dawrx);
+ mtspr(SPRN_PID, host_pidr);
+
+ /*
+ * Since this is radix, do a eieio; tlbsync; ptesync sequence in
+ * case we interrupted the guest between a tlbie and a ptesync.
+ */
+ asm volatile("eieio; tlbsync; ptesync");
+
+ mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
+ isync();
+
+ vc->dpdes = mfspr(SPRN_DPDES);
+ vc->vtb = mfspr(SPRN_VTB);
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, 0);
+
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = 0;
+ }
+
+ mtspr(SPRN_HDEC, 0x7fffffff);
+ mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+
+ return trap;
+}
+
+/*
+ * Virtual-mode guest entry for POWER9 and later when the host and
+ * guest are both using the radix MMU. The LPIDR has already been set.
+ */
+int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ unsigned long host_dscr = mfspr(SPRN_DSCR);
+ unsigned long host_tidr = mfspr(SPRN_TIDR);
+ unsigned long host_iamr = mfspr(SPRN_IAMR);
+ s64 dec;
+ u64 tb;
+ int trap, save_pmu;
+
+ dec = mfspr(SPRN_DEC);
+ tb = mftb();
+ if (dec < 512)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ local_paca->kvm_hstate.dec_expires = dec + tb;
+ if (local_paca->kvm_hstate.dec_expires < time_limit)
+ time_limit = local_paca->kvm_hstate.dec_expires;
+
+ vcpu->arch.ceded = 0;
+
+ kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
+
+ kvmppc_subcore_enter_guest();
+
+ vc->entry_exit_map = 1;
+ vc->in_guest = 1;
+
+ if (vcpu->arch.vpa.pinned_addr) {
+ struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+ lp->yield_count = cpu_to_be32(yield_count);
+ vcpu->arch.vpa.dirty = 1;
+ }
+
+ if (cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+
+ kvmhv_load_guest_pmu(vcpu);
+
+ msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
+ load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+ load_vr_state(&vcpu->arch.vr);
+#endif
+
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ mtspr(SPRN_WORT, vcpu->arch.wort);
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+
+ mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+
+ if (kvmhv_on_pseries()) {
+ /* call our hypervisor to load up HV regs and go */
+ struct hv_guest_state hvregs;
+
+ kvmhv_save_hv_regs(vcpu, &hvregs);
+ hvregs.lpcr = lpcr;
+ vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+ hvregs.version = HV_GUEST_STATE_VERSION;
+ if (vcpu->arch.nested) {
+ hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+ hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+ } else {
+ hvregs.lpid = vcpu->kvm->arch.lpid;
+ hvregs.vcpu_token = vcpu->vcpu_id;
+ }
+ hvregs.hdec_expiry = time_limit;
+ trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
+ __pa(&vcpu->arch.regs));
+ kvmhv_restore_hv_return_state(vcpu, &hvregs);
+ vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+ /* H_CEDE has to be handled now, not later */
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
+ kvmppc_nested_cede(vcpu);
+ trap = 0;
+ }
+ } else {
+ trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ }
+
+ vcpu->arch.slb_max = 0;
+ dec = mfspr(SPRN_DEC);
+ tb = mftb();
+ vcpu->arch.dec_expires = dec + tb;
+ vcpu->cpu = -1;
+ vcpu->arch.thread_cpu = -1;
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ vcpu->arch.wort = mfspr(SPRN_WORT);
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+
+ mtspr(SPRN_PSPB, 0);
+ mtspr(SPRN_WORT, 0);
+ mtspr(SPRN_AMR, 0);
+ mtspr(SPRN_UAMOR, 0);
+ mtspr(SPRN_DSCR, host_dscr);
+ mtspr(SPRN_TIDR, host_tidr);
+ mtspr(SPRN_IAMR, host_iamr);
+ mtspr(SPRN_PSPB, 0);
+
+ msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
+ store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+ store_vr_state(&vcpu->arch.vr);
+#endif
+
+ if (cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+
+ save_pmu = 1;
+ if (vcpu->arch.vpa.pinned_addr) {
+ struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+ lp->yield_count = cpu_to_be32(yield_count);
+ vcpu->arch.vpa.dirty = 1;
+ save_pmu = lp->pmcregs_in_use;
+ }
+
+ kvmhv_save_guest_pmu(vcpu, save_pmu);
+
+ vc->entry_exit_map = 0x101;
+ vc->in_guest = 0;
+
+ mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+
+ kvmhv_load_host_pmu();
+
+ kvmppc_subcore_exit_guest();
+
+ return trap;
+}
+
+/*
* Wait for some other vcpu thread to execute us, and
* wake us up when we need to handle something in the host.
*/
@@ -3256,6 +3780,11 @@ out:
trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
}
+/*
+ * This never fails for a radix guest, as none of the operations it does
+ * for a radix guest can fail or have a way to report failure.
+ * kvmhv_run_single_vcpu() relies on this fact.
+ */
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
int r = 0;
@@ -3405,6 +3934,171 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return vcpu->arch.ret;
}
+int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
+ struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr)
+{
+ int trap, r, pcpu;
+ int srcu_idx;
+ struct kvmppc_vcore *vc;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+
+ trace_kvmppc_run_vcpu_enter(vcpu);
+
+ kvm_run->exit_reason = 0;
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
+
+ vc = vcpu->arch.vcore;
+ vcpu->arch.ceded = 0;
+ vcpu->arch.run_task = current;
+ vcpu->arch.kvm_run = kvm_run;
+ vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+ vcpu->arch.busy_preempt = TB_NIL;
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+ vc->runnable_threads[0] = vcpu;
+ vc->n_runnable = 1;
+ vc->runner = vcpu;
+
+ /* See if the MMU is ready to go */
+ if (!kvm->arch.mmu_ready)
+ kvmhv_setup_mmu(vcpu);
+
+ if (need_resched())
+ cond_resched();
+
+ kvmppc_update_vpas(vcpu);
+
+ init_vcore_to_run(vc);
+ vc->preempt_tb = TB_NIL;
+
+ preempt_disable();
+ pcpu = smp_processor_id();
+ vc->pcpu = pcpu;
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+
+ local_irq_disable();
+ hard_irq_disable();
+ if (signal_pending(current))
+ goto sigpend;
+ if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
+ goto out;
+
+ if (!nested) {
+ kvmppc_core_prepare_to_enter(vcpu);
+ if (vcpu->arch.doorbell_request) {
+ vc->dpdes = 1;
+ smp_wmb();
+ vcpu->arch.doorbell_request = 0;
+ }
+ if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+ &vcpu->arch.pending_exceptions))
+ lpcr |= LPCR_MER;
+ } else if (vcpu->arch.pending_exceptions ||
+ vcpu->arch.doorbell_request ||
+ xive_interrupt_pending(vcpu)) {
+ vcpu->arch.ret = RESUME_HOST;
+ goto out;
+ }
+
+ kvmppc_clear_host_core(pcpu);
+
+ local_paca->kvm_hstate.tid = 0;
+ local_paca->kvm_hstate.napping = 0;
+ local_paca->kvm_hstate.kvm_split_mode = NULL;
+ kvmppc_start_thread(vcpu, vc);
+ kvmppc_create_dtl_entry(vcpu, vc);
+ trace_kvm_guest_enter(vcpu);
+
+ vc->vcore_state = VCORE_RUNNING;
+ trace_kvmppc_run_core(vc, 0);
+
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested);
+
+ trace_hardirqs_on();
+ guest_enter_irqoff();
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ this_cpu_disable_ftrace();
+
+ trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
+ vcpu->arch.trap = trap;
+
+ this_cpu_enable_ftrace();
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ mtspr(SPRN_LPID, kvm->arch.host_lpid);
+ isync();
+ }
+
+ trace_hardirqs_off();
+ set_irq_happened(trap);
+
+ kvmppc_set_host_core(pcpu);
+
+ local_irq_enable();
+ guest_exit();
+
+ cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
+
+ preempt_enable();
+
+ /* cancel pending decrementer exception if DEC is now positive */
+ if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
+ kvmppc_core_dequeue_dec(vcpu);
+
+ trace_kvm_guest_exit(vcpu);
+ r = RESUME_GUEST;
+ if (trap) {
+ if (!nested)
+ r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
+ else
+ r = kvmppc_handle_nested_exit(vcpu);
+ }
+ vcpu->arch.ret = r;
+
+ if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
+ !kvmppc_vcpu_woken(vcpu)) {
+ kvmppc_set_timer(vcpu);
+ while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
+ if (signal_pending(current)) {
+ vcpu->stat.signal_exits++;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ break;
+ }
+ spin_lock(&vc->lock);
+ kvmppc_vcore_blocked(vc);
+ spin_unlock(&vc->lock);
+ }
+ }
+ vcpu->arch.ceded = 0;
+
+ vc->vcore_state = VCORE_INACTIVE;
+ trace_kvmppc_run_core(vc, 1);
+
+ done:
+ kvmppc_remove_runnable(vc, vcpu);
+ trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
+
+ return vcpu->arch.ret;
+
+ sigpend:
+ vcpu->stat.signal_exits++;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ out:
+ local_irq_enable();
+ preempt_enable();
+ goto done;
+}
+
static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
@@ -3480,7 +4174,20 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
- r = kvmppc_run_vcpu(run, vcpu);
+ /*
+ * The early POWER9 chips that can't mix radix and HPT threads
+ * on the same core also need the workaround for the problem
+ * where the TLB would prefetch entries in the guest exit path
+ * for radix guests using the guest PIDR value and LPID 0.
+ * The workaround is in the old path (kvmppc_run_vcpu())
+ * but not the new path (kvmhv_run_single_vcpu()).
+ */
+ if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
+ !no_mixing_hpt_and_radix)
+ r = kvmhv_run_single_vcpu(run, vcpu, ~(u64)0,
+ vcpu->arch.vcore->lpcr);
+ else
+ r = kvmppc_run_vcpu(run, vcpu);
if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
@@ -3559,6 +4266,10 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
+ /* If running as a nested hypervisor, we don't support HPT guests */
+ if (kvmhv_on_pseries())
+ info->flags |= KVM_PPC_NO_HASH;
+
return 0;
}
@@ -3723,8 +4434,7 @@ void kvmppc_setup_partition_table(struct kvm *kvm)
__pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
dw1 = PATB_GR | kvm->arch.process_table;
}
-
- mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
+ kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
}
/*
@@ -3820,6 +4530,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Must be called with kvm->lock held and mmu_ready = 0 and no vcpus running */
int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
{
+ if (nesting_enabled(kvm))
+ kvmhv_release_all_nested(kvm);
kvmppc_free_radix(kvm);
kvmppc_update_lpcr(kvm, LPCR_VPM1,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
@@ -3841,6 +4553,7 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
kvmppc_free_hpt(&kvm->arch.hpt);
kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+ kvmppc_rmap_reset(kvm);
kvm->arch.radix = 1;
return 0;
}
@@ -3940,6 +4653,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvmppc_alloc_host_rm_ops();
+ kvmhv_vm_nested_init(kvm);
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@@ -3958,9 +4673,13 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
/* Init LPCR for virtual RMA mode */
- kvm->arch.host_lpid = mfspr(SPRN_LPID);
- kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
- lpcr &= LPCR_PECE | LPCR_LPES;
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ kvm->arch.host_lpid = mfspr(SPRN_LPID);
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+ lpcr &= LPCR_PECE | LPCR_LPES;
+ } else {
+ lpcr = 0;
+ }
lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
LPCR_VPM0 | LPCR_VPM1;
kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
@@ -4027,8 +4746,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
* On POWER9, we only need to do this if the "indep_threads_mode"
* module parameter has been set to N.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- kvm->arch.threads_indep = indep_threads_mode;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
+ pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
+ kvm->arch.threads_indep = true;
+ } else {
+ kvm->arch.threads_indep = indep_threads_mode;
+ }
+ }
if (!kvm->arch.threads_indep)
kvm_hv_vm_activated();
@@ -4051,6 +4776,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
snprintf(buf, sizeof(buf), "vm%d", current->pid);
kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
kvmppc_mmu_debugfs_init(kvm);
+ if (radix_enabled())
+ kvmhv_radix_debugfs_init(kvm);
return 0;
}
@@ -4073,13 +4800,21 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvmppc_free_vcores(kvm);
- kvmppc_free_lpid(kvm->arch.lpid);
if (kvm_is_radix(kvm))
kvmppc_free_radix(kvm);
else
kvmppc_free_hpt(&kvm->arch.hpt);
+ /* Perform global invalidation and return lpid to the pool */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (nesting_enabled(kvm))
+ kvmhv_release_all_nested(kvm);
+ kvm->arch.process_table = 0;
+ kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
+ }
+ kvmppc_free_lpid(kvm->arch.lpid);
+
kvmppc_free_pimap(kvm);
}
@@ -4104,11 +4839,15 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
static int kvmppc_core_check_processor_compat_hv(void)
{
- if (!cpu_has_feature(CPU_FTR_HVMODE) ||
- !cpu_has_feature(CPU_FTR_ARCH_206))
- return -EIO;
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_206))
+ return 0;
- return 0;
+ /* POWER9 in radix mode is capable of being a nested hypervisor. */
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
+ return 0;
+
+ return -EIO;
}
#ifdef CONFIG_KVM_XICS
@@ -4426,6 +5165,10 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
if (radix && !radix_enabled())
return -EINVAL;
+ /* If we're a nested hypervisor, we currently only support radix */
+ if (kvmhv_on_pseries() && !radix)
+ return -EINVAL;
+
mutex_lock(&kvm->lock);
if (radix != kvm_is_radix(kvm)) {
if (kvm->arch.mmu_ready) {
@@ -4458,6 +5201,19 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
return err;
}
+static int kvmhv_enable_nested(struct kvm *kvm)
+{
+ if (!nested)
+ return -EPERM;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+ return -ENODEV;
+
+ /* kvm == NULL means the caller is testing if the capability exists */
+ if (kvm)
+ kvm->arch.nested_enable = true;
+ return 0;
+}
+
static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -4497,6 +5253,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
+ .enable_nested = kvmhv_enable_nested,
};
static int kvm_init_subcore_bitmap(void)
@@ -4547,6 +5304,10 @@ static int kvmppc_book3s_init_hv(void)
if (r < 0)
return -ENODEV;
+ r = kvmhv_nested_init();
+ if (r)
+ return r;
+
r = kvm_init_subcore_bitmap();
if (r)
return r;
@@ -4557,7 +5318,8 @@ static int kvmppc_book3s_init_hv(void)
* indirectly, via OPAL.
*/
#ifdef CONFIG_SMP
- if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
+ if (!xive_enabled() && !kvmhv_on_pseries() &&
+ !local_paca->kvm_hstate.xics_phys) {
struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
@@ -4605,6 +5367,7 @@ static void kvmppc_book3s_exit_hv(void)
if (kvmppc_radix_possible())
kvmppc_radix_exit();
kvmppc_hv_ops = NULL;
+ kvmhv_nested_exit();
}
module_init(kvmppc_book3s_init_hv);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fc6bb9630a9c..a71e2fc00a4e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -231,6 +231,15 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+ /* For a nested hypervisor, use the XICS via hcall */
+ if (kvmhv_on_pseries()) {
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
+ IPI_PRIORITY);
+ return;
+ }
+
/* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
@@ -460,12 +469,19 @@ static long kvmppc_read_one_intr(bool *again)
return 1;
/* Now read the interrupt from the ICP */
- xics_phys = local_paca->kvm_hstate.xics_phys;
- rc = 0;
- if (!xics_phys)
- rc = opal_int_get_xirr(&xirr, false);
- else
- xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
+ if (kvmhv_on_pseries()) {
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
+ xirr = cpu_to_be32(retbuf[0]);
+ } else {
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ rc = 0;
+ if (!xics_phys)
+ rc = opal_int_get_xirr(&xirr, false);
+ else
+ xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
+ }
if (rc < 0)
return 1;
@@ -494,7 +510,13 @@ static long kvmppc_read_one_intr(bool *again)
*/
if (xisr == XICS_IPI) {
rc = 0;
- if (xics_phys) {
+ if (kvmhv_on_pseries()) {
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ plpar_hcall_raw(H_IPI, retbuf,
+ hard_smp_processor_id(), 0xff);
+ plpar_hcall_raw(H_EOI, retbuf, h_xirr);
+ } else if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else {
@@ -520,7 +542,13 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
- if (xics_phys)
+ if (kvmhv_on_pseries()) {
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ plpar_hcall_raw(H_IPI, retbuf,
+ hard_smp_processor_id(),
+ IPI_PRIORITY);
+ } else if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR);
else
@@ -729,3 +757,51 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip)
smp_mb();
local_paca->kvm_hstate.kvm_split_mode = NULL;
}
+
+/*
+ * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
+ * Can we inject a Decrementer or a External interrupt?
+ */
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
+{
+ int ext;
+ unsigned long vec = 0;
+ unsigned long lpcr;
+
+ /* Insert EXTERNAL bit into LPCR at the MER bit position */
+ ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= ext << LPCR_MER_SH;
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+
+ if (vcpu->arch.shregs.msr & MSR_EE) {
+ if (ext) {
+ vec = BOOK3S_INTERRUPT_EXTERNAL;
+ } else {
+ long int dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD))
+ dec = (int) dec;
+ if (dec < 0)
+ vec = BOOK3S_INTERRUPT_DECREMENTER;
+ }
+ }
+ if (vec) {
+ unsigned long msr, old_msr = vcpu->arch.shregs.msr;
+
+ kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
+ kvmppc_set_srr1(vcpu, old_msr);
+ kvmppc_set_pc(vcpu, vec);
+ msr = vcpu->arch.intr_msr;
+ if (MSR_TM_ACTIVE(old_msr))
+ msr |= MSR_TS_S;
+ vcpu->arch.shregs.msr = msr;
+ }
+
+ if (vcpu->arch.doorbell_request) {
+ mtspr(SPRN_DPDES, 1);
+ vcpu->arch.vcore->dpdes = 1;
+ smp_wmb();
+ vcpu->arch.doorbell_request = 0;
+ }
+}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 666b91c79eb4..a6d10010d9e8 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -64,52 +64,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Save host PMU registers */
-BEGIN_FTR_SECTION
- /* Work around P8 PMAE bug */
- li r3, -1
- clrrdi r3, r3, 10
- mfspr r8, SPRN_MMCR2
- mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
- isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mfspr r7, SPRN_MMCR0 /* save MMCR0 */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
- mfspr r6, SPRN_MMCRA
- /* Clear MMCRA in order to disable SDAR updates */
- li r5, 0
- mtspr SPRN_MMCRA, r5
- isync
- lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
- cmpwi r5, 0
- beq 31f /* skip if not */
- mfspr r5, SPRN_MMCR1
- mfspr r9, SPRN_SIAR
- mfspr r10, SPRN_SDAR
- std r7, HSTATE_MMCR0(r13)
- std r5, HSTATE_MMCR1(r13)
- std r6, HSTATE_MMCRA(r13)
- std r9, HSTATE_SIAR(r13)
- std r10, HSTATE_SDAR(r13)
-BEGIN_FTR_SECTION
- mfspr r9, SPRN_SIER
- std r8, HSTATE_MMCR2(r13)
- std r9, HSTATE_SIER(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- mfspr r3, SPRN_PMC1
- mfspr r5, SPRN_PMC2
- mfspr r6, SPRN_PMC3
- mfspr r7, SPRN_PMC4
- mfspr r8, SPRN_PMC5
- mfspr r9, SPRN_PMC6
- stw r3, HSTATE_PMC1(r13)
- stw r5, HSTATE_PMC2(r13)
- stw r6, HSTATE_PMC3(r13)
- stw r7, HSTATE_PMC4(r13)
- stw r8, HSTATE_PMC5(r13)
- stw r9, HSTATE_PMC6(r13)
-31:
+ bl kvmhv_save_host_pmu
/*
* Put whatever is in the decrementer into the
@@ -161,3 +116,51 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
+
+_GLOBAL(kvmhv_save_host_pmu)
+BEGIN_FTR_SECTION
+ /* Work around P8 PMAE bug */
+ li r3, -1
+ clrrdi r3, r3, 10
+ mfspr r8, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r7, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ mfspr r6, SPRN_MMCRA
+ /* Clear MMCRA in order to disable SDAR updates */
+ li r5, 0
+ mtspr SPRN_MMCRA, r5
+ isync
+ lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
+ cmpwi r5, 0
+ beq 31f /* skip if not */
+ mfspr r5, SPRN_MMCR1
+ mfspr r9, SPRN_SIAR
+ mfspr r10, SPRN_SDAR
+ std r7, HSTATE_MMCR0(r13)
+ std r5, HSTATE_MMCR1(r13)
+ std r6, HSTATE_MMCRA(r13)
+ std r9, HSTATE_SIAR(r13)
+ std r10, HSTATE_SDAR(r13)
+BEGIN_FTR_SECTION
+ mfspr r9, SPRN_SIER
+ std r8, HSTATE_MMCR2(r13)
+ std r9, HSTATE_SIER(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mfspr r3, SPRN_PMC1
+ mfspr r5, SPRN_PMC2
+ mfspr r6, SPRN_PMC3
+ mfspr r7, SPRN_PMC4
+ mfspr r8, SPRN_PMC5
+ mfspr r9, SPRN_PMC6
+ stw r3, HSTATE_PMC1(r13)
+ stw r5, HSTATE_PMC2(r13)
+ stw r6, HSTATE_PMC3(r13)
+ stw r7, HSTATE_PMC4(r13)
+ stw r8, HSTATE_PMC5(r13)
+ stw r9, HSTATE_PMC6(r13)
+31: blr
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
new file mode 100644
index 000000000000..401d2ecbebc5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -0,0 +1,1291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2018
+ * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
+ * Paul Mackerras <paulus@ozlabs.org>
+ *
+ * Description: KVM functions specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/llist.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+
+static struct patb_entry *pseries_partition_tb;
+
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
+
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ hr->pcr = vc->pcr;
+ hr->dpdes = vc->dpdes;
+ hr->hfscr = vcpu->arch.hfscr;
+ hr->tb_offset = vc->tb_offset;
+ hr->dawr0 = vcpu->arch.dawr;
+ hr->dawrx0 = vcpu->arch.dawrx;
+ hr->ciabr = vcpu->arch.ciabr;
+ hr->purr = vcpu->arch.purr;
+ hr->spurr = vcpu->arch.spurr;
+ hr->ic = vcpu->arch.ic;
+ hr->vtb = vc->vtb;
+ hr->srr0 = vcpu->arch.shregs.srr0;
+ hr->srr1 = vcpu->arch.shregs.srr1;
+ hr->sprg[0] = vcpu->arch.shregs.sprg0;
+ hr->sprg[1] = vcpu->arch.shregs.sprg1;
+ hr->sprg[2] = vcpu->arch.shregs.sprg2;
+ hr->sprg[3] = vcpu->arch.shregs.sprg3;
+ hr->pidr = vcpu->arch.pid;
+ hr->cfar = vcpu->arch.cfar;
+ hr->ppr = vcpu->arch.ppr;
+}
+
+static void byteswap_pt_regs(struct pt_regs *regs)
+{
+ unsigned long *addr = (unsigned long *) regs;
+
+ for (; addr < ((unsigned long *) (regs + 1)); addr++)
+ *addr = swab64(*addr);
+}
+
+static void byteswap_hv_regs(struct hv_guest_state *hr)
+{
+ hr->version = swab64(hr->version);
+ hr->lpid = swab32(hr->lpid);
+ hr->vcpu_token = swab32(hr->vcpu_token);
+ hr->lpcr = swab64(hr->lpcr);
+ hr->pcr = swab64(hr->pcr);
+ hr->amor = swab64(hr->amor);
+ hr->dpdes = swab64(hr->dpdes);
+ hr->hfscr = swab64(hr->hfscr);
+ hr->tb_offset = swab64(hr->tb_offset);
+ hr->dawr0 = swab64(hr->dawr0);
+ hr->dawrx0 = swab64(hr->dawrx0);
+ hr->ciabr = swab64(hr->ciabr);
+ hr->hdec_expiry = swab64(hr->hdec_expiry);
+ hr->purr = swab64(hr->purr);
+ hr->spurr = swab64(hr->spurr);
+ hr->ic = swab64(hr->ic);
+ hr->vtb = swab64(hr->vtb);
+ hr->hdar = swab64(hr->hdar);
+ hr->hdsisr = swab64(hr->hdsisr);
+ hr->heir = swab64(hr->heir);
+ hr->asdr = swab64(hr->asdr);
+ hr->srr0 = swab64(hr->srr0);
+ hr->srr1 = swab64(hr->srr1);
+ hr->sprg[0] = swab64(hr->sprg[0]);
+ hr->sprg[1] = swab64(hr->sprg[1]);
+ hr->sprg[2] = swab64(hr->sprg[2]);
+ hr->sprg[3] = swab64(hr->sprg[3]);
+ hr->pidr = swab64(hr->pidr);
+ hr->cfar = swab64(hr->cfar);
+ hr->ppr = swab64(hr->ppr);
+}
+
+static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+ struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ hr->dpdes = vc->dpdes;
+ hr->hfscr = vcpu->arch.hfscr;
+ hr->purr = vcpu->arch.purr;
+ hr->spurr = vcpu->arch.spurr;
+ hr->ic = vcpu->arch.ic;
+ hr->vtb = vc->vtb;
+ hr->srr0 = vcpu->arch.shregs.srr0;
+ hr->srr1 = vcpu->arch.shregs.srr1;
+ hr->sprg[0] = vcpu->arch.shregs.sprg0;
+ hr->sprg[1] = vcpu->arch.shregs.sprg1;
+ hr->sprg[2] = vcpu->arch.shregs.sprg2;
+ hr->sprg[3] = vcpu->arch.shregs.sprg3;
+ hr->pidr = vcpu->arch.pid;
+ hr->cfar = vcpu->arch.cfar;
+ hr->ppr = vcpu->arch.ppr;
+ switch (trap) {
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ hr->hdar = vcpu->arch.fault_dar;
+ hr->hdsisr = vcpu->arch.fault_dsisr;
+ hr->asdr = vcpu->arch.fault_gpa;
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ hr->asdr = vcpu->arch.fault_gpa;
+ break;
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ hr->heir = vcpu->arch.emul_inst;
+ break;
+ }
+}
+
+static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+ /*
+ * Don't let L1 enable features for L2 which we've disabled for L1,
+ * but preserve the interrupt cause field.
+ */
+ hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
+
+ /* Don't let data address watchpoint match in hypervisor state */
+ hr->dawrx0 &= ~DAWRX_HYP;
+
+ /* Don't let completed instruction address breakpt match in HV state */
+ if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ hr->ciabr &= ~CIABR_PRIV;
+}
+
+static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ vc->pcr = hr->pcr;
+ vc->dpdes = hr->dpdes;
+ vcpu->arch.hfscr = hr->hfscr;
+ vcpu->arch.dawr = hr->dawr0;
+ vcpu->arch.dawrx = hr->dawrx0;
+ vcpu->arch.ciabr = hr->ciabr;
+ vcpu->arch.purr = hr->purr;
+ vcpu->arch.spurr = hr->spurr;
+ vcpu->arch.ic = hr->ic;
+ vc->vtb = hr->vtb;
+ vcpu->arch.shregs.srr0 = hr->srr0;
+ vcpu->arch.shregs.srr1 = hr->srr1;
+ vcpu->arch.shregs.sprg0 = hr->sprg[0];
+ vcpu->arch.shregs.sprg1 = hr->sprg[1];
+ vcpu->arch.shregs.sprg2 = hr->sprg[2];
+ vcpu->arch.shregs.sprg3 = hr->sprg[3];
+ vcpu->arch.pid = hr->pidr;
+ vcpu->arch.cfar = hr->cfar;
+ vcpu->arch.ppr = hr->ppr;
+}
+
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ vc->dpdes = hr->dpdes;
+ vcpu->arch.hfscr = hr->hfscr;
+ vcpu->arch.purr = hr->purr;
+ vcpu->arch.spurr = hr->spurr;
+ vcpu->arch.ic = hr->ic;
+ vc->vtb = hr->vtb;
+ vcpu->arch.fault_dar = hr->hdar;
+ vcpu->arch.fault_dsisr = hr->hdsisr;
+ vcpu->arch.fault_gpa = hr->asdr;
+ vcpu->arch.emul_inst = hr->heir;
+ vcpu->arch.shregs.srr0 = hr->srr0;
+ vcpu->arch.shregs.srr1 = hr->srr1;
+ vcpu->arch.shregs.sprg0 = hr->sprg[0];
+ vcpu->arch.shregs.sprg1 = hr->sprg[1];
+ vcpu->arch.shregs.sprg2 = hr->sprg[2];
+ vcpu->arch.shregs.sprg3 = hr->sprg[3];
+ vcpu->arch.pid = hr->pidr;
+ vcpu->arch.cfar = hr->cfar;
+ vcpu->arch.ppr = hr->ppr;
+}
+
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
+{
+ long int err, r;
+ struct kvm_nested_guest *l2;
+ struct pt_regs l2_regs, saved_l1_regs;
+ struct hv_guest_state l2_hv, saved_l1_hv;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 hv_ptr, regs_ptr;
+ u64 hdec_exp;
+ s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
+ u64 mask;
+ unsigned long lpcr;
+
+ if (vcpu->kvm->arch.l1_ptcr == 0)
+ return H_NOT_AVAILABLE;
+
+ /* copy parameters in */
+ hv_ptr = kvmppc_get_gpr(vcpu, 4);
+ err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
+ sizeof(struct hv_guest_state));
+ if (err)
+ return H_PARAMETER;
+ if (kvmppc_need_byteswap(vcpu))
+ byteswap_hv_regs(&l2_hv);
+ if (l2_hv.version != HV_GUEST_STATE_VERSION)
+ return H_P2;
+
+ regs_ptr = kvmppc_get_gpr(vcpu, 5);
+ err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct pt_regs));
+ if (err)
+ return H_PARAMETER;
+ if (kvmppc_need_byteswap(vcpu))
+ byteswap_pt_regs(&l2_regs);
+ if (l2_hv.vcpu_token >= NR_CPUS)
+ return H_PARAMETER;
+
+ /* translate lpid */
+ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
+ if (!l2)
+ return H_PARAMETER;
+ if (!l2->l1_gr_to_hr) {
+ mutex_lock(&l2->tlb_lock);
+ kvmhv_update_ptbl_cache(l2);
+ mutex_unlock(&l2->tlb_lock);
+ }
+
+ /* save l1 values of things */
+ vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+ saved_l1_regs = vcpu->arch.regs;
+ kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
+
+ /* convert TB values/offsets to host (L0) values */
+ hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
+ vc->tb_offset += l2_hv.tb_offset;
+
+ /* set L1 state to L2 state */
+ vcpu->arch.nested = l2;
+ vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+ vcpu->arch.regs = l2_regs;
+ vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+ LPCR_LPES | LPCR_MER;
+ lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
+ sanitise_hv_regs(vcpu, &l2_hv);
+ restore_hv_regs(vcpu, &l2_hv);
+
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
+ do {
+ if (mftb() >= hdec_exp) {
+ vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+ r = RESUME_HOST;
+ break;
+ }
+ r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
+ lpcr);
+ } while (is_kvmppc_resume_guest(r));
+
+ /* save L2 state for return */
+ l2_regs = vcpu->arch.regs;
+ l2_regs.msr = vcpu->arch.shregs.msr;
+ delta_purr = vcpu->arch.purr - l2_hv.purr;
+ delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
+ delta_ic = vcpu->arch.ic - l2_hv.ic;
+ delta_vtb = vc->vtb - l2_hv.vtb;
+ save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
+
+ /* restore L1 state */
+ vcpu->arch.nested = NULL;
+ vcpu->arch.regs = saved_l1_regs;
+ vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
+ /* set L1 MSR TS field according to L2 transaction state */
+ if (l2_regs.msr & MSR_TS_MASK)
+ vcpu->arch.shregs.msr |= MSR_TS_S;
+ vc->tb_offset = saved_l1_hv.tb_offset;
+ restore_hv_regs(vcpu, &saved_l1_hv);
+ vcpu->arch.purr += delta_purr;
+ vcpu->arch.spurr += delta_spurr;
+ vcpu->arch.ic += delta_ic;
+ vc->vtb += delta_vtb;
+
+ kvmhv_put_nested(l2);
+
+ /* copy l2_hv_state and regs back to guest */
+ if (kvmppc_need_byteswap(vcpu)) {
+ byteswap_hv_regs(&l2_hv);
+ byteswap_pt_regs(&l2_regs);
+ }
+ err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
+ sizeof(struct hv_guest_state));
+ if (err)
+ return H_AUTHORITY;
+ err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct pt_regs));
+ if (err)
+ return H_AUTHORITY;
+
+ if (r == -EINTR)
+ return H_INTERRUPT;
+
+ return vcpu->arch.trap;
+}
+
+long kvmhv_nested_init(void)
+{
+ long int ptb_order;
+ unsigned long ptcr;
+ long rc;
+
+ if (!kvmhv_on_pseries())
+ return 0;
+ if (!radix_enabled())
+ return -ENODEV;
+
+ /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
+ ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
+ if (ptb_order < 8)
+ ptb_order = 8;
+ pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
+ GFP_KERNEL);
+ if (!pseries_partition_tb) {
+ pr_err("kvm-hv: failed to allocated nested partition table\n");
+ return -ENOMEM;
+ }
+
+ ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
+ rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
+ if (rc != H_SUCCESS) {
+ pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
+ rc);
+ kfree(pseries_partition_tb);
+ pseries_partition_tb = NULL;
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+void kvmhv_nested_exit(void)
+{
+ /*
+ * N.B. the kvmhv_on_pseries() test is there because it enables
+ * the compiler to remove the call to plpar_hcall_norets()
+ * when CONFIG_PPC_PSERIES=n.
+ */
+ if (kvmhv_on_pseries() && pseries_partition_tb) {
+ plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
+ kfree(pseries_partition_tb);
+ pseries_partition_tb = NULL;
+ }
+}
+
+static void kvmhv_flush_lpid(unsigned int lpid)
+{
+ long rc;
+
+ if (!kvmhv_on_pseries()) {
+ radix__flush_tlb_lpid(lpid);
+ return;
+ }
+
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ if (rc)
+ pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
+}
+
+void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
+{
+ if (!kvmhv_on_pseries()) {
+ mmu_partition_table_set_entry(lpid, dw0, dw1);
+ return;
+ }
+
+ pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+ /* L0 will do the necessary barriers */
+ kvmhv_flush_lpid(lpid);
+}
+
+static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
+{
+ unsigned long dw0;
+
+ dw0 = PATB_HR | radix__get_tree_size() |
+ __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
+ kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
+}
+
+void kvmhv_vm_nested_init(struct kvm *kvm)
+{
+ kvm->arch.max_nested_lpid = -1;
+}
+
+/*
+ * Handle the H_SET_PARTITION_TABLE hcall.
+ * r4 = guest real address of partition table + log_2(size) - 12
+ * (formatted as for the PTCR).
+ */
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
+ int srcu_idx;
+ long ret = H_SUCCESS;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ /*
+ * Limit the partition table to 4096 entries (because that's what
+ * hardware supports), and check the base address.
+ */
+ if ((ptcr & PRTS_MASK) > 12 - 8 ||
+ !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
+ ret = H_PARAMETER;
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ if (ret == H_SUCCESS)
+ kvm->arch.l1_ptcr = ptcr;
+ return ret;
+}
+
+/*
+ * Reload the partition table entry for a guest.
+ * Caller must hold gp->tlb_lock.
+ */
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
+{
+ int ret;
+ struct patb_entry ptbl_entry;
+ unsigned long ptbl_addr;
+ struct kvm *kvm = gp->l1_host;
+
+ ret = -EFAULT;
+ ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
+ if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
+ ret = kvm_read_guest(kvm, ptbl_addr,
+ &ptbl_entry, sizeof(ptbl_entry));
+ if (ret) {
+ gp->l1_gr_to_hr = 0;
+ gp->process_table = 0;
+ } else {
+ gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
+ gp->process_table = be64_to_cpu(ptbl_entry.patb1);
+ }
+ kvmhv_set_nested_ptbl(gp);
+}
+
+struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
+{
+ struct kvm_nested_guest *gp;
+ long shadow_lpid;
+
+ gp = kzalloc(sizeof(*gp), GFP_KERNEL);
+ if (!gp)
+ return NULL;
+ gp->l1_host = kvm;
+ gp->l1_lpid = lpid;
+ mutex_init(&gp->tlb_lock);
+ gp->shadow_pgtable = pgd_alloc(kvm->mm);
+ if (!gp->shadow_pgtable)
+ goto out_free;
+ shadow_lpid = kvmppc_alloc_lpid();
+ if (shadow_lpid < 0)
+ goto out_free2;
+ gp->shadow_lpid = shadow_lpid;
+
+ memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
+
+ return gp;
+
+ out_free2:
+ pgd_free(kvm->mm, gp->shadow_pgtable);
+ out_free:
+ kfree(gp);
+ return NULL;
+}
+
+/*
+ * Free up any resources allocated for a nested guest.
+ */
+static void kvmhv_release_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+
+ if (gp->shadow_pgtable) {
+ /*
+ * No vcpu is using this struct and no call to
+ * kvmhv_get_nested can find this struct,
+ * so we don't need to hold kvm->mmu_lock.
+ */
+ kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+ gp->shadow_lpid);
+ pgd_free(kvm->mm, gp->shadow_pgtable);
+ }
+ kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
+ kvmppc_free_lpid(gp->shadow_lpid);
+ kfree(gp);
+}
+
+static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+ int lpid = gp->l1_lpid;
+ long ref;
+
+ spin_lock(&kvm->mmu_lock);
+ if (gp == kvm->arch.nested_guests[lpid]) {
+ kvm->arch.nested_guests[lpid] = NULL;
+ if (lpid == kvm->arch.max_nested_lpid) {
+ while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
+ ;
+ kvm->arch.max_nested_lpid = lpid;
+ }
+ --gp->refcnt;
+ }
+ ref = gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+ if (ref == 0)
+ kvmhv_release_nested(gp);
+}
+
+/*
+ * Free up all nested resources allocated for this guest.
+ * This is called with no vcpus of the guest running, when
+ * switching the guest to HPT mode or when destroying the
+ * guest.
+ */
+void kvmhv_release_all_nested(struct kvm *kvm)
+{
+ int i;
+ struct kvm_nested_guest *gp;
+ struct kvm_nested_guest *freelist = NULL;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+
+ spin_lock(&kvm->mmu_lock);
+ for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
+ gp = kvm->arch.nested_guests[i];
+ if (!gp)
+ continue;
+ kvm->arch.nested_guests[i] = NULL;
+ if (--gp->refcnt == 0) {
+ gp->next = freelist;
+ freelist = gp;
+ }
+ }
+ kvm->arch.max_nested_lpid = -1;
+ spin_unlock(&kvm->mmu_lock);
+ while ((gp = freelist) != NULL) {
+ freelist = gp->next;
+ kvmhv_release_nested(gp);
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+ kvmhv_free_memslot_nest_rmap(memslot);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/* caller must hold gp->tlb_lock */
+static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+
+ spin_lock(&kvm->mmu_lock);
+ kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
+ spin_unlock(&kvm->mmu_lock);
+ kvmhv_flush_lpid(gp->shadow_lpid);
+ kvmhv_update_ptbl_cache(gp);
+ if (gp->l1_gr_to_hr == 0)
+ kvmhv_remove_nested(gp);
+}
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+ bool create)
+{
+ struct kvm_nested_guest *gp, *newgp;
+
+ if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
+ l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
+ return NULL;
+
+ spin_lock(&kvm->mmu_lock);
+ gp = kvm->arch.nested_guests[l1_lpid];
+ if (gp)
+ ++gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (gp || !create)
+ return gp;
+
+ newgp = kvmhv_alloc_nested(kvm, l1_lpid);
+ if (!newgp)
+ return NULL;
+ spin_lock(&kvm->mmu_lock);
+ if (kvm->arch.nested_guests[l1_lpid]) {
+ /* someone else beat us to it */
+ gp = kvm->arch.nested_guests[l1_lpid];
+ } else {
+ kvm->arch.nested_guests[l1_lpid] = newgp;
+ ++newgp->refcnt;
+ gp = newgp;
+ newgp = NULL;
+ if (l1_lpid > kvm->arch.max_nested_lpid)
+ kvm->arch.max_nested_lpid = l1_lpid;
+ }
+ ++gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (newgp)
+ kvmhv_release_nested(newgp);
+
+ return gp;
+}
+
+void kvmhv_put_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+ long ref;
+
+ spin_lock(&kvm->mmu_lock);
+ ref = --gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+ if (ref == 0)
+ kvmhv_release_nested(gp);
+}
+
+static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
+{
+ if (lpid > kvm->arch.max_nested_lpid)
+ return NULL;
+ return kvm->arch.nested_guests[lpid];
+}
+
+static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
+{
+ return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
+ RMAP_NESTED_GPA_MASK));
+}
+
+void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+ struct rmap_nested **n_rmap)
+{
+ struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+ struct rmap_nested *cursor;
+ u64 rmap, new_rmap = (*n_rmap)->rmap;
+
+ /* Are there any existing entries? */
+ if (!(*rmapp)) {
+ /* No -> use the rmap as a single entry */
+ *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
+ return;
+ }
+
+ /* Do any entries match what we're trying to insert? */
+ for_each_nest_rmap_safe(cursor, entry, &rmap) {
+ if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
+ return;
+ }
+
+ /* Do we need to create a list or just add the new entry? */
+ rmap = *rmapp;
+ if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+ *rmapp = 0UL;
+ llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
+ if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+ (*n_rmap)->list.next = (struct llist_node *) rmap;
+
+ /* Set NULL so not freed by caller */
+ *n_rmap = NULL;
+}
+
+static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
+ unsigned long hpa, unsigned long mask)
+{
+ struct kvm_nested_guest *gp;
+ unsigned long gpa;
+ unsigned int shift, lpid;
+ pte_t *ptep;
+
+ gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+ lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+ gp = kvmhv_find_nested(kvm, lpid);
+ if (!gp)
+ return;
+
+ /* Find and invalidate the pte */
+ ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+ /* Don't spuriously invalidate ptes if the pfn has changed */
+ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+}
+
+static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long hpa, unsigned long mask)
+{
+ struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
+ struct rmap_nested *cursor;
+ unsigned long rmap;
+
+ for_each_nest_rmap_safe(cursor, entry, &rmap) {
+ kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
+ kfree(cursor);
+ }
+}
+
+/* called with kvm->mmu_lock held */
+void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ unsigned long gpa, unsigned long hpa,
+ unsigned long nbytes)
+{
+ unsigned long gfn, end_gfn;
+ unsigned long addr_mask;
+
+ if (!memslot)
+ return;
+ gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
+ end_gfn = gfn + (nbytes >> PAGE_SHIFT);
+
+ addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
+ hpa &= addr_mask;
+
+ for (; gfn < end_gfn; gfn++) {
+ unsigned long *rmap = &memslot->arch.rmap[gfn];
+ kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
+ }
+}
+
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
+{
+ unsigned long page;
+
+ for (page = 0; page < free->npages; page++) {
+ unsigned long rmap, *rmapp = &free->arch.rmap[page];
+ struct rmap_nested *cursor;
+ struct llist_node *entry;
+
+ entry = llist_del_all((struct llist_head *) rmapp);
+ for_each_nest_rmap_safe(cursor, entry, &rmap)
+ kfree(cursor);
+ }
+}
+
+static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp,
+ long gpa, int *shift_ret)
+{
+ struct kvm *kvm = vcpu->kvm;
+ bool ret = false;
+ pte_t *ptep;
+ int shift;
+
+ spin_lock(&kvm->mmu_lock);
+ ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+ if (!shift)
+ shift = PAGE_SHIFT;
+ if (ptep && pte_present(*ptep)) {
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+ ret = true;
+ }
+ spin_unlock(&kvm->mmu_lock);
+
+ if (shift_ret)
+ *shift_ret = shift;
+ return ret;
+}
+
+static inline int get_ric(unsigned int instr)
+{
+ return (instr >> 18) & 0x3;
+}
+
+static inline int get_prs(unsigned int instr)
+{
+ return (instr >> 17) & 0x1;
+}
+
+static inline int get_r(unsigned int instr)
+{
+ return (instr >> 16) & 0x1;
+}
+
+static inline int get_lpid(unsigned long r_val)
+{
+ return r_val & 0xffffffff;
+}
+
+static inline int get_is(unsigned long r_val)
+{
+ return (r_val >> 10) & 0x3;
+}
+
+static inline int get_ap(unsigned long r_val)
+{
+ return (r_val >> 5) & 0x7;
+}
+
+static inline long get_epn(unsigned long r_val)
+{
+ return r_val >> 12;
+}
+
+static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
+ int ap, long epn)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+ long npages;
+ int shift, shadow_shift;
+ unsigned long addr;
+
+ shift = ap_to_shift(ap);
+ addr = epn << 12;
+ if (shift < 0)
+ /* Invalid ap encoding */
+ return -EINVAL;
+
+ addr &= ~((1UL << shift) - 1);
+ npages = 1UL << (shift - PAGE_SHIFT);
+
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (!gp) /* No such guest -> nothing to do */
+ return 0;
+ mutex_lock(&gp->tlb_lock);
+
+ /* There may be more than one host page backing this single guest pte */
+ do {
+ kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
+
+ npages -= 1UL << (shadow_shift - PAGE_SHIFT);
+ addr += 1UL << shadow_shift;
+ } while (npages > 0);
+
+ mutex_unlock(&gp->tlb_lock);
+ kvmhv_put_nested(gp);
+ return 0;
+}
+
+static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp, int ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&gp->tlb_lock);
+ switch (ric) {
+ case 0:
+ /* Invalidate TLB */
+ spin_lock(&kvm->mmu_lock);
+ kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+ gp->shadow_lpid);
+ kvmhv_flush_lpid(gp->shadow_lpid);
+ spin_unlock(&kvm->mmu_lock);
+ break;
+ case 1:
+ /*
+ * Invalidate PWC
+ * We don't cache this -> nothing to do
+ */
+ break;
+ case 2:
+ /* Invalidate TLB, PWC and caching of partition table entries */
+ kvmhv_flush_nested(gp);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&gp->tlb_lock);
+}
+
+static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+ int i;
+
+ spin_lock(&kvm->mmu_lock);
+ for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
+ gp = kvm->arch.nested_guests[i];
+ if (gp) {
+ spin_unlock(&kvm->mmu_lock);
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ spin_lock(&kvm->mmu_lock);
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+}
+
+static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
+ unsigned long rsval, unsigned long rbval)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+ int r, ric, prs, is, ap;
+ int lpid;
+ long epn;
+ int ret = 0;
+
+ ric = get_ric(instr);
+ prs = get_prs(instr);
+ r = get_r(instr);
+ lpid = get_lpid(rsval);
+ is = get_is(rbval);
+
+ /*
+ * These cases are invalid and are not handled:
+ * r != 1 -> Only radix supported
+ * prs == 1 -> Not HV privileged
+ * ric == 3 -> No cluster bombs for radix
+ * is == 1 -> Partition scoped translations not associated with pid
+ * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
+ */
+ if ((!r) || (prs) || (ric == 3) || (is == 1) ||
+ ((!is) && (ric == 1 || ric == 2)))
+ return -EINVAL;
+
+ switch (is) {
+ case 0:
+ /*
+ * We know ric == 0
+ * Invalidate TLB for a given target address
+ */
+ epn = get_epn(rbval);
+ ap = get_ap(rbval);
+ ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
+ break;
+ case 2:
+ /* Invalidate matching LPID */
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (gp) {
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ kvmhv_put_nested(gp);
+ }
+ break;
+ case 3:
+ /* Invalidate ALL LPIDs */
+ kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * This handles the H_TLB_INVALIDATE hcall.
+ * Parameters are (r4) tlbie instruction code, (r5) rS contents,
+ * (r6) rB contents.
+ */
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
+ if (ret)
+ return H_PARAMETER;
+ return H_SUCCESS;
+}
+
+/* Used to convert a nested guest real address to a L1 guest real address */
+static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp,
+ unsigned long n_gpa, unsigned long dsisr,
+ struct kvmppc_pte *gpte_p)
+{
+ u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
+ int ret;
+
+ ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
+ &fault_addr);
+
+ if (ret) {
+ /* We didn't find a pte */
+ if (ret == -EINVAL) {
+ /* Unsupported mmu config */
+ flags |= DSISR_UNSUPP_MMU;
+ } else if (ret == -ENOENT) {
+ /* No translation found */
+ flags |= DSISR_NOHPTE;
+ } else if (ret == -EFAULT) {
+ /* Couldn't access L1 real address */
+ flags |= DSISR_PRTABLE_FAULT;
+ vcpu->arch.fault_gpa = fault_addr;
+ } else {
+ /* Unknown error */
+ return ret;
+ }
+ goto forward_to_l1;
+ } else {
+ /* We found a pte -> check permissions */
+ if (dsisr & DSISR_ISSTORE) {
+ /* Can we write? */
+ if (!gpte_p->may_write) {
+ flags |= DSISR_PROTFAULT;
+ goto forward_to_l1;
+ }
+ } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ /* Can we execute? */
+ if (!gpte_p->may_execute) {
+ flags |= SRR1_ISI_N_OR_G;
+ goto forward_to_l1;
+ }
+ } else {
+ /* Can we read? */
+ if (!gpte_p->may_read && !gpte_p->may_write) {
+ flags |= DSISR_PROTFAULT;
+ goto forward_to_l1;
+ }
+ }
+ }
+
+ return 0;
+
+forward_to_l1:
+ vcpu->arch.fault_dsisr = flags;
+ if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.shregs.msr &= ~0x783f0000ul;
+ vcpu->arch.shregs.msr |= flags;
+ }
+ return RESUME_HOST;
+}
+
+static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp,
+ unsigned long n_gpa,
+ struct kvmppc_pte gpte,
+ unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ bool writing = !!(dsisr & DSISR_ISSTORE);
+ u64 pgflags;
+ bool ret;
+
+ /* Are the rc bits set in the L1 partition scoped pte? */
+ pgflags = _PAGE_ACCESSED;
+ if (writing)
+ pgflags |= _PAGE_DIRTY;
+ if (pgflags & ~gpte.rc)
+ return RESUME_HOST;
+
+ spin_lock(&kvm->mmu_lock);
+ /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
+ ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
+ gpte.raddr, kvm->arch.lpid);
+ spin_unlock(&kvm->mmu_lock);
+ if (!ret)
+ return -EINVAL;
+
+ /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
+ ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
+ gp->shadow_lpid);
+ if (!ret)
+ return -EINVAL;
+ return 0;
+}
+
+static inline int kvmppc_radix_level_to_shift(int level)
+{
+ switch (level) {
+ case 2:
+ return PUD_SHIFT;
+ case 1:
+ return PMD_SHIFT;
+ default:
+ return PAGE_SHIFT;
+ }
+}
+
+static inline int kvmppc_radix_shift_to_level(int shift)
+{
+ if (shift == PUD_SHIFT)
+ return 2;
+ if (shift == PMD_SHIFT)
+ return 1;
+ if (shift == PAGE_SHIFT)
+ return 0;
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+/* called with gp->tlb_lock held */
+static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ struct rmap_nested *n_rmap;
+ struct kvmppc_pte gpte;
+ pte_t pte, *pte_p;
+ unsigned long mmu_seq;
+ unsigned long dsisr = vcpu->arch.fault_dsisr;
+ unsigned long ea = vcpu->arch.fault_dar;
+ unsigned long *rmapp;
+ unsigned long n_gpa, gpa, gfn, perm = 0UL;
+ unsigned int shift, l1_shift, level;
+ bool writing = !!(dsisr & DSISR_ISSTORE);
+ bool kvm_ro = false;
+ long int ret;
+
+ if (!gp->l1_gr_to_hr) {
+ kvmhv_update_ptbl_cache(gp);
+ if (!gp->l1_gr_to_hr)
+ return RESUME_HOST;
+ }
+
+ /* Convert the nested guest real address into a L1 guest real address */
+
+ n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
+ if (!(dsisr & DSISR_PRTABLE_FAULT))
+ n_gpa |= ea & 0xFFF;
+ ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
+
+ /*
+ * If the hardware found a translation but we don't now have a usable
+ * translation in the l1 partition-scoped tree, remove the shadow pte
+ * and let the guest retry.
+ */
+ if (ret == RESUME_HOST &&
+ (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
+ DSISR_BAD_COPYPASTE)))
+ goto inval;
+ if (ret)
+ return ret;
+
+ /* Failed to set the reference/change bits */
+ if (dsisr & DSISR_SET_RC) {
+ ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
+ if (ret == RESUME_HOST)
+ return ret;
+ if (ret)
+ goto inval;
+ dsisr &= ~DSISR_SET_RC;
+ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+ DSISR_PROTFAULT)))
+ return RESUME_GUEST;
+ }
+
+ /*
+ * We took an HISI or HDSI while we were running a nested guest which
+ * means we have no partition scoped translation for that. This means
+ * we need to insert a pte for the mapping into our shadow_pgtable.
+ */
+
+ l1_shift = gpte.page_shift;
+ if (l1_shift < PAGE_SHIFT) {
+ /* We don't support l1 using a page size smaller than our own */
+ pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
+ l1_shift, PAGE_SHIFT);
+ return -EINVAL;
+ }
+ gpa = gpte.raddr;
+ gfn = gpa >> PAGE_SHIFT;
+
+ /* 1. Get the corresponding host memslot */
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
+ /* unusual error -> reflect to the guest as a DSI */
+ kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+ return RESUME_GUEST;
+ }
+ /* passthrough of emulated MMIO case... */
+ pr_err("emulated MMIO passthrough?\n");
+ return -EINVAL;
+ }
+ if (memslot->flags & KVM_MEM_READONLY) {
+ if (writing) {
+ /* Give the guest a DSI */
+ kvmppc_core_queue_data_storage(vcpu, ea,
+ DSISR_ISSTORE | DSISR_PROTFAULT);
+ return RESUME_GUEST;
+ }
+ kvm_ro = true;
+ }
+
+ /* 2. Find the host pte for this L1 guest real address */
+
+ /* Used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ /* See if can find translation in our partition scoped tables for L1 */
+ pte = __pte(0);
+ spin_lock(&kvm->mmu_lock);
+ pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (!shift)
+ shift = PAGE_SHIFT;
+ if (pte_p)
+ pte = *pte_p;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
+ /* No suitable pte found -> try to insert a mapping */
+ ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
+ writing, kvm_ro, &pte, &level);
+ if (ret == -EAGAIN)
+ return RESUME_GUEST;
+ else if (ret)
+ return ret;
+ shift = kvmppc_radix_level_to_shift(level);
+ }
+
+ /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
+
+ /* The permissions is the combination of the host and l1 guest ptes */
+ perm |= gpte.may_read ? 0UL : _PAGE_READ;
+ perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
+ perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
+ pte = __pte(pte_val(pte) & ~perm);
+
+ /* What size pte can we insert? */
+ if (shift > l1_shift) {
+ u64 mask;
+ unsigned int actual_shift = PAGE_SHIFT;
+ if (PMD_SHIFT < l1_shift)
+ actual_shift = PMD_SHIFT;
+ mask = (1UL << shift) - (1UL << actual_shift);
+ pte = __pte(pte_val(pte) | (gpa & mask));
+ shift = actual_shift;
+ }
+ level = kvmppc_radix_shift_to_level(shift);
+ n_gpa &= ~((1UL << shift) - 1);
+
+ /* 4. Insert the pte into our shadow_pgtable */
+
+ n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
+ if (!n_rmap)
+ return RESUME_GUEST; /* Let the guest try again */
+ n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
+ (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
+ mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
+ if (n_rmap)
+ kfree(n_rmap);
+ if (ret == -EAGAIN)
+ ret = RESUME_GUEST; /* Let the guest try again */
+
+ return ret;
+
+ inval:
+ kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
+ return RESUME_GUEST;
+}
+
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
+{
+ struct kvm_nested_guest *gp = vcpu->arch.nested;
+ long int ret;
+
+ mutex_lock(&gp->tlb_lock);
+ ret = __kvmhv_nested_page_fault(vcpu, gp);
+ mutex_unlock(&gp->tlb_lock);
+ return ret;
+}
+
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
+{
+ int ret = -1;
+
+ spin_lock(&kvm->mmu_lock);
+ while (++lpid <= kvm->arch.max_nested_lpid) {
+ if (kvm->arch.nested_guests[lpid]) {
+ ret = lpid;
+ break;
+ }
+ }
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index b11043b23c18..0787f12c1a1b 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
}
+EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
void kvmppc_subcore_exit_guest(void)
{
@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
}
+EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
static bool kvmppc_tb_resync_required(void)
{
@@ -331,5 +333,13 @@ long kvmppc_realmode_hmi_handler(void)
} else {
wait_for_tb_resync();
}
+
+ /*
+ * Reset tb_offset_applied so the guest exit code won't try
+ * to subtract the previous timebase offset from the timebase.
+ */
+ if (local_paca->kvm_hstate.kvm_vcore)
+ local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
+
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 758d1d23215e..b3f5786b20dc 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -136,7 +136,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
- set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+ set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) {
@@ -170,8 +170,7 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{
/* Note: Only called on self ! */
- clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
- &vcpu->arch.pending_exceptions);
+ clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
}
@@ -768,6 +767,14 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys;
int64_t rc;
+ if (kvmhv_on_pseries()) {
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ iosync();
+ plpar_hcall_raw(H_EOI, retbuf, hwirq);
+ return;
+ }
+
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 1d14046124a0..9b8d50a7cbaf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -28,6 +28,7 @@
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/book3s/64/mmu-hash.h>
+#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
#include <asm/xive-regs.h>
@@ -46,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS 160
+#define SFS 208
#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
@@ -56,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72)
+/* the following is used by the P9 short path */
+#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
/*
* Call kvmppc_hv_entry in real mode.
@@ -113,45 +117,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_SPRG_VDSO_WRITE,r3
/* Reload the host's PMU registers */
- lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
- cmpwi r4, 0
- beq 23f /* skip if not */
-BEGIN_FTR_SECTION
- ld r3, HSTATE_MMCR0(r13)
- andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
- cmpwi r4, MMCR0_PMAO
- beql kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
- lwz r3, HSTATE_PMC1(r13)
- lwz r4, HSTATE_PMC2(r13)
- lwz r5, HSTATE_PMC3(r13)
- lwz r6, HSTATE_PMC4(r13)
- lwz r8, HSTATE_PMC5(r13)
- lwz r9, HSTATE_PMC6(r13)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r4
- mtspr SPRN_PMC3, r5
- mtspr SPRN_PMC4, r6
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
- ld r3, HSTATE_MMCR0(r13)
- ld r4, HSTATE_MMCR1(r13)
- ld r5, HSTATE_MMCRA(r13)
- ld r6, HSTATE_SIAR(r13)
- ld r7, HSTATE_SDAR(r13)
- mtspr SPRN_MMCR1, r4
- mtspr SPRN_MMCRA, r5
- mtspr SPRN_SIAR, r6
- mtspr SPRN_SDAR, r7
-BEGIN_FTR_SECTION
- ld r8, HSTATE_MMCR2(r13)
- ld r9, HSTATE_SIER(r13)
- mtspr SPRN_MMCR2, r8
- mtspr SPRN_SIER, r9
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- mtspr SPRN_MMCR0, r3
- isync
-23:
+ bl kvmhv_load_host_pmu
/*
* Reload DEC. HDEC interrupts were disabled when
@@ -796,66 +762,23 @@ BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
mr r3, r4
ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
bl kvmppc_restore_tm_hv
+ nop
ld r4, HSTATE_KVM_VCPU(r13)
91:
#endif
- /* Load guest PMU registers */
- /* R4 is live here (vcpu pointer) */
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
- isync
-BEGIN_FTR_SECTION
- ld r3, VCPU_MMCR(r4)
- andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
- cmpwi r5, MMCR0_PMAO
- beql kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
- lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
- lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
- lwz r6, VCPU_PMC + 8(r4)
- lwz r7, VCPU_PMC + 12(r4)
- lwz r8, VCPU_PMC + 16(r4)
- lwz r9, VCPU_PMC + 20(r4)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r5
- mtspr SPRN_PMC3, r6
- mtspr SPRN_PMC4, r7
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
- ld r3, VCPU_MMCR(r4)
- ld r5, VCPU_MMCR + 8(r4)
- ld r6, VCPU_MMCR + 16(r4)
- ld r7, VCPU_SIAR(r4)
- ld r8, VCPU_SDAR(r4)
- mtspr SPRN_MMCR1, r5
- mtspr SPRN_MMCRA, r6
- mtspr SPRN_SIAR, r7
- mtspr SPRN_SDAR, r8
-BEGIN_FTR_SECTION
- ld r5, VCPU_MMCR + 24(r4)
- ld r6, VCPU_SIER(r4)
- mtspr SPRN_MMCR2, r5
- mtspr SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
- lwz r7, VCPU_PMC + 24(r4)
- lwz r8, VCPU_PMC + 28(r4)
- ld r9, VCPU_MMCR + 32(r4)
- mtspr SPRN_SPMC1, r7
- mtspr SPRN_SPMC2, r8
- mtspr SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- mtspr SPRN_MMCR0, r3
- isync
+ /* Load guest PMU registers; r4 = vcpu pointer here */
+ mr r3, r4
+ bl kvmhv_load_guest_pmu
/* Load up FP, VMX and VSX registers */
+ ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_load_fp
ld r14, VCPU_GPR(R14)(r4)
@@ -1100,73 +1023,40 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
no_xive:
#endif /* CONFIG_KVM_XICS */
-deliver_guest_interrupt:
- ld r6, VCPU_CTR(r4)
- ld r7, VCPU_XER(r4)
-
- mtctr r6
- mtxer r7
+ li r0, 0
+ stw r0, STACK_SLOT_SHORT_PATH(r1)
-kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
- ld r10, VCPU_PC(r4)
- ld r11, VCPU_MSR(r4)
+deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
+ /* Check if we can deliver an external or decrementer interrupt now */
+ ld r0, VCPU_PENDING_EXC(r4)
+BEGIN_FTR_SECTION
+ /* On POWER9, also check for emulated doorbell interrupt */
+ lbz r3, VCPU_DBELL_REQ(r4)
+ or r0, r0, r3
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ cmpdi r0, 0
+ beq 71f
+ mr r3, r4
+ bl kvmppc_guest_entry_inject_int
+ ld r4, HSTATE_KVM_VCPU(r13)
+71:
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
+fast_guest_entry_c:
+ ld r10, VCPU_PC(r4)
+ ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
- /* Check if we can deliver an external or decrementer interrupt now */
- ld r0, VCPU_PENDING_EXC(r4)
- rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
- cmpdi cr1, r0, 0
- andi. r8, r11, MSR_EE
- mfspr r8, SPRN_LPCR
- /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
- rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
- mtspr SPRN_LPCR, r8
- isync
- beq 5f
- li r0, BOOK3S_INTERRUPT_EXTERNAL
- bne cr1, 12f
- mfspr r0, SPRN_DEC
-BEGIN_FTR_SECTION
- /* On POWER9 check whether the guest has large decrementer enabled */
- andis. r8, r8, LPCR_LD@h
- bne 15f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- extsw r0, r0
-15: cmpdi r0, 0
- li r0, BOOK3S_INTERRUPT_DECREMENTER
- bge 5f
-
-12: mtspr SPRN_SRR0, r10
- mr r10,r0
- mtspr SPRN_SRR1, r11
- mr r9, r4
- bl kvmppc_msr_interrupt
-5:
-BEGIN_FTR_SECTION
- b fast_guest_return
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- /* On POWER9, check for pending doorbell requests */
- lbz r0, VCPU_DBELL_REQ(r4)
- cmpwi r0, 0
- beq fast_guest_return
- ld r5, HSTATE_KVM_VCORE(r13)
- /* Set DPDES register so the CPU will take a doorbell interrupt */
- li r0, 1
- mtspr SPRN_DPDES, r0
- std r0, VCORE_DPDES(r5)
- /* Make sure other cpus see vcore->dpdes set before dbell req clear */
- lwsync
- /* Clear the pending doorbell request */
- li r0, 0
- stb r0, VCPU_DBELL_REQ(r4)
+ ld r6, VCPU_CTR(r4)
+ ld r7, VCPU_XER(r4)
+ mtctr r6
+ mtxer r7
/*
* Required state:
@@ -1202,7 +1092,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
- lwz r6, VCPU_CR(r4)
+ ld r6, VCPU_CR(r4)
mtlr r5
mtcr r6
@@ -1234,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
HRFI_TO_GUEST
b .
+/*
+ * Enter the guest on a P9 or later system where we have exactly
+ * one vcpu per vcore and we don't need to go to real mode
+ * (which implies that host and guest are both using radix MMU mode).
+ * r3 = vcpu pointer
+ * Most SPRs and all the VSRs have been loaded already.
+ */
+_GLOBAL(__kvmhv_vcpu_entry_p9)
+EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -SFS(r1)
+
+ li r0, 1
+ stw r0, STACK_SLOT_SHORT_PATH(r1)
+
+ std r3, HSTATE_KVM_VCPU(r13)
+ mfcr r4
+ stw r4, SFS+8(r1)
+
+ std r1, HSTATE_HOST_R1(r13)
+
+ reg = 14
+ .rept 18
+ std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ reg = 14
+ .rept 18
+ ld reg, __VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
+
+ mr r4, r3
+ b fast_guest_entry_c
+guest_exit_short_path:
+
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
+
+ reg = 14
+ .rept 18
+ std reg, __VCPU_GPR(reg)(r9)
+ reg = reg + 1
+ .endr
+
+ reg = 14
+ .rept 18
+ ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ lwz r4, SFS+8(r1)
+ mtcr r4
+
+ mr r3, r12 /* trap number */
+
+ addi r1, r1, SFS
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+
+ /* If we are in real mode, do a rfid to get back to the caller */
+ mfmsr r4
+ andi. r5, r4, MSR_IR
+ bnelr
+ rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
+ mtspr SPRN_SRR0, r0
+ ld r10, HSTATE_HOST_MSR(r13)
+ rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ mtspr SPRN_SRR1, r10
+ RFI_TO_KERNEL
+ b .
+
secondary_too_late:
li r12, 0
stw r12, STACK_SLOT_TRAP(r1)
@@ -1313,7 +1280,7 @@ kvmppc_interrupt_hv:
std r3, VCPU_GPR(R12)(r9)
/* CR is in the high half of r12 */
srdi r4, r12, 32
- stw r4, VCPU_CR(r9)
+ std r4, VCPU_CR(r9)
BEGIN_FTR_SECTION
ld r3, HSTATE_CFAR(r13)
std r3, VCPU_CFAR(r9)
@@ -1387,18 +1354,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r3, VCPU_CTR(r9)
std r4, VCPU_XER(r9)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* For softpatch interrupt, go off and do TM instruction emulation */
- cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
- beq kvmppc_tm_emul
-#endif
+ /* Save more register state */
+ mfdar r3
+ mfdsisr r4
+ std r3, VCPU_DAR(r9)
+ stw r4, VCPU_DSISR(r9)
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
beq kvmppc_hdsi
+ std r3, VCPU_FAULT_DAR(r9)
+ stw r4, VCPU_FAULT_DSISR(r9)
cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
beq kvmppc_hisi
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* For softpatch interrupt, go off and do TM instruction emulation */
+ cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+ beq kvmppc_tm_emul
+#endif
+
/* See if this is a leftover HDEC interrupt */
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
@@ -1418,10 +1393,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
PPC_MSGSYNC
lwsync
+ /* always exit if we're running a nested guest */
+ ld r0, VCPU_NESTED(r9)
+ cmpdi r0, 0
+ bne guest_exit_cont
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
- beq 4f
+ beq maybe_reenter_guest
b guest_exit_cont
3:
/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
@@ -1433,82 +1412,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- bne+ guest_exit_cont
-
- /* External interrupt, first check for host_ipi. If this is
- * set, we know the host wants us out so let's do it now
- */
- bl kvmppc_read_intr
-
- /*
- * Restore the active volatile registers after returning from
- * a C function.
- */
- ld r9, HSTATE_KVM_VCPU(r13)
- li r12, BOOK3S_INTERRUPT_EXTERNAL
-
- /*
- * kvmppc_read_intr return codes:
- *
- * Exit to host (r3 > 0)
- * 1 An interrupt is pending that needs to be handled by the host
- * Exit guest and return to host by branching to guest_exit_cont
- *
- * 2 Passthrough that needs completion in the host
- * Exit guest and return to host by branching to guest_exit_cont
- * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
- * to indicate to the host to complete handling the interrupt
- *
- * Before returning to guest, we check if any CPU is heading out
- * to the host and if so, we head out also. If no CPUs are heading
- * check return values <= 0.
- *
- * Return to guest (r3 <= 0)
- * 0 No external interrupt is pending
- * -1 A guest wakeup IPI (which has now been cleared)
- * In either case, we return to guest to deliver any pending
- * guest interrupts.
- *
- * -2 A PCI passthrough external interrupt was handled
- * (interrupt was delivered directly to guest)
- * Return to guest to deliver any pending guest interrupts.
- */
-
- cmpdi r3, 1
- ble 1f
-
- /* Return code = 2 */
- li r12, BOOK3S_INTERRUPT_HV_RM_HARD
- stw r12, VCPU_TRAP(r9)
- b guest_exit_cont
-
-1: /* Return code <= 1 */
- cmpdi r3, 0
- bgt guest_exit_cont
-
- /* Return code <= 0 */
-4: ld r5, HSTATE_KVM_VCORE(r13)
- lwz r0, VCORE_ENTRY_EXIT(r5)
- cmpwi r0, 0x100
- mr r4, r9
- blt deliver_guest_interrupt
-
-guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
- /* Save more register state */
- mfdar r6
- mfdsisr r7
- std r6, VCPU_DAR(r9)
- stw r7, VCPU_DSISR(r9)
- /* don't overwrite fault_dar/fault_dsisr if HDSI */
- cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
- beq mc_cont
- std r6, VCPU_FAULT_DAR(r9)
- stw r7, VCPU_FAULT_DSISR(r9)
-
+ beq kvmppc_guest_external
/* See if it is a machine check */
cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
beq machine_check_realmode
-mc_cont:
+ /* Or a hypervisor maintenance interrupt */
+ cmpwi r12, BOOK3S_INTERRUPT_HMI
+ beq hmi_realmode
+
+guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r9, VCPU_TB_RMEXIT
mr r4, r9
@@ -1552,6 +1465,11 @@ mc_cont:
1:
#endif /* CONFIG_KVM_XICS */
+ /* If we came in through the P9 short path, go back out to C now */
+ lwz r0, STACK_SLOT_SHORT_PATH(r1)
+ cmpwi r0, 0
+ bne guest_exit_short_path
+
/* For hash guest, read the guest SLB and save it away */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
@@ -1780,11 +1698,13 @@ BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
mr r3, r9
ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
bl kvmppc_save_tm_hv
+ nop
ld r9, HSTATE_KVM_VCPU(r13)
91:
#endif
@@ -1802,83 +1722,12 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
25:
/* Save PMU registers if requested */
/* r8 and cr0.eq are live here */
-BEGIN_FTR_SECTION
- /*
- * POWER8 seems to have a hardware bug where setting
- * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
- * when some counters are already negative doesn't seem
- * to cause a performance monitor alert (and hence interrupt).
- * The effect of this is that when saving the PMU state,
- * if there is no PMU alert pending when we read MMCR0
- * before freezing the counters, but one becomes pending
- * before we read the counters, we lose it.
- * To work around this, we need a way to freeze the counters
- * before reading MMCR0. Normally, freezing the counters
- * is done by writing MMCR0 (to set MMCR0[FC]) which
- * unavoidably writes MMCR0[PMA0] as well. On POWER8,
- * we can also freeze the counters using MMCR2, by writing
- * 1s to all the counter freeze condition bits (there are
- * 9 bits each for 6 counters).
- */
- li r3, -1 /* set all freeze bits */
- clrrdi r3, r3, 10
- mfspr r10, SPRN_MMCR2
- mtspr SPRN_MMCR2, r3
- isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mfspr r4, SPRN_MMCR0 /* save MMCR0 */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
- mfspr r6, SPRN_MMCRA
- /* Clear MMCRA in order to disable SDAR updates */
- li r7, 0
- mtspr SPRN_MMCRA, r7
- isync
+ mr r3, r9
+ li r4, 1
beq 21f /* if no VPA, save PMU stuff anyway */
- lbz r7, LPPACA_PMCINUSE(r8)
- cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
- bne 21f
- std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
- b 22f
-21: mfspr r5, SPRN_MMCR1
- mfspr r7, SPRN_SIAR
- mfspr r8, SPRN_SDAR
- std r4, VCPU_MMCR(r9)
- std r5, VCPU_MMCR + 8(r9)
- std r6, VCPU_MMCR + 16(r9)
-BEGIN_FTR_SECTION
- std r10, VCPU_MMCR + 24(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- std r7, VCPU_SIAR(r9)
- std r8, VCPU_SDAR(r9)
- mfspr r3, SPRN_PMC1
- mfspr r4, SPRN_PMC2
- mfspr r5, SPRN_PMC3
- mfspr r6, SPRN_PMC4
- mfspr r7, SPRN_PMC5
- mfspr r8, SPRN_PMC6
- stw r3, VCPU_PMC(r9)
- stw r4, VCPU_PMC + 4(r9)
- stw r5, VCPU_PMC + 8(r9)
- stw r6, VCPU_PMC + 12(r9)
- stw r7, VCPU_PMC + 16(r9)
- stw r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_SIER
- std r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
- mfspr r6, SPRN_SPMC1
- mfspr r7, SPRN_SPMC2
- mfspr r8, SPRN_MMCRS
- stw r6, VCPU_PMC + 24(r9)
- stw r7, VCPU_PMC + 28(r9)
- std r8, VCPU_MMCR + 32(r9)
- lis r4, 0x8000
- mtspr SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-22:
+ lbz r4, LPPACA_PMCINUSE(r8)
+21: bl kvmhv_save_guest_pmu
+ ld r9, HSTATE_KVM_VCPU(r13)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
@@ -2010,24 +1859,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_DPDES, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- /* If HMI, call kvmppc_realmode_hmi_handler() */
- lwz r12, STACK_SLOT_TRAP(r1)
- cmpwi r12, BOOK3S_INTERRUPT_HMI
- bne 27f
- bl kvmppc_realmode_hmi_handler
- nop
- cmpdi r3, 0
- /*
- * At this point kvmppc_realmode_hmi_handler may have resync-ed
- * the TB, and if it has, we must not subtract the guest timebase
- * offset from the timebase. So, skip it.
- *
- * Also, do not call kvmppc_subcore_exit_guest() because it has
- * been invoked as part of kvmppc_realmode_hmi_handler().
- */
- beq 30f
-
-27:
/* Subtract timebase offset from timebase */
ld r8, VCORE_TB_OFFSET_APPL(r5)
cmpdi r8,0
@@ -2045,7 +1876,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
addis r8,r8,0x100 /* if so, increment upper 40 bits */
mtspr SPRN_TBU40,r8
-17: bl kvmppc_subcore_exit_guest
+17:
+ /*
+ * If this is an HMI, we called kvmppc_realmode_hmi_handler
+ * above, which may or may not have already called
+ * kvmppc_subcore_exit_guest. Fortunately, all that
+ * kvmppc_subcore_exit_guest does is clear a flag, so calling
+ * it again here is benign even if kvmppc_realmode_hmi_handler
+ * has already called it.
+ */
+ bl kvmppc_subcore_exit_guest
nop
30: ld r5,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
@@ -2099,6 +1939,67 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtlr r0
blr
+kvmppc_guest_external:
+ /* External interrupt, first check for host_ipi. If this is
+ * set, we know the host wants us out so let's do it now
+ */
+ bl kvmppc_read_intr
+
+ /*
+ * Restore the active volatile registers after returning from
+ * a C function.
+ */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+
+ /*
+ * kvmppc_read_intr return codes:
+ *
+ * Exit to host (r3 > 0)
+ * 1 An interrupt is pending that needs to be handled by the host
+ * Exit guest and return to host by branching to guest_exit_cont
+ *
+ * 2 Passthrough that needs completion in the host
+ * Exit guest and return to host by branching to guest_exit_cont
+ * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
+ * to indicate to the host to complete handling the interrupt
+ *
+ * Before returning to guest, we check if any CPU is heading out
+ * to the host and if so, we head out also. If no CPUs are heading
+ * check return values <= 0.
+ *
+ * Return to guest (r3 <= 0)
+ * 0 No external interrupt is pending
+ * -1 A guest wakeup IPI (which has now been cleared)
+ * In either case, we return to guest to deliver any pending
+ * guest interrupts.
+ *
+ * -2 A PCI passthrough external interrupt was handled
+ * (interrupt was delivered directly to guest)
+ * Return to guest to deliver any pending guest interrupts.
+ */
+
+ cmpdi r3, 1
+ ble 1f
+
+ /* Return code = 2 */
+ li r12, BOOK3S_INTERRUPT_HV_RM_HARD
+ stw r12, VCPU_TRAP(r9)
+ b guest_exit_cont
+
+1: /* Return code <= 1 */
+ cmpdi r3, 0
+ bgt guest_exit_cont
+
+ /* Return code <= 0 */
+maybe_reenter_guest:
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lwz r0, VCORE_ENTRY_EXIT(r5)
+ cmpwi r0, 0x100
+ mr r4, r9
+ blt deliver_guest_interrupt
+ b guest_exit_cont
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Softpatch interrupt for transactional memory emulation cases
@@ -2302,6 +2203,10 @@ hcall_try_real_mode:
andi. r0,r11,MSR_PR
/* sc 1 from userspace - reflect to guest syscall */
bne sc_1_fast_return
+ /* sc 1 from nested guest - give it to L1 to handle */
+ ld r0, VCPU_NESTED(r9)
+ cmpdi r0, 0
+ bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
@@ -2561,6 +2466,7 @@ hcall_real_table:
hcall_real_table_end:
_GLOBAL(kvmppc_h_set_xdabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
andi. r0, r5, DABRX_USER | DABRX_KERNEL
beq 6f
li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
@@ -2570,6 +2476,7 @@ _GLOBAL(kvmppc_h_set_xdabr)
blr
_GLOBAL(kvmppc_h_set_dabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
li r5, DABRX_USER | DABRX_KERNEL
3:
BEGIN_FTR_SECTION
@@ -2682,11 +2589,13 @@ BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
ld r3, HSTATE_KVM_VCPU(r13)
ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
bl kvmppc_save_tm_hv
+ nop
91:
#endif
@@ -2802,11 +2711,13 @@ BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
/*
- * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
mr r3, r4
ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
bl kvmppc_restore_tm_hv
+ nop
ld r4, HSTATE_KVM_VCPU(r13)
91:
#endif
@@ -2874,13 +2785,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
mr r9, r4
cmpdi r3, 0
bgt guest_exit_cont
-
- /* see if any other thread is already exiting */
- lwz r0,VCORE_ENTRY_EXIT(r5)
- cmpwi r0,0x100
- bge guest_exit_cont
-
- b kvmppc_cede_reentry /* if not go back to guest */
+ b maybe_reenter_guest
/* cede when already previously prodded case */
kvm_cede_prodded:
@@ -2947,12 +2852,12 @@ machine_check_realmode:
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
- bne mc_cont /* if so, exit to host */
+ bne guest_exit_cont /* if so, exit to host */
/* Check if guest is capable of handling NMI exit */
ld r10, VCPU_KVM(r9)
lbz r10, KVM_FWNMI(r10)
cmpdi r10, 1 /* FWNMI capable? */
- beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
+ beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */
/* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
@@ -2966,6 +2871,21 @@ machine_check_realmode:
2: b fast_interrupt_c_return
/*
+ * Call C code to handle a HMI in real mode.
+ * Only the primary thread does the call, secondary threads are handled
+ * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
+ * r9 points to the vcpu on entry
+ */
+hmi_realmode:
+ lbz r0, HSTATE_PTID(r13)
+ cmpwi r0, 0
+ bne guest_exit_cont
+ bl kvmppc_realmode_hmi_handler
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_HMI
+ b guest_exit_cont
+
+/*
* Check the reason we woke from nap, and take appropriate action.
* Returns (in r3):
* 0 if nothing needs to be done
@@ -3130,10 +3050,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* Save transactional state and TM-related registers.
* Called with r3 pointing to the vcpu struct and r4 containing
* the guest MSR value.
- * This can modify all checkpointed registers, but
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * If r5 == 0, this can modify all checkpointed registers, but
* restores r1 and r2 before exit.
*/
-kvmppc_save_tm_hv:
+_GLOBAL_TOC(kvmppc_save_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
/* See if we need to handle fake suspend mode */
BEGIN_FTR_SECTION
b __kvmppc_save_tm
@@ -3161,12 +3083,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
nop
- std r1, HSTATE_HOST_R1(r13)
-
- /* Clear the MSR RI since r1, r13 may be foobar. */
- li r5, 0
- mtmsrd r5, 1
-
/* We have to treclaim here because that's the only way to do S->N */
li r3, TM_CAUSE_KVM_RESCHED
TRECLAIM(R3)
@@ -3175,22 +3091,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
* We were in fake suspend, so we are not going to save the
* register state as the guest checkpointed state (since
* we already have it), therefore we can now use any volatile GPR.
+ * In fact treclaim in fake suspend state doesn't modify
+ * any registers.
*/
- /* Reload PACA pointer, stack pointer and TOC. */
- GET_PACA(r13)
- ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATOC(r13)
- /* Set MSR RI now we have r1 and r13 back. */
- li r5, MSR_RI
- mtmsrd r5, 1
-
- HMT_MEDIUM
- ld r6, HSTATE_DSCR(r13)
- mtspr SPRN_DSCR, r6
-BEGIN_FTR_SECTION_NESTED(96)
+BEGIN_FTR_SECTION
bl pnv_power9_force_smt4_release
-END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
nop
4:
@@ -3216,10 +3123,12 @@ END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
* Restore transactional state and TM-related registers.
* Called with r3 pointing to the vcpu struct
* and r4 containing the guest MSR value.
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
* This potentially modifies all checkpointed registers.
* It restores r1 and r2 from the PACA.
*/
-kvmppc_restore_tm_hv:
+_GLOBAL_TOC(kvmppc_restore_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
/*
* If we are doing TM emulation for the guest on a POWER9 DD2,
* then we don't actually do a trechkpt -- we either set up
@@ -3424,6 +3333,194 @@ kvmppc_msr_interrupt:
blr
/*
+ * Load up guest PMU state. R3 points to the vcpu struct.
+ */
+_GLOBAL(kvmhv_load_guest_pmu)
+EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+ mr r4, r3
+ mflr r0
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+BEGIN_FTR_SECTION
+ ld r3, VCPU_MMCR(r4)
+ andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r5, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
+ lwz r6, VCPU_PMC + 8(r4)
+ lwz r7, VCPU_PMC + 12(r4)
+ lwz r8, VCPU_PMC + 16(r4)
+ lwz r9, VCPU_PMC + 20(r4)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r5
+ mtspr SPRN_PMC3, r6
+ mtspr SPRN_PMC4, r7
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+ ld r3, VCPU_MMCR(r4)
+ ld r5, VCPU_MMCR + 8(r4)
+ ld r6, VCPU_MMCR + 16(r4)
+ ld r7, VCPU_SIAR(r4)
+ ld r8, VCPU_SDAR(r4)
+ mtspr SPRN_MMCR1, r5
+ mtspr SPRN_MMCRA, r6
+ mtspr SPRN_SIAR, r7
+ mtspr SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+ ld r5, VCPU_MMCR + 24(r4)
+ ld r6, VCPU_SIER(r4)
+ mtspr SPRN_MMCR2, r5
+ mtspr SPRN_SIER, r6
+BEGIN_FTR_SECTION_NESTED(96)
+ lwz r7, VCPU_PMC + 24(r4)
+ lwz r8, VCPU_PMC + 28(r4)
+ ld r9, VCPU_MMCR + 32(r4)
+ mtspr SPRN_SPMC1, r7
+ mtspr SPRN_SPMC2, r8
+ mtspr SPRN_MMCRS, r9
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+ mtlr r0
+ blr
+
+/*
+ * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
+ */
+_GLOBAL(kvmhv_load_host_pmu)
+EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+ mflr r0
+ lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+BEGIN_FTR_SECTION
+ ld r3, HSTATE_MMCR0(r13)
+ andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r4, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, HSTATE_PMC1(r13)
+ lwz r4, HSTATE_PMC2(r13)
+ lwz r5, HSTATE_PMC3(r13)
+ lwz r6, HSTATE_PMC4(r13)
+ lwz r8, HSTATE_PMC5(r13)
+ lwz r9, HSTATE_PMC6(r13)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+ ld r3, HSTATE_MMCR0(r13)
+ ld r4, HSTATE_MMCR1(r13)
+ ld r5, HSTATE_MMCRA(r13)
+ ld r6, HSTATE_SIAR(r13)
+ ld r7, HSTATE_SDAR(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_SIAR, r6
+ mtspr SPRN_SDAR, r7
+BEGIN_FTR_SECTION
+ ld r8, HSTATE_MMCR2(r13)
+ ld r9, HSTATE_SIER(r13)
+ mtspr SPRN_MMCR2, r8
+ mtspr SPRN_SIER, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+ mtlr r0
+23: blr
+
+/*
+ * Save guest PMU state into the vcpu struct.
+ * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
+ */
+_GLOBAL(kvmhv_save_guest_pmu)
+EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+ mr r9, r3
+ mr r8, r4
+BEGIN_FTR_SECTION
+ /*
+ * POWER8 seems to have a hardware bug where setting
+ * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+ * when some counters are already negative doesn't seem
+ * to cause a performance monitor alert (and hence interrupt).
+ * The effect of this is that when saving the PMU state,
+ * if there is no PMU alert pending when we read MMCR0
+ * before freezing the counters, but one becomes pending
+ * before we read the counters, we lose it.
+ * To work around this, we need a way to freeze the counters
+ * before reading MMCR0. Normally, freezing the counters
+ * is done by writing MMCR0 (to set MMCR0[FC]) which
+ * unavoidably writes MMCR0[PMA0] as well. On POWER8,
+ * we can also freeze the counters using MMCR2, by writing
+ * 1s to all the counter freeze condition bits (there are
+ * 9 bits each for 6 counters).
+ */
+ li r3, -1 /* set all freeze bits */
+ clrrdi r3, r3, 10
+ mfspr r10, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ mfspr r6, SPRN_MMCRA
+ /* Clear MMCRA in order to disable SDAR updates */
+ li r7, 0
+ mtspr SPRN_MMCRA, r7
+ isync
+ cmpwi r8, 0 /* did they ask for PMU stuff to be saved? */
+ bne 21f
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
+ b 22f
+21: mfspr r5, SPRN_MMCR1
+ mfspr r7, SPRN_SIAR
+ mfspr r8, SPRN_SDAR
+ std r4, VCPU_MMCR(r9)
+ std r5, VCPU_MMCR + 8(r9)
+ std r6, VCPU_MMCR + 16(r9)
+BEGIN_FTR_SECTION
+ std r10, VCPU_MMCR + 24(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ std r7, VCPU_SIAR(r9)
+ std r8, VCPU_SDAR(r9)
+ mfspr r3, SPRN_PMC1
+ mfspr r4, SPRN_PMC2
+ mfspr r5, SPRN_PMC3
+ mfspr r6, SPRN_PMC4
+ mfspr r7, SPRN_PMC5
+ mfspr r8, SPRN_PMC6
+ stw r3, VCPU_PMC(r9)
+ stw r4, VCPU_PMC + 4(r9)
+ stw r5, VCPU_PMC + 8(r9)
+ stw r6, VCPU_PMC + 12(r9)
+ stw r7, VCPU_PMC + 16(r9)
+ stw r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_SIER
+ std r5, VCPU_SIER(r9)
+BEGIN_FTR_SECTION_NESTED(96)
+ mfspr r6, SPRN_SPMC1
+ mfspr r7, SPRN_SPMC2
+ mfspr r8, SPRN_MMCRS
+ stw r6, VCPU_PMC + 24(r9)
+ stw r7, VCPU_PMC + 28(r9)
+ std r8, VCPU_MMCR + 32(r9)
+ lis r4, 0x8000
+ mtspr SPRN_MMCRS, r4
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22: blr
+
+/*
* This works around a hardware bug on POWER8E processors, where
* writing a 1 to the MMCR0[PMAO] bit doesn't generate a
* performance monitor interrupt. Instead, when we need to have
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index 008285058f9b..888e2609e3f1 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -130,7 +130,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
/* Set CR0 to indicate previous transactional state */
- vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
/* L=1 => tresume, L=0 => tsuspend */
if (instr & (1 << 21)) {
@@ -174,7 +174,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_from_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */
- vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
return RESUME_GUEST;
@@ -204,7 +204,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
copy_to_checkpoint(vcpu);
/* Set CR0 to indicate previous transactional state */
- vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
vcpu->arch.shregs.msr = msr | MSR_TS_S;
return RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
index b2c7c6fca4f9..3cf5863bc06e 100644
--- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -89,7 +89,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
if (instr & (1 << 21))
vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
/* Set CR0 to 0b0010 */
- vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+ 0x20000000;
return 1;
}
@@ -105,5 +106,5 @@ void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
vcpu->arch.regs.nip = vcpu->arch.tfhar;
copy_from_checkpoint(vcpu);
- vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 614ebb4261f7..4efd65d9e828 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -167,7 +167,7 @@ void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
- svcpu->cr = vcpu->arch.cr;
+ svcpu->cr = vcpu->arch.regs.ccr;
svcpu->xer = vcpu->arch.regs.xer;
svcpu->ctr = vcpu->arch.regs.ctr;
svcpu->lr = vcpu->arch.regs.link;
@@ -249,7 +249,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
- vcpu->arch.cr = svcpu->cr;
+ vcpu->arch.regs.ccr = svcpu->cr;
vcpu->arch.regs.xer = svcpu->xer;
vcpu->arch.regs.ctr = svcpu->ctr;
vcpu->arch.regs.link = svcpu->lr;
@@ -1246,7 +1246,6 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
- case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
case BOOK3S_INTERRUPT_EXTERNAL_HV:
case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index b8356cdc0c04..b0b2bfc2ff51 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -310,7 +310,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
*/
if (new.out_ee) {
kvmppc_book3s_queue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ BOOK3S_INTERRUPT_EXTERNAL);
if (!change_self)
kvmppc_fast_vcpu_kick(icp->vcpu);
}
@@ -593,8 +593,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
u32 xirr;
/* First, remove EE from the processor */
- kvmppc_book3s_dequeue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
/*
* ICP State: Accept_Interrupt
@@ -754,8 +753,7 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* We can remove EE from the current processor, the update
* transaction will set it again if needed
*/
- kvmppc_book3s_dequeue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
do {
old_state = new_state = READ_ONCE(icp->state);
@@ -1167,8 +1165,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
* Deassert the CPU interrupt request.
* icp_try_update will reassert it if necessary.
*/
- kvmppc_book3s_dequeue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
/*
* Note that if we displace an interrupt from old_state.xisr,
@@ -1393,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ cpu_has_feature(CPU_FTR_HVMODE)) {
/* Enable real mode support */
xics->real_mode = ENABLE_REALMODE;
xics->real_mode_dbg = DEBUG_REALMODE;
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 30c2eb766954..ad4a370703d3 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -62,6 +62,69 @@
#define XIVE_Q_GAP 2
/*
+ * Push a vcpu's context to the XIVE on guest entry.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+ u64 pq;
+
+ if (!tima)
+ return;
+ eieio();
+ __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+ __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
+ vcpu->arch.xive_pushed = 1;
+ eieio();
+
+ /*
+ * We clear the irq_pending flag. There is a small chance of a
+ * race vs. the escalation interrupt happening on another
+ * processor setting it again, but the only consequence is to
+ * cause a spurious wakeup on the next H_CEDE, which is not an
+ * issue.
+ */
+ vcpu->arch.irq_pending = 0;
+
+ /*
+ * In single escalation mode, if the escalation interrupt is
+ * on, we mask it.
+ */
+ if (vcpu->arch.xive_esc_on) {
+ pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+ XIVE_ESB_SET_PQ_01));
+ mb();
+
+ /*
+ * We have a possible subtle race here: The escalation
+ * interrupt might have fired and be on its way to the
+ * host queue while we mask it, and if we unmask it
+ * early enough (re-cede right away), there is a
+ * theorical possibility that it fires again, thus
+ * landing in the target queue more than once which is
+ * a big no-no.
+ *
+ * Fortunately, solving this is rather easy. If the
+ * above load setting PQ to 01 returns a previous
+ * value where P is set, then we know the escalation
+ * interrupt is somewhere on its way to the host. In
+ * that case we simply don't clear the xive_esc_on
+ * flag below. It will be eventually cleared by the
+ * handler for the escalation interrupt.
+ *
+ * Then, when doing a cede, we check that flag again
+ * before re-enabling the escalation interrupt, and if
+ * set, we abort the cede.
+ */
+ if (!(pq & XIVE_ESB_VAL_P))
+ /* Now P is 0, we can clear the flag */
+ vcpu->arch.xive_esc_on = 0;
+ }
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
+
+/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
*/
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4171ede8722b..033363d6e764 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -280,14 +280,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
/* First collect pending bits from HW */
GLUE(X_PFX,ack_pending)(xc);
- /*
- * Cleanup the old-style bits if needed (they may have been
- * set by pull or an escalation interrupts).
- */
- if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
- clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
- &vcpu->arch.pending_exceptions);
-
pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
xc->pending, xc->hw_cppr, xc->cppr);
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 81bd8a07aa51..051af7d97327 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -182,7 +182,7 @@
*/
PPC_LL r4, PACACURRENT(r13)
PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
- stw r10, VCPU_CR(r4)
+ PPC_STL r10, VCPU_CR(r4)
PPC_STL r11, VCPU_GPR(R4)(r4)
PPC_STL r5, VCPU_GPR(R5)(r4)
PPC_STL r6, VCPU_GPR(R6)(r4)
@@ -292,7 +292,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, THREAD_NORMSAVE(0)(r10)
PPC_STL r5, VCPU_GPR(R5)(r11)
- stw r13, VCPU_CR(r11)
+ PPC_STL r13, VCPU_CR(r11)
mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R10)(r11)
PPC_LL r3, THREAD_NORMSAVE(2)(r10)
@@ -319,7 +319,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, GPR9(r8)
PPC_STL r5, VCPU_GPR(R5)(r11)
- stw r9, VCPU_CR(r11)
+ PPC_STL r9, VCPU_CR(r11)
mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R8)(r11)
PPC_LL r3, GPR10(r8)
@@ -643,7 +643,7 @@ lightweight_exit:
PPC_LL r3, VCPU_LR(r4)
PPC_LL r5, VCPU_XER(r4)
PPC_LL r6, VCPU_CTR(r4)
- lwz r7, VCPU_CR(r4)
+ PPC_LL r7, VCPU_CR(r4)
PPC_LL r8, VCPU_PC(r4)
PPC_LD(r9, VCPU_SHARED_MSR, r11)
PPC_LL r0, VCPU_GPR(R0)(r4)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 75dce1ef3bc8..f91b1309a0a8 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -117,7 +117,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = EMULATE_FAIL;
vcpu->arch.regs.msr = vcpu->arch.shared->msr;
- vcpu->arch.regs.ccr = vcpu->arch.cr;
if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
int type = op.type & INSTR_TYPE_MASK;
int size = GETSIZE(op.type);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index eba5756d5b41..2869a299c4ed 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -594,7 +594,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && radix_enabled());
break;
case KVM_CAP_PPC_MMU_HASH_V3:
- r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300));
+ r = !!(hv_enabled && cpu_has_feature(CPU_FTR_ARCH_300) &&
+ cpu_has_feature(CPU_FTR_HVMODE));
+ break;
+ case KVM_CAP_PPC_NESTED_HV:
+ r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
+ !kvmppc_hv_ops->enable_nested(NULL));
break;
#endif
case KVM_CAP_SYNC_MMU:
@@ -2114,6 +2119,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
break;
}
+
+ case KVM_CAP_PPC_NESTED_HV:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(kvm) ||
+ !kvm->arch.kvm_ops->enable_nested)
+ break;
+ r = kvm->arch.kvm_ops->enable_nested(kvm);
+ break;
#endif
default:
r = -EINVAL;
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 90e330f21356..0531a1492fdf 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -28,17 +28,25 @@
* Save transactional state and TM-related registers.
* Called with:
* - r3 pointing to the vcpu struct
- * - r4 points to the MSR with current TS bits:
+ * - r4 containing the MSR with current TS bits:
* (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
- * This can modify all checkpointed registers, but
- * restores r1, r2 before exit.
+ * - r5 containing a flag indicating that non-volatile registers
+ * must be preserved.
+ * If r5 == 0, this can modify all checkpointed registers, but
+ * restores r1, r2 before exit. If r5 != 0, this restores the
+ * MSR TM/FP/VEC/VSX bits to their state on entry.
*/
_GLOBAL(__kvmppc_save_tm)
mflr r0
std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+
+ mr r9, r3
+ cmpdi cr7, r5, 0
/* Turn on TM. */
mfmsr r8
+ mr r10, r8
li r0, 1
rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
ori r8, r8, MSR_FP
@@ -51,6 +59,27 @@ _GLOBAL(__kvmppc_save_tm)
std r1, HSTATE_SCRATCH2(r13)
std r3, HSTATE_SCRATCH1(r13)
+ /* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
+ mfcr r6
+ SAVE_GPR(6, r1)
+
+ /* Save DSCR so we can restore it to avoid running with user value */
+ mfspr r7, SPRN_DSCR
+ SAVE_GPR(7, r1)
+
+ /*
+ * We are going to do treclaim., which will modify all checkpointed
+ * registers. Save the non-volatile registers on the stack if
+ * preservation of non-volatile state has been requested.
+ */
+ beq cr7, 3f
+ SAVE_NVGPRS(r1)
+
+ /* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
+ li r0, 0
+ rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ SAVE_GPR(10, r1) /* final MSR value */
+3:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
BEGIN_FTR_SECTION
/* Emulation of the treclaim instruction needs TEXASR before treclaim */
@@ -74,22 +103,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
std r9, PACATMSCRATCH(r13)
ld r9, HSTATE_SCRATCH1(r13)
- /* Get a few more GPRs free. */
- std r29, VCPU_GPRS_TM(29)(r9)
- std r30, VCPU_GPRS_TM(30)(r9)
- std r31, VCPU_GPRS_TM(31)(r9)
-
- /* Save away PPR and DSCR soon so don't run with user values. */
- mfspr r31, SPRN_PPR
+ /* Save away PPR soon so we don't run with user value. */
+ std r0, VCPU_GPRS_TM(0)(r9)
+ mfspr r0, SPRN_PPR
HMT_MEDIUM
- mfspr r30, SPRN_DSCR
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- ld r29, HSTATE_DSCR(r13)
- mtspr SPRN_DSCR, r29
-#endif
- /* Save all but r9, r13 & r29-r31 */
- reg = 0
+ /* Reload stack pointer. */
+ std r1, VCPU_GPRS_TM(1)(r9)
+ ld r1, HSTATE_SCRATCH2(r13)
+
+ /* Set MSR RI now we have r1 and r13 back. */
+ std r2, VCPU_GPRS_TM(2)(r9)
+ li r2, MSR_RI
+ mtmsrd r2, 1
+
+ /* Reload TOC pointer. */
+ ld r2, PACATOC(r13)
+
+ /* Save all but r0-r2, r9 & r13 */
+ reg = 3
.rept 29
.if (reg != 9) && (reg != 13)
std reg, VCPU_GPRS_TM(reg)(r9)
@@ -103,33 +135,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
ld r4, PACATMSCRATCH(r13)
std r4, VCPU_GPRS_TM(9)(r9)
- /* Reload stack pointer and TOC. */
- ld r1, HSTATE_SCRATCH2(r13)
- ld r2, PACATOC(r13)
-
- /* Set MSR RI now we have r1 and r13 back. */
- li r5, MSR_RI
- mtmsrd r5, 1
+ /* Restore host DSCR and CR values, after saving guest values */
+ mfcr r6
+ mfspr r7, SPRN_DSCR
+ stw r6, VCPU_CR_TM(r9)
+ std r7, VCPU_DSCR_TM(r9)
+ REST_GPR(6, r1)
+ REST_GPR(7, r1)
+ mtcr r6
+ mtspr SPRN_DSCR, r7
- /* Save away checkpinted SPRs. */
- std r31, VCPU_PPR_TM(r9)
- std r30, VCPU_DSCR_TM(r9)
+ /* Save away checkpointed SPRs. */
+ std r0, VCPU_PPR_TM(r9)
mflr r5
- mfcr r6
mfctr r7
mfspr r8, SPRN_AMR
mfspr r10, SPRN_TAR
mfxer r11
std r5, VCPU_LR_TM(r9)
- stw r6, VCPU_CR_TM(r9)
std r7, VCPU_CTR_TM(r9)
std r8, VCPU_AMR_TM(r9)
std r10, VCPU_TAR_TM(r9)
std r11, VCPU_XER_TM(r9)
- /* Restore r12 as trap number. */
- lwz r12, VCPU_TRAP(r9)
-
/* Save FP/VSX. */
addi r3, r9, VCPU_FPRS_TM
bl store_fp_state
@@ -137,6 +165,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
bl store_vr_state
mfspr r6, SPRN_VRSAVE
stw r6, VCPU_VRSAVE_TM(r9)
+
+ /* Restore non-volatile registers if requested to */
+ beq cr7, 1f
+ REST_NVGPRS(r1)
+ REST_GPR(10, r1)
1:
/*
* We need to save these SPRs after the treclaim so that the software
@@ -146,12 +179,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
*/
mfspr r7, SPRN_TEXASR
std r7, VCPU_TEXASR(r9)
-11:
mfspr r5, SPRN_TFHAR
mfspr r6, SPRN_TFIAR
std r5, VCPU_TFHAR(r9)
std r6, VCPU_TFIAR(r9)
+ /* Restore MSR state if requested */
+ beq cr7, 2f
+ mtmsrd r10, 0
+2:
+ addi r1, r1, SWITCH_FRAME_SIZE
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
@@ -161,49 +198,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
* be invoked from C function by PR KVM only.
*/
_GLOBAL(_kvmppc_save_tm_pr)
- mflr r5
- std r5, PPC_LR_STKOFF(r1)
- stdu r1, -SWITCH_FRAME_SIZE(r1)
- SAVE_NVGPRS(r1)
-
- /* save MSR since TM/math bits might be impacted
- * by __kvmppc_save_tm().
- */
- mfmsr r5
- SAVE_GPR(5, r1)
-
- /* also save DSCR/CR/TAR so that it can be recovered later */
- mfspr r6, SPRN_DSCR
- SAVE_GPR(6, r1)
-
- mfcr r7
- stw r7, _CCR(r1)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
mfspr r8, SPRN_TAR
- SAVE_GPR(8, r1)
+ std r8, PPC_MIN_STKFRM-8(r1)
+ li r5, 1 /* preserve non-volatile registers */
bl __kvmppc_save_tm
- REST_GPR(8, r1)
+ ld r8, PPC_MIN_STKFRM-8(r1)
mtspr SPRN_TAR, r8
- ld r7, _CCR(r1)
- mtcr r7
-
- REST_GPR(6, r1)
- mtspr SPRN_DSCR, r6
-
- /* need preserve current MSR's MSR_TS bits */
- REST_GPR(5, r1)
- mfmsr r6
- rldicl r6, r6, 64 - MSR_TS_S_LG, 62
- rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
- mtmsrd r5
-
- REST_NVGPRS(r1)
- addi r1, r1, SWITCH_FRAME_SIZE
- ld r5, PPC_LR_STKOFF(r1)
- mtlr r5
+ addi r1, r1, PPC_MIN_STKFRM
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
blr
EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
@@ -215,15 +225,21 @@ EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
* - r4 is the guest MSR with desired TS bits:
* For HV KVM, it is VCPU_MSR
* For PR KVM, it is provided by caller
- * This potentially modifies all checkpointed registers.
- * It restores r1, r2 from the PACA.
+ * - r5 containing a flag indicating that non-volatile registers
+ * must be preserved.
+ * If r5 == 0, this potentially modifies all checkpointed registers, but
+ * restores r1, r2 from the PACA before exit.
+ * If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
*/
_GLOBAL(__kvmppc_restore_tm)
mflr r0
std r0, PPC_LR_STKOFF(r1)
+ cmpdi cr7, r5, 0
+
/* Turn on TM/FP/VSX/VMX so we can restore them. */
mfmsr r5
+ mr r10, r5
li r6, MSR_TM >> 32
sldi r6, r6, 32
or r5, r5, r6
@@ -244,8 +260,7 @@ _GLOBAL(__kvmppc_restore_tm)
mr r5, r4
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
- beqlr /* TM not active in guest */
- std r1, HSTATE_SCRATCH2(r13)
+ beq 9f /* TM not active in guest */
/* Make sure the failure summary is set, otherwise we'll program check
* when we trechkpt. It's possible that this might have been not set
@@ -256,6 +271,26 @@ _GLOBAL(__kvmppc_restore_tm)
mtspr SPRN_TEXASR, r7
/*
+ * Make a stack frame and save non-volatile registers if requested.
+ */
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+ std r1, HSTATE_SCRATCH2(r13)
+
+ mfcr r6
+ mfspr r7, SPRN_DSCR
+ SAVE_GPR(2, r1)
+ SAVE_GPR(6, r1)
+ SAVE_GPR(7, r1)
+
+ beq cr7, 4f
+ SAVE_NVGPRS(r1)
+
+ /* MSR[TS] will be 1 (suspended) once we do trechkpt */
+ li r0, 1
+ rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ SAVE_GPR(10, r1) /* final MSR value */
+4:
+ /*
* We need to load up the checkpointed state for the guest.
* We need to do this early as it will blow away any GPRs, VSRs and
* some SPRs.
@@ -291,8 +326,6 @@ _GLOBAL(__kvmppc_restore_tm)
ld r29, VCPU_DSCR_TM(r3)
ld r30, VCPU_PPR_TM(r3)
- std r2, PACATMSCRATCH(r13) /* Save TOC */
-
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
li r5, 0
mtmsrd r5, 1
@@ -318,18 +351,31 @@ _GLOBAL(__kvmppc_restore_tm)
/* Now let's get back the state we need. */
HMT_MEDIUM
GET_PACA(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- ld r29, HSTATE_DSCR(r13)
- mtspr SPRN_DSCR, r29
-#endif
ld r1, HSTATE_SCRATCH2(r13)
- ld r2, PACATMSCRATCH(r13)
+ REST_GPR(7, r1)
+ mtspr SPRN_DSCR, r7
/* Set the MSR RI since we have our registers back. */
li r5, MSR_RI
mtmsrd r5, 1
+
+ /* Restore TOC pointer and CR */
+ REST_GPR(2, r1)
+ REST_GPR(6, r1)
+ mtcr r6
+
+ /* Restore non-volatile registers if requested to. */
+ beq cr7, 5f
+ REST_GPR(10, r1)
+ REST_NVGPRS(r1)
+
+5: addi r1, r1, SWITCH_FRAME_SIZE
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
+
+9: /* Restore MSR bits if requested */
+ beqlr cr7
+ mtmsrd r10, 0
blr
/*
@@ -337,47 +383,23 @@ _GLOBAL(__kvmppc_restore_tm)
* can be invoked from C function by PR KVM only.
*/
_GLOBAL(_kvmppc_restore_tm_pr)
- mflr r5
- std r5, PPC_LR_STKOFF(r1)
- stdu r1, -SWITCH_FRAME_SIZE(r1)
- SAVE_NVGPRS(r1)
-
- /* save MSR to avoid TM/math bits change */
- mfmsr r5
- SAVE_GPR(5, r1)
-
- /* also save DSCR/CR/TAR so that it can be recovered later */
- mfspr r6, SPRN_DSCR
- SAVE_GPR(6, r1)
-
- mfcr r7
- stw r7, _CCR(r1)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
+ /* save TAR so that it can be recovered later */
mfspr r8, SPRN_TAR
- SAVE_GPR(8, r1)
+ std r8, PPC_MIN_STKFRM-8(r1)
+ li r5, 1
bl __kvmppc_restore_tm
- REST_GPR(8, r1)
+ ld r8, PPC_MIN_STKFRM-8(r1)
mtspr SPRN_TAR, r8
- ld r7, _CCR(r1)
- mtcr r7
-
- REST_GPR(6, r1)
- mtspr SPRN_DSCR, r6
-
- /* need preserve current MSR's MSR_TS bits */
- REST_GPR(5, r1)
- mfmsr r6
- rldicl r6, r6, 64 - MSR_TS_S_LG, 62
- rldimi r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
- mtmsrd r5
-
- REST_NVGPRS(r1)
- addi r1, r1, SWITCH_FRAME_SIZE
- ld r5, PPC_LR_STKOFF(r1)
- mtlr r5
+ addi r1, r1, PPC_MIN_STKFRM
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
blr
EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
index f3b23759e017..372a82fa2de3 100644
--- a/arch/powerpc/kvm/trace_book3s.h
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -14,7 +14,6 @@
{0x400, "INST_STORAGE"}, \
{0x480, "INST_SEGMENT"}, \
{0x500, "EXTERNAL"}, \
- {0x501, "EXTERNAL_LEVEL"}, \
{0x502, "EXTERNAL_HV"}, \
{0x600, "ALIGNMENT"}, \
{0x700, "PROGRAM"}, \
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index fef3e1eb3a19..4c4dfc473800 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -833,6 +833,15 @@ EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
/*
* Flush partition scoped translations from LPID (=LPIDR)
*/
+void radix__flush_tlb_lpid(unsigned int lpid)
+{
+ _tlbie_lpid(lpid, RIC_FLUSH_ALL);
+}
+EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid);
+
+/*
+ * Flush partition scoped translations from LPID (=LPIDR)
+ */
void radix__local_flush_tlb_lpid(unsigned int lpid)
{
_tlbiel_lpid(lpid, RIC_FLUSH_ALL);
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index ad054dd0d666..5df6290d1ccc 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -7,7 +7,7 @@
* 2 of the License, or (at your option) any later version.
**/
-#include <linux/compat_time.h>
+#include <linux/time.h>
#include <linux/oprofile.h>
#include <linux/sched.h>
#include <asm/processor.h>
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a344980287a5..fe451348ae57 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -31,6 +31,7 @@ config RISCV
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_CONTIGUOUS
+ select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GENERIC_DMA_COHERENT
select HAVE_PERF_EVENTS
select IRQ_DOMAIN
@@ -108,10 +109,12 @@ config ARCH_RV32I
select GENERIC_LIB_ASHRDI3
select GENERIC_LIB_LSHRDI3
select GENERIC_LIB_UCMPDI2
+ select GENERIC_LIB_UMODDI3
config ARCH_RV64I
bool "RV64I"
select 64BIT
+ select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
@@ -208,14 +211,61 @@ config RISCV_BASE_PMU
endmenu
+config FPU
+ bool "FPU support"
+ default y
+ help
+ Say N here if you want to disable all floating-point related procedure
+ in the kernel.
+
+ If you don't know what to do here, say Y.
+
endmenu
-menu "Kernel type"
+menu "Kernel features"
source "kernel/Kconfig.hz"
endmenu
+menu "Boot options"
+
+config CMDLINE_BOOL
+ bool "Built-in kernel command line"
+ help
+ For most platforms, it is firmware or second stage bootloader
+ that by default specifies the kernel command line options.
+ However, it might be necessary or advantageous to either override
+ the default kernel command line or add a few extra options to it.
+ For such cases, this option allows hardcoding command line options
+ directly into the kernel.
+
+ For that, choose 'Y' here and fill in the extra boot parameters
+ in CONFIG_CMDLINE.
+
+ The built-in options will be concatenated to the default command
+ line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
+ command line will be ignored and replaced by the built-in string.
+
+config CMDLINE
+ string "Built-in kernel command string"
+ depends on CMDLINE_BOOL
+ default ""
+ help
+ Supply command-line options at build time by entering them here.
+
+config CMDLINE_FORCE
+ bool "Built-in command line overrides bootloader arguments"
+ depends on CMDLINE_BOOL
+ help
+ Set this option to 'Y' to have the kernel ignore the bootloader
+ or firmware command line. Instead, the built-in command line
+ will be used exclusively.
+
+ If you don't know what to do here, say N.
+
+endmenu
+
menu "Bus support"
config PCI
diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug
index 3224ff6ecf6e..c5a72f17c469 100644
--- a/arch/riscv/Kconfig.debug
+++ b/arch/riscv/Kconfig.debug
@@ -1,37 +1,2 @@
-
-config CMDLINE_BOOL
- bool "Built-in kernel command line"
- help
- For most platforms, it is firmware or second stage bootloader
- that by default specifies the kernel command line options.
- However, it might be necessary or advantageous to either override
- the default kernel command line or add a few extra options to it.
- For such cases, this option allows hardcoding command line options
- directly into the kernel.
-
- For that, choose 'Y' here and fill in the extra boot parameters
- in CONFIG_CMDLINE.
-
- The built-in options will be concatenated to the default command
- line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
- command line will be ignored and replaced by the built-in string.
-
-config CMDLINE
- string "Built-in kernel command string"
- depends on CMDLINE_BOOL
- default ""
- help
- Supply command-line options at build time by entering them here.
-
-config CMDLINE_FORCE
- bool "Built-in command line overrides bootloader arguments"
- depends on CMDLINE_BOOL
- help
- Set this option to 'Y' to have the kernel ignore the bootloader
- or firmware command line. Instead, the built-in command line
- will be used exclusively.
-
- If you don't know what to do here, say N.
-
config EARLY_PRINTK
def_bool y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 61ec42405ec9..d10146197533 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -25,10 +25,7 @@ ifeq ($(CONFIG_ARCH_RV64I),y)
KBUILD_CFLAGS += -mabi=lp64
KBUILD_AFLAGS += -mabi=lp64
-
- KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0500, -DCONFIG_ARCH_SUPPORTS_INT128)
- KBUILD_MARCH = rv64im
KBUILD_LDFLAGS += -melf64lriscv
else
BITS := 32
@@ -36,22 +33,20 @@ else
KBUILD_CFLAGS += -mabi=ilp32
KBUILD_AFLAGS += -mabi=ilp32
- KBUILD_MARCH = rv32im
KBUILD_LDFLAGS += -melf32lriscv
endif
KBUILD_CFLAGS += -Wall
-ifeq ($(CONFIG_RISCV_ISA_A),y)
- KBUILD_ARCH_A = a
-endif
-ifeq ($(CONFIG_RISCV_ISA_C),y)
- KBUILD_ARCH_C = c
-endif
-
-KBUILD_AFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)fd$(KBUILD_ARCH_C)
+# ISA string setting
+riscv-march-$(CONFIG_ARCH_RV32I) := rv32im
+riscv-march-$(CONFIG_ARCH_RV64I) := rv64im
+riscv-march-$(CONFIG_RISCV_ISA_A) := $(riscv-march-y)a
+riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
+riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
+KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
+KBUILD_AFLAGS += -march=$(riscv-march-y)
-KBUILD_CFLAGS += -march=$(KBUILD_MARCH)$(KBUILD_ARCH_A)$(KBUILD_ARCH_C)
KBUILD_CFLAGS += -mno-save-restore
KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index efdbe311e936..6a646d9ea780 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -13,7 +13,6 @@ generic-y += errno.h
generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
-generic-y += futex.h
generic-y += hardirq.h
generic-y += hash.h
generic-y += hw_irq.h
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
new file mode 100644
index 000000000000..3b19eba1bc8e
--- /dev/null
+++ b/arch/riscv/include/asm/futex.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2006 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (c) 2018 Jim Wilson (jimw@sifive.com)
+ */
+
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#ifndef CONFIG_RISCV_ISA_A
+/*
+ * Use the generic interrupt disabling versions if the A extension
+ * is not supported.
+ */
+#ifdef CONFIG_SMP
+#error "Can't support generic futex calls without A extension on SMP"
+#endif
+#include <asm-generic/futex.h>
+
+#else /* CONFIG_RISCV_ISA_A */
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+#include <asm/asm.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+{ \
+ uintptr_t tmp; \
+ __enable_user_access(); \
+ __asm__ __volatile__ ( \
+ "1: " insn " \n" \
+ "2: \n" \
+ " .section .fixup,\"ax\" \n" \
+ " .balign 4 \n" \
+ "3: li %[r],%[e] \n" \
+ " jump 2b,%[t] \n" \
+ " .previous \n" \
+ " .section __ex_table,\"a\" \n" \
+ " .balign " RISCV_SZPTR " \n" \
+ " " RISCV_PTR " 1b, 3b \n" \
+ " .previous \n" \
+ : [r] "+r" (ret), [ov] "=&r" (oldval), \
+ [u] "+m" (*uaddr), [t] "=&r" (tmp) \
+ : [op] "Jr" (oparg), [e] "i" (-EFAULT) \
+ : "memory"); \
+ __disable_user_access(); \
+}
+
+static inline int
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
+{
+ int oldval = 0, ret = 0;
+
+ pagefault_disable();
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("amoswap.w.aqrl %[ov],%z[op],%[u]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("amoadd.w.aqrl %[ov],%z[op],%[u]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("amoor.w.aqrl %[ov],%z[op],%[u]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("amoand.w.aqrl %[ov],%z[op],%[u]",
+ ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("amoxor.w.aqrl %[ov],%z[op],%[u]",
+ ret, oldval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ pagefault_enable();
+
+ if (!ret)
+ *oval = oldval;
+
+ return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ int ret = 0;
+ u32 val;
+ uintptr_t tmp;
+
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ __enable_user_access();
+ __asm__ __volatile__ (
+ "1: lr.w.aqrl %[v],%[u] \n"
+ " bne %[v],%z[ov],3f \n"
+ "2: sc.w.aqrl %[t],%z[nv],%[u] \n"
+ " bnez %[t],1b \n"
+ "3: \n"
+ " .section .fixup,\"ax\" \n"
+ " .balign 4 \n"
+ "4: li %[r],%[e] \n"
+ " jump 3b,%[t] \n"
+ " .previous \n"
+ " .section __ex_table,\"a\" \n"
+ " .balign " RISCV_SZPTR " \n"
+ " " RISCV_PTR " 1b, 4b \n"
+ " " RISCV_PTR " 2b, 4b \n"
+ " .previous \n"
+ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp)
+ : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT)
+ : "memory");
+ __disable_user_access();
+
+ *uval = val;
+ return ret;
+}
+
+#endif /* CONFIG_RISCV_ISA_A */
+#endif /* _ASM_FUTEX_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 3fe4af8147d2..50de774d827a 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -88,7 +88,7 @@ static inline void wait_for_interrupt(void)
}
struct device_node;
-extern int riscv_of_processor_hart(struct device_node *node);
+int riscv_of_processor_hartid(struct device_node *node);
extern void riscv_fill_hwcap(void);
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index 36016845461d..41aa73b476f4 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -14,16 +14,24 @@
#ifndef _ASM_RISCV_SMP_H
#define _ASM_RISCV_SMP_H
-/* This both needs asm-offsets.h and is used when generating it. */
-#ifndef GENERATING_ASM_OFFSETS
-#include <asm/asm-offsets.h>
-#endif
-
#include <linux/cpumask.h>
#include <linux/irqreturn.h>
+#include <linux/thread_info.h>
+
+#define INVALID_HARTID ULONG_MAX
+/*
+ * Mapping between linux logical cpu index and hartid.
+ */
+extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
+#define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu]
+
+struct seq_file;
#ifdef CONFIG_SMP
+/* print IPI stats */
+void show_ipi_stats(struct seq_file *p, int prec);
+
/* SMP initialization hook for setup_arch */
void __init setup_smp(void);
@@ -33,14 +41,31 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
/* Hook for the generic smp_call_function_single() routine. */
void arch_send_call_function_single_ipi(int cpu);
+int riscv_hartid_to_cpuid(int hartid);
+void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
+
/*
- * This is particularly ugly: it appears we can't actually get the definition
- * of task_struct here, but we need access to the CPU this task is running on.
- * Instead of using C we're using asm-offsets.h to get the current processor
- * ID.
+ * Obtains the hart ID of the currently executing task. This relies on
+ * THREAD_INFO_IN_TASK, but we define that unconditionally.
*/
-#define raw_smp_processor_id() (*((int*)((char*)get_current() + TASK_TI_CPU)))
+#define raw_smp_processor_id() (current_thread_info()->cpu)
-#endif /* CONFIG_SMP */
+#else
+
+static inline void show_ipi_stats(struct seq_file *p, int prec)
+{
+}
+static inline int riscv_hartid_to_cpuid(int hartid)
+{
+ return 0;
+}
+
+static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
+ struct cpumask *out)
+{
+ cpumask_set_cpu(cpuid_to_hartid_map(0), out);
+}
+
+#endif /* CONFIG_SMP */
#endif /* _ASM_RISCV_SMP_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index dd6b05bff75b..733559083f24 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -18,6 +18,7 @@
#include <asm/ptrace.h>
#include <asm/csr.h>
+#ifdef CONFIG_FPU
extern void __fstate_save(struct task_struct *save_to);
extern void __fstate_restore(struct task_struct *restore_from);
@@ -55,6 +56,14 @@ static inline void __switch_to_aux(struct task_struct *prev,
fstate_restore(next, task_pt_regs(next));
}
+extern bool has_fpu;
+#else
+#define has_fpu false
+#define fstate_save(task, regs) do { } while (0)
+#define fstate_restore(task, regs) do { } while (0)
+#define __switch_to_aux(__prev, __next) do { } while (0)
+#endif
+
extern struct task_struct *__switch_to(struct task_struct *,
struct task_struct *);
@@ -62,7 +71,8 @@ extern struct task_struct *__switch_to(struct task_struct *,
do { \
struct task_struct *__prev = (prev); \
struct task_struct *__next = (next); \
- __switch_to_aux(__prev, __next); \
+ if (has_fpu) \
+ __switch_to_aux(__prev, __next); \
((last) = __switch_to(__prev, __next)); \
} while (0)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 85c2d8bae957..54fee0cadb1e 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -16,6 +16,7 @@
#define _ASM_RISCV_TLBFLUSH_H
#include <linux/mm_types.h>
+#include <asm/smp.h>
/*
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
@@ -49,13 +50,22 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
#include <asm/sbi.h>
+static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
+ unsigned long size)
+{
+ struct cpumask hmask;
+
+ cpumask_clear(&hmask);
+ riscv_cpuid_to_hartid_mask(cmask, &hmask);
+ sbi_remote_sfence_vma(hmask.bits, start, size);
+}
+
#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
#define flush_tlb_range(vma, start, end) \
- sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \
- start, (end) - (start))
+ remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
#define flush_tlb_mm(mm) \
- sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1)
+ remote_sfence_vma(mm_cpumask(mm), 0, -1)
#endif /* CONFIG_SMP */
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 0caea01d5cca..eff7aa9aa163 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -16,6 +16,7 @@
* be included multiple times. See uapi/asm/syscalls.h for more info.
*/
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index 1e0dfc36aab9..644a00ce6e2e 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -19,7 +19,10 @@ typedef unsigned long elf_greg_t;
typedef struct user_regs_struct elf_gregset_t;
#define ELF_NGREG (sizeof(elf_gregset_t) / sizeof(elf_greg_t))
+/* We don't support f without d, or q. */
+typedef __u64 elf_fpreg_t;
typedef union __riscv_fp_state elf_fpregset_t;
+#define ELF_NFPREG (sizeof(struct __riscv_d_ext_state) / sizeof(elf_fpreg_t))
#if __riscv_xlen == 64
#define ELF_RISCV_R_SYM(r_info) ELF64_R_SYM(r_info)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index e1274fc03af4..f13f7f276639 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -31,6 +31,7 @@ obj-y += vdso/
CFLAGS_setup.o := -mcmodel=medany
+obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index 0bc86e5f8f3f..cb35ffd8ec6b 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -22,13 +22,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
{
this_leaf->level = level;
this_leaf->type = type;
- /* not a sector cache */
- this_leaf->physical_line_partition = 1;
- /* TODO: Add to DTS */
- this_leaf->attributes =
- CACHE_WRITE_BACK
- | CACHE_READ_ALLOCATE
- | CACHE_WRITE_ALLOCATE;
}
static int __init_cache_level(unsigned int cpu)
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index ca6c81e54e37..3a5a2ee31547 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -14,9 +14,13 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <asm/smp.h>
-/* Return -1 if not a valid hart */
-int riscv_of_processor_hart(struct device_node *node)
+/*
+ * Returns the hart ID of the given device tree node, or -1 if the device tree
+ * node isn't a RISC-V hart.
+ */
+int riscv_of_processor_hartid(struct device_node *node)
{
const char *isa, *status;
u32 hart;
@@ -58,6 +62,64 @@ int riscv_of_processor_hart(struct device_node *node)
#ifdef CONFIG_PROC_FS
+static void print_isa(struct seq_file *f, const char *orig_isa)
+{
+ static const char *ext = "mafdc";
+ const char *isa = orig_isa;
+ const char *e;
+
+ /*
+ * Linux doesn't support rv32e or rv128i, and we only support booting
+ * kernels on harts with the same ISA that the kernel is compiled for.
+ */
+#if defined(CONFIG_32BIT)
+ if (strncmp(isa, "rv32i", 5) != 0)
+ return;
+#elif defined(CONFIG_64BIT)
+ if (strncmp(isa, "rv64i", 5) != 0)
+ return;
+#endif
+
+ /* Print the base ISA, as we already know it's legal. */
+ seq_puts(f, "isa\t\t: ");
+ seq_write(f, isa, 5);
+ isa += 5;
+
+ /*
+ * Check the rest of the ISA string for valid extensions, printing those
+ * we find. RISC-V ISA strings define an order, so we only print the
+ * extension bits when they're in order.
+ */
+ for (e = ext; *e != '\0'; ++e) {
+ if (isa[0] == e[0]) {
+ seq_write(f, isa, 1);
+ isa++;
+ }
+ }
+ seq_puts(f, "\n");
+
+ /*
+ * If we were given an unsupported ISA in the device tree then print
+ * a bit of info describing what went wrong.
+ */
+ if (isa[0] != '\0')
+ pr_info("unsupported ISA \"%s\" in device tree", orig_isa);
+}
+
+static void print_mmu(struct seq_file *f, const char *mmu_type)
+{
+#if defined(CONFIG_32BIT)
+ if (strcmp(mmu_type, "riscv,sv32") != 0)
+ return;
+#elif defined(CONFIG_64BIT)
+ if (strcmp(mmu_type, "riscv,sv39") != 0 &&
+ strcmp(mmu_type, "riscv,sv48") != 0)
+ return;
+#endif
+
+ seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+}
+
static void *c_start(struct seq_file *m, loff_t *pos)
{
*pos = cpumask_next(*pos - 1, cpu_online_mask);
@@ -78,21 +140,20 @@ static void c_stop(struct seq_file *m, void *v)
static int c_show(struct seq_file *m, void *v)
{
- unsigned long hart_id = (unsigned long)v - 1;
- struct device_node *node = of_get_cpu_node(hart_id, NULL);
+ unsigned long cpu_id = (unsigned long)v - 1;
+ struct device_node *node = of_get_cpu_node(cpuid_to_hartid_map(cpu_id),
+ NULL);
const char *compat, *isa, *mmu;
- seq_printf(m, "hart\t: %lu\n", hart_id);
- if (!of_property_read_string(node, "riscv,isa", &isa)
- && isa[0] == 'r'
- && isa[1] == 'v')
- seq_printf(m, "isa\t: %s\n", isa);
- if (!of_property_read_string(node, "mmu-type", &mmu)
- && !strncmp(mmu, "riscv,", 6))
- seq_printf(m, "mmu\t: %s\n", mmu+6);
+ seq_printf(m, "processor\t: %lu\n", cpu_id);
+ seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
+ if (!of_property_read_string(node, "riscv,isa", &isa))
+ print_isa(m, isa);
+ if (!of_property_read_string(node, "mmu-type", &mmu))
+ print_mmu(m, mmu);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
- seq_printf(m, "uarch\t: %s\n", compat);
+ seq_printf(m, "uarch\t\t: %s\n", compat);
seq_puts(m, "\n");
return 0;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 17011a870044..5493f3228704 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -22,6 +22,9 @@
#include <asm/hwcap.h>
unsigned long elf_hwcap __read_mostly;
+#ifdef CONFIG_FPU
+bool has_fpu __read_mostly;
+#endif
void riscv_fill_hwcap(void)
{
@@ -57,5 +60,17 @@ void riscv_fill_hwcap(void)
for (i = 0; i < strlen(isa); ++i)
elf_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+ /* We don't support systems with F but without D, so mask those out
+ * here. */
+ if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
+ pr_info("This kernel does not support systems with F but not D");
+ elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
+ }
+
pr_info("elf_hwcap is 0x%lx", elf_hwcap);
+
+#ifdef CONFIG_FPU
+ if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
+ has_fpu = true;
+#endif
}
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index fa2c08e3c05e..13d4826ab2a1 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -168,7 +168,6 @@ ENTRY(handle_exception)
/* Handle interrupts */
move a0, sp /* pt_regs */
- move a1, s4 /* scause */
tail do_IRQ
1:
/* Exceptions run with interrupts enabled */
@@ -357,93 +356,6 @@ ENTRY(__switch_to)
ret
ENDPROC(__switch_to)
-ENTRY(__fstate_save)
- li a2, TASK_THREAD_F0
- add a0, a0, a2
- li t1, SR_FS
- csrs sstatus, t1
- frcsr t0
- fsd f0, TASK_THREAD_F0_F0(a0)
- fsd f1, TASK_THREAD_F1_F0(a0)
- fsd f2, TASK_THREAD_F2_F0(a0)
- fsd f3, TASK_THREAD_F3_F0(a0)
- fsd f4, TASK_THREAD_F4_F0(a0)
- fsd f5, TASK_THREAD_F5_F0(a0)
- fsd f6, TASK_THREAD_F6_F0(a0)
- fsd f7, TASK_THREAD_F7_F0(a0)
- fsd f8, TASK_THREAD_F8_F0(a0)
- fsd f9, TASK_THREAD_F9_F0(a0)
- fsd f10, TASK_THREAD_F10_F0(a0)
- fsd f11, TASK_THREAD_F11_F0(a0)
- fsd f12, TASK_THREAD_F12_F0(a0)
- fsd f13, TASK_THREAD_F13_F0(a0)
- fsd f14, TASK_THREAD_F14_F0(a0)
- fsd f15, TASK_THREAD_F15_F0(a0)
- fsd f16, TASK_THREAD_F16_F0(a0)
- fsd f17, TASK_THREAD_F17_F0(a0)
- fsd f18, TASK_THREAD_F18_F0(a0)
- fsd f19, TASK_THREAD_F19_F0(a0)
- fsd f20, TASK_THREAD_F20_F0(a0)
- fsd f21, TASK_THREAD_F21_F0(a0)
- fsd f22, TASK_THREAD_F22_F0(a0)
- fsd f23, TASK_THREAD_F23_F0(a0)
- fsd f24, TASK_THREAD_F24_F0(a0)
- fsd f25, TASK_THREAD_F25_F0(a0)
- fsd f26, TASK_THREAD_F26_F0(a0)
- fsd f27, TASK_THREAD_F27_F0(a0)
- fsd f28, TASK_THREAD_F28_F0(a0)
- fsd f29, TASK_THREAD_F29_F0(a0)
- fsd f30, TASK_THREAD_F30_F0(a0)
- fsd f31, TASK_THREAD_F31_F0(a0)
- sw t0, TASK_THREAD_FCSR_F0(a0)
- csrc sstatus, t1
- ret
-ENDPROC(__fstate_save)
-
-ENTRY(__fstate_restore)
- li a2, TASK_THREAD_F0
- add a0, a0, a2
- li t1, SR_FS
- lw t0, TASK_THREAD_FCSR_F0(a0)
- csrs sstatus, t1
- fld f0, TASK_THREAD_F0_F0(a0)
- fld f1, TASK_THREAD_F1_F0(a0)
- fld f2, TASK_THREAD_F2_F0(a0)
- fld f3, TASK_THREAD_F3_F0(a0)
- fld f4, TASK_THREAD_F4_F0(a0)
- fld f5, TASK_THREAD_F5_F0(a0)
- fld f6, TASK_THREAD_F6_F0(a0)
- fld f7, TASK_THREAD_F7_F0(a0)
- fld f8, TASK_THREAD_F8_F0(a0)
- fld f9, TASK_THREAD_F9_F0(a0)
- fld f10, TASK_THREAD_F10_F0(a0)
- fld f11, TASK_THREAD_F11_F0(a0)
- fld f12, TASK_THREAD_F12_F0(a0)
- fld f13, TASK_THREAD_F13_F0(a0)
- fld f14, TASK_THREAD_F14_F0(a0)
- fld f15, TASK_THREAD_F15_F0(a0)
- fld f16, TASK_THREAD_F16_F0(a0)
- fld f17, TASK_THREAD_F17_F0(a0)
- fld f18, TASK_THREAD_F18_F0(a0)
- fld f19, TASK_THREAD_F19_F0(a0)
- fld f20, TASK_THREAD_F20_F0(a0)
- fld f21, TASK_THREAD_F21_F0(a0)
- fld f22, TASK_THREAD_F22_F0(a0)
- fld f23, TASK_THREAD_F23_F0(a0)
- fld f24, TASK_THREAD_F24_F0(a0)
- fld f25, TASK_THREAD_F25_F0(a0)
- fld f26, TASK_THREAD_F26_F0(a0)
- fld f27, TASK_THREAD_F27_F0(a0)
- fld f28, TASK_THREAD_F28_F0(a0)
- fld f29, TASK_THREAD_F29_F0(a0)
- fld f30, TASK_THREAD_F30_F0(a0)
- fld f31, TASK_THREAD_F31_F0(a0)
- fscsr t0
- csrc sstatus, t1
- ret
-ENDPROC(__fstate_restore)
-
-
.section ".rodata"
/* Exception vector table */
ENTRY(excp_vect_table)
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
new file mode 100644
index 000000000000..1defb0618aff
--- /dev/null
+++ b/arch/riscv/kernel/fpu.S
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/csr.h>
+#include <asm/asm-offsets.h>
+
+ENTRY(__fstate_save)
+ li a2, TASK_THREAD_F0
+ add a0, a0, a2
+ li t1, SR_FS
+ csrs sstatus, t1
+ frcsr t0
+ fsd f0, TASK_THREAD_F0_F0(a0)
+ fsd f1, TASK_THREAD_F1_F0(a0)
+ fsd f2, TASK_THREAD_F2_F0(a0)
+ fsd f3, TASK_THREAD_F3_F0(a0)
+ fsd f4, TASK_THREAD_F4_F0(a0)
+ fsd f5, TASK_THREAD_F5_F0(a0)
+ fsd f6, TASK_THREAD_F6_F0(a0)
+ fsd f7, TASK_THREAD_F7_F0(a0)
+ fsd f8, TASK_THREAD_F8_F0(a0)
+ fsd f9, TASK_THREAD_F9_F0(a0)
+ fsd f10, TASK_THREAD_F10_F0(a0)
+ fsd f11, TASK_THREAD_F11_F0(a0)
+ fsd f12, TASK_THREAD_F12_F0(a0)
+ fsd f13, TASK_THREAD_F13_F0(a0)
+ fsd f14, TASK_THREAD_F14_F0(a0)
+ fsd f15, TASK_THREAD_F15_F0(a0)
+ fsd f16, TASK_THREAD_F16_F0(a0)
+ fsd f17, TASK_THREAD_F17_F0(a0)
+ fsd f18, TASK_THREAD_F18_F0(a0)
+ fsd f19, TASK_THREAD_F19_F0(a0)
+ fsd f20, TASK_THREAD_F20_F0(a0)
+ fsd f21, TASK_THREAD_F21_F0(a0)
+ fsd f22, TASK_THREAD_F22_F0(a0)
+ fsd f23, TASK_THREAD_F23_F0(a0)
+ fsd f24, TASK_THREAD_F24_F0(a0)
+ fsd f25, TASK_THREAD_F25_F0(a0)
+ fsd f26, TASK_THREAD_F26_F0(a0)
+ fsd f27, TASK_THREAD_F27_F0(a0)
+ fsd f28, TASK_THREAD_F28_F0(a0)
+ fsd f29, TASK_THREAD_F29_F0(a0)
+ fsd f30, TASK_THREAD_F30_F0(a0)
+ fsd f31, TASK_THREAD_F31_F0(a0)
+ sw t0, TASK_THREAD_FCSR_F0(a0)
+ csrc sstatus, t1
+ ret
+ENDPROC(__fstate_save)
+
+ENTRY(__fstate_restore)
+ li a2, TASK_THREAD_F0
+ add a0, a0, a2
+ li t1, SR_FS
+ lw t0, TASK_THREAD_FCSR_F0(a0)
+ csrs sstatus, t1
+ fld f0, TASK_THREAD_F0_F0(a0)
+ fld f1, TASK_THREAD_F1_F0(a0)
+ fld f2, TASK_THREAD_F2_F0(a0)
+ fld f3, TASK_THREAD_F3_F0(a0)
+ fld f4, TASK_THREAD_F4_F0(a0)
+ fld f5, TASK_THREAD_F5_F0(a0)
+ fld f6, TASK_THREAD_F6_F0(a0)
+ fld f7, TASK_THREAD_F7_F0(a0)
+ fld f8, TASK_THREAD_F8_F0(a0)
+ fld f9, TASK_THREAD_F9_F0(a0)
+ fld f10, TASK_THREAD_F10_F0(a0)
+ fld f11, TASK_THREAD_F11_F0(a0)
+ fld f12, TASK_THREAD_F12_F0(a0)
+ fld f13, TASK_THREAD_F13_F0(a0)
+ fld f14, TASK_THREAD_F14_F0(a0)
+ fld f15, TASK_THREAD_F15_F0(a0)
+ fld f16, TASK_THREAD_F16_F0(a0)
+ fld f17, TASK_THREAD_F17_F0(a0)
+ fld f18, TASK_THREAD_F18_F0(a0)
+ fld f19, TASK_THREAD_F19_F0(a0)
+ fld f20, TASK_THREAD_F20_F0(a0)
+ fld f21, TASK_THREAD_F21_F0(a0)
+ fld f22, TASK_THREAD_F22_F0(a0)
+ fld f23, TASK_THREAD_F23_F0(a0)
+ fld f24, TASK_THREAD_F24_F0(a0)
+ fld f25, TASK_THREAD_F25_F0(a0)
+ fld f26, TASK_THREAD_F26_F0(a0)
+ fld f27, TASK_THREAD_F27_F0(a0)
+ fld f28, TASK_THREAD_F28_F0(a0)
+ fld f29, TASK_THREAD_F29_F0(a0)
+ fld f30, TASK_THREAD_F30_F0(a0)
+ fld f31, TASK_THREAD_F31_F0(a0)
+ fscsr t0
+ csrc sstatus, t1
+ ret
+ENDPROC(__fstate_restore)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index c4d2c63f9a29..711190d473d4 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -47,6 +47,8 @@ ENTRY(_start)
/* Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
+ la a2, boot_cpu_hartid
+ REG_S a0, (a2)
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
@@ -55,7 +57,7 @@ ENTRY(_start)
/* Restore C environment */
la tp, init_task
- sw s0, TASK_TI_CPU(tp)
+ sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union
li a0, ASM_THREAD_SIZE
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 0cfac48a1272..48e6b7db83a1 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -8,6 +8,8 @@
#include <linux/interrupt.h>
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
+#include <linux/seq_file.h>
+#include <asm/smp.h>
/*
* Possible interrupt causes:
@@ -24,12 +26,18 @@
*/
#define INTERRUPT_CAUSE_FLAG (1UL << (__riscv_xlen - 1))
-asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause)
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+ show_ipi_stats(p, prec);
+ return 0;
+}
+
+asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
- switch (cause & ~INTERRUPT_CAUSE_FLAG) {
+ switch (regs->scause & ~INTERRUPT_CAUSE_FLAG) {
case INTERRUPT_CAUSE_TIMER:
riscv_timer_interrupt();
break;
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 5721624886a1..8a5593ff9ff3 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -75,7 +75,6 @@ ENTRY(return_to_handler)
RESTORE_RET_ABI_STATE
jalr a1
ENDPROC(return_to_handler)
-EXPORT_SYMBOL(return_to_handler)
#endif
#ifndef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index d7c6ca7c95ae..bef19993ea92 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -76,7 +76,9 @@ void show_regs(struct pt_regs *regs)
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
+ regs->sstatus = SR_SPIE;
+ if (has_fpu)
+ regs->sstatus |= SR_FS_INITIAL;
regs->sepc = pc;
regs->sp = sp;
set_fs(USER_DS);
@@ -84,12 +86,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
void flush_thread(void)
{
+#ifdef CONFIG_FPU
/*
* Reset FPU context
* frm: round to nearest, ties to even (IEEE default)
* fflags: accrued exceptions cleared
*/
memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
+#endif
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 9f82a7e34c64..60f1e02eed36 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -28,6 +28,9 @@
enum riscv_regset {
REGSET_X,
+#ifdef CONFIG_FPU
+ REGSET_F,
+#endif
};
static int riscv_gpr_get(struct task_struct *target,
@@ -54,6 +57,45 @@ static int riscv_gpr_set(struct task_struct *target,
return ret;
}
+#ifdef CONFIG_FPU
+static int riscv_fpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+ struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
+ offsetof(struct __riscv_d_ext_state, fcsr));
+ if (!ret) {
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, fstate, 0,
+ offsetof(struct __riscv_d_ext_state, fcsr) +
+ sizeof(fstate->fcsr));
+ }
+
+ return ret;
+}
+
+static int riscv_fpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct __riscv_d_ext_state *fstate = &target->thread.fstate;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0,
+ offsetof(struct __riscv_d_ext_state, fcsr));
+ if (!ret) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0,
+ offsetof(struct __riscv_d_ext_state, fcsr) +
+ sizeof(fstate->fcsr));
+ }
+
+ return ret;
+}
+#endif
static const struct user_regset riscv_user_regset[] = {
[REGSET_X] = {
@@ -64,6 +106,16 @@ static const struct user_regset riscv_user_regset[] = {
.get = &riscv_gpr_get,
.set = &riscv_gpr_set,
},
+#ifdef CONFIG_FPU
+ [REGSET_F] = {
+ .core_note_type = NT_PRFPREG,
+ .n = ELF_NFPREG,
+ .size = sizeof(elf_fpreg_t),
+ .align = sizeof(elf_fpreg_t),
+ .get = &riscv_fpr_get,
+ .set = &riscv_fpr_set,
+ },
+#endif
};
static const struct user_regset_view riscv_user_native_view = {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b2d26d9d8489..2c290e6aaa6e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -81,6 +81,16 @@ EXPORT_SYMBOL(empty_zero_page);
/* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery;
+unsigned long boot_cpu_hartid;
+
+unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
+ [0 ... NR_CPUS-1] = INVALID_HARTID
+};
+
+void __init smp_setup_processor_id(void)
+{
+ cpuid_to_hartid_map(0) = boot_cpu_hartid;
+}
#ifdef CONFIG_BLK_DEV_INITRD
static void __init setup_initrd(void)
@@ -227,7 +237,10 @@ void __init setup_arch(char **cmdline_p)
setup_bootmem();
paging_init();
unflatten_device_tree();
+
+#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
+#endif
#ifdef CONFIG_SMP
setup_smp();
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 718d0c984ef0..f9b5e7e352ef 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -37,45 +37,69 @@ struct rt_sigframe {
struct ucontext uc;
};
-static long restore_d_state(struct pt_regs *regs,
- struct __riscv_d_ext_state __user *state)
+#ifdef CONFIG_FPU
+static long restore_fp_state(struct pt_regs *regs,
+ union __riscv_fp_state *sc_fpregs)
{
long err;
+ struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+ size_t i;
+
err = __copy_from_user(&current->thread.fstate, state, sizeof(*state));
- if (likely(!err))
- fstate_restore(current, regs);
+ if (unlikely(err))
+ return err;
+
+ fstate_restore(current, regs);
+
+ /* We support no other extension state at this time. */
+ for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+ u32 value;
+
+ err = __get_user(value, &sc_fpregs->q.reserved[i]);
+ if (unlikely(err))
+ break;
+ if (value != 0)
+ return -EINVAL;
+ }
+
return err;
}
-static long save_d_state(struct pt_regs *regs,
- struct __riscv_d_ext_state __user *state)
+static long save_fp_state(struct pt_regs *regs,
+ union __riscv_fp_state *sc_fpregs)
{
+ long err;
+ struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
+ size_t i;
+
fstate_save(current, regs);
- return __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+ err = __copy_to_user(state, &current->thread.fstate, sizeof(*state));
+ if (unlikely(err))
+ return err;
+
+ /* We support no other extension state at this time. */
+ for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) {
+ err = __put_user(0, &sc_fpregs->q.reserved[i]);
+ if (unlikely(err))
+ break;
+ }
+
+ return err;
}
+#else
+#define save_fp_state(task, regs) (0)
+#define restore_fp_state(task, regs) (0)
+#endif
static long restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *sc)
{
long err;
- size_t i;
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs));
- if (unlikely(err))
- return err;
/* Restore the floating-point state. */
- err = restore_d_state(regs, &sc->sc_fpregs.d);
- if (unlikely(err))
- return err;
- /* We support no other extension state at this time. */
- for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++) {
- u32 value;
- err = __get_user(value, &sc->sc_fpregs.q.reserved[i]);
- if (unlikely(err))
- break;
- if (value != 0)
- return -EINVAL;
- }
+ if (has_fpu)
+ err |= restore_fp_state(regs, &sc->sc_fpregs);
return err;
}
@@ -124,14 +148,11 @@ static long setup_sigcontext(struct rt_sigframe __user *frame,
{
struct sigcontext __user *sc = &frame->uc.uc_mcontext;
long err;
- size_t i;
/* sc_regs is structured the same as the start of pt_regs */
err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs));
/* Save the floating-point state. */
- err |= save_d_state(regs, &sc->sc_fpregs.d);
- /* We support no other extension state at this time. */
- for (i = 0; i < ARRAY_SIZE(sc->sc_fpregs.q.reserved); i++)
- err |= __put_user(0, &sc->sc_fpregs.q.reserved[i]);
+ if (has_fpu)
+ err |= save_fp_state(regs, &sc->sc_fpregs);
return err;
}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 906fe21ea21b..57b1383e5ef7 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -22,23 +22,44 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
-/* A collection of single bit ipi messages. */
-static struct {
- unsigned long bits ____cacheline_aligned;
-} ipi_data[NR_CPUS] __cacheline_aligned;
-
enum ipi_message_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_MAX
};
+/* A collection of single bit ipi messages. */
+static struct {
+ unsigned long stats[IPI_MAX] ____cacheline_aligned;
+ unsigned long bits ____cacheline_aligned;
+} ipi_data[NR_CPUS] __cacheline_aligned;
+
+int riscv_hartid_to_cpuid(int hartid)
+{
+ int i = -1;
+
+ for (i = 0; i < NR_CPUS; i++)
+ if (cpuid_to_hartid_map(i) == hartid)
+ return i;
+ pr_err("Couldn't find cpu id for hartid [%d]\n", hartid);
+ BUG();
+ return i;
+}
+
+void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
+{
+ int cpu;
+
+ for_each_cpu(cpu, in)
+ cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
+}
/* Unsupported */
int setup_profiling_timer(unsigned int multiplier)
{
@@ -48,6 +69,7 @@ int setup_profiling_timer(unsigned int multiplier)
void riscv_software_interrupt(void)
{
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
+ unsigned long *stats = ipi_data[smp_processor_id()].stats;
/* Clear pending IPI */
csr_clear(sip, SIE_SSIE);
@@ -62,11 +84,15 @@ void riscv_software_interrupt(void)
if (ops == 0)
return;
- if (ops & (1 << IPI_RESCHEDULE))
+ if (ops & (1 << IPI_RESCHEDULE)) {
+ stats[IPI_RESCHEDULE]++;
scheduler_ipi();
+ }
- if (ops & (1 << IPI_CALL_FUNC))
+ if (ops & (1 << IPI_CALL_FUNC)) {
+ stats[IPI_CALL_FUNC]++;
generic_smp_call_function_interrupt();
+ }
BUG_ON((ops >> IPI_MAX) != 0);
@@ -78,14 +104,36 @@ void riscv_software_interrupt(void)
static void
send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
{
- int i;
+ int cpuid, hartid;
+ struct cpumask hartid_mask;
+ cpumask_clear(&hartid_mask);
mb();
- for_each_cpu(i, to_whom)
- set_bit(operation, &ipi_data[i].bits);
-
+ for_each_cpu(cpuid, to_whom) {
+ set_bit(operation, &ipi_data[cpuid].bits);
+ hartid = cpuid_to_hartid_map(cpuid);
+ cpumask_set_cpu(hartid, &hartid_mask);
+ }
mb();
- sbi_send_ipi(cpumask_bits(to_whom));
+ sbi_send_ipi(cpumask_bits(&hartid_mask));
+}
+
+static const char * const ipi_names[] = {
+ [IPI_RESCHEDULE] = "Rescheduling interrupts",
+ [IPI_CALL_FUNC] = "Function call interrupts",
+};
+
+void show_ipi_stats(struct seq_file *p, int prec)
+{
+ unsigned int cpu, i;
+
+ for (i = 0; i < IPI_MAX; i++) {
+ seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i,
+ prec >= 4 ? " " : "");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]);
+ seq_printf(p, " %s\n", ipi_names[i]);
+ }
}
void arch_send_call_function_ipi_mask(struct cpumask *mask)
@@ -127,7 +175,7 @@ void smp_send_reschedule(int cpu)
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
- cpumask_t others, *mask;
+ cpumask_t others, hmask, *mask;
preempt_disable();
@@ -145,9 +193,11 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
- if (mm != current->active_mm || !local)
- sbi_remote_fence_i(others.bits);
- else {
+ if (mm != current->active_mm || !local) {
+ cpumask_clear(&hmask);
+ riscv_cpuid_to_hartid_mask(&others, &hmask);
+ sbi_remote_fence_i(hmask.bits);
+ } else {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 56abab6a9812..18cda0e8cf94 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -30,6 +30,7 @@
#include <linux/irq.h>
#include <linux/of.h>
#include <linux/sched/task_stack.h>
+#include <linux/sched/mm.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -50,25 +51,33 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
void __init setup_smp(void)
{
struct device_node *dn = NULL;
- int hart, im_okay_therefore_i_am = 0;
+ int hart;
+ bool found_boot_cpu = false;
+ int cpuid = 1;
while ((dn = of_find_node_by_type(dn, "cpu"))) {
- hart = riscv_of_processor_hart(dn);
- if (hart >= 0) {
- set_cpu_possible(hart, true);
- set_cpu_present(hart, true);
- if (hart == smp_processor_id()) {
- BUG_ON(im_okay_therefore_i_am);
- im_okay_therefore_i_am = 1;
- }
+ hart = riscv_of_processor_hartid(dn);
+ if (hart < 0)
+ continue;
+
+ if (hart == cpuid_to_hartid_map(0)) {
+ BUG_ON(found_boot_cpu);
+ found_boot_cpu = 1;
+ continue;
}
+
+ cpuid_to_hartid_map(cpuid) = hart;
+ set_cpu_possible(cpuid, true);
+ set_cpu_present(cpuid, true);
+ cpuid++;
}
- BUG_ON(!im_okay_therefore_i_am);
+ BUG_ON(!found_boot_cpu);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
+ int hartid = cpuid_to_hartid_map(cpu);
tidle->thread_info.cpu = cpu;
/*
@@ -79,8 +88,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
* the spinning harts that they can continue the boot process.
*/
smp_mb();
- __cpu_up_stack_pointer[cpu] = task_stack_page(tidle) + THREAD_SIZE;
- __cpu_up_task_pointer[cpu] = tidle;
+ WRITE_ONCE(__cpu_up_stack_pointer[hartid],
+ task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
while (!cpu_online(cpu))
cpu_relax();
@@ -100,14 +110,22 @@ asmlinkage void __init smp_callin(void)
struct mm_struct *mm = &init_mm;
/* All kernel threads share the same mm context. */
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
current->active_mm = mm;
trap_init();
notify_cpu_starting(smp_processor_id());
set_cpu_online(smp_processor_id(), 1);
+ /*
+ * Remote TLB flushes are ignored while the CPU is offline, so emit
+ * a local TLB flush right now just in case.
+ */
local_flush_tlb_all();
- local_irq_enable();
+ /*
+ * Disable preemption before enabling interrupts, so we don't try to
+ * schedule a CPU that hasn't actually started yet.
+ */
preempt_disable();
+ local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 445ec84f9a47..5739bd05d289 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -2,6 +2,7 @@ lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
lib-y += uaccess.o
-lib-y += tishift.o
+
+lib-(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_32BIT) += udivdi3.o
diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c
index 70ef2724cdf6..bd2f2db557cc 100644
--- a/arch/riscv/mm/ioremap.c
+++ b/arch/riscv/mm/ioremap.c
@@ -42,7 +42,7 @@ static void __iomem *__ioremap_caller(phys_addr_t addr, size_t size,
/* Page-align mappings */
offset = addr & (~PAGE_MASK);
- addr &= PAGE_MASK;
+ addr -= offset;
size = PAGE_ALIGN(size + offset);
area = get_vm_area_caller(size, VM_IOREMAP, caller);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 039a3417dfc4..8b25e1f45b27 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -783,6 +783,17 @@ config VFIO_CCW
To compile this driver as a module, choose M here: the
module will be called vfio_ccw.
+config VFIO_AP
+ def_tristate n
+ prompt "VFIO support for AP devices"
+ depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+ help
+ This driver grants access to Adjunct Processor (AP) devices
+ via the VFIO mediated device interface.
+
+ To compile this driver as a module, choose M here: the module
+ will be called vfio_ap.
+
endmenu
menu "Dump support"
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 941d8cc6c9f5..259d1698ac50 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index eb6f75f24208..37fd60c20e22 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_MCRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index c54cb26eb7f5..812d9498d97b 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
int key_len;
unsigned long fc;
union {
- struct crypto_skcipher *blk;
+ struct crypto_sync_skcipher *blk;
struct crypto_cipher *cip;
} fallback;
};
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
u8 pcc_key[32];
int key_len;
unsigned long fc;
- struct crypto_skcipher *fallback;
+ struct crypto_sync_skcipher *fallback;
};
struct gcm_sg_walk {
@@ -184,14 +184,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
unsigned int ret;
- crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
+ crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
CRYPTO_TFM_REQ_MASK);
- ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
+ ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
+ tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
CRYPTO_TFM_RES_MASK;
return ret;
@@ -204,9 +205,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
unsigned int ret;
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
- skcipher_request_set_tfm(req, sctx->fallback.blk);
+ skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -223,9 +224,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
unsigned int ret;
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
- skcipher_request_set_tfm(req, sctx->fallback.blk);
+ skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -306,8 +307,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
const char *name = tfm->__crt_alg->cra_name;
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
- CRYPTO_ALG_ASYNC |
+ sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(sctx->fallback.blk)) {
@@ -323,7 +323,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
{
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- crypto_free_skcipher(sctx->fallback.blk);
+ crypto_free_sync_skcipher(sctx->fallback.blk);
}
static struct crypto_alg ecb_aes_alg = {
@@ -453,14 +453,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
unsigned int ret;
- crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
+ crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
CRYPTO_TFM_REQ_MASK);
- ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+ ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
+ tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
CRYPTO_TFM_RES_MASK;
return ret;
@@ -472,10 +473,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
{
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
unsigned int ret;
- skcipher_request_set_tfm(req, xts_ctx->fallback);
+ skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -491,10 +492,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
{
struct crypto_blkcipher *tfm = desc->tfm;
struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
unsigned int ret;
- skcipher_request_set_tfm(req, xts_ctx->fallback);
+ skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
@@ -611,8 +612,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
const char *name = tfm->__crt_alg->cra_name;
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
- CRYPTO_ALG_ASYNC |
+ xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(xts_ctx->fallback)) {
@@ -627,7 +627,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
{
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- crypto_free_skcipher(xts_ctx->fallback);
+ crypto_free_sync_skcipher(xts_ctx->fallback);
}
static struct crypto_alg xts_aes_alg = {
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 20add000dd6d..7cb6a52f727d 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -221,7 +221,6 @@ CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
-CONFIG_CRYPTO_SPECK=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 97db2fba546a..63b46e30b2c3 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -9,6 +9,8 @@
#include <linux/sched/task_stack.h>
#include <linux/thread_info.h>
+#include <asm-generic/compat.h>
+
#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
typeof(0?(__force t)0:0ULL), u64))
@@ -51,34 +53,18 @@
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "s390\0\0\0\0"
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
typedef u32 __compat_uid32_t;
typedef u32 __compat_gid32_t;
typedef u16 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u16 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 compat_s64;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
typedef struct {
u32 mask;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 29c940bf8506..d5d24889c3bc 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2)
#define KVM_REQ_START_MIGRATION KVM_ARCH_REQ(3)
#define KVM_REQ_STOP_MIGRATION KVM_ARCH_REQ(4)
+#define KVM_REQ_VSIE_RESTART KVM_ARCH_REQ(5)
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
@@ -186,6 +187,7 @@ struct kvm_s390_sie_block {
#define ECA_AIV 0x00200000
#define ECA_VX 0x00020000
#define ECA_PROTEXCI 0x00002000
+#define ECA_APIE 0x00000008
#define ECA_SII 0x00000001
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
@@ -237,7 +239,11 @@ struct kvm_s390_sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
- __u8 reservedb0[20]; /* 0x00b0 */
+ __u8 reservedb0[8]; /* 0x00b0 */
+#define HPID_KVM 0x4
+#define HPID_VSIE 0x5
+ __u8 hpid; /* 0x00b8 */
+ __u8 reservedb9[11]; /* 0x00b9 */
__u16 extcpuaddr; /* 0x00c4 */
__u16 eic; /* 0x00c6 */
__u32 reservedc8; /* 0x00c8 */
@@ -255,6 +261,8 @@ struct kvm_s390_sie_block {
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
__u8 reservedf0[12]; /* 0x00f0 */
+#define CRYCB_FORMAT_MASK 0x00000003
+#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
@@ -715,6 +723,7 @@ struct kvm_s390_crypto {
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
+ __u8 apie;
};
#define APCB0_MASK_SIZE 1
@@ -855,6 +864,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
+void kvm_arch_crypto_clear_masks(struct kvm *kvm);
+void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
+ unsigned long *aqm, unsigned long *adm);
+
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index fd79c0d35dc4..a1fbf15d53aa 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -15,6 +15,7 @@
#define __IGNORE_pkey_alloc
#define __IGNORE_pkey_free
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
@@ -25,7 +26,6 @@
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLD_GETRLIMIT
#define __ARCH_WANT_SYS_OLD_MMAP
@@ -34,6 +34,7 @@
#define __ARCH_WANT_SYS_SIGPROCMASK
# ifdef CONFIG_COMPAT
# define __ARCH_WANT_COMPAT_SYS_TIME
+# define __ARCH_WANT_SYS_UTIME32
# endif
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9a50f02b9894..16511d97e8dc 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5
/* kvm attributes for migration mode */
#define KVM_S390_VM_MIGRATION_STOP 0
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ac5da6b0b862..fe24150ff666 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -40,6 +40,7 @@
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
+#include <asm/ap.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -844,20 +845,24 @@ void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
kvm_s390_vcpu_block_all(kvm);
- kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_s390_vcpu_crypto_setup(vcpu);
+ /* recreate the shadow crycb by leaving the VSIE handler */
+ kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
+ }
kvm_s390_vcpu_unblock_all(kvm);
}
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
{
- if (!test_kvm_facility(kvm, 76))
- return -EINVAL;
-
mutex_lock(&kvm->lock);
switch (attr->attr) {
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+ if (!test_kvm_facility(kvm, 76)) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
get_random_bytes(
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
@@ -865,6 +870,10 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+ if (!test_kvm_facility(kvm, 76)) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
get_random_bytes(
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
@@ -872,17 +881,39 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+ if (!test_kvm_facility(kvm, 76)) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
kvm->arch.crypto.aes_kw = 0;
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+ if (!test_kvm_facility(kvm, 76)) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
kvm->arch.crypto.dea_kw = 0;
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
break;
+ case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+ if (!ap_instructions_available()) {
+ mutex_unlock(&kvm->lock);
+ return -EOPNOTSUPP;
+ }
+ kvm->arch.crypto.apie = 1;
+ break;
+ case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+ if (!ap_instructions_available()) {
+ mutex_unlock(&kvm->lock);
+ return -EOPNOTSUPP;
+ }
+ kvm->arch.crypto.apie = 0;
+ break;
default:
mutex_unlock(&kvm->lock);
return -ENXIO;
@@ -1491,6 +1522,10 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
ret = 0;
break;
+ case KVM_S390_VM_CRYPTO_ENABLE_APIE:
+ case KVM_S390_VM_CRYPTO_DISABLE_APIE:
+ ret = ap_instructions_available() ? 0 : -ENXIO;
+ break;
default:
ret = -ENXIO;
break;
@@ -1992,55 +2027,101 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
}
-static int kvm_s390_query_ap_config(u8 *config)
-{
- u32 fcn_code = 0x04000000UL;
- u32 cc = 0;
-
- memset(config, 0, 128);
- asm volatile(
- "lgr 0,%1\n"
- "lgr 2,%2\n"
- ".long 0xb2af0000\n" /* PQAP(QCI) */
- "0: ipm %0\n"
- "srl %0,28\n"
- "1:\n"
- EX_TABLE(0b, 1b)
- : "+r" (cc)
- : "r" (fcn_code), "r" (config)
- : "cc", "0", "2", "memory"
- );
-
- return cc;
-}
-
static int kvm_s390_apxa_installed(void)
{
- u8 config[128];
- int cc;
+ struct ap_config_info info;
- if (test_facility(12)) {
- cc = kvm_s390_query_ap_config(config);
-
- if (cc)
- pr_err("PQAP(QCI) failed with cc=%d", cc);
- else
- return config[0] & 0x40;
+ if (ap_instructions_available()) {
+ if (ap_qci(&info) == 0)
+ return info.apxa;
}
return 0;
}
+/*
+ * The format of the crypto control block (CRYCB) is specified in the 3 low
+ * order bits of the CRYCB designation (CRYCBD) field as follows:
+ * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
+ * AP extended addressing (APXA) facility are installed.
+ * Format 1: The APXA facility is not installed but the MSAX3 facility is.
+ * Format 2: Both the APXA and MSAX3 facilities are installed
+ */
static void kvm_s390_set_crycb_format(struct kvm *kvm)
{
kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+ /* Clear the CRYCB format bits - i.e., set format 0 by default */
+ kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
+
+ /* Check whether MSAX3 is installed */
+ if (!test_kvm_facility(kvm, 76))
+ return;
+
if (kvm_s390_apxa_installed())
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
else
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
+void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
+ unsigned long *aqm, unsigned long *adm)
+{
+ struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
+
+ mutex_lock(&kvm->lock);
+ kvm_s390_vcpu_block_all(kvm);
+
+ switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
+ case CRYCB_FORMAT2: /* APCB1 use 256 bits */
+ memcpy(crycb->apcb1.apm, apm, 32);
+ VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
+ apm[0], apm[1], apm[2], apm[3]);
+ memcpy(crycb->apcb1.aqm, aqm, 32);
+ VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
+ aqm[0], aqm[1], aqm[2], aqm[3]);
+ memcpy(crycb->apcb1.adm, adm, 32);
+ VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
+ adm[0], adm[1], adm[2], adm[3]);
+ break;
+ case CRYCB_FORMAT1:
+ case CRYCB_FORMAT0: /* Fall through both use APCB0 */
+ memcpy(crycb->apcb0.apm, apm, 8);
+ memcpy(crycb->apcb0.aqm, aqm, 2);
+ memcpy(crycb->apcb0.adm, adm, 2);
+ VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
+ apm[0], *((unsigned short *)aqm),
+ *((unsigned short *)adm));
+ break;
+ default: /* Can not happen */
+ break;
+ }
+
+ /* recreate the shadow crycb for each vcpu */
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+ kvm_s390_vcpu_unblock_all(kvm);
+ mutex_unlock(&kvm->lock);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
+
+void kvm_arch_crypto_clear_masks(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_s390_vcpu_block_all(kvm);
+
+ memset(&kvm->arch.crypto.crycb->apcb0, 0,
+ sizeof(kvm->arch.crypto.crycb->apcb0));
+ memset(&kvm->arch.crypto.crycb->apcb1, 0,
+ sizeof(kvm->arch.crypto.crycb->apcb1));
+
+ VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
+ /* recreate the shadow crycb for each vcpu */
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
+ kvm_s390_vcpu_unblock_all(kvm);
+ mutex_unlock(&kvm->lock);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
+
static u64 kvm_s390_get_initial_cpuid(void)
{
struct cpuid cpuid;
@@ -2052,12 +2133,12 @@ static u64 kvm_s390_get_initial_cpuid(void)
static void kvm_s390_crypto_init(struct kvm *kvm)
{
- if (!test_kvm_facility(kvm, 76))
- return;
-
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
+ if (!test_kvm_facility(kvm, 76))
+ return;
+
/* Enable AES/DEA protected key functions by default */
kvm->arch.crypto.aes_kw = 1;
kvm->arch.crypto.dea_kw = 1;
@@ -2583,17 +2664,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
- if (!test_kvm_facility(vcpu->kvm, 76))
+ /*
+ * If the AP instructions are not being interpreted and the MSAX3
+ * facility is not configured for the guest, there is nothing to set up.
+ */
+ if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
return;
+ vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
+ vcpu->arch.sie_block->eca &= ~ECA_APIE;
+
+ if (vcpu->kvm->arch.crypto.apie)
+ vcpu->arch.sie_block->eca |= ECA_APIE;
+ /* Set up protected key support */
if (vcpu->kvm->arch.crypto.aes_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
-
- vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
}
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
@@ -2685,6 +2774,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+ vcpu->arch.sie_block->hpid = HPID_KVM;
+
kvm_s390_vcpu_crypto_setup(vcpu);
return rc;
@@ -2768,18 +2859,25 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
exit_sie(vcpu);
}
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->prog20) &
+ (PROG_BLOCK_SIE | PROG_REQUEST);
+}
+
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
{
atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
}
/*
- * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
* If the CPU is not running (e.g. waiting as idle) the function will
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_vsie_kick(vcpu);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
@@ -3196,6 +3294,8 @@ retry:
/* nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ /* we left the vsie handler, nothing to do, just clear the request */
+ kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 981e3ba97461..1f6e36cdce0d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -290,6 +290,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a2b28cd1e3fe..a153257bf7d9 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -135,14 +135,148 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
atomic_set(&scb_s->cpuflags, newflags);
return 0;
}
+/* Copy to APCB FORMAT1 from APCB FORMAT0 */
+static int setup_apcb10(struct kvm_vcpu *vcpu, struct kvm_s390_apcb1 *apcb_s,
+ unsigned long apcb_o, struct kvm_s390_apcb1 *apcb_h)
+{
+ struct kvm_s390_apcb0 tmp;
-/*
+ if (read_guest_real(vcpu, apcb_o, &tmp, sizeof(struct kvm_s390_apcb0)))
+ return -EFAULT;
+
+ apcb_s->apm[0] = apcb_h->apm[0] & tmp.apm[0];
+ apcb_s->aqm[0] = apcb_h->aqm[0] & tmp.aqm[0] & 0xffff000000000000UL;
+ apcb_s->adm[0] = apcb_h->adm[0] & tmp.adm[0] & 0xffff000000000000UL;
+
+ return 0;
+
+}
+
+/**
+ * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @apcb_o: pointer to start of original apcb in the guest2
+ * @apcb_h: pointer to start of apcb in the guest1
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+ unsigned long apcb_o, unsigned long *apcb_h)
+{
+ if (read_guest_real(vcpu, apcb_o, apcb_s,
+ sizeof(struct kvm_s390_apcb0)))
+ return -EFAULT;
+
+ bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0));
+
+ return 0;
+}
+
+/**
+ * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
+ * @vcpu: pointer to the virtual CPU
+ * @apcb_s: pointer to start of apcb in the shadow crycb
+ * @apcb_o: pointer to start of original guest apcb
+ * @apcb_h: pointer to start of apcb in the host
+ *
+ * Returns 0 and -EFAULT on error reading guest apcb
+ */
+static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s,
+ unsigned long apcb_o,
+ unsigned long *apcb_h)
+{
+ if (read_guest_real(vcpu, apcb_o, apcb_s,
+ sizeof(struct kvm_s390_apcb1)))
+ return -EFAULT;
+
+ bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1));
+
+ return 0;
+}
+
+/**
+ * setup_apcb - Create a shadow copy of the apcb.
+ * @vcpu: pointer to the virtual CPU
+ * @crycb_s: pointer to shadow crycb
+ * @crycb_o: pointer to original guest crycb
+ * @crycb_h: pointer to the host crycb
+ * @fmt_o: format of the original guest crycb.
+ * @fmt_h: format of the host crycb.
+ *
+ * Checks the compatibility between the guest and host crycb and calls the
+ * appropriate copy function.
+ *
+ * Return 0 or an error number if the guest and host crycb are incompatible.
+ */
+static int setup_apcb(struct kvm_vcpu *vcpu, struct kvm_s390_crypto_cb *crycb_s,
+ const u32 crycb_o,
+ struct kvm_s390_crypto_cb *crycb_h,
+ int fmt_o, int fmt_h)
+{
+ struct kvm_s390_crypto_cb *crycb;
+
+ crycb = (struct kvm_s390_crypto_cb *) (unsigned long)crycb_o;
+
+ switch (fmt_o) {
+ case CRYCB_FORMAT2:
+ if ((crycb_o & PAGE_MASK) != ((crycb_o + 256) & PAGE_MASK))
+ return -EACCES;
+ if (fmt_h != CRYCB_FORMAT2)
+ return -EINVAL;
+ return setup_apcb11(vcpu, (unsigned long *)&crycb_s->apcb1,
+ (unsigned long) &crycb->apcb1,
+ (unsigned long *)&crycb_h->apcb1);
+ case CRYCB_FORMAT1:
+ switch (fmt_h) {
+ case CRYCB_FORMAT2:
+ return setup_apcb10(vcpu, &crycb_s->apcb1,
+ (unsigned long) &crycb->apcb0,
+ &crycb_h->apcb1);
+ case CRYCB_FORMAT1:
+ return setup_apcb00(vcpu,
+ (unsigned long *) &crycb_s->apcb0,
+ (unsigned long) &crycb->apcb0,
+ (unsigned long *) &crycb_h->apcb0);
+ }
+ break;
+ case CRYCB_FORMAT0:
+ if ((crycb_o & PAGE_MASK) != ((crycb_o + 32) & PAGE_MASK))
+ return -EACCES;
+
+ switch (fmt_h) {
+ case CRYCB_FORMAT2:
+ return setup_apcb10(vcpu, &crycb_s->apcb1,
+ (unsigned long) &crycb->apcb0,
+ &crycb_h->apcb1);
+ case CRYCB_FORMAT1:
+ case CRYCB_FORMAT0:
+ return setup_apcb00(vcpu,
+ (unsigned long *) &crycb_s->apcb0,
+ (unsigned long) &crycb->apcb0,
+ (unsigned long *) &crycb_h->apcb0);
+ }
+ }
+ return -EINVAL;
+}
+
+/**
+ * shadow_crycb - Create a shadow copy of the crycb block
+ * @vcpu: a pointer to the virtual CPU
+ * @vsie_page: a pointer to internal date used for the vSIE
+ *
* Create a shadow copy of the crycb block and setup key wrapping, if
* requested for guest 3 and enabled for guest 2.
*
- * We only accept format-1 (no AP in g2), but convert it into format-2
+ * We accept format-1 or format-2, but we convert format-1 into format-2
+ * in the shadow CRYCB.
+ * Using format-2 enables the firmware to choose the right format when
+ * scheduling the SIE.
* There is nothing to do for format-0.
*
+ * This function centralize the issuing of set_validity_icpt() for all
+ * the subfunctions working on the crycb.
+ *
* Returns: - 0 if shadowed or nothing to do
* - > 0 if control has to be given to guest 2
*/
@@ -154,23 +288,40 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
+ int apie_h;
+ int key_msk = test_kvm_facility(vcpu->kvm, 76);
+ int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
+ int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
+ int ret = 0;
scb_s->crycbd = 0;
- if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
- return 0;
- /* format-1 is supported with message-security-assist extension 3 */
- if (!test_kvm_facility(vcpu->kvm, 76))
+
+ apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
+ if (!apie_h && !key_msk)
return 0;
+
+ if (!crycb_addr)
+ return set_validity_icpt(scb_s, 0x0039U);
+
+ if (fmt_o == CRYCB_FORMAT1)
+ if ((crycb_addr & PAGE_MASK) !=
+ ((crycb_addr + 128) & PAGE_MASK))
+ return set_validity_icpt(scb_s, 0x003CU);
+
+ if (apie_h && (scb_o->eca & ECA_APIE)) {
+ ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
+ vcpu->kvm->arch.crypto.crycb,
+ fmt_o, fmt_h);
+ if (ret)
+ goto end;
+ scb_s->eca |= scb_o->eca & ECA_APIE;
+ }
+
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
if (!ecb3_flags)
- return 0;
-
- if ((crycb_addr & PAGE_MASK) != ((crycb_addr + 128) & PAGE_MASK))
- return set_validity_icpt(scb_s, 0x003CU);
- else if (!crycb_addr)
- return set_validity_icpt(scb_s, 0x0039U);
+ goto end;
/* copy only the wrapping keys */
if (read_guest_real(vcpu, crycb_addr + 72,
@@ -178,8 +329,6 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return set_validity_icpt(scb_s, 0x0035U);
scb_s->ecb3 |= ecb3_flags;
- scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT1 |
- CRYCB_FORMAT2;
/* xor both blocks in one run */
b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
@@ -187,6 +336,16 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->kvm->arch.crypto.crycb->dea_wrapping_key_mask;
/* as 56%8 == 0, bitmap_xor won't overwrite any data */
bitmap_xor(b1, b1, b2, BITS_PER_BYTE * 56);
+end:
+ switch (ret) {
+ case -EINVAL:
+ return set_validity_icpt(scb_s, 0x0020U);
+ case -EFAULT:
+ return set_validity_icpt(scb_s, 0x0035U);
+ case -EACCES:
+ return set_validity_icpt(scb_s, 0x003CU);
+ }
+ scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2;
return 0;
}
@@ -383,6 +542,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 156))
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+ scb_s->hpid = HPID_VSIE;
+
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
out:
@@ -830,7 +991,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
int guest_bp_isolation;
- int rc;
+ int rc = 0;
handle_last_fault(vcpu, vsie_page);
@@ -858,7 +1019,18 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
guest_enter_irqoff();
local_irq_enable();
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+ /*
+ * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
+ * and VCPU requests also hinder the vSIE from running and lead
+ * to an immediate exit. kvm_s390_vsie_kick() has to be used to
+ * also kick the vSIE.
+ */
+ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
+ barrier();
+ if (!kvm_s390_vcpu_sie_inhibited(vcpu))
+ rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+ barrier();
+ vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
local_irq_disable();
guest_exit_irqoff();
@@ -1005,7 +1177,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (rc == -EAGAIN)
rc = 0;
if (rc || scb_s->icptcode || signal_pending(current) ||
- kvm_s390_vcpu_has_irq(vcpu, 0))
+ kvm_s390_vcpu_has_irq(vcpu, 0) ||
+ kvm_s390_vcpu_sie_inhibited(vcpu))
break;
}
@@ -1122,7 +1295,8 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (unlikely(scb_addr & 0x1ffUL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
+ if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
+ kvm_s390_vcpu_sie_inhibited(vcpu))
return 0;
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 911c7ded35f1..1e668b95e0c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -907,10 +907,16 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
pmd_t *pmdp;
BUG_ON(gmap_is_shadow(gmap));
- spin_lock(&gmap->guest_table_lock);
pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+ if (!pmdp)
+ return NULL;
- if (!pmdp || pmd_none(*pmdp)) {
+ /* without huge pages, there is no need to take the table lock */
+ if (!gmap->mm->context.allow_gmap_hpage_1m)
+ return pmd_none(*pmdp) ? NULL : pmdp;
+
+ spin_lock(&gmap->guest_table_lock);
+ if (pmd_none(*pmdp)) {
spin_unlock(&gmap->guest_table_lock);
return NULL;
}
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 0c85aedcf9b3..fd788e0f2e5b 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -106,6 +106,8 @@ static struct facility_def facility_defs[] = {
.name = "FACILITIES_KVM_CPUMODEL",
.bits = (int[]){
+ 12, /* AP Query Configuration Information */
+ 15, /* AP Facilities Test */
156, /* etoken facility */
-1 /* END */
}
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index b36200af9ce7..a99234b61051 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -5,6 +5,7 @@
# include <asm/unistd_64.h>
# endif
+# define __ARCH_WANT_NEW_STAT
# define __ARCH_WANT_OLD_READDIR
# define __ARCH_WANT_OLD_STAT
# define __ARCH_WANT_STAT64
@@ -19,7 +20,6 @@
# define __ARCH_WANT_SYS_SOCKETCALL
# define __ARCH_WANT_SYS_FADVISE64
# define __ARCH_WANT_SYS_GETPGRP
-# define __ARCH_WANT_SYS_LLSEEK
# define __ARCH_WANT_SYS_NICE
# define __ARCH_WANT_SYS_OLD_GETRLIMIT
# define __ARCH_WANT_SYS_OLD_UNAME
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 4eb51d2dae98..30b1763580b1 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -6,38 +6,23 @@
*/
#include <linux/types.h>
+#include <asm-generic/compat.h>
+
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "sparc\0\0"
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
typedef u32 __compat_uid32_t;
typedef u32 __compat_gid32_t;
typedef u16 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u16 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef s16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 compat_s64;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
-
struct compat_stat {
compat_dev_t st_dev;
compat_ino_t st_ino;
@@ -47,11 +32,11 @@ struct compat_stat {
__compat_gid_t st_gid;
compat_dev_t st_rdev;
compat_off_t st_size;
- compat_time_t st_atime;
+ old_time32_t st_atime;
compat_ulong_t st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
compat_ulong_t st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
compat_ulong_t st_ctime_nsec;
compat_off_t st_blksize;
compat_off_t st_blocks;
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index b2a6a955113e..00f87dbd0b17 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -21,6 +21,7 @@
#else
#define __NR_time 231 /* Linux sparc32 */
#endif
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
@@ -33,7 +34,6 @@
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
@@ -42,6 +42,7 @@
#define __ARCH_WANT_SYS_IPC
#else
#define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 65856eaab163..1e8fe5941b8a 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -15,4 +15,5 @@
/* Use the standard ABI for syscalls. */
#include <asm-generic/unistd.h>
+#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a450ad573dcb..a4b0007a54e1 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -60,9 +60,6 @@ endif
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
- obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
- obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
- obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
endif
@@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
endif
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
+aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index acbe7e8336d8..661f7daf43da 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
-int crypto_fpu_init(void);
-void crypto_fpu_exit(void);
-
#define AVX_GEN2_OPTSIZE 640
#define AVX_GEN4_OPTSIZE 4096
@@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
/* Linearize assoc, if not already linear */
if (req->src->length >= assoclen && req->src->length &&
(!PageHighMem(sg_page(req->src)) ||
- req->src->offset + req->src->length < PAGE_SIZE)) {
+ req->src->offset + req->src->length <= PAGE_SIZE)) {
scatterwalk_start(&assoc_sg_walk, req->src);
assoc = scatterwalk_map(&assoc_sg_walk);
} else {
@@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
static
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
-static struct {
- const char *algname;
- const char *drvname;
- const char *basename;
- struct simd_skcipher_alg *simd;
-} aesni_simd_skciphers2[] = {
-#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
- IS_BUILTIN(CONFIG_CRYPTO_PCBC)
- {
- .algname = "pcbc(aes)",
- .drvname = "pcbc-aes-aesni",
- .basename = "fpu(pcbc(__aes-aesni))",
- },
-#endif
-};
-
#ifdef CONFIG_X86_64
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
unsigned int key_len)
@@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
aesni_simd_skciphers[i]; i++)
simd_skcipher_free(aesni_simd_skciphers[i]);
-
- for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
- if (aesni_simd_skciphers2[i].simd)
- simd_skcipher_free(aesni_simd_skciphers2[i].simd);
}
static int __init aesni_init(void)
@@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
#endif
#endif
- err = crypto_fpu_init();
- if (err)
- return err;
-
err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
if (err)
- goto fpu_exit;
+ return err;
err = crypto_register_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
@@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
aesni_simd_skciphers[i] = simd;
}
- for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
- algname = aesni_simd_skciphers2[i].algname;
- drvname = aesni_simd_skciphers2[i].drvname;
- basename = aesni_simd_skciphers2[i].basename;
- simd = simd_skcipher_create_compat(algname, drvname, basename);
- err = PTR_ERR(simd);
- if (IS_ERR(simd))
- continue;
-
- aesni_simd_skciphers2[i].simd = simd;
- }
-
return 0;
unregister_simds:
@@ -1521,8 +1482,6 @@ unregister_skciphers:
ARRAY_SIZE(aesni_skciphers));
unregister_algs:
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
-fpu_exit:
- crypto_fpu_exit();
return err;
}
@@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
crypto_unregister_skciphers(aesni_skciphers,
ARRAY_SIZE(aesni_skciphers));
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
-
- crypto_fpu_exit();
}
late_initcall(aesni_init);
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
deleted file mode 100644
index 406680476c52..000000000000
--- a/arch/x86/crypto/fpu.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * FPU: Wrapper for blkcipher touching fpu
- *
- * Copyright (c) Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/skcipher.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/fpu/api.h>
-
-struct crypto_fpu_ctx {
- struct crypto_skcipher *child;
-};
-
-static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
- unsigned int keylen)
-{
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
- struct crypto_skcipher *child = ctx->child;
- int err;
-
- crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
- CRYPTO_TFM_REQ_MASK);
- err = crypto_skcipher_setkey(child, key, keylen);
- crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
- CRYPTO_TFM_RES_MASK);
- return err;
-}
-
-static int crypto_fpu_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *child = ctx->child;
- SKCIPHER_REQUEST_ON_STACK(subreq, child);
- int err;
-
- skcipher_request_set_tfm(subreq, child);
- skcipher_request_set_callback(subreq, 0, NULL, NULL);
- skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
- req->iv);
-
- kernel_fpu_begin();
- err = crypto_skcipher_encrypt(subreq);
- kernel_fpu_end();
-
- skcipher_request_zero(subreq);
- return err;
-}
-
-static int crypto_fpu_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *child = ctx->child;
- SKCIPHER_REQUEST_ON_STACK(subreq, child);
- int err;
-
- skcipher_request_set_tfm(subreq, child);
- skcipher_request_set_callback(subreq, 0, NULL, NULL);
- skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
- req->iv);
-
- kernel_fpu_begin();
- err = crypto_skcipher_decrypt(subreq);
- kernel_fpu_end();
-
- skcipher_request_zero(subreq);
- return err;
-}
-
-static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
-{
- struct skcipher_instance *inst = skcipher_alg_instance(tfm);
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher_spawn *spawn;
- struct crypto_skcipher *cipher;
-
- spawn = skcipher_instance_ctx(inst);
- cipher = crypto_spawn_skcipher(spawn);
- if (IS_ERR(cipher))
- return PTR_ERR(cipher);
-
- ctx->child = cipher;
-
- return 0;
-}
-
-static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
-{
- struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->child);
-}
-
-static void crypto_fpu_free(struct skcipher_instance *inst)
-{
- crypto_drop_skcipher(skcipher_instance_ctx(inst));
- kfree(inst);
-}
-
-static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
-{
- struct crypto_skcipher_spawn *spawn;
- struct skcipher_instance *inst;
- struct crypto_attr_type *algt;
- struct skcipher_alg *alg;
- const char *cipher_name;
- int err;
-
- algt = crypto_get_attr_type(tb);
- if (IS_ERR(algt))
- return PTR_ERR(algt);
-
- if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
- algt->mask)
- return -EINVAL;
-
- if (!(algt->mask & CRYPTO_ALG_INTERNAL))
- return -EINVAL;
-
- cipher_name = crypto_attr_alg_name(tb[1]);
- if (IS_ERR(cipher_name))
- return PTR_ERR(cipher_name);
-
- inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
- if (!inst)
- return -ENOMEM;
-
- spawn = skcipher_instance_ctx(inst);
-
- crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
- err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
- if (err)
- goto out_free_inst;
-
- alg = crypto_skcipher_spawn_alg(spawn);
-
- err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
- &alg->base);
- if (err)
- goto out_drop_skcipher;
-
- inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
- inst->alg.base.cra_priority = alg->base.cra_priority;
- inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
- inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
-
- inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
- inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
- inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
-
- inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-
- inst->alg.init = crypto_fpu_init_tfm;
- inst->alg.exit = crypto_fpu_exit_tfm;
-
- inst->alg.setkey = crypto_fpu_setkey;
- inst->alg.encrypt = crypto_fpu_encrypt;
- inst->alg.decrypt = crypto_fpu_decrypt;
-
- inst->free = crypto_fpu_free;
-
- err = skcipher_register_instance(tmpl, inst);
- if (err)
- goto out_drop_skcipher;
-
-out:
- return err;
-
-out_drop_skcipher:
- crypto_drop_skcipher(spawn);
-out_free_inst:
- kfree(inst);
- goto out;
-}
-
-static struct crypto_template crypto_fpu_tmpl = {
- .name = "fpu",
- .create = crypto_fpu_create,
- .module = THIS_MODULE,
-};
-
-int __init crypto_fpu_init(void)
-{
- return crypto_register_template(&crypto_fpu_tmpl);
-}
-
-void crypto_fpu_exit(void)
-{
- crypto_unregister_template(&crypto_fpu_tmpl);
-}
-
-MODULE_ALIAS_CRYPTO("fpu");
diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile
deleted file mode 100644
index 815ded3ba90e..000000000000
--- a/arch/x86/crypto/sha1-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-OBJECT_FILES_NON_STANDARD := y
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
- sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
- sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
-endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
deleted file mode 100644
index b93805664c1d..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/*
- * Multi buffer SHA1 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha1_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha1_mb_alg_state;
-
-struct sha1_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)
- (struct sha1_mb_mgr *state, struct job_sha1 *job);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
- (struct sha1_mb_mgr *state);
-static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
- (struct sha1_mb_mgr *state);
-
-static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA1_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA1_PADLENGTHFIELD_SIZE;
-
-#if SHA1_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA1_LOG2_BLOCK_SIZE;
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr,
- struct sha1_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA1_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA1_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA1_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr,
- &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha1_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha1_hash_ctx *)
- sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha1_hash_ctx
- *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user.
- * If it is not ready, resubmit the job to finish processing.
- * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
- * Otherwise, all jobs currently being managed by the hash_ctx_mgr
- * still need processing.
- */
- struct sha1_hash_ctx *ctx;
-
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
- return sha1_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
-{
- sha1_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
- struct sha1_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- return ctx;
- }
-
- /*
- * If we made it here, there were no errors during this call to
- * submit
- */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently
- * being processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) {
- /*
- * Compute how many bytes to copy from user buffer into
- * extra block
- */
- uint32_t copy_len = SHA1_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /*
- * The extra block should never contain more than 1 block
- * here
- */
- assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
-
- /*
- * If the extra block buffer contains exactly 1 block, it can
- * be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha1_hash_ctx *)
- sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- return sha1_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
-{
- struct sha1_hash_ctx *ctx;
-
- while (1) {
- ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- return NULL;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha1_ctx_mgr_resubmit returned a job, it is ready to be
- * returned. Otherwise, all jobs currently being managed by the
- * sha1_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- return ctx;
- }
-}
-
-static int sha1_mb_init(struct ahash_request *areq)
-{
- struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA1_H0;
- sctx->job.result_digest[1] = SHA1_H1;
- sctx->job.result_digest[2] = SHA1_H2;
- sctx->job.result_digest[3] = SHA1_H3;
- sctx->job.result_digest[4] = SHA1_H4;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be32 *dst = (__be32 *) rctx->out;
-
- for (i = 0; i < 5; ++i)
- dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha1_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha1_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha1_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha1_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
-
- /* remove from work list */
- spin_lock(&cstate->work_lock);
- list_del(&rctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock(&cstate->work_lock);
- list_del(&req_ctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
- }
-
- return 0;
-}
-
-static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock(&cstate->work_lock);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock(&cstate->work_lock);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha1_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha1_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
- sha1_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha1_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha1_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
- sha1_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha1_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
-
- struct sha1_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha1_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
- HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha1_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha1_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha1_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha1_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha1_hash_ctx));
-
- return 0;
-}
-
-static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha1_mb_areq_alg = {
- .init = sha1_mb_init,
- .update = sha1_mb_update,
- .final = sha1_mb_final,
- .finup = sha1_mb_finup,
- .export = sha1_mb_export,
- .import = sha1_mb_import,
- .halg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .statesize = sizeof(struct sha1_hash_ctx),
- .base = {
- .cra_name = "__sha1-mb",
- .cra_driver_name = "__intel_sha1-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha1_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha1_mb_areq_init_tfm,
- .cra_exit = sha1_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha1_hash_ctx),
- }
- }
-};
-
-static int sha1_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha1_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha1_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha1_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha1_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha1_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha1_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha1_mb_async_alg = {
- .init = sha1_mb_async_init,
- .update = sha1_mb_async_update,
- .final = sha1_mb_async_final,
- .finup = sha1_mb_async_finup,
- .digest = sha1_mb_async_digest,
- .export = sha1_mb_async_export,
- .import = sha1_mb_async_import,
- .halg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .statesize = sizeof(struct sha1_hash_ctx),
- .base = {
- .cra_name = "sha1",
- .cra_driver_name = "sha1_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
- .cra_init = sha1_mb_async_init_tfm,
- .cra_exit = sha1_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha1_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha1_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if (time_before(cur_time, rctx->tag.expire))
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha1_hash_ctx *)
- sha1_ctx_mgr_flush(cstate->mgr);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha1_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
-
- sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
- sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
- sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
- sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
-
- if (!sha1_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha1_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha1_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha1_mb_alg_state.flusher = &sha1_mb_flusher;
-
- err = crypto_register_ahash(&sha1_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha1_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha1_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha1_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha1_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha1_mb_async_alg);
- crypto_unregister_ahash(&sha1_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha1_mb_alg_state.alg_cstate);
-}
-
-module_init(sha1_mb_mod_init);
-module_exit(sha1_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
deleted file mode 100644
index 9454bd16f9f8..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Header file for multi buffer SHA context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha1_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-
-#ifdef HASH_CTX_DEBUG
- HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
-#endif
-};
-
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-
-/* Hash Constants and Typedefs */
-#define SHA1_DIGEST_LENGTH 5
-#define SHA1_LOG2_BLOCK_SIZE 6
-
-#define SHA1_PADLENGTHFIELD_SIZE 8
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha1_ctx_mgr {
- struct sha1_mb_mgr mgr;
-};
-
-/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
-
-struct sha1_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha1 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
deleted file mode 100644
index 08ad1a9acfd7..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Header file for multi buffer SHA1 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-
-#include <linux/types.h>
-
-#define NUM_SHA1_DIGEST_WORDS 5
-
-enum job_sts { STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha1 {
- u8 *buffer;
- u32 len;
- u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-/* SHA1 out-of-order scheduler */
-
-/* typedef uint32_t sha1_digest_array[5][8]; */
-
-struct sha1_args_x8 {
- uint32_t digest[5][8];
- uint8_t *data_ptr[8];
-};
-
-struct sha1_lane_data {
- struct job_sha1 *job_in_lane;
-};
-
-struct sha1_mb_mgr {
- struct sha1_args_x8 args;
-
- uint32_t lens[8];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha1_lane_data ldata[8];
-};
-
-
-#define SHA1_MB_MGR_NUM_LANES_AVX2 8
-
-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
-struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
- struct job_sha1 *job);
-struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
-struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
deleted file mode 100644
index 86688c6e7a25..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Header file for multi buffer SHA1 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
-#define _SHA1_MB_MGR_DATASTRUCT_ASM_
-
-## START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-## FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-## END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-########################################################################
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - %%tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endif
-.endstruc
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-
-#endif
-
-########################################################################
-#### Define constants
-########################################################################
-
-########################################################################
-#### Define SHA1 Out Of Order Data Structures
-########################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
-_LANE_DATA_size = _FIELD_OFFSET
-_LANE_DATA_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS # SHA1_ARGS_X8
-### name size align
-FIELD _digest, 4*5*8, 16 # transposed digest
-FIELD _data_ptr, 8*8, 8 # array of pointers to data
-END_FIELDS
-
-_SHA1_ARGS_X4_size = _FIELD_OFFSET
-_SHA1_ARGS_X4_align = _STRUCT_ALIGN
-_SHA1_ARGS_X8_size = _FIELD_OFFSET
-_SHA1_ARGS_X8_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
-FIELD _lens, 4*8, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
-END_FIELDS
-
-_MB_MGR_size = _FIELD_OFFSET
-_MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-
-########################################################################
-#### Define constants
-########################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-########################################################################
-#### Define JOB_SHA1 structure
-########################################################################
-
-START_FIELDS # JOB_SHA1
-
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 4, 4 # length in bytes
-FIELD _result_digest, 5*4, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
-_JOB_SHA1_size = _FIELD_OFFSET
-_JOB_SHA1_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7cfba738f104..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Flush routine for SHA1 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-
-.extern sha1_x8_avx2
-
-# LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-# idx must be a register not clobbered by sha1_x8_avx2
-#define idx %r8
-#define DWORD_idx %r8d
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-#define tmp2_w %ebx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 %arg1
-
-#define extra_blocks %arg2
-#define p %arg2
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha1_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+3, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
- offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne four(%rip), idx
- offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne five(%rip), idx
- offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne six(%rip), idx
- offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne seven(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 8
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 4*I)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- vmovdqu _lens+0*16(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqu %xmm0, _lens+0*16(state)
- vmovdqu %xmm1, _lens+1*16(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha1_x8_avx2
- # state and idx are intact
-
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state , idx, 4) , %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- movl _args_digest+4*32(state, idx, 4), tmp2_w
-
- vmovdqu %xmm0, _result_digest(job_rax)
- offset = (_result_digest + 1*16)
- mov tmp2_w, offset(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha1_mb_mgr_flush_avx2)
-
-
-#################################################################
-
-.align 16
-ENTRY(sha1_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- ## if bit 32+3 is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $(32+3), unused_lanes
- jc .return_null
-
- # Find min length
- vmovdqu _lens(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
-
- vmovd %xmm2, DWORD_idx
- test $~0xF, idx
- jnz .return_null
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- movl _args_digest+4*32(state, idx, 4), tmp2_w
-
- vmovdqu %xmm0, _result_digest(job_rax)
- movl tmp2_w, _result_digest+1*16(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-.octa 0x000000000000000000000000FFFFFFF0
-
-.section .rodata.cst8, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-two:
-.quad 2
-three:
-.quad 3
-four:
-.quad 4
-five:
-.quad 5
-six:
-.quad 6
-seven:
-.quad 7
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
deleted file mode 100644
index d2add0d35f43..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Initialization code for multi buffer SHA1 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha1_mb_mgr.h"
-
-void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
-{
- unsigned int j;
- state->unused_lanes = 0xF76543210ULL;
- for (j = 0; j < 8; j++) {
- state->lens[j] = 0xFFFFFFFF;
- state->ldata[j].job_in_lane = NULL;
- }
-}
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
deleted file mode 100644
index 7a93b1c0d69a..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA1 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-
-.extern sha1_x8_avx
-
-# LINUX register definitions
-arg1 = %rdi
-arg2 = %rsi
-size_offset = %rcx
-tmp2 = %rcx
-extra_blocks = %rdx
-
-# Common definitions
-#define state arg1
-#define job %rsi
-#define len2 arg2
-#define p2 arg2
-
-# idx must be a register not clobberred by sha1_x8_avx2
-idx = %r8
-DWORD_idx = %r8d
-last_len = %r8
-
-p = %r11
-start_offset = %r11
-
-unused_lanes = %rbx
-BYTE_unused_lanes = %bl
-
-job_rax = %rax
-len = %rax
-DWORD_len = %eax
-
-lane = %r12
-tmp3 = %r12
-
-tmp = %r9
-DWORD_tmp = %r9d
-
-lane_data = %r10
-
-# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha1_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- mov unused_lanes, lane
- and $0xF, lane
- shr $4, unused_lanes
- imul $_LANE_DATA_size, lane, lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- shl $4, len
- or lane, len
-
- movl DWORD_len, _lens(state , lane, 4)
-
- # Load digest words from result_digest
- vmovdqu _result_digest(job), %xmm0
- mov _result_digest+1*16(job), DWORD_tmp
- vmovd %xmm0, _args_digest(state, lane, 4)
- vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
- vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
- vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
- movl DWORD_tmp, _args_digest+4*32(state , lane, 4)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xF, unused_lanes
- jne return_null
-
-start_loop:
- # Find min length
- vmovdqa _lens(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqa %xmm0, _lens + 0*16(state)
- vmovdqa %xmm1, _lens + 1*16(state)
-
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha1_x8_avx2
-
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
- movl _args_digest+4*32(state, idx, 4), DWORD_tmp
-
- vmovdqu %xmm0, _result_digest(job_rax)
- movl DWORD_tmp, _result_digest+1*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-
-ENDPROC(sha1_mb_mgr_submit_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
- .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
deleted file mode 100644
index 20f77aa633de..000000000000
--- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- * Multi-buffer SHA1 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * James Guilford <james.guilford@intel.com>
- * Tim Chen <tim.c.chen@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include "sha1_mb_mgr_datastruct.S"
-
-## code to compute oct SHA1 using SSE-256
-## outer calling routine takes care of save and restore of XMM registers
-
-## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15
-##
-## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
-## Linux preserves: rdi rbp r8
-##
-## clobbers ymm0-15
-
-
-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-# "transpose" data in {r0...r7} using temps {t0...t1}
-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
-# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
-# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
-# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
-#
-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
-# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
-# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
-# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
-# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
-# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
-# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
-# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
-#
-
-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
- # process top half (r0..r3) {a...d}
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
- vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
- vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
- vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
- vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
-
- # use r2 in place of t0
- # process bottom half (r4..r7) {e...h}
- vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
- vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
- vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
- vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
- vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
- vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
- vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
- vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
-
- vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
- vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
- vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
- vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
- vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
- vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
- vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
- vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
-
-.endm
-##
-## Magic functions defined in FIPS 180-1
-##
-# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D)))
-.macro MAGIC_F0 regF regB regC regD regT
- vpxor \regD, \regC, \regF
- vpand \regB, \regF, \regF
- vpxor \regD, \regF, \regF
-.endm
-
-# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D)
-.macro MAGIC_F1 regF regB regC regD regT
- vpxor \regC, \regD, \regF
- vpxor \regB, \regF, \regF
-.endm
-
-# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D))
-.macro MAGIC_F2 regF regB regC regD regT
- vpor \regC, \regB, \regF
- vpand \regC, \regB, \regT
- vpand \regD, \regF, \regF
- vpor \regT, \regF, \regF
-.endm
-
-# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D)
-.macro MAGIC_F3 regF regB regC regD regT
- MAGIC_F1 \regF,\regB,\regC,\regD,\regT
-.endm
-
-# PROLD reg, imm, tmp
-.macro PROLD reg imm tmp
- vpsrld $(32-\imm), \reg, \tmp
- vpslld $\imm, \reg, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-.macro PROLD_nd reg imm tmp src
- vpsrld $(32-\imm), \src, \tmp
- vpslld $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
- vpaddd \immCNT, \regE, \regE
- vpaddd \memW*32(%rsp), \regE, \regE
- PROLD_nd \regT, 5, \regF, \regA
- vpaddd \regT, \regE, \regE
- \MAGIC \regF, \regB, \regC, \regD, \regT
- PROLD \regB, 30, \regT
- vpaddd \regF, \regE, \regE
-.endm
-
-.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
- vpaddd \immCNT, \regE, \regE
- offset = ((\memW - 14) & 15) * 32
- vmovdqu offset(%rsp), W14
- vpxor W14, W16, W16
- offset = ((\memW - 8) & 15) * 32
- vpxor offset(%rsp), W16, W16
- offset = ((\memW - 3) & 15) * 32
- vpxor offset(%rsp), W16, W16
- vpsrld $(32-1), W16, \regF
- vpslld $1, W16, W16
- vpor W16, \regF, \regF
-
- ROTATE_W
-
- offset = ((\memW - 0) & 15) * 32
- vmovdqu \regF, offset(%rsp)
- vpaddd \regF, \regE, \regE
- PROLD_nd \regT, 5, \regF, \regA
- vpaddd \regT, \regE, \regE
- \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D)
- PROLD \regB,30, \regT
- vpaddd \regF, \regE, \regE
-.endm
-
-########################################################################
-########################################################################
-########################################################################
-
-## FRAMESZ plus pushes must be an odd multiple of 8
-YMM_SAVE = (15-15)*32
-FRAMESZ = 32*16 + YMM_SAVE
-_YMM = FRAMESZ - YMM_SAVE
-
-#define VMOVPS vmovups
-
-IDX = %rax
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-inp4 = %r13
-inp5 = %r14
-inp6 = %r15
-inp7 = %rcx
-arg1 = %rdi
-arg2 = %rsi
-RSP_SAVE = %rdx
-
-# ymm0 A
-# ymm1 B
-# ymm2 C
-# ymm3 D
-# ymm4 E
-# ymm5 F AA
-# ymm6 T0 BB
-# ymm7 T1 CC
-# ymm8 T2 DD
-# ymm9 T3 EE
-# ymm10 T4 TMP
-# ymm11 T5 FUN
-# ymm12 T6 K
-# ymm13 T7 W14
-# ymm14 T8 W15
-# ymm15 T9 W16
-
-
-A = %ymm0
-B = %ymm1
-C = %ymm2
-D = %ymm3
-E = %ymm4
-F = %ymm5
-T0 = %ymm6
-T1 = %ymm7
-T2 = %ymm8
-T3 = %ymm9
-T4 = %ymm10
-T5 = %ymm11
-T6 = %ymm12
-T7 = %ymm13
-T8 = %ymm14
-T9 = %ymm15
-
-AA = %ymm5
-BB = %ymm6
-CC = %ymm7
-DD = %ymm8
-EE = %ymm9
-TMP = %ymm10
-FUN = %ymm11
-K = %ymm12
-W14 = %ymm13
-W15 = %ymm14
-W16 = %ymm15
-
-.macro ROTATE_ARGS
- TMP_ = E
- E = D
- D = C
- C = B
- B = A
- A = TMP_
-.endm
-
-.macro ROTATE_W
-TMP_ = W16
-W16 = W15
-W15 = W14
-W14 = TMP_
-.endm
-
-# 8 streams x 5 32bit words per digest x 4 bytes per word
-#define DIGEST_SIZE (8*5*4)
-
-.align 32
-
-# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
-# arg 1 : pointer to array[4] of pointer to input data
-# arg 2 : size (in blocks) ;; assumed to be >= 1
-#
-ENTRY(sha1_x8_avx2)
-
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- #save rsp
- mov %rsp, RSP_SAVE
- sub $FRAMESZ, %rsp
-
- #align rsp to 32 Bytes
- and $~0x1F, %rsp
-
- ## Initialize digests
- vmovdqu 0*32(arg1), A
- vmovdqu 1*32(arg1), B
- vmovdqu 2*32(arg1), C
- vmovdqu 3*32(arg1), D
- vmovdqu 4*32(arg1), E
-
- ## transpose input onto stack
- mov _data_ptr+0*8(arg1),inp0
- mov _data_ptr+1*8(arg1),inp1
- mov _data_ptr+2*8(arg1),inp2
- mov _data_ptr+3*8(arg1),inp3
- mov _data_ptr+4*8(arg1),inp4
- mov _data_ptr+5*8(arg1),inp5
- mov _data_ptr+6*8(arg1),inp6
- mov _data_ptr+7*8(arg1),inp7
-
- xor IDX, IDX
-lloop:
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F
- I=0
-.rep 2
- VMOVPS (inp0, IDX), T0
- VMOVPS (inp1, IDX), T1
- VMOVPS (inp2, IDX), T2
- VMOVPS (inp3, IDX), T3
- VMOVPS (inp4, IDX), T4
- VMOVPS (inp5, IDX), T5
- VMOVPS (inp6, IDX), T6
- VMOVPS (inp7, IDX), T7
-
- TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
- vpshufb F, T0, T0
- vmovdqu T0, (I*8)*32(%rsp)
- vpshufb F, T1, T1
- vmovdqu T1, (I*8+1)*32(%rsp)
- vpshufb F, T2, T2
- vmovdqu T2, (I*8+2)*32(%rsp)
- vpshufb F, T3, T3
- vmovdqu T3, (I*8+3)*32(%rsp)
- vpshufb F, T4, T4
- vmovdqu T4, (I*8+4)*32(%rsp)
- vpshufb F, T5, T5
- vmovdqu T5, (I*8+5)*32(%rsp)
- vpshufb F, T6, T6
- vmovdqu T6, (I*8+6)*32(%rsp)
- vpshufb F, T7, T7
- vmovdqu T7, (I*8+7)*32(%rsp)
- add $32, IDX
- I = (I+1)
-.endr
- # save old digests
- vmovdqu A,AA
- vmovdqu B,BB
- vmovdqu C,CC
- vmovdqu D,DD
- vmovdqu E,EE
-
-##
-## perform 0-79 steps
-##
- vmovdqu K00_19(%rip), K
-## do rounds 0...15
- I = 0
-.rep 16
- SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 16...19
- vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16
- vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15
-.rep 4
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 20...39
- vmovdqu K20_39(%rip), K
-.rep 20
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 40...59
- vmovdqu K40_59(%rip), K
-.rep 20
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
- ROTATE_ARGS
- I = (I+1)
-.endr
-
-## do rounds 60...79
- vmovdqu K60_79(%rip), K
-.rep 20
- SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
- ROTATE_ARGS
- I = (I+1)
-.endr
-
- vpaddd AA,A,A
- vpaddd BB,B,B
- vpaddd CC,C,C
- vpaddd DD,D,D
- vpaddd EE,E,E
-
- sub $1, arg2
- jne lloop
-
- # write out digests
- vmovdqu A, 0*32(arg1)
- vmovdqu B, 1*32(arg1)
- vmovdqu C, 2*32(arg1)
- vmovdqu D, 3*32(arg1)
- vmovdqu E, 4*32(arg1)
-
- # update input pointers
- add IDX, inp0
- add IDX, inp1
- add IDX, inp2
- add IDX, inp3
- add IDX, inp4
- add IDX, inp5
- add IDX, inp6
- add IDX, inp7
- mov inp0, _data_ptr (arg1)
- mov inp1, _data_ptr + 1*8(arg1)
- mov inp2, _data_ptr + 2*8(arg1)
- mov inp3, _data_ptr + 3*8(arg1)
- mov inp4, _data_ptr + 4*8(arg1)
- mov inp5, _data_ptr + 5*8(arg1)
- mov inp6, _data_ptr + 6*8(arg1)
- mov inp7, _data_ptr + 7*8(arg1)
-
- ################
- ## Postamble
-
- mov RSP_SAVE, %rsp
-
- # restore callee-saved clobbered registers
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- ret
-ENDPROC(sha1_x8_avx2)
-
-
-.section .rodata.cst32.K00_19, "aM", @progbits, 32
-.align 32
-K00_19:
-.octa 0x5A8279995A8279995A8279995A827999
-.octa 0x5A8279995A8279995A8279995A827999
-
-.section .rodata.cst32.K20_39, "aM", @progbits, 32
-.align 32
-K20_39:
-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
-.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
-
-.section .rodata.cst32.K40_59, "aM", @progbits, 32
-.align 32
-K40_59:
-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
-.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
-
-.section .rodata.cst32.K60_79, "aM", @progbits, 32
-.align 32
-K60_79:
-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
-.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-.octa 0x0c0d0e0f08090a0b0405060700010203
-.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile
deleted file mode 100644
index 53ad6e7db747..000000000000
--- a/arch/x86/crypto/sha256-mb/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-OBJECT_FILES_NON_STANDARD := y
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
- sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
- sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
-endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
deleted file mode 100644
index 97c5fc43e115..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ /dev/null
@@ -1,1013 +0,0 @@
-/*
- * Multi buffer SHA256 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha256_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha256_mb_alg_state;
-
-struct sha256_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_submit)
- (struct sha256_mb_mgr *state, struct job_sha256 *job);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
- (struct sha256_mb_mgr *state);
-static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
- (struct sha256_mb_mgr *state);
-
-inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA256_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA256_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA256_PADLENGTHFIELD_SIZE;
-
-#if SHA256_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA256_LOG2_BLOCK_SIZE;
-}
-
-static struct sha256_hash_ctx
- *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr,
- struct sha256_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA256_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA256_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA256_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha256_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha256_hash_ctx
- *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user. If it is not ready, resubmit the job to finish processing.
- * If sha256_ctx_mgr_resubmit returned a job, it is ready to be
- * returned. Otherwise, all jobs currently being managed by the
- * hash_ctx_mgr still need processing.
- */
- struct sha256_hash_ctx *ctx;
-
- ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr);
- return sha256_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr)
-{
- sha256_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
- struct sha256_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- return ctx;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- return ctx;
- }
-
- /* If we made it here, there was no error during this call to submit */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently
- * being processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) {
- /*
- * Compute how many bytes to copy from user buffer into
- * extra block
- */
- uint32_t copy_len = SHA256_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy(
- &ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /* The extra block should never contain more than 1 block */
- assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE);
-
- /*
- * If the extra block buffer contains exactly 1 block,
- * it can be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- return sha256_ctx_mgr_resubmit(mgr, ctx);
-}
-
-static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr)
-{
- struct sha256_hash_ctx *ctx;
-
- while (1) {
- ctx = (struct sha256_hash_ctx *)
- sha256_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- return NULL;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha256_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha256_ctx_mgr_resubmit returned a job, it is ready to
- * be returned. Otherwise, all jobs currently being managed by
- * the sha256_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- return ctx;
- }
-}
-
-static int sha256_mb_init(struct ahash_request *areq)
-{
- struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA256_H0;
- sctx->job.result_digest[1] = SHA256_H1;
- sctx->job.result_digest[2] = SHA256_H2;
- sctx->job.result_digest[3] = SHA256_H3;
- sctx->job.result_digest[4] = SHA256_H4;
- sctx->job.result_digest[5] = SHA256_H5;
- sctx->job.result_digest[6] = SHA256_H6;
- sctx->job.result_digest[7] = SHA256_H7;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be32 *dst = (__be32 *) rctx->out;
-
- for (i = 0; i < 8; ++i)
- dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha256_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha256_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha256_ctx_mgr_flush(cstate->mgr);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha256_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha256_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
-
- /* remove from work list */
- spin_lock(&cstate->work_lock);
- list_del(&rctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock(&cstate->work_lock);
- list_del(&req_ctx->waiter);
- spin_unlock(&cstate->work_lock);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr);
- }
-
- return 0;
-}
-
-static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock(&cstate->work_lock);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock(&cstate->work_lock);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha256_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha256_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
- sha256_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha256_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx, areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha256_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
- sha256_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha256_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha256_mb_alg_state.alg_cstate);
-
- struct sha256_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha256_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
- HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha256_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha256_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha256_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha256_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha256_hash_ctx));
-
- return 0;
-}
-
-static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha256_mb_areq_alg = {
- .init = sha256_mb_init,
- .update = sha256_mb_update,
- .final = sha256_mb_final,
- .finup = sha256_mb_finup,
- .export = sha256_mb_export,
- .import = sha256_mb_import,
- .halg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_hash_ctx),
- .base = {
- .cra_name = "__sha256-mb",
- .cra_driver_name = "__intel_sha256-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha256_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha256_mb_areq_init_tfm,
- .cra_exit = sha256_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha256_hash_ctx),
- }
- }
-};
-
-static int sha256_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha256_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha256_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha256_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha256_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha256_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha256_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha256_mb_async_alg = {
- .init = sha256_mb_async_init,
- .update = sha256_mb_async_update,
- .final = sha256_mb_async_final,
- .finup = sha256_mb_async_finup,
- .export = sha256_mb_async_export,
- .import = sha256_mb_async_import,
- .digest = sha256_mb_async_digest,
- .halg = {
- .digestsize = SHA256_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_hash_ctx),
- .base = {
- .cra_name = "sha256",
- .cra_driver_name = "sha256_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA256_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha256_mb_async_alg.halg.base.cra_list),
- .cra_init = sha256_mb_async_init_tfm,
- .cra_exit = sha256_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha256_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha256_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if (time_before(cur_time, rctx->tag.expire))
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha256_hash_ctx *)
- sha256_ctx_mgr_flush(cstate->mgr);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha256_mb error: nothing got"
- " flushed for non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha256_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha256_mb_alg_state.alg_cstate = alloc_percpu
- (struct mcryptd_alg_cstate);
-
- sha256_job_mgr_init = sha256_mb_mgr_init_avx2;
- sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2;
- sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2;
- sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2;
-
- if (!sha256_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha256_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha256_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha256_mb_alg_state.flusher = &sha256_mb_flusher;
-
- err = crypto_register_ahash(&sha256_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha256_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha256_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha256_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha256_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha256_mb_async_alg);
- crypto_unregister_ahash(&sha256_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha256_mb_alg_state.alg_cstate);
-}
-
-module_init(sha256_mb_mod_init);
-module_exit(sha256_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
deleted file mode 100644
index 7c432543dc7f..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Header file for multi buffer SHA256 context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha256_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-
-#ifdef HASH_CTX_DEBUG
- HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
-#endif
-};
-
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-
-/* Hash Constants and Typedefs */
-#define SHA256_DIGEST_LENGTH 8
-#define SHA256_LOG2_BLOCK_SIZE 6
-
-#define SHA256_PADLENGTHFIELD_SIZE 8
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha256_ctx_mgr {
- struct sha256_mb_mgr mgr;
-};
-
-/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
-
-struct sha256_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha256 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
deleted file mode 100644
index b01ae408c56d..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA256_DIGEST_WORDS 8
-
-enum job_sts { STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha256 {
- u8 *buffer;
- u32 len;
- u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-/* SHA256 out-of-order scheduler */
-
-/* typedef uint32_t sha8_digest_array[8][8]; */
-
-struct sha256_args_x8 {
- uint32_t digest[8][8];
- uint8_t *data_ptr[8];
-};
-
-struct sha256_lane_data {
- struct job_sha256 *job_in_lane;
-};
-
-struct sha256_mb_mgr {
- struct sha256_args_x8 args;
-
- uint32_t lens[8];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha256_lane_data ldata[8];
-};
-
-
-#define SHA256_MB_MGR_NUM_LANES_AVX2 8
-
-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
-struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
- struct job_sha256 *job);
-struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
-struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
deleted file mode 100644
index 5c377bac21d0..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define SZ8 8*SHA256_DIGEST_WORD_SIZE
-#define ROUNDS 64*SZ8
-#define PTR_SZ 8
-#define SHA256_DIGEST_WORD_SIZE 4
-#define MAX_SHA256_LANES 8
-#define SHA256_DIGEST_WORDS 8
-#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
-#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
-#define SHA256_BLK_SZ 64
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-########################################################################
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - %%tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endif
-.endstruc
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-
-########################################################################
-#### Define SHA256 Out Of Order Data Structures
-########################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-########################################################################
-
-START_FIELDS # SHA256_ARGS_X4
-### name size align
-FIELD _digest, 4*8*8, 4 # transposed digest
-FIELD _data_ptr, 8*8, 8 # array of pointers to data
-END_FIELDS
-
- _SHA256_ARGS_X4_size = _FIELD_OFFSET
- _SHA256_ARGS_X4_align = _STRUCT_ALIGN
- _SHA256_ARGS_X8_size = _FIELD_OFFSET
- _SHA256_ARGS_X8_align = _STRUCT_ALIGN
-
-#######################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
-FIELD _lens, 4*8, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size = _FIELD_OFFSET
- _MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-START_FIELDS #STACK_FRAME
-### name size align
-FIELD _data, 16*SZ8, 1 # transposed digest
-FIELD _digest, 8*SZ8, 1 # array of pointers to data
-FIELD _ytmp, 4*SZ8, 1
-FIELD _rsp, 8, 1
-END_FIELDS
-
- _STACK_FRAME_size = _FIELD_OFFSET
- _STACK_FRAME_align = _STRUCT_ALIGN
-
-#######################################################################
-
-########################################################################
-#### Define constants
-########################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-########################################################################
-#### Define JOB_SHA256 structure
-########################################################################
-
-START_FIELDS # JOB_SHA256
-
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 8, 8 # length in bytes
-FIELD _result_digest, 8*4, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
- _JOB_SHA256_size = _FIELD_OFFSET
- _JOB_SHA256_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
deleted file mode 100644
index d2364c55bbde..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Flush routine for SHA256 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-.extern sha256_x8_avx2
-
-#LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# Common register definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-# idx must be a register not clobberred by sha1_mult
-#define idx %r8
-#define DWORD_idx %r8d
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-#define tmp2_w %ebx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 %arg1
-
-#define extra_blocks %arg2
-#define p %arg2
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha256_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+3, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
- offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne four(%rip), idx
- offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne five(%rip), idx
- offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne six(%rip), idx
- offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne seven(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 8
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 4*I)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- vmovdqu _lens+0*16(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqu %xmm0, _lens+0*16(state)
- vmovdqu %xmm1, _lens+1*16(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha256_x8_avx2
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
-
- mov unused_lanes, _unused_lanes(state)
- movl $0xFFFFFFFF, _lens(state,idx,4)
-
- vmovd _args_digest(state , idx, 4) , %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- vmovd _args_digest+4*32(state, idx, 4), %xmm1
- vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
-
- vmovdqu %xmm0, _result_digest(job_rax)
- offset = (_result_digest + 1*16)
- vmovdqu %xmm1, offset(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha256_mb_mgr_flush_avx2)
-
-##############################################################################
-
-.align 16
-ENTRY(sha256_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- ## if bit 32+3 is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $(32+3), unused_lanes
- jc .return_null
-
- # Find min length
- vmovdqu _lens(state), %xmm0
- vmovdqu _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
-
- vmovd %xmm2, DWORD_idx
- test $~0xF, idx
- jnz .return_null
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state, idx, 4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
- vmovd _args_digest+4*32(state, idx, 4), %xmm1
- vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
- vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
-
- vmovdqu %xmm0, _result_digest(job_rax)
- offset = (_result_digest + 1*16)
- vmovdqu %xmm1, offset(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
-.octa 0x000000000000000000000000FFFFFFF0
-
-.section .rodata.cst8, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-two:
-.quad 2
-three:
-.quad 3
-four:
-.quad 4
-five:
-.quad 5
-six:
-.quad 6
-seven:
-.quad 7
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
deleted file mode 100644
index b0c498371e67..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha256_mb_mgr.h"
-
-void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
-{
- unsigned int j;
-
- state->unused_lanes = 0xF76543210ULL;
- for (j = 0; j < 8; j++) {
- state->lens[j] = 0xFFFFFFFF;
- state->ldata[j].job_in_lane = NULL;
- }
-}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
deleted file mode 100644
index b36ae7454084..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA256 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-.extern sha256_x8_avx2
-
-# LINUX register definitions
-arg1 = %rdi
-arg2 = %rsi
-size_offset = %rcx
-tmp2 = %rcx
-extra_blocks = %rdx
-
-# Common definitions
-#define state arg1
-#define job %rsi
-#define len2 arg2
-#define p2 arg2
-
-# idx must be a register not clobberred by sha1_x8_avx2
-idx = %r8
-DWORD_idx = %r8d
-last_len = %r8
-
-p = %r11
-start_offset = %r11
-
-unused_lanes = %rbx
-BYTE_unused_lanes = %bl
-
-job_rax = %rax
-len = %rax
-DWORD_len = %eax
-
-lane = %r12
-tmp3 = %r12
-
-tmp = %r9
-DWORD_tmp = %r9d
-
-lane_data = %r10
-
-# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha256_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- mov unused_lanes, lane
- and $0xF, lane
- shr $4, unused_lanes
- imul $_LANE_DATA_size, lane, lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- shl $4, len
- or lane, len
-
- movl DWORD_len, _lens(state , lane, 4)
-
- # Load digest words from result_digest
- vmovdqu _result_digest(job), %xmm0
- vmovdqu _result_digest+1*16(job), %xmm1
- vmovd %xmm0, _args_digest(state, lane, 4)
- vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
- vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
- vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
- vmovd %xmm1, _args_digest+4*32(state , lane, 4)
-
- vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4)
- vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4)
- vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xF, unused_lanes
- jne return_null
-
-start_loop:
- # Find min length
- vmovdqa _lens(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
-
- vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
- vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
- vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
- vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
-
- vmovd %xmm2, DWORD_idx
- mov idx, len2
- and $0xF, idx
- shr $4, len2
- jz len_is_0
-
- vpand clear_low_nibble(%rip), %xmm2, %xmm2
- vpshufd $0, %xmm2, %xmm2
-
- vpsubd %xmm2, %xmm0, %xmm0
- vpsubd %xmm2, %xmm1, %xmm1
-
- vmovdqa %xmm0, _lens + 0*16(state)
- vmovdqa %xmm1, _lens + 1*16(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha256_x8_avx2
-
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $4, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens(state,idx,4)
-
- vmovd _args_digest(state, idx, 4), %xmm0
- vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
- vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
- vmovd _args_digest+4*32(state, idx, 4), %xmm1
-
- vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
- vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
- vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
-
- vmovdqu %xmm0, _result_digest(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-
-ENDPROC(sha256_mb_mgr_submit_avx2)
-
-.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
-.align 16
-clear_low_nibble:
- .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
deleted file mode 100644
index 1687c80c5995..000000000000
--- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * Multi-buffer SHA256 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include "sha256_mb_mgr_datastruct.S"
-
-## code to compute oct SHA256 using SSE-256
-## outer calling routine takes care of save and restore of XMM registers
-## Logic designed/laid out by JDG
-
-## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15
-## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
-## Linux preserves: rdi rbp r8
-##
-## clobbers %ymm0-15
-
-arg1 = %rdi
-arg2 = %rsi
-reg3 = %rcx
-reg4 = %rdx
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = reg3
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-inp4 = %r13
-inp5 = %r14
-inp6 = %r15
-inp7 = reg4
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-T1 = %ymm8
-
-a0 = %ymm12
-a1 = %ymm13
-a2 = %ymm14
-TMP = %ymm15
-TMP0 = %ymm6
-TMP1 = %ymm7
-
-TT0 = %ymm8
-TT1 = %ymm9
-TT2 = %ymm10
-TT3 = %ymm11
-TT4 = %ymm12
-TT5 = %ymm13
-TT6 = %ymm14
-TT7 = %ymm15
-
-# Define stack usage
-
-# Assume stack aligned to 32 bytes before call
-# Therefore FRAMESZ mod 32 must be 32-8 = 24
-
-#define FRAMESZ 0x388
-
-#define VMOVPS vmovups
-
-# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
-# "transpose" data in {r0...r7} using temps {t0...t1}
-# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
-# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
-# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
-# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
-#
-# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
-# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
-# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
-# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
-# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
-# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
-# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
-# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
-# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
-#
-
-.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
- # process top half (r0..r3) {a...d}
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
- vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
- vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
- vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
- vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
-
- # use r2 in place of t0
- # process bottom half (r4..r7) {e...h}
- vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
- vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
- vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
- vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
- vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
- vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
- vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
- vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
-
- vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
- vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
- vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
- vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
- vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
- vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
- vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
- vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
-
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-.macro _PRORD reg imm tmp
- vpslld $(32-\imm),\reg,\tmp
- vpsrld $\imm,\reg, \reg
- vpor \tmp,\reg, \reg
-.endm
-
-# PRORD_nd reg, imm, tmp, src
-.macro _PRORD_nd reg imm tmp src
- vpslld $(32-\imm), \src, \tmp
- vpsrld $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-# PRORD dst/src, amt
-.macro PRORD reg imm
- _PRORD \reg,\imm,TMP
-.endm
-
-# PRORD_nd dst, src, amt
-.macro PRORD_nd reg tmp imm
- _PRORD_nd \reg, \imm, TMP, \tmp
-.endm
-
-# arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
- PRORD_nd a0,e,5 # sig1: a0 = (e >> 5)
-
- vpxor g, f, a2 # ch: a2 = f^g
- vpand e,a2, a2 # ch: a2 = (f^g)&e
- vpxor g, a2, a2 # a2 = ch
-
- PRORD_nd a1,e,25 # sig1: a1 = (e >> 25)
-
- vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp)
- vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
- vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
- PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11)
- vpaddd a2, h, h # h = h + ch
- PRORD_nd a2,a,11 # sig0: a2 = (a >> 11)
- vpaddd \_T1,h, h # h = h + ch + W + K
- vpxor a1, a0, a0 # a0 = sigma1
- PRORD_nd a1,a,22 # sig0: a1 = (a >> 22)
- vpxor c, a, \_T1 # maj: T1 = a^c
- add $SZ8, ROUND # ROUND++
- vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
- vpaddd a0, h, h
- vpaddd h, d, d
- vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
- PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13)
- vpxor a1, a2, a2 # a2 = sig0
- vpand c, a, a1 # maj: a1 = a&c
- vpor \_T1, a1, a1 # a1 = maj
- vpaddd a1, h, h # h = h + ch + W + K + maj
- vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0
- ROTATE_ARGS
-.endm
-
-# arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
- vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1
- vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1
- vmovdqu \_T1, a0
- PRORD \_T1,11
- vmovdqu a1, a2
- PRORD a1,2
- vpxor a0, \_T1, \_T1
- PRORD \_T1, 7
- vpxor a2, a1, a1
- PRORD a1, 17
- vpsrld $3, a0, a0
- vpxor a0, \_T1, \_T1
- vpsrld $10, a2, a2
- vpxor a2, a1, a1
- vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
- vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1
- vpaddd a1, \_T1, \_T1
-
- ROUND_00_15 \_T1,\i
-.endm
-
-# SHA256_ARGS:
-# UINT128 digest[8]; // transposed digests
-# UINT8 *data_ptr[4];
-
-# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
-# arg 1 : STATE : pointer to array of pointers to input data
-# arg 2 : INP_SIZE : size of input in blocks
- # general registers preserved in outer calling routine
- # outer calling routine saves all the XMM registers
- # save rsp, allocate 32-byte aligned for local variables
-ENTRY(sha256_x8_avx2)
-
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov %rsp, IDX
- sub $FRAMESZ, %rsp
- and $~0x1F, %rsp
- mov IDX, _rsp(%rsp)
-
- # Load the pre-transposed incoming digest.
- vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a
- vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b
- vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c
- vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d
- vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e
- vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f
- vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g
- vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h
-
- lea K256_8(%rip),TBL
-
- # load the address of each of the 4 message lanes
- # getting ready to transpose input onto stack
- mov _args_data_ptr+0*PTR_SZ(STATE),inp0
- mov _args_data_ptr+1*PTR_SZ(STATE),inp1
- mov _args_data_ptr+2*PTR_SZ(STATE),inp2
- mov _args_data_ptr+3*PTR_SZ(STATE),inp3
- mov _args_data_ptr+4*PTR_SZ(STATE),inp4
- mov _args_data_ptr+5*PTR_SZ(STATE),inp5
- mov _args_data_ptr+6*PTR_SZ(STATE),inp6
- mov _args_data_ptr+7*PTR_SZ(STATE),inp7
-
- xor IDX, IDX
-lloop:
- xor ROUND, ROUND
-
- # save old digest
- vmovdqu a, _digest(%rsp)
- vmovdqu b, _digest+1*SZ8(%rsp)
- vmovdqu c, _digest+2*SZ8(%rsp)
- vmovdqu d, _digest+3*SZ8(%rsp)
- vmovdqu e, _digest+4*SZ8(%rsp)
- vmovdqu f, _digest+5*SZ8(%rsp)
- vmovdqu g, _digest+6*SZ8(%rsp)
- vmovdqu h, _digest+7*SZ8(%rsp)
- i = 0
-.rep 2
- VMOVPS i*32(inp0, IDX), TT0
- VMOVPS i*32(inp1, IDX), TT1
- VMOVPS i*32(inp2, IDX), TT2
- VMOVPS i*32(inp3, IDX), TT3
- VMOVPS i*32(inp4, IDX), TT4
- VMOVPS i*32(inp5, IDX), TT5
- VMOVPS i*32(inp6, IDX), TT6
- VMOVPS i*32(inp7, IDX), TT7
- vmovdqu g, _ytmp(%rsp)
- vmovdqu h, _ytmp+1*SZ8(%rsp)
- TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
- vmovdqu _ytmp(%rsp), g
- vpshufb TMP1, TT0, TT0
- vpshufb TMP1, TT1, TT1
- vpshufb TMP1, TT2, TT2
- vpshufb TMP1, TT3, TT3
- vpshufb TMP1, TT4, TT4
- vpshufb TMP1, TT5, TT5
- vpshufb TMP1, TT6, TT6
- vpshufb TMP1, TT7, TT7
- vmovdqu _ytmp+1*SZ8(%rsp), h
- vmovdqu TT4, _ytmp(%rsp)
- vmovdqu TT5, _ytmp+1*SZ8(%rsp)
- vmovdqu TT6, _ytmp+2*SZ8(%rsp)
- vmovdqu TT7, _ytmp+3*SZ8(%rsp)
- ROUND_00_15 TT0,(i*8+0)
- vmovdqu _ytmp(%rsp), TT0
- ROUND_00_15 TT1,(i*8+1)
- vmovdqu _ytmp+1*SZ8(%rsp), TT1
- ROUND_00_15 TT2,(i*8+2)
- vmovdqu _ytmp+2*SZ8(%rsp), TT2
- ROUND_00_15 TT3,(i*8+3)
- vmovdqu _ytmp+3*SZ8(%rsp), TT3
- ROUND_00_15 TT0,(i*8+4)
- ROUND_00_15 TT1,(i*8+5)
- ROUND_00_15 TT2,(i*8+6)
- ROUND_00_15 TT3,(i*8+7)
- i = (i+1)
-.endr
- add $64, IDX
- i = (i*8)
-
- jmp Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
- ROUND_16_XX T1, i
- i = (i+1)
-.endr
-
- cmp $ROUNDS,ROUND
- jb Lrounds_16_xx
-
- # add old digest
- vpaddd _digest+0*SZ8(%rsp), a, a
- vpaddd _digest+1*SZ8(%rsp), b, b
- vpaddd _digest+2*SZ8(%rsp), c, c
- vpaddd _digest+3*SZ8(%rsp), d, d
- vpaddd _digest+4*SZ8(%rsp), e, e
- vpaddd _digest+5*SZ8(%rsp), f, f
- vpaddd _digest+6*SZ8(%rsp), g, g
- vpaddd _digest+7*SZ8(%rsp), h, h
-
- sub $1, INP_SIZE # unit is blocks
- jne lloop
-
- # write back to memory (state object) the transposed digest
- vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
- vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
-
- # update input pointers
- add IDX, inp0
- mov inp0, _args_data_ptr+0*8(STATE)
- add IDX, inp1
- mov inp1, _args_data_ptr+1*8(STATE)
- add IDX, inp2
- mov inp2, _args_data_ptr+2*8(STATE)
- add IDX, inp3
- mov inp3, _args_data_ptr+3*8(STATE)
- add IDX, inp4
- mov inp4, _args_data_ptr+4*8(STATE)
- add IDX, inp5
- mov inp5, _args_data_ptr+5*8(STATE)
- add IDX, inp6
- mov inp6, _args_data_ptr+6*8(STATE)
- add IDX, inp7
- mov inp7, _args_data_ptr+7*8(STATE)
-
- # Postamble
- mov _rsp(%rsp), %rsp
-
- # restore callee-saved clobbered registers
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- ret
-ENDPROC(sha256_x8_avx2)
-
-.section .rodata.K256_8, "a", @progbits
-.align 64
-K256_8:
- .octa 0x428a2f98428a2f98428a2f98428a2f98
- .octa 0x428a2f98428a2f98428a2f98428a2f98
- .octa 0x71374491713744917137449171374491
- .octa 0x71374491713744917137449171374491
- .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
- .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
- .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
- .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
- .octa 0x3956c25b3956c25b3956c25b3956c25b
- .octa 0x3956c25b3956c25b3956c25b3956c25b
- .octa 0x59f111f159f111f159f111f159f111f1
- .octa 0x59f111f159f111f159f111f159f111f1
- .octa 0x923f82a4923f82a4923f82a4923f82a4
- .octa 0x923f82a4923f82a4923f82a4923f82a4
- .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
- .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
- .octa 0xd807aa98d807aa98d807aa98d807aa98
- .octa 0xd807aa98d807aa98d807aa98d807aa98
- .octa 0x12835b0112835b0112835b0112835b01
- .octa 0x12835b0112835b0112835b0112835b01
- .octa 0x243185be243185be243185be243185be
- .octa 0x243185be243185be243185be243185be
- .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
- .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
- .octa 0x72be5d7472be5d7472be5d7472be5d74
- .octa 0x72be5d7472be5d7472be5d7472be5d74
- .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
- .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
- .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
- .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
- .octa 0xc19bf174c19bf174c19bf174c19bf174
- .octa 0xc19bf174c19bf174c19bf174c19bf174
- .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
- .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
- .octa 0xefbe4786efbe4786efbe4786efbe4786
- .octa 0xefbe4786efbe4786efbe4786efbe4786
- .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
- .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
- .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
- .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
- .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
- .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
- .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
- .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
- .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
- .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
- .octa 0x76f988da76f988da76f988da76f988da
- .octa 0x76f988da76f988da76f988da76f988da
- .octa 0x983e5152983e5152983e5152983e5152
- .octa 0x983e5152983e5152983e5152983e5152
- .octa 0xa831c66da831c66da831c66da831c66d
- .octa 0xa831c66da831c66da831c66da831c66d
- .octa 0xb00327c8b00327c8b00327c8b00327c8
- .octa 0xb00327c8b00327c8b00327c8b00327c8
- .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
- .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
- .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
- .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
- .octa 0xd5a79147d5a79147d5a79147d5a79147
- .octa 0xd5a79147d5a79147d5a79147d5a79147
- .octa 0x06ca635106ca635106ca635106ca6351
- .octa 0x06ca635106ca635106ca635106ca6351
- .octa 0x14292967142929671429296714292967
- .octa 0x14292967142929671429296714292967
- .octa 0x27b70a8527b70a8527b70a8527b70a85
- .octa 0x27b70a8527b70a8527b70a8527b70a85
- .octa 0x2e1b21382e1b21382e1b21382e1b2138
- .octa 0x2e1b21382e1b21382e1b21382e1b2138
- .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
- .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
- .octa 0x53380d1353380d1353380d1353380d13
- .octa 0x53380d1353380d1353380d1353380d13
- .octa 0x650a7354650a7354650a7354650a7354
- .octa 0x650a7354650a7354650a7354650a7354
- .octa 0x766a0abb766a0abb766a0abb766a0abb
- .octa 0x766a0abb766a0abb766a0abb766a0abb
- .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
- .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
- .octa 0x92722c8592722c8592722c8592722c85
- .octa 0x92722c8592722c8592722c8592722c85
- .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
- .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
- .octa 0xa81a664ba81a664ba81a664ba81a664b
- .octa 0xa81a664ba81a664ba81a664ba81a664b
- .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
- .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
- .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
- .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
- .octa 0xd192e819d192e819d192e819d192e819
- .octa 0xd192e819d192e819d192e819d192e819
- .octa 0xd6990624d6990624d6990624d6990624
- .octa 0xd6990624d6990624d6990624d6990624
- .octa 0xf40e3585f40e3585f40e3585f40e3585
- .octa 0xf40e3585f40e3585f40e3585f40e3585
- .octa 0x106aa070106aa070106aa070106aa070
- .octa 0x106aa070106aa070106aa070106aa070
- .octa 0x19a4c11619a4c11619a4c11619a4c116
- .octa 0x19a4c11619a4c11619a4c11619a4c116
- .octa 0x1e376c081e376c081e376c081e376c08
- .octa 0x1e376c081e376c081e376c081e376c08
- .octa 0x2748774c2748774c2748774c2748774c
- .octa 0x2748774c2748774c2748774c2748774c
- .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
- .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
- .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
- .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
- .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
- .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
- .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
- .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
- .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
- .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
- .octa 0x748f82ee748f82ee748f82ee748f82ee
- .octa 0x748f82ee748f82ee748f82ee748f82ee
- .octa 0x78a5636f78a5636f78a5636f78a5636f
- .octa 0x78a5636f78a5636f78a5636f78a5636f
- .octa 0x84c8781484c8781484c8781484c87814
- .octa 0x84c8781484c8781484c8781484c87814
- .octa 0x8cc702088cc702088cc702088cc70208
- .octa 0x8cc702088cc702088cc702088cc70208
- .octa 0x90befffa90befffa90befffa90befffa
- .octa 0x90befffa90befffa90befffa90befffa
- .octa 0xa4506ceba4506ceba4506ceba4506ceb
- .octa 0xa4506ceba4506ceba4506ceba4506ceb
- .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
- .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
- .octa 0xc67178f2c67178f2c67178f2c67178f2
- .octa 0xc67178f2c67178f2c67178f2c67178f2
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK:
-.octa 0x0c0d0e0f08090a0b0405060700010203
-.octa 0x0c0d0e0f08090a0b0405060700010203
-
-.section .rodata.cst256.K256, "aM", @progbits, 256
-.align 64
-.global K256
-K256:
- .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
deleted file mode 100644
index 90f1ef69152e..000000000000
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
- sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
- sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
-endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
deleted file mode 100644
index 26b85678012d..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * Multi buffer SHA512 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha512_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha512_mb_alg_state;
-
-struct sha512_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
- (struct sha512_mb_mgr *state,
- struct job_sha512 *job);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
- (struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
- (struct sha512_mb_mgr *state);
-
-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA512_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA512_PADLENGTHFIELD_SIZE;
-
-#if SHA512_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA512_LOG2_BLOCK_SIZE;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
- (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA512_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA512_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA512_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr,
- &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha512_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- if (ctx)
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha512_hash_ctx
- *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user.
- * If it is not ready, resubmit the job to finish processing.
- * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
- * returned.
- * Otherwise, all jobs currently being managed by the hash_ctx_mgr
- * still need processing.
- */
- struct sha512_ctx_mgr *mgr;
- struct sha512_hash_ctx *ctx;
- unsigned long flags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, flags);
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_get_comp_job(&mgr->mgr);
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
- return ctx;
-}
-
-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
-{
- sha512_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha512_hash_ctx
- *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
- struct sha512_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- struct sha512_ctx_mgr *mgr;
- unsigned long irqflags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, irqflags);
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- goto unlock;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- goto unlock;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- goto unlock;
- }
-
- /*
- * If we made it here, there were no errors during this call to
- * submit
- */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently being
- * processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
- /* Compute how many bytes to copy from user buffer into extra
- * block
- */
- uint32_t copy_len = SHA512_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy
- (&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /* The extra block should never contain more than 1 block
- * here
- */
- assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
-
- /* If the extra block buffer contains exactly 1 block, it can
- * be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-unlock:
- spin_unlock_irqrestore(&cstate->work_lock, irqflags);
- return ctx;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
-{
- struct sha512_ctx_mgr *mgr;
- struct sha512_hash_ctx *ctx;
- unsigned long flags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, flags);
- while (1) {
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- break;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha512_ctx_mgr_resubmit returned a job, it is ready to
- * be returned. Otherwise, all jobs currently being managed by
- * the sha512_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- break;
- }
- spin_unlock_irqrestore(&cstate->work_lock, flags);
- return ctx;
-}
-
-static int sha512_mb_init(struct ahash_request *areq)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA512_H0;
- sctx->job.result_digest[1] = SHA512_H1;
- sctx->job.result_digest[2] = SHA512_H2;
- sctx->job.result_digest[3] = SHA512_H3;
- sctx->job.result_digest[4] = SHA512_H4;
- sctx->job.result_digest[5] = SHA512_H5;
- sctx->job.result_digest[6] = SHA512_H6;
- sctx->job.result_digest[7] = SHA512_H7;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be64 *dst = (__be64 *) rctx->out;
-
- for (i = 0; i < 8; ++i)
- dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha512_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha512_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha512_ctx_mgr_flush(cstate);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha512_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
- unsigned long flags;
-
- /* remove from work list */
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_del(&rctx->waiter);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_del(&req_ctx->waiter);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
- }
-
- return 0;
-}
-
-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
- unsigned long flags;
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha512_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- sha512_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- sha512_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha512_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha512_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha512_hash_ctx));
-
- return 0;
-}
-
-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha512_mb_areq_alg = {
- .init = sha512_mb_init,
- .update = sha512_mb_update,
- .final = sha512_mb_final,
- .finup = sha512_mb_finup,
- .export = sha512_mb_export,
- .import = sha512_mb_import,
- .halg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .statesize = sizeof(struct sha512_hash_ctx),
- .base = {
- .cra_name = "__sha512-mb",
- .cra_driver_name = "__intel_sha512-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha512_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha512_mb_areq_init_tfm,
- .cra_exit = sha512_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha512_hash_ctx),
- }
- }
-};
-
-static int sha512_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha512_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha512_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha512_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha512_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha512_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
-
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha512_mb_async_alg = {
- .init = sha512_mb_async_init,
- .update = sha512_mb_async_update,
- .final = sha512_mb_async_final,
- .finup = sha512_mb_async_finup,
- .digest = sha512_mb_async_digest,
- .export = sha512_mb_async_export,
- .import = sha512_mb_async_import,
- .halg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .statesize = sizeof(struct sha512_hash_ctx),
- .base = {
- .cra_name = "sha512",
- .cra_driver_name = "sha512_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha512_mb_async_alg.halg.base.cra_list),
- .cra_init = sha512_mb_async_init_tfm,
- .cra_exit = sha512_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha512_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha512_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if time_before(cur_time, rctx->tag.expire)
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha512_hash_ctx *)
- sha512_ctx_mgr_flush(cstate);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha512_mb error: nothing got flushed for"
- " non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha512_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha512_mb_alg_state.alg_cstate =
- alloc_percpu(struct mcryptd_alg_cstate);
-
- sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
- sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
- sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
- sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
-
- if (!sha512_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha512_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha512_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha512_mb_alg_state.flusher = &sha512_mb_flusher;
-
- err = crypto_register_ahash(&sha512_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha512_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha512_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha512_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha512_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha512_mb_async_alg);
- crypto_unregister_ahash(&sha512_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha512_mb_alg_state.alg_cstate);
-}
-
-module_init(sha512_mb_mod_init);
-module_exit(sha512_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
deleted file mode 100644
index e5c465bd821e..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Header file for multi buffer SHA512 context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha512_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-};
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-/* Hash Constants and Typedefs */
-#define SHA512_DIGEST_LENGTH 8
-#define SHA512_LOG2_BLOCK_SIZE 7
-
-#define SHA512_PADLENGTHFIELD_SIZE 16
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha512_ctx_mgr {
- struct sha512_mb_mgr mgr;
-};
-
-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
-
-struct sha512_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha512 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
deleted file mode 100644
index 178f17eef382..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Header file for multi buffer SHA512 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA512_DIGEST_WORDS 8
-
-enum job_sts {STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha512 {
- u8 *buffer;
- u64 len;
- u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-struct sha512_args_x4 {
- uint64_t digest[8][4];
- uint8_t *data_ptr[4];
-};
-
-struct sha512_lane_data {
- struct job_sha512 *job_in_lane;
-};
-
-struct sha512_mb_mgr {
- struct sha512_args_x4 args;
-
- uint64_t lens[4];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha512_lane_data ldata[4];
-};
-
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
- struct job_sha512 *job);
-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
deleted file mode 100644
index cf2636d4c9ba..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define PTR_SZ 8
-#define SHA512_DIGEST_WORD_SIZE 8
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-#define NUM_SHA512_DIGEST_WORDS 8
-#define SZ4 4*SHA512_DIGEST_WORD_SIZE
-#define ROUNDS 80*SZ4
-#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - ##tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-###################################################################
-### Define SHA512 Out Of Order Data Structures
-###################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-####################################################################
-
-START_FIELDS # SHA512_ARGS_X4
-### name size align
-FIELD _digest, 8*8*4, 4 # transposed digest
-FIELD _data_ptr, 8*4, 8 # array of pointers to data
-END_FIELDS
-
- _SHA512_ARGS_X4_size = _FIELD_OFFSET
- _SHA512_ARGS_X4_align = _STRUCT_ALIGN
-
-#####################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
-FIELD _lens, 8*4, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size = _FIELD_OFFSET
- _MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-#######################################################################
-#### Define constants
-#######################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-#######################################################################
-#### Define JOB_SHA512 structure
-#######################################################################
-
-START_FIELDS # JOB_SHA512
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 8, 8 # length in bytes
-FIELD _result_digest, 8*8, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
- _JOB_SHA512_size = _FIELD_OFFSET
- _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7c629caebc05..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Flush routine for SHA512 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-# LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# idx needs to be other than arg1, arg2, rbx, r12
-#define idx %rdx
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 arg1
-
-#define extra_blocks arg2
-#define p arg2
-
-#define tmp4 %r8
-#define lens0 %r8
-
-#define lens1 %r9
-#define lens2 %r10
-#define lens3 %r11
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha512_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+7, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 4
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 8*I +4)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- mov _lens + 0*8(state),lens0
- mov lens0,idx
- mov _lens + 1*8(state),lens1
- cmp idx,lens1
- cmovb lens1,idx
- mov _lens + 2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens + 3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- mov idx,len2
- and $0xF,idx
- and $~0xFF,len2
- jz len_is_0
-
- sub len2, lens0
- sub len2, lens1
- sub len2, lens2
- sub len2, lens3
- shr $32,len2
- mov lens0, _lens + 0*8(state)
- mov lens1, _lens + 1*8(state)
- mov lens2, _lens + 2*8(state)
- mov lens3, _lens + 3*8(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha512_x4_avx2
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens+4(state, idx, 8)
-
- vmovq _args_digest+0*32(state, idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state, idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state, idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state, idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
- vmovdqu %xmm2, _result_digest+2*16(job_rax)
- vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha512_mb_mgr_flush_avx2)
-.align 16
-
-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- mov _unused_lanes(state), unused_lanes
- bt $(32+7), unused_lanes
- jc .return_null
-
- # Find min length
- mov _lens(state),lens0
- mov lens0,idx
- mov _lens+1*8(state),lens1
- cmp idx,lens1
- cmovb lens1,idx
- mov _lens+2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens+3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- test $~0xF,idx
- jnz .return_null
- and $0xF,idx
-
- #process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens+4(state, idx, 8)
-
- vmovq _args_digest(state, idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state, idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state, idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state, idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest+0*16(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
- vmovdqu %xmm2, _result_digest+2*16(job_rax)
- vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst8.one, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-
-.section .rodata.cst8.two, "aM", @progbits, 8
-.align 8
-two:
-.quad 2
-
-.section .rodata.cst8.three, "aM", @progbits, 8
-.align 8
-three:
-.quad 3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
deleted file mode 100644
index d08805032f01..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha512_mb_mgr.h"
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
-{
- unsigned int j;
-
- /* initially all lanes are unused */
- state->lens[0] = 0xFFFFFFFF00000000;
- state->lens[1] = 0xFFFFFFFF00000001;
- state->lens[2] = 0xFFFFFFFF00000002;
- state->lens[3] = 0xFFFFFFFF00000003;
-
- state->unused_lanes = 0xFF03020100;
- for (j = 0; j < 4; j++)
- state->ldata[j].job_in_lane = NULL;
-}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
deleted file mode 100644
index 4ba709ba78e5..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA512 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-#define arg1 %rdi
-#define arg2 %rsi
-
-#define idx %rdx
-#define last_len %rdx
-
-#define size_offset %rcx
-#define tmp2 %rcx
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-#define p2 arg2
-
-#define p %r11
-#define start_offset %r11
-
-#define unused_lanes %rbx
-
-#define job_rax %rax
-#define len %rax
-
-#define lane %r12
-#define tmp3 %r12
-#define lens3 %r12
-
-#define extra_blocks %r8
-#define lens0 %r8
-
-#define tmp %r9
-#define lens1 %r9
-
-#define lane_data %r10
-#define lens2 %r10
-
-#define DWORD_len %eax
-
-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha512_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- movzb %bl,lane
- shr $8, unused_lanes
- imul $_LANE_DATA_size, lane,lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- movl DWORD_len,_lens+4(state , lane, 8)
-
- # Load digest words from result_digest
- vmovdqu _result_digest+0*16(job), %xmm0
- vmovdqu _result_digest+1*16(job), %xmm1
- vmovdqu _result_digest+2*16(job), %xmm2
- vmovdqu _result_digest+3*16(job), %xmm3
-
- vmovq %xmm0, _args_digest(state, lane, 8)
- vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
- vmovq %xmm1, _args_digest+2*32(state , lane, 8)
- vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
- vmovq %xmm2, _args_digest+4*32(state , lane, 8)
- vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
- vmovq %xmm3, _args_digest+6*32(state , lane, 8)
- vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xFF, unused_lanes
- jne return_null
-
-start_loop:
-
- # Find min length
- mov _lens+0*8(state),lens0
- mov lens0,idx
- mov _lens+1*8(state),lens1
- cmp idx,lens1
- cmovb lens1, idx
- mov _lens+2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens+3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- mov idx,len2
- and $0xF,idx
- and $~0xFF,len2
- jz len_is_0
-
- sub len2,lens0
- sub len2,lens1
- sub len2,lens2
- sub len2,lens3
- shr $32,len2
- mov lens0, _lens + 0*8(state)
- mov lens1, _lens + 1*8(state)
- mov lens2, _lens + 2*8(state)
- mov lens3, _lens + 3*8(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha512_x4_avx2
- # state and idx are intact
-
-len_is_0:
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF,_lens+4(state,idx,8)
- vmovq _args_digest+0*32(state , idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state , idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state , idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state , idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest + 0*16(job_rax)
- vmovdqu %xmm1, _result_digest + 1*16(job_rax)
- vmovdqu %xmm2, _result_digest + 2*16(job_rax)
- vmovdqu %xmm3, _result_digest + 3*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha512_mb_mgr_submit_avx2)
-
-/* UNUSED?
-.section .rodata.cst16, "aM", @progbits, 16
-.align 16
-H0: .int 0x6a09e667
-H1: .int 0xbb67ae85
-H2: .int 0x3c6ef372
-H3: .int 0xa54ff53a
-H4: .int 0x510e527f
-H5: .int 0x9b05688c
-H6: .int 0x1f83d9ab
-H7: .int 0x5be0cd19
-*/
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
deleted file mode 100644
index e22e907643a6..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Multi-buffer SHA512 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# code to compute quad SHA512 using AVX2
-# use YMMs to tackle the larger digest size
-# outer calling routine takes care of save and restore of XMM registers
-# Logic designed/laid out by JDG
-
-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
-# Stack must be aligned to 32 bytes before call
-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
-# clobbers ymm0-15
-
-#include <linux/linkage.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-arg1 = %rdi
-arg2 = %rsi
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = %r8
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-a0 = %ymm8
-a1 = %ymm9
-a2 = %ymm10
-
-TT0 = %ymm14
-TT1 = %ymm13
-TT2 = %ymm12
-TT3 = %ymm11
-TT4 = %ymm10
-TT5 = %ymm9
-
-T1 = %ymm14
-TMP = %ymm15
-
-# Define stack usage
-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
-
-#define VMOVPD vmovupd
-_digest = SZ4*16
-
-# transpose r0, r1, r2, r3, t0, t1
-# "transpose" data in {r0..r3} using temps {t0..t3}
-# Input looks like: {r0 r1 r2 r3}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-#
-# output looks like: {t0 r1 r0 r3}
-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
-
-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
-
- vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
- vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
- vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
- vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-# PRORQ reg, imm, tmp
-# packed-rotate-right-double
-# does a rotate by doing two shifts and an or
-.macro _PRORQ reg imm tmp
- vpsllq $(64-\imm),\reg,\tmp
- vpsrlq $\imm,\reg, \reg
- vpor \tmp,\reg, \reg
-.endm
-
-# non-destructive
-# PRORQ_nd reg, imm, tmp, src
-.macro _PRORQ_nd reg imm tmp src
- vpsllq $(64-\imm), \src, \tmp
- vpsrlq $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-# PRORQ dst/src, amt
-.macro PRORQ reg imm
- _PRORQ \reg, \imm, TMP
-.endm
-
-# PRORQ_nd dst, src, amt
-.macro PRORQ_nd reg tmp imm
- _PRORQ_nd \reg, \imm, TMP, \tmp
-.endm
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
- PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
-
- vpxor g, f, a2 # ch: a2 = f^g
- vpand e,a2, a2 # ch: a2 = (f^g)&e
- vpxor g, a2, a2 # a2 = ch
-
- PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
-
- offset = SZ4*(\i & 0xf)
- vmovdqu \_T1,offset(%rsp)
- vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
- vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
- PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
- vpaddq a2, h, h # h = h + ch
- PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
- vpaddq \_T1,h, h # h = h + ch + W + K
- vpxor a1, a0, a0 # a0 = sigma1
- vmovdqu a,\_T1
- PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
- vpxor c, \_T1, \_T1 # maj: T1 = a^c
- add $SZ4, ROUND # ROUND++
- vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
- vpaddq a0, h, h
- vpaddq h, d, d
- vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
- PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
- vpxor a1, a2, a2 # a2 = sig0
- vpand c, a, a1 # maj: a1 = a&c
- vpor \_T1, a1, a1 # a1 = maj
- vpaddq a1, h, h # h = h + ch + W + K + maj
- vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
- ROTATE_ARGS
-.endm
-
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
- vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
- vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
- vmovdqu \_T1, a0
- PRORQ \_T1,7
- vmovdqu a1, a2
- PRORQ a1,42
- vpxor a0, \_T1, \_T1
- PRORQ \_T1, 1
- vpxor a2, a1, a1
- PRORQ a1, 19
- vpsrlq $7, a0, a0
- vpxor a0, \_T1, \_T1
- vpsrlq $6, a2, a2
- vpxor a2, a1, a1
- vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
- vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
- vpaddq a1, \_T1, \_T1
-
- ROUND_00_15 \_T1,\i
-.endm
-
-
-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
-# arg 1 : STATE : pointer to input data
-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
-ENTRY(sha512_x4_avx2)
- # general registers preserved in outer calling routine
- # outer calling routine saves all the XMM registers
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- sub $STACK_SPACE1, %rsp
-
- # Load the pre-transposed incoming digest.
- vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
- vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
- vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
- vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
- vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
- vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
- vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
- vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
-
- lea K512_4(%rip),TBL
-
- # load the address of each of the 4 message lanes
- # getting ready to transpose input onto stack
- mov _data_ptr+0*PTR_SZ(STATE),inp0
- mov _data_ptr+1*PTR_SZ(STATE),inp1
- mov _data_ptr+2*PTR_SZ(STATE),inp2
- mov _data_ptr+3*PTR_SZ(STATE),inp3
-
- xor IDX, IDX
-lloop:
- xor ROUND, ROUND
-
- # save old digest
- vmovdqu a, _digest(%rsp)
- vmovdqu b, _digest+1*SZ4(%rsp)
- vmovdqu c, _digest+2*SZ4(%rsp)
- vmovdqu d, _digest+3*SZ4(%rsp)
- vmovdqu e, _digest+4*SZ4(%rsp)
- vmovdqu f, _digest+5*SZ4(%rsp)
- vmovdqu g, _digest+6*SZ4(%rsp)
- vmovdqu h, _digest+7*SZ4(%rsp)
- i = 0
-.rep 4
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
- VMOVPD i*32(inp0, IDX), TT2
- VMOVPD i*32(inp1, IDX), TT1
- VMOVPD i*32(inp2, IDX), TT4
- VMOVPD i*32(inp3, IDX), TT3
- TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
- vpshufb TMP, TT0, TT0
- vpshufb TMP, TT1, TT1
- vpshufb TMP, TT2, TT2
- vpshufb TMP, TT3, TT3
- ROUND_00_15 TT0,(i*4+0)
- ROUND_00_15 TT1,(i*4+1)
- ROUND_00_15 TT2,(i*4+2)
- ROUND_00_15 TT3,(i*4+3)
- i = (i+1)
-.endr
- add $128, IDX
-
- i = (i*4)
-
- jmp Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
- ROUND_16_XX T1, i
- i = (i+1)
-.endr
- cmp $0xa00,ROUND
- jb Lrounds_16_xx
-
- # add old digest
- vpaddq _digest(%rsp), a, a
- vpaddq _digest+1*SZ4(%rsp), b, b
- vpaddq _digest+2*SZ4(%rsp), c, c
- vpaddq _digest+3*SZ4(%rsp), d, d
- vpaddq _digest+4*SZ4(%rsp), e, e
- vpaddq _digest+5*SZ4(%rsp), f, f
- vpaddq _digest+6*SZ4(%rsp), g, g
- vpaddq _digest+7*SZ4(%rsp), h, h
-
- sub $1, INP_SIZE # unit is blocks
- jne lloop
-
- # write back to memory (state object) the transposed digest
- vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
-
- # update input data pointers
- add IDX, inp0
- mov inp0, _data_ptr+0*PTR_SZ(STATE)
- add IDX, inp1
- mov inp1, _data_ptr+1*PTR_SZ(STATE)
- add IDX, inp2
- mov inp2, _data_ptr+2*PTR_SZ(STATE)
- add IDX, inp3
- mov inp3, _data_ptr+3*PTR_SZ(STATE)
-
- #;;;;;;;;;;;;;;;
- #; Postamble
- add $STACK_SPACE1, %rsp
- # restore callee-saved clobbered registers
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- # outer calling routine restores XMM and other GP registers
- ret
-ENDPROC(sha512_x4_avx2)
-
-.section .rodata.K512_4, "a", @progbits
-.align 64
-K512_4:
- .octa 0x428a2f98d728ae22428a2f98d728ae22,\
- 0x428a2f98d728ae22428a2f98d728ae22
- .octa 0x7137449123ef65cd7137449123ef65cd,\
- 0x7137449123ef65cd7137449123ef65cd
- .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
- 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
- .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
- 0xe9b5dba58189dbbce9b5dba58189dbbc
- .octa 0x3956c25bf348b5383956c25bf348b538,\
- 0x3956c25bf348b5383956c25bf348b538
- .octa 0x59f111f1b605d01959f111f1b605d019,\
- 0x59f111f1b605d01959f111f1b605d019
- .octa 0x923f82a4af194f9b923f82a4af194f9b,\
- 0x923f82a4af194f9b923f82a4af194f9b
- .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
- 0xab1c5ed5da6d8118ab1c5ed5da6d8118
- .octa 0xd807aa98a3030242d807aa98a3030242,\
- 0xd807aa98a3030242d807aa98a3030242
- .octa 0x12835b0145706fbe12835b0145706fbe,\
- 0x12835b0145706fbe12835b0145706fbe
- .octa 0x243185be4ee4b28c243185be4ee4b28c,\
- 0x243185be4ee4b28c243185be4ee4b28c
- .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
- 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
- .octa 0x72be5d74f27b896f72be5d74f27b896f,\
- 0x72be5d74f27b896f72be5d74f27b896f
- .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
- 0x80deb1fe3b1696b180deb1fe3b1696b1
- .octa 0x9bdc06a725c712359bdc06a725c71235,\
- 0x9bdc06a725c712359bdc06a725c71235
- .octa 0xc19bf174cf692694c19bf174cf692694,\
- 0xc19bf174cf692694c19bf174cf692694
- .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
- 0xe49b69c19ef14ad2e49b69c19ef14ad2
- .octa 0xefbe4786384f25e3efbe4786384f25e3,\
- 0xefbe4786384f25e3efbe4786384f25e3
- .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
- 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
- .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
- 0x240ca1cc77ac9c65240ca1cc77ac9c65
- .octa 0x2de92c6f592b02752de92c6f592b0275,\
- 0x2de92c6f592b02752de92c6f592b0275
- .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
- 0x4a7484aa6ea6e4834a7484aa6ea6e483
- .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
- 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
- .octa 0x76f988da831153b576f988da831153b5,\
- 0x76f988da831153b576f988da831153b5
- .octa 0x983e5152ee66dfab983e5152ee66dfab,\
- 0x983e5152ee66dfab983e5152ee66dfab
- .octa 0xa831c66d2db43210a831c66d2db43210,\
- 0xa831c66d2db43210a831c66d2db43210
- .octa 0xb00327c898fb213fb00327c898fb213f,\
- 0xb00327c898fb213fb00327c898fb213f
- .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
- 0xbf597fc7beef0ee4bf597fc7beef0ee4
- .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
- 0xc6e00bf33da88fc2c6e00bf33da88fc2
- .octa 0xd5a79147930aa725d5a79147930aa725,\
- 0xd5a79147930aa725d5a79147930aa725
- .octa 0x06ca6351e003826f06ca6351e003826f,\
- 0x06ca6351e003826f06ca6351e003826f
- .octa 0x142929670a0e6e70142929670a0e6e70,\
- 0x142929670a0e6e70142929670a0e6e70
- .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
- 0x27b70a8546d22ffc27b70a8546d22ffc
- .octa 0x2e1b21385c26c9262e1b21385c26c926,\
- 0x2e1b21385c26c9262e1b21385c26c926
- .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
- 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
- .octa 0x53380d139d95b3df53380d139d95b3df,\
- 0x53380d139d95b3df53380d139d95b3df
- .octa 0x650a73548baf63de650a73548baf63de,\
- 0x650a73548baf63de650a73548baf63de
- .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
- 0x766a0abb3c77b2a8766a0abb3c77b2a8
- .octa 0x81c2c92e47edaee681c2c92e47edaee6,\
- 0x81c2c92e47edaee681c2c92e47edaee6
- .octa 0x92722c851482353b92722c851482353b,\
- 0x92722c851482353b92722c851482353b
- .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
- 0xa2bfe8a14cf10364a2bfe8a14cf10364
- .octa 0xa81a664bbc423001a81a664bbc423001,\
- 0xa81a664bbc423001a81a664bbc423001
- .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
- 0xc24b8b70d0f89791c24b8b70d0f89791
- .octa 0xc76c51a30654be30c76c51a30654be30,\
- 0xc76c51a30654be30c76c51a30654be30
- .octa 0xd192e819d6ef5218d192e819d6ef5218,\
- 0xd192e819d6ef5218d192e819d6ef5218
- .octa 0xd69906245565a910d69906245565a910,\
- 0xd69906245565a910d69906245565a910
- .octa 0xf40e35855771202af40e35855771202a,\
- 0xf40e35855771202af40e35855771202a
- .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
- 0x106aa07032bbd1b8106aa07032bbd1b8
- .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
- 0x19a4c116b8d2d0c819a4c116b8d2d0c8
- .octa 0x1e376c085141ab531e376c085141ab53,\
- 0x1e376c085141ab531e376c085141ab53
- .octa 0x2748774cdf8eeb992748774cdf8eeb99,\
- 0x2748774cdf8eeb992748774cdf8eeb99
- .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
- 0x34b0bcb5e19b48a834b0bcb5e19b48a8
- .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
- 0x391c0cb3c5c95a63391c0cb3c5c95a63
- .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
- 0x4ed8aa4ae3418acb4ed8aa4ae3418acb
- .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
- 0x5b9cca4f7763e3735b9cca4f7763e373
- .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
- 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
- .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
- 0x748f82ee5defb2fc748f82ee5defb2fc
- .octa 0x78a5636f43172f6078a5636f43172f60,\
- 0x78a5636f43172f6078a5636f43172f60
- .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
- 0x84c87814a1f0ab7284c87814a1f0ab72
- .octa 0x8cc702081a6439ec8cc702081a6439ec,\
- 0x8cc702081a6439ec8cc702081a6439ec
- .octa 0x90befffa23631e2890befffa23631e28,\
- 0x90befffa23631e2890befffa23631e28
- .octa 0xa4506cebde82bde9a4506cebde82bde9,\
- 0xa4506cebde82bde9a4506cebde82bde9
- .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
- 0xbef9a3f7b2c67915bef9a3f7b2c67915
- .octa 0xc67178f2e372532bc67178f2e372532b,\
- 0xc67178f2e372532bc67178f2e372532b
- .octa 0xca273eceea26619cca273eceea26619c,\
- 0xca273eceea26619cca273eceea26619c
- .octa 0xd186b8c721c0c207d186b8c721c0c207,\
- 0xd186b8c721c0c207d186b8c721c0c207
- .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
- 0xeada7dd6cde0eb1eeada7dd6cde0eb1e
- .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
- 0xf57d4f7fee6ed178f57d4f7fee6ed178
- .octa 0x06f067aa72176fba06f067aa72176fba,\
- 0x06f067aa72176fba06f067aa72176fba
- .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
- 0x0a637dc5a2c898a60a637dc5a2c898a6
- .octa 0x113f9804bef90dae113f9804bef90dae,\
- 0x113f9804bef90dae113f9804bef90dae
- .octa 0x1b710b35131c471b1b710b35131c471b,\
- 0x1b710b35131c471b1b710b35131c471b
- .octa 0x28db77f523047d8428db77f523047d84,\
- 0x28db77f523047d8428db77f523047d84
- .octa 0x32caab7b40c7249332caab7b40c72493,\
- 0x32caab7b40c7249332caab7b40c72493
- .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
- 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
- .octa 0x431d67c49c100d4c431d67c49c100d4c,\
- 0x431d67c49c100d4c431d67c49c100d4c
- .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
- 0x4cc5d4becb3e42b64cc5d4becb3e42b6
- .octa 0x597f299cfc657e2a597f299cfc657e2a,\
- 0x597f299cfc657e2a597f299cfc657e2a
- .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
- 0x5fcb6fab3ad6faec5fcb6fab3ad6faec
- .octa 0x6c44198c4a4758176c44198c4a475817,\
- 0x6c44198c4a4758176c44198c4a475817
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
- .octa 0x18191a1b1c1d1e1f1011121314151617
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index a0f46bdd9f24..fab4df16a3c4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -12,38 +12,23 @@
#include <asm/user32.h>
#include <asm/unistd.h>
+#include <asm-generic/compat.h>
+
#define COMPAT_USER_HZ 100
#define COMPAT_UTS_MACHINE "i686\0\0"
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
typedef u16 __compat_uid_t;
typedef u16 __compat_gid_t;
typedef u32 __compat_uid32_t;
typedef u32 __compat_gid32_t;
typedef u16 compat_mode_t;
-typedef u32 compat_ino_t;
typedef u16 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef u16 compat_nlink_t;
typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_timer_t;
-typedef s32 compat_key_t;
-
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
typedef s64 __attribute__((aligned(4))) compat_s64;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
-typedef u32 compat_u32;
typedef u64 __attribute__((aligned(4))) compat_u64;
-typedef u32 compat_uptr_t;
struct compat_stat {
compat_dev_t st_dev;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09b2e3e2cf1b..55e51ff7e421 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -102,7 +102,15 @@
#define UNMAPPED_GVA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
-#define KVM_NR_PAGE_SIZES 3
+enum {
+ PT_PAGE_TABLE_LEVEL = 1,
+ PT_DIRECTORY_LEVEL = 2,
+ PT_PDPE_LEVEL = 3,
+ /* set max level to the biggest one */
+ PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL,
+};
+#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \
+ PT_PAGE_TABLE_LEVEL + 1)
#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
@@ -177,6 +185,7 @@ enum {
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
+#define DR6_BT (1 << 15)
#define DR6_RTM (1 << 16)
#define DR6_FIXED_1 0xfffe0ff0
#define DR6_INIT 0xffff0ff0
@@ -247,7 +256,7 @@ struct kvm_mmu_memory_cache {
* @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
*/
union kvm_mmu_page_role {
- unsigned word;
+ u32 word;
struct {
unsigned level:4;
unsigned cr4_pae:1;
@@ -273,6 +282,34 @@ union kvm_mmu_page_role {
};
};
+union kvm_mmu_extended_role {
+/*
+ * This structure complements kvm_mmu_page_role caching everything needed for
+ * MMU configuration. If nothing in both these structures changed, MMU
+ * re-configuration can be skipped. @valid bit is set on first usage so we don't
+ * treat all-zero structure as valid data.
+ */
+ u32 word;
+ struct {
+ unsigned int valid:1;
+ unsigned int execonly:1;
+ unsigned int cr0_pg:1;
+ unsigned int cr4_pse:1;
+ unsigned int cr4_pke:1;
+ unsigned int cr4_smap:1;
+ unsigned int cr4_smep:1;
+ unsigned int cr4_la57:1;
+ };
+};
+
+union kvm_mmu_role {
+ u64 as_u64;
+ struct {
+ union kvm_mmu_page_role base;
+ union kvm_mmu_extended_role ext;
+ };
+};
+
struct kvm_rmap_head {
unsigned long val;
};
@@ -280,18 +317,18 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ bool unsync;
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
- gfn_t gfn;
union kvm_mmu_page_role role;
+ gfn_t gfn;
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
- bool unsync;
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
@@ -360,7 +397,7 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
- union kvm_mmu_page_role base_role;
+ union kvm_mmu_role mmu_role;
u8 root_level;
u8 shadow_root_level;
u8 ept_ad;
@@ -490,7 +527,7 @@ struct kvm_vcpu_hv {
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
- cpumask_t tlb_lush;
+ cpumask_t tlb_flush;
};
struct kvm_vcpu_arch {
@@ -534,7 +571,13 @@ struct kvm_vcpu_arch {
* the paging mode of the l1 guest. This context is always used to
* handle faults.
*/
- struct kvm_mmu mmu;
+ struct kvm_mmu *mmu;
+
+ /* Non-nested MMU for L1 */
+ struct kvm_mmu root_mmu;
+
+ /* L1 MMU when running nested */
+ struct kvm_mmu guest_mmu;
/*
* Paging state of an L2 guest (used for nested npt)
@@ -585,6 +628,8 @@ struct kvm_vcpu_arch {
bool has_error_code;
u8 nr;
u32 error_code;
+ unsigned long payload;
+ bool has_payload;
u8 nested_apf;
} exception;
@@ -781,6 +826,9 @@ struct kvm_hv {
u64 hv_reenlightenment_control;
u64 hv_tsc_emulation_control;
u64 hv_tsc_emulation_status;
+
+ /* How many vCPUs have VP index != vCPU index */
+ atomic_t num_mismatched_vp_indexes;
};
enum kvm_irqchip_mode {
@@ -871,6 +919,7 @@ struct kvm_arch {
bool x2apic_broadcast_quirk_disabled;
bool guest_can_read_msr_platform_info;
+ bool exception_payload_enabled;
};
struct kvm_vm_stat {
@@ -1133,6 +1182,9 @@ struct kvm_x86_ops {
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
+
+ int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version);
};
struct kvm_arch_async_pf {
@@ -1170,7 +1222,6 @@ void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
@@ -1324,7 +1375,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 51c4eee00732..dc4ed8bc2382 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -24,6 +24,7 @@
# include <asm/unistd_64.h>
# include <asm/unistd_64_x32.h>
# define __ARCH_WANT_COMPAT_SYS_TIME
+# define __ARCH_WANT_SYS_UTIME32
# define __ARCH_WANT_COMPAT_SYS_PREADV64
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
@@ -31,13 +32,13 @@
# endif
+# define __ARCH_WANT_NEW_STAT
# define __ARCH_WANT_OLD_READDIR
# define __ARCH_WANT_OLD_STAT
# define __ARCH_WANT_SYS_ALARM
# define __ARCH_WANT_SYS_FADVISE64
# define __ARCH_WANT_SYS_GETHOSTNAME
# define __ARCH_WANT_SYS_GETPGRP
-# define __ARCH_WANT_SYS_LLSEEK
# define __ARCH_WANT_SYS_NICE
# define __ARCH_WANT_SYS_OLDUMOUNT
# define __ARCH_WANT_SYS_OLD_GETRLIMIT
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index e05e0d309244..1fc7a0d1e877 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void)
*/
static inline void cpu_vmxoff(void)
{
- asm volatile (ASM_VMX_VMXOFF : : : "cc");
+ asm volatile ("vmxoff");
cr4_clear_bits(X86_CR4_VMXE);
}
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9527ba5d62da..ade0f153947d 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -503,19 +503,6 @@ enum vmcs_field {
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
-
-#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
struct vmx_msr_entry {
u32 index;
u32 reserved;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index fd23d5778ea1..dabfcf7c3941 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -288,6 +288,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
#define KVM_VCPUEVENT_VALID_SMM 0x00000008
+#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -299,7 +300,7 @@ struct kvm_vcpu_events {
__u8 injected;
__u8 nr;
__u8 has_error_code;
- __u8 pad;
+ __u8 pending;
__u32 error_code;
} exception;
struct {
@@ -322,7 +323,9 @@ struct kvm_vcpu_events {
__u8 smm_inside_nmi;
__u8 latched_init;
} smi;
- __u32 reserved[9];
+ __u8 reserved[27];
+ __u8 exception_has_payload;
+ __u64 exception_payload;
};
/* for KVM_GET/SET_DEBUGREGS */
@@ -381,6 +384,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 01d209ab5481..4e80080f277a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,8 @@
#include "trace.h"
+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
{
return atomic64_read(&synic->sint[sint]);
@@ -132,8 +134,10 @@ static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
struct kvm_vcpu *vcpu = NULL;
int i;
- if (vpidx < KVM_MAX_VCPUS)
- vcpu = kvm_get_vcpu(kvm, vpidx);
+ if (vpidx >= KVM_MAX_VCPUS)
+ return NULL;
+
+ vcpu = kvm_get_vcpu(kvm, vpidx);
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -689,6 +693,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
stimer_cleanup(&hv_vcpu->stimer[i]);
}
+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
+ return false;
+ return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
+}
+EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
+
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+ struct hv_vp_assist_page *assist_page)
+{
+ if (!kvm_hv_assist_page_enabled(vcpu))
+ return false;
+ return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
+ assist_page, sizeof(*assist_page));
+}
+EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
+
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
{
struct hv_message *msg = &stimer->msg;
@@ -1040,21 +1062,41 @@ static u64 current_task_runtime_100ns(void)
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
{
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+ struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
switch (msr) {
- case HV_X64_MSR_VP_INDEX:
- if (!host)
+ case HV_X64_MSR_VP_INDEX: {
+ struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+ int vcpu_idx = kvm_vcpu_get_idx(vcpu);
+ u32 new_vp_index = (u32)data;
+
+ if (!host || new_vp_index >= KVM_MAX_VCPUS)
return 1;
- hv->vp_index = (u32)data;
+
+ if (new_vp_index == hv_vcpu->vp_index)
+ return 0;
+
+ /*
+ * The VP index is initialized to vcpu_index by
+ * kvm_hv_vcpu_postcreate so they initially match. Now the
+ * VP index is changing, adjust num_mismatched_vp_indexes if
+ * it now matches or no longer matches vcpu_idx.
+ */
+ if (hv_vcpu->vp_index == vcpu_idx)
+ atomic_inc(&hv->num_mismatched_vp_indexes);
+ else if (new_vp_index == vcpu_idx)
+ atomic_dec(&hv->num_mismatched_vp_indexes);
+
+ hv_vcpu->vp_index = new_vp_index;
break;
+ }
case HV_X64_MSR_VP_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
- hv->hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+ hv_vcpu->hv_vapic = data;
+ if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@@ -1062,12 +1104,19 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
if (kvm_is_error_hva(addr))
return 1;
- if (__clear_user((void __user *)addr, PAGE_SIZE))
+
+ /*
+ * Clear apic_assist portion of f(struct hv_vp_assist_page
+ * only, there can be valuable data in the rest which needs
+ * to be preserved e.g. on migration.
+ */
+ if (__clear_user((void __user *)addr, sizeof(u32)))
return 1;
- hv->hv_vapic = data;
+ hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu,
- gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+ gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
+ sizeof(struct hv_vp_assist_page)))
return 1;
break;
}
@@ -1080,7 +1129,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
case HV_X64_MSR_VP_RUNTIME:
if (!host)
return 1;
- hv->runtime_offset = data - current_task_runtime_100ns();
+ hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
@@ -1172,11 +1221,11 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
bool host)
{
u64 data = 0;
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+ struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
switch (msr) {
case HV_X64_MSR_VP_INDEX:
- data = hv->vp_index;
+ data = hv_vcpu->vp_index;
break;
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
@@ -1185,10 +1234,10 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
case HV_X64_MSR_VP_ASSIST_PAGE:
- data = hv->hv_vapic;
+ data = hv_vcpu->hv_vapic;
break;
case HV_X64_MSR_VP_RUNTIME:
- data = current_task_runtime_100ns() + hv->runtime_offset;
+ data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
@@ -1255,32 +1304,47 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
return kvm_hv_get_msr(vcpu, msr, pdata, host);
}
-static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+static __always_inline unsigned long *sparse_set_to_vcpu_mask(
+ struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
+ u64 *vp_bitmap, unsigned long *vcpu_bitmap)
{
- int i = 0, j;
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+ struct kvm_vcpu *vcpu;
+ int i, bank, sbank = 0;
- if (!(valid_bank_mask & BIT_ULL(bank_no)))
- return -1;
+ memset(vp_bitmap, 0,
+ KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
+ for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
+ KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
+ vp_bitmap[bank] = sparse_banks[sbank++];
- for (j = 0; j < bank_no; j++)
- if (valid_bank_mask & BIT_ULL(j))
- i++;
+ if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
+ /* for all vcpus vp_index == vcpu_idx */
+ return (unsigned long *)vp_bitmap;
+ }
- return i;
+ bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
+ (unsigned long *)vp_bitmap))
+ __set_bit(i, vcpu_bitmap);
+ }
+ return vcpu_bitmap;
}
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
u16 rep_cnt, bool ex)
{
struct kvm *kvm = current_vcpu->kvm;
- struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+ struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
- struct kvm_vcpu *vcpu;
- unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
- unsigned long valid_bank_mask = 0;
+ u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ unsigned long *vcpu_mask;
+ u64 valid_bank_mask;
u64 sparse_banks[64];
- int sparse_banks_len, i;
+ int sparse_banks_len;
bool all_cpus;
if (!ex) {
@@ -1290,6 +1354,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
trace_kvm_hv_flush_tlb(flush.processor_mask,
flush.address_space, flush.flags);
+ valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
} else {
@@ -1306,7 +1371,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;
- sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+ sparse_banks_len =
+ bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
sizeof(sparse_banks[0]);
if (!sparse_banks_len && !all_cpus)
@@ -1321,48 +1387,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
- cpumask_clear(&hv_current->tlb_lush);
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
- int bank = hv->vp_index / 64, sbank = 0;
-
- if (!all_cpus) {
- /* Banks >64 can't be represented */
- if (bank >= 64)
- continue;
-
- /* Non-ex hypercalls can only address first 64 vCPUs */
- if (!ex && bank)
- continue;
-
- if (ex) {
- /*
- * Check is the bank of this vCPU is in sparse
- * set and get the sparse bank number.
- */
- sbank = get_sparse_bank_no(valid_bank_mask,
- bank);
-
- if (sbank < 0)
- continue;
- }
-
- if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
- continue;
- }
+ cpumask_clear(&hv_vcpu->tlb_flush);
- /*
- * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
- * can't analyze it here, flush TLB regardless of the specified
- * address space.
- */
- __set_bit(i, vcpu_bitmap);
- }
+ vcpu_mask = all_cpus ? NULL :
+ sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+ vp_bitmap, vcpu_bitmap);
+ /*
+ * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
+ * analyze it here, flush TLB regardless of the specified address space.
+ */
kvm_make_vcpus_request_mask(kvm,
KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
- vcpu_bitmap, &hv_current->tlb_lush);
+ vcpu_mask, &hv_vcpu->tlb_flush);
ret_success:
/* We always do full TLB flush, set rep_done = rep_cnt. */
@@ -1370,6 +1407,99 @@ ret_success:
((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
}
+static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
+ unsigned long *vcpu_bitmap)
+{
+ struct kvm_lapic_irq irq = {
+ .delivery_mode = APIC_DM_FIXED,
+ .vector = vector
+ };
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
+ continue;
+
+ /* We fail only when APIC is disabled */
+ kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+}
+
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
+ bool ex, bool fast)
+{
+ struct kvm *kvm = current_vcpu->kvm;
+ struct hv_send_ipi_ex send_ipi_ex;
+ struct hv_send_ipi send_ipi;
+ u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ unsigned long *vcpu_mask;
+ unsigned long valid_bank_mask;
+ u64 sparse_banks[64];
+ int sparse_banks_len;
+ u32 vector;
+ bool all_cpus;
+
+ if (!ex) {
+ if (!fast) {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+ sizeof(send_ipi))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ sparse_banks[0] = send_ipi.cpu_mask;
+ vector = send_ipi.vector;
+ } else {
+ /* 'reserved' part of hv_send_ipi should be 0 */
+ if (unlikely(ingpa >> 32 != 0))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ sparse_banks[0] = outgpa;
+ vector = (u32)ingpa;
+ }
+ all_cpus = false;
+ valid_bank_mask = BIT_ULL(0);
+
+ trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
+ } else {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
+ sizeof(send_ipi_ex))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
+ send_ipi_ex.vp_set.format,
+ send_ipi_ex.vp_set.valid_bank_mask);
+
+ vector = send_ipi_ex.vector;
+ valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
+ sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+ sizeof(sparse_banks[0]);
+
+ all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
+
+ if (!sparse_banks_len)
+ goto ret_success;
+
+ if (!all_cpus &&
+ kvm_read_guest(kvm,
+ ingpa + offsetof(struct hv_send_ipi_ex,
+ vp_set.bank_contents),
+ sparse_banks,
+ sparse_banks_len))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+
+ if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ vcpu_mask = all_cpus ? NULL :
+ sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+ vp_bitmap, vcpu_bitmap);
+
+ kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
+
+ret_success:
+ return HV_STATUS_SUCCESS;
+}
+
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1539,6 +1669,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
}
ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
break;
+ case HVCALL_SEND_IPI:
+ if (unlikely(rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
+ break;
+ case HVCALL_SEND_IPI_EX:
+ if (unlikely(fast || rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
+ break;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index d6aa969e20f1..0e66c12ed2c3 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+ struct hv_vp_assist_page *assist_page);
+
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
int timer_index)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fbb0e6df121b..3cd227ff807f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,6 +70,11 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
+static bool lapic_timer_advance_adjust_done = false;
+#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+/* step-by-step approximation to mitigate fluctuation */
+#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+
static inline int apic_test_vector(int vec, void *bitmap)
{
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -955,14 +960,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
map = rcu_dereference(kvm->arch.apic_map);
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
- if (ret)
+ if (ret) {
+ *r = 0;
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
- if (*r < 0)
- *r = 0;
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
+ }
rcu_read_unlock();
return ret;
@@ -1472,7 +1477,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- u64 guest_tsc, tsc_deadline;
+ u64 guest_tsc, tsc_deadline, ns;
if (!lapic_in_kernel(vcpu))
return;
@@ -1492,6 +1497,24 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
if (guest_tsc < tsc_deadline)
__delay(min(tsc_deadline - guest_tsc,
nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+
+ if (!lapic_timer_advance_adjust_done) {
+ /* too early */
+ if (guest_tsc < tsc_deadline) {
+ ns = (tsc_deadline - guest_tsc) * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ lapic_timer_advance_ns -= min((unsigned int)ns,
+ lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ } else {
+ /* too late */
+ ns = (guest_tsc - tsc_deadline) * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ lapic_timer_advance_ns += min((unsigned int)ns,
+ lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ }
+ if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+ lapic_timer_advance_adjust_done = true;
+ }
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -2621,17 +2644,25 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
+ unsigned long new_len;
+
if (!IS_ALIGNED(addr, 4))
return 1;
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr, sizeof(u8));
+
+ if (addr == ghc->gpa && len <= ghc->len)
+ new_len = ghc->len;
+ else
+ new_len = len;
+
+ return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
}
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ed0ed39abd36..ff6ef9c3d760 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -120,7 +120,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_init(void);
void kvm_lapic_exit(void);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e843ec46609d..cf5f572f2305 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -932,7 +932,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
if (!obj)
- return -ENOMEM;
+ return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
}
return 0;
@@ -960,7 +960,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
- return -ENOMEM;
+ return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
@@ -1265,24 +1265,24 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
mmu_free_pte_list_desc(desc);
}
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
struct pte_list_desc *prev_desc;
int i;
if (!rmap_head->val) {
- printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
+ pr_err("%s: %p 0->BUG\n", __func__, spte);
BUG();
} else if (!(rmap_head->val & 1)) {
- rmap_printk("pte_list_remove: %p 1->0\n", spte);
+ rmap_printk("%s: %p 1->0\n", __func__, spte);
if ((u64 *)rmap_head->val != spte) {
- printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte);
+ pr_err("%s: %p 1->BUG\n", __func__, spte);
BUG();
}
rmap_head->val = 0;
} else {
- rmap_printk("pte_list_remove: %p many->many\n", spte);
+ rmap_printk("%s: %p many->many\n", __func__, spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
prev_desc = NULL;
while (desc) {
@@ -1296,11 +1296,17 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
prev_desc = desc;
desc = desc->more;
}
- pr_err("pte_list_remove: %p many->many\n", spte);
+ pr_err("%s: %p many->many\n", __func__, spte);
BUG();
}
}
+static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
+{
+ mmu_spte_clear_track_bits(sptep);
+ __pte_list_remove(sptep, rmap_head);
+}
+
static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
@@ -1349,7 +1355,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmap_head = gfn_to_rmap(kvm, gfn, sp);
- pte_list_remove(spte, rmap_head);
+ __pte_list_remove(spte, rmap_head);
}
/*
@@ -1685,7 +1691,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
while ((sptep = rmap_get_first(rmap_head, &iter))) {
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
- drop_spte(kvm, sptep);
+ pte_list_remove(rmap_head, sptep);
flush = true;
}
@@ -1721,7 +1727,7 @@ restart:
need_flush = 1;
if (pte_write(*ptep)) {
- drop_spte(kvm, sptep);
+ pte_list_remove(rmap_head, sptep);
goto restart;
} else {
new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
@@ -1988,7 +1994,7 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
u64 *parent_pte)
{
- pte_list_remove(parent_pte, &sp->parent_ptes);
+ __pte_list_remove(parent_pte, &sp->parent_ptes);
}
static void drop_parent_pte(struct kvm_mmu_page *sp,
@@ -2181,7 +2187,7 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)
- || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
+ || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return false;
}
@@ -2375,14 +2381,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
int collisions = 0;
LIST_HEAD(invalid_list);
- role = vcpu->arch.mmu.base_role;
+ role = vcpu->arch.mmu->mmu_role.base;
role.level = level;
role.direct = direct;
if (role.direct)
role.cr4_pae = 0;
role.access = access;
- if (!vcpu->arch.mmu.direct_map
- && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
+ if (!vcpu->arch.mmu->direct_map
+ && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
@@ -2457,11 +2463,11 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
{
iterator->addr = addr;
iterator->shadow_addr = root;
- iterator->level = vcpu->arch.mmu.shadow_root_level;
+ iterator->level = vcpu->arch.mmu->shadow_root_level;
if (iterator->level == PT64_ROOT_4LEVEL &&
- vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
- !vcpu->arch.mmu.direct_map)
+ vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+ !vcpu->arch.mmu->direct_map)
--iterator->level;
if (iterator->level == PT32E_ROOT_LEVEL) {
@@ -2469,10 +2475,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
* prev_root is currently only used for 64-bit hosts. So only
* the active root_hpa is valid here.
*/
- BUG_ON(root != vcpu->arch.mmu.root_hpa);
+ BUG_ON(root != vcpu->arch.mmu->root_hpa);
iterator->shadow_addr
- = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+ = vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
--iterator->level;
if (!iterator->shadow_addr)
@@ -2483,7 +2489,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
struct kvm_vcpu *vcpu, u64 addr)
{
- shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
+ shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
addr);
}
@@ -3095,7 +3101,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int emulate = 0;
gfn_t pseudo_gfn;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return 0;
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
@@ -3301,7 +3307,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
u64 spte = 0ull;
uint retry_count = 0;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return false;
if (!page_fault_can_be_fast(error_code))
@@ -3471,11 +3477,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
}
/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ ulong roots_to_free)
{
int i;
LIST_HEAD(invalid_list);
- struct kvm_mmu *mmu = &vcpu->arch.mmu;
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
@@ -3535,20 +3541,20 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
struct kvm_mmu_page *sp;
unsigned i;
- if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
+ if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
if(make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
- vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+ vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = __pa(sp->spt);
- } else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
+ vcpu->arch.mmu->root_hpa = __pa(sp->spt);
+ } else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
@@ -3561,9 +3567,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
}
- vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
} else
BUG();
@@ -3577,7 +3583,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
gfn_t root_gfn;
int i;
- root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT;
+ root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
return 1;
@@ -3586,8 +3592,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* Do we shadow a long mode page table? If so we need to
* write-protect the guests page table root.
*/
- if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
+ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+ hpa_t root = vcpu->arch.mmu->root_hpa;
MMU_WARN_ON(VALID_PAGE(root));
@@ -3597,11 +3603,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
+ vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = root;
+ vcpu->arch.mmu->root_hpa = root;
return 0;
}
@@ -3611,17 +3617,17 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* the shadow page table may be a PAE or a long mode page table.
*/
pm_mask = PT_PRESENT_MASK;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
+ if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
- if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
- pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
+ if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
+ pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
if (!(pdptr & PT_PRESENT_MASK)) {
- vcpu->arch.mmu.pae_root[i] = 0;
+ vcpu->arch.mmu->pae_root[i] = 0;
continue;
}
root_gfn = pdptr >> PAGE_SHIFT;
@@ -3639,16 +3645,16 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.pae_root[i] = root | pm_mask;
+ vcpu->arch.mmu->pae_root[i] = root | pm_mask;
}
- vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
/*
* If we shadow a 32 bit page table with a long mode page
* table we enter this path.
*/
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
- if (vcpu->arch.mmu.lm_root == NULL) {
+ if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+ if (vcpu->arch.mmu->lm_root == NULL) {
/*
* The additional page necessary for this is only
* allocated on demand.
@@ -3660,12 +3666,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
if (lm_root == NULL)
return 1;
- lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
+ lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
- vcpu->arch.mmu.lm_root = lm_root;
+ vcpu->arch.mmu->lm_root = lm_root;
}
- vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root);
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
}
return 0;
@@ -3673,7 +3679,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.mmu.direct_map)
+ if (vcpu->arch.mmu->direct_map)
return mmu_alloc_direct_roots(vcpu);
else
return mmu_alloc_shadow_roots(vcpu);
@@ -3684,17 +3690,16 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
int i;
struct kvm_mmu_page *sp;
- if (vcpu->arch.mmu.direct_map)
+ if (vcpu->arch.mmu->direct_map)
return;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
- if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
-
+ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+ hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
/*
@@ -3725,7 +3730,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
@@ -3799,7 +3804,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
int root, leaf;
bool reserved = false;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
goto exit;
walk_shadow_page_lockless_begin(vcpu);
@@ -3816,7 +3821,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
if (!is_shadow_present_pte(spte))
break;
- reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+ reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
iterator.level);
}
@@ -3895,7 +3900,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
struct kvm_shadow_walk_iterator iterator;
u64 spte;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
walk_shadow_page_lockless_begin(vcpu);
@@ -3922,7 +3927,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
if (r)
return r;
- MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
return nonpaging_map(vcpu, gva & PAGE_MASK,
@@ -3935,8 +3940,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
arch.gfn = gfn;
- arch.direct_map = vcpu->arch.mmu.direct_map;
- arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
+ arch.direct_map = vcpu->arch.mmu->direct_map;
+ arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu);
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
@@ -4042,7 +4047,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
- MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
if (page_fault_handle_page_track(vcpu, error_code, gfn))
return RET_PF_EMULATE;
@@ -4118,7 +4123,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
{
uint i;
struct kvm_mmu_root_info root;
- struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
root.cr3 = mmu->get_cr3(vcpu);
root.hpa = mmu->root_hpa;
@@ -4141,7 +4146,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
union kvm_mmu_page_role new_role,
bool skip_tlb_flush)
{
- struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
/*
* For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
@@ -4192,7 +4197,8 @@ static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
bool skip_tlb_flush)
{
if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
- kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT);
+ kvm_mmu_free_roots(vcpu, vcpu->arch.mmu,
+ KVM_MMU_ROOT_CURRENT);
}
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
@@ -4210,7 +4216,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
static void inject_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
- vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+ vcpu->arch.mmu->inject_page_fault(vcpu, fault);
}
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -4414,7 +4420,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
- bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+ bool uses_nx = context->nx ||
+ context->mmu_role.base.smep_andnot_wp;
struct rsvd_bits_validate *shadow_zero_check;
int i;
@@ -4553,7 +4560,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
* SMAP:kernel-mode data accesses from user-mode
* mappings should fault. A fault is considered
* as a SMAP violation if all of the following
- * conditions are ture:
+ * conditions are true:
* - X86_CR4_SMAP is set in CR4
* - A user page is accessed
* - The access is not a fetch
@@ -4714,27 +4721,65 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}
-static union kvm_mmu_page_role
-kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu)
+static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_extended_role ext = {0};
+
+ ext.cr0_pg = !!is_paging(vcpu);
+ ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+ ext.cr4_pse = !!is_pse(vcpu);
+ ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
+ ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+
+ ext.valid = 1;
+
+ return ext;
+}
+
+static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
+ bool base_only)
+{
+ union kvm_mmu_role role = {0};
+
+ role.base.access = ACC_ALL;
+ role.base.nxe = !!is_nx(vcpu);
+ role.base.cr4_pae = !!is_pae(vcpu);
+ role.base.cr0_wp = is_write_protection(vcpu);
+ role.base.smm = is_smm(vcpu);
+ role.base.guest_mode = is_guest_mode(vcpu);
+
+ if (base_only)
+ return role;
+
+ role.ext = kvm_calc_mmu_role_ext(vcpu);
+
+ return role;
+}
+
+static union kvm_mmu_role
+kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
{
- union kvm_mmu_page_role role = {0};
+ union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
- role.guest_mode = is_guest_mode(vcpu);
- role.smm = is_smm(vcpu);
- role.ad_disabled = (shadow_accessed_mask == 0);
- role.level = kvm_x86_ops->get_tdp_level(vcpu);
- role.direct = true;
- role.access = ACC_ALL;
+ role.base.ad_disabled = (shadow_accessed_mask == 0);
+ role.base.level = kvm_x86_ops->get_tdp_level(vcpu);
+ role.base.direct = true;
return role;
}
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
+ struct kvm_mmu *context = vcpu->arch.mmu;
+ union kvm_mmu_role new_role =
+ kvm_calc_tdp_mmu_root_page_role(vcpu, false);
- context->base_role.word = mmu_base_role_mask.word &
- kvm_calc_tdp_mmu_root_page_role(vcpu).word;
+ new_role.base.word &= mmu_base_role_mask.word;
+ if (new_role.as_u64 == context->mmu_role.as_u64)
+ return;
+
+ context->mmu_role.as_u64 = new_role.as_u64;
context->page_fault = tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
@@ -4774,36 +4819,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
-static union kvm_mmu_page_role
-kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu)
-{
- union kvm_mmu_page_role role = {0};
- bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
- bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
-
- role.nxe = is_nx(vcpu);
- role.cr4_pae = !!is_pae(vcpu);
- role.cr0_wp = is_write_protection(vcpu);
- role.smep_andnot_wp = smep && !is_write_protection(vcpu);
- role.smap_andnot_wp = smap && !is_write_protection(vcpu);
- role.guest_mode = is_guest_mode(vcpu);
- role.smm = is_smm(vcpu);
- role.direct = !is_paging(vcpu);
- role.access = ACC_ALL;
+static union kvm_mmu_role
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+{
+ union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
+
+ role.base.smep_andnot_wp = role.ext.cr4_smep &&
+ !is_write_protection(vcpu);
+ role.base.smap_andnot_wp = role.ext.cr4_smap &&
+ !is_write_protection(vcpu);
+ role.base.direct = !is_paging(vcpu);
if (!is_long_mode(vcpu))
- role.level = PT32E_ROOT_LEVEL;
+ role.base.level = PT32E_ROOT_LEVEL;
else if (is_la57_mode(vcpu))
- role.level = PT64_ROOT_5LEVEL;
+ role.base.level = PT64_ROOT_5LEVEL;
else
- role.level = PT64_ROOT_4LEVEL;
+ role.base.level = PT64_ROOT_4LEVEL;
return role;
}
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
+ struct kvm_mmu *context = vcpu->arch.mmu;
+ union kvm_mmu_role new_role =
+ kvm_calc_shadow_mmu_root_page_role(vcpu, false);
+
+ new_role.base.word &= mmu_base_role_mask.word;
+ if (new_role.as_u64 == context->mmu_role.as_u64)
+ return;
if (!is_paging(vcpu))
nonpaging_init_context(vcpu, context);
@@ -4814,22 +4859,28 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
else
paging32_init_context(vcpu, context);
- context->base_role.word = mmu_base_role_mask.word &
- kvm_calc_shadow_mmu_root_page_role(vcpu).word;
+ context->mmu_role.as_u64 = new_role.as_u64;
reset_shadow_zero_bits_mask(vcpu, context);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-static union kvm_mmu_page_role
-kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
+static union kvm_mmu_role
+kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
+ bool execonly)
{
- union kvm_mmu_page_role role = vcpu->arch.mmu.base_role;
+ union kvm_mmu_role role;
+
+ /* Base role is inherited from root_mmu */
+ role.base.word = vcpu->arch.root_mmu.mmu_role.base.word;
+ role.ext = kvm_calc_mmu_role_ext(vcpu);
+
+ role.base.level = PT64_ROOT_4LEVEL;
+ role.base.direct = false;
+ role.base.ad_disabled = !accessed_dirty;
+ role.base.guest_mode = true;
+ role.base.access = ACC_ALL;
- role.level = PT64_ROOT_4LEVEL;
- role.direct = false;
- role.ad_disabled = !accessed_dirty;
- role.guest_mode = true;
- role.access = ACC_ALL;
+ role.ext.execonly = execonly;
return role;
}
@@ -4837,11 +4888,17 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
- union kvm_mmu_page_role root_page_role =
- kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty);
+ struct kvm_mmu *context = vcpu->arch.mmu;
+ union kvm_mmu_role new_role =
+ kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
+ execonly);
+
+ __kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
+
+ new_role.base.word &= mmu_base_role_mask.word;
+ if (new_role.as_u64 == context->mmu_role.as_u64)
+ return;
- __kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false);
context->shadow_root_level = PT64_ROOT_4LEVEL;
context->nx = true;
@@ -4853,7 +4910,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL;
context->direct_map = false;
- context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
+ context->mmu_role.as_u64 = new_role.as_u64;
+
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
update_last_nonleaf_level(vcpu, context);
@@ -4864,7 +4922,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
- struct kvm_mmu *context = &vcpu->arch.mmu;
+ struct kvm_mmu *context = vcpu->arch.mmu;
kvm_init_shadow_mmu(vcpu);
context->set_cr3 = kvm_x86_ops->set_cr3;
@@ -4875,14 +4933,20 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
+ union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
+ new_role.base.word &= mmu_base_role_mask.word;
+ if (new_role.as_u64 == g_context->mmu_role.as_u64)
+ return;
+
+ g_context->mmu_role.as_u64 = new_role.as_u64;
g_context->get_cr3 = get_cr3;
g_context->get_pdptr = kvm_pdptr_read;
g_context->inject_page_fault = kvm_inject_page_fault;
/*
- * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
+ * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using
* L1's nested page tables (e.g. EPT12). The nested translation
* of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
* L2's page tables as the first level of translation and L1's
@@ -4921,10 +4985,10 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
if (reset_roots) {
uint i;
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.mmu->root_hpa = INVALID_PAGE;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+ vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
}
if (mmu_is_nested(vcpu))
@@ -4939,10 +5003,14 @@ EXPORT_SYMBOL_GPL(kvm_init_mmu);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
{
+ union kvm_mmu_role role;
+
if (tdp_enabled)
- return kvm_calc_tdp_mmu_root_page_role(vcpu);
+ role = kvm_calc_tdp_mmu_root_page_role(vcpu, true);
else
- return kvm_calc_shadow_mmu_root_page_role(vcpu);
+ role = kvm_calc_shadow_mmu_root_page_role(vcpu, true);
+
+ return role.base;
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -4972,8 +5040,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
- kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL);
- WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
+ WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa));
+ kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+ WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4987,7 +5057,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
++vcpu->kvm->stat.mmu_pte_updated;
- vcpu->arch.mmu.update_pte(vcpu, sp, spte, new);
+ vcpu->arch.mmu->update_pte(vcpu, sp, spte, new);
}
static bool need_remote_flush(u64 old, u64 new)
@@ -5164,10 +5234,12 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
local_flush = true;
while (npte--) {
+ u32 base_role = vcpu->arch.mmu->mmu_role.base.word;
+
entry = *spte;
mmu_page_zap_pte(vcpu->kvm, sp, spte);
if (gentry &&
- !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
+ !((sp->role.word ^ base_role)
& mmu_base_role_mask.word) && rmap_can_add(vcpu))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (need_remote_flush(entry, *spte))
@@ -5185,7 +5257,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
gpa_t gpa;
int r;
- if (vcpu->arch.mmu.direct_map)
+ if (vcpu->arch.mmu->direct_map)
return 0;
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
@@ -5221,10 +5293,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
{
int r, emulation_type = 0;
enum emulation_result er;
- bool direct = vcpu->arch.mmu.direct_map;
+ bool direct = vcpu->arch.mmu->direct_map;
/* With shadow page tables, fault_address contains a GVA or nGPA. */
- if (vcpu->arch.mmu.direct_map) {
+ if (vcpu->arch.mmu->direct_map) {
vcpu->arch.gpa_available = true;
vcpu->arch.gpa_val = cr2;
}
@@ -5237,8 +5309,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
}
if (r == RET_PF_INVALID) {
- r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
- false);
+ r = vcpu->arch.mmu->page_fault(vcpu, cr2,
+ lower_32_bits(error_code),
+ false);
WARN_ON(r == RET_PF_INVALID);
}
@@ -5254,7 +5327,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
* paging in both guests. If true, we simply unprotect the page
* and resume the guest.
*/
- if (vcpu->arch.mmu.direct_map &&
+ if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
return 1;
@@ -5302,7 +5375,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
- struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
int i;
/* INVLPG on a * non-canonical address is a NOP according to the SDM. */
@@ -5333,7 +5406,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
{
- struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
bool tlb_flush = false;
uint i;
@@ -5377,8 +5450,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
- free_page((unsigned long)vcpu->arch.mmu.pae_root);
- free_page((unsigned long)vcpu->arch.mmu.lm_root);
+ free_page((unsigned long)vcpu->arch.mmu->pae_root);
+ free_page((unsigned long)vcpu->arch.mmu->lm_root);
}
static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@ -5398,9 +5471,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
if (!page)
return -ENOMEM;
- vcpu->arch.mmu.pae_root = page_address(page);
+ vcpu->arch.mmu->pae_root = page_address(page);
for (i = 0; i < 4; ++i)
- vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+ vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
return 0;
}
@@ -5409,27 +5482,21 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
uint i;
- vcpu->arch.walk_mmu = &vcpu->arch.mmu;
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
- vcpu->arch.mmu.translate_gpa = translate_gpa;
- vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
+ vcpu->arch.mmu = &vcpu->arch.root_mmu;
+ vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
+ vcpu->arch.root_mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.root_mmu.translate_gpa = translate_gpa;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-
- return alloc_mmu_pages(vcpu);
-}
+ vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-void kvm_mmu_setup(struct kvm_vcpu *vcpu)
-{
- MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE;
+ vcpu->arch.guest_mmu.translate_gpa = translate_gpa;
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
- /*
- * kvm_mmu_setup() is called only on vCPU initialization.
- * Therefore, no need to reset mmu roots as they are not yet
- * initialized.
- */
- kvm_init_mmu(vcpu, false);
+ vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
+ return alloc_mmu_pages(vcpu);
}
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
@@ -5612,7 +5679,7 @@ restart:
if (sp->role.direct &&
!kvm_is_reserved_pfn(pfn) &&
PageTransCompoundMap(pfn_to_page(pfn))) {
- drop_spte(kvm, sptep);
+ pte_list_remove(rmap_head, sptep);
need_tlb_flush = 1;
goto restart;
}
@@ -5869,6 +5936,16 @@ int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
+ /*
+ * MMU roles use union aliasing which is, generally speaking, an
+ * undefined behavior. However, we supposedly know how compilers behave
+ * and the current status quo is unlikely to change. Guardians below are
+ * supposed to let us know if the assumption becomes false.
+ */
+ BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32));
+ BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32));
+ BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64));
+
kvm_mmu_reset_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
@@ -5898,7 +5975,7 @@ out:
}
/*
- * Caculate mmu pages needed for kvm.
+ * Calculate mmu pages needed for kvm.
*/
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1fab69c0b2f3..c7b333147c4a 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -43,11 +43,6 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
-#define PT_PDPE_LEVEL 3
-#define PT_DIRECTORY_LEVEL 2
-#define PT_PAGE_TABLE_LEVEL 1
-#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
-
static inline u64 rsvd_bits(int s, int e)
{
if (e < s)
@@ -80,7 +75,7 @@ static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
- if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE))
+ if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
@@ -102,9 +97,9 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
{
- if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
- vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
- kvm_get_active_pcid(vcpu));
+ if (VALID_PAGE(vcpu->arch.mmu->root_hpa))
+ vcpu->arch.mmu->set_cr3(vcpu, vcpu->arch.mmu->root_hpa |
+ kvm_get_active_pcid(vcpu));
}
/*
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 1272861e77b9..abac7e208853 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -59,19 +59,19 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
int i;
struct kvm_mmu_page *sp;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
- if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
+ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+ hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
- __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level);
+ __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level);
return;
}
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
@@ -122,7 +122,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
hpa = pfn << PAGE_SHIFT;
if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
- "ent %llxn", vcpu->arch.mmu.root_level, pfn,
+ "ent %llxn", vcpu->arch.mmu->root_level, pfn,
hpa, *sptep);
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 14ffd973df54..7cf2185b7eb5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -158,14 +158,15 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, u64 *spte,
u64 gpte)
{
- if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+ if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
goto no_present;
if (!FNAME(is_present_gpte)(gpte))
goto no_present;
/* if accessed bit is not supported prefetch non accessed gpte */
- if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
+ if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
+ !(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
return false;
@@ -480,7 +481,7 @@ error:
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr, u32 access)
{
- return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
+ return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
access);
}
@@ -509,7 +510,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
- FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
+ FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
if (is_error_pfn(pfn))
@@ -604,7 +605,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
direct_access = gw->pte_access;
- top_level = vcpu->arch.mmu.root_level;
+ top_level = vcpu->arch.mmu->root_level;
if (top_level == PT32E_ROOT_LEVEL)
top_level = PT32_ROOT_LEVEL;
/*
@@ -616,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
if (FNAME(gpte_changed)(vcpu, gw, top_level))
goto out_gpte_changed;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
goto out_gpte_changed;
for (shadow_walk_init(&it, vcpu, addr);
@@ -1004,7 +1005,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(gpte);
- FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
+ FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
&nr_present))
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 61ccfb13899e..0e21ccc46792 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -809,6 +809,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
nested_svm_check_exception(svm, nr, has_error_code, error_code))
return;
+ kvm_deliver_exception_payload(&svm->vcpu);
+
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
@@ -2922,18 +2924,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
WARN_ON(mmu_is_nested(vcpu));
kvm_init_shadow_mmu(vcpu);
- vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
- vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
- vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr;
- vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
- vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu);
- reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu);
+ vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3;
+ vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3;
+ vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
+ vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
+ vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
+ reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
}
static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
{
- vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+ vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
}
static int nested_svm_check_permissions(struct vcpu_svm *svm)
@@ -2969,16 +2971,13 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
svm->vmcb->control.exit_info_1 = error_code;
/*
- * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
- * The fix is to add the ancillary datum (CR2 or DR6) to structs
- * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
- * written only when inject_pending_event runs (DR6 would written here
- * too). This should be conditional on a new capability---if the
- * capability is disabled, kvm_multiple_exception would write the
- * ancillary information to CR2 or DR6, for backwards ABI-compatibility.
+ * EXITINFO2 is undefined for all exception intercepts other
+ * than #PF.
*/
if (svm->vcpu.arch.exception.nested_apf)
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+ else if (svm->vcpu.arch.exception.has_payload)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
else
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
@@ -5642,26 +5641,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%[svm]) \n\t"
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
-#endif
/*
* Clear host registers marked as clobbered to prevent
* speculative use.
*/
- "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
- "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
- "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
- "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
- "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
-#ifdef CONFIG_X86_64
- "xor %%r8, %%r8 \n\t"
- "xor %%r9, %%r9 \n\t"
- "xor %%r10, %%r10 \n\t"
- "xor %%r11, %%r11 \n\t"
- "xor %%r12, %%r12 \n\t"
- "xor %%r13, %%r13 \n\t"
- "xor %%r14, %%r14 \n\t"
- "xor %%r15, %%r15 \n\t"
+ "xor %%r8d, %%r8d \n\t"
+ "xor %%r9d, %%r9d \n\t"
+ "xor %%r10d, %%r10d \n\t"
+ "xor %%r11d, %%r11d \n\t"
+ "xor %%r12d, %%r12d \n\t"
+ "xor %%r13d, %%r13d \n\t"
+ "xor %%r14d, %%r14d \n\t"
+ "xor %%r15d, %%r15d \n\t"
#endif
+ "xor %%ebx, %%ebx \n\t"
+ "xor %%ecx, %%ecx \n\t"
+ "xor %%edx, %%edx \n\t"
+ "xor %%esi, %%esi \n\t"
+ "xor %%edi, %%edi \n\t"
"pop %%" _ASM_BP
:
: [svm]"a"(svm),
@@ -7040,6 +7037,13 @@ failed:
return ret;
}
+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version)
+{
+ /* Intel-only feature */
+ return -ENODEV;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7169,6 +7173,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.mem_enc_op = svm_mem_enc_op,
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
+
+ .nested_enable_evmcs = nested_enable_evmcs,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0f997683404f..0659465a745c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1418,6 +1418,48 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex,
__entry->valid_bank_mask, __entry->format,
__entry->address_space, __entry->flags)
);
+
+/*
+ * Tracepoints for kvm_hv_send_ipi.
+ */
+TRACE_EVENT(kvm_hv_send_ipi,
+ TP_PROTO(u32 vector, u64 processor_mask),
+ TP_ARGS(vector, processor_mask),
+
+ TP_STRUCT__entry(
+ __field(u32, vector)
+ __field(u64, processor_mask)
+ ),
+
+ TP_fast_assign(
+ __entry->vector = vector;
+ __entry->processor_mask = processor_mask;
+ ),
+
+ TP_printk("vector %x processor_mask 0x%llx",
+ __entry->vector, __entry->processor_mask)
+);
+
+TRACE_EVENT(kvm_hv_send_ipi_ex,
+ TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask),
+ TP_ARGS(vector, format, valid_bank_mask),
+
+ TP_STRUCT__entry(
+ __field(u32, vector)
+ __field(u64, format)
+ __field(u64, valid_bank_mask)
+ ),
+
+ TP_fast_assign(
+ __entry->vector = vector;
+ __entry->format = format;
+ __entry->valid_bank_mask = valid_bank_mask;
+ ),
+
+ TP_printk("vector %x format %llx valid_bank_mask 0x%llx",
+ __entry->vector, __entry->format,
+ __entry->valid_bank_mask)
+);
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e665aa7167cf..4555077d69ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -20,6 +20,7 @@
#include "mmu.h"
#include "cpuid.h"
#include "lapic.h"
+#include "hyperv.h"
#include <linux/kvm_host.h>
#include <linux/module.h>
@@ -61,7 +62,7 @@
#define __ex(x) __kvm_handle_fault_on_reboot(x)
#define __ex_clear(x, reg) \
- ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
+ ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -107,9 +108,12 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
* VMX and be a hypervisor for its own guests. If nested=0, guests may not
* use VMX instructions.
*/
-static bool __read_mostly nested = 0;
+static bool __read_mostly nested = 1;
module_param(nested, bool, S_IRUGO);
+static bool __read_mostly nested_early_check = 0;
+module_param(nested_early_check, bool, S_IRUGO);
+
static u64 __read_mostly host_xss;
static bool __read_mostly enable_pml = 1;
@@ -131,7 +135,7 @@ static bool __read_mostly enable_preemption_timer = 1;
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif
-#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
+#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
#define KVM_VM_CR0_ALWAYS_ON \
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
@@ -187,6 +191,7 @@ static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, uint, 0444);
extern const ulong vmx_return;
+extern const ulong vmx_early_consistency_check_return;
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
@@ -827,14 +832,28 @@ struct nested_vmx {
*/
struct vmcs12 *cached_shadow_vmcs12;
/*
- * Indicates if the shadow vmcs must be updated with the
- * data hold by vmcs12
+ * Indicates if the shadow vmcs or enlightened vmcs must be updated
+ * with the data held by struct vmcs12.
*/
- bool sync_shadow_vmcs;
+ bool need_vmcs12_sync;
bool dirty_vmcs12;
+ /*
+ * vmcs02 has been initialized, i.e. state that is constant for
+ * vmcs02 has been written to the backing VMCS. Initialization
+ * is delayed until L1 actually attempts to run a nested VM.
+ */
+ bool vmcs02_initialized;
+
bool change_vmcs01_virtual_apic_mode;
+ /*
+ * Enlightened VMCS has been enabled. It does not mean that L1 has to
+ * use it. However, VMX features available to L1 will be limited based
+ * on what the enlightened VMCS supports.
+ */
+ bool enlightened_vmcs_enabled;
+
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
@@ -870,6 +889,10 @@ struct nested_vmx {
/* in guest mode on SMM entry? */
bool guest_mode;
} smm;
+
+ gpa_t hv_evmcs_vmptr;
+ struct page *hv_evmcs_page;
+ struct hv_enlightened_vmcs *hv_evmcs;
};
#define POSTED_INTR_ON 0
@@ -1381,6 +1404,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
#define KVM_EVMCS_VERSION 1
+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ * POSTED_INTR_NV = 0x00000002,
+ * GUEST_INTR_STATUS = 0x00000810,
+ * APIC_ACCESS_ADDR = 0x00002014,
+ * POSTED_INTR_DESC_ADDR = 0x00002016,
+ * EOI_EXIT_BITMAP0 = 0x0000201c,
+ * EOI_EXIT_BITMAP1 = 0x0000201e,
+ * EOI_EXIT_BITMAP2 = 0x00002020,
+ * EOI_EXIT_BITMAP3 = 0x00002022,
+ * GUEST_PML_INDEX = 0x00000812,
+ * PML_ADDRESS = 0x0000200e,
+ * VM_FUNCTION_CONTROL = 0x00002018,
+ * EPTP_LIST_ADDRESS = 0x00002024,
+ * VMREAD_BITMAP = 0x00002026,
+ * VMWRITE_BITMAP = 0x00002028,
+ *
+ * TSC_MULTIPLIER = 0x00002032,
+ * PLE_GAP = 0x00004020,
+ * PLE_WINDOW = 0x00004022,
+ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ *
+ * Currently unsupported in KVM:
+ * GUEST_IA32_RTIT_CTL = 0x00002814,
+ */
+#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
+ PIN_BASED_VMX_PREEMPTION_TIMER)
+#define EVMCS1_UNSUPPORTED_2NDEXEC \
+ (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
+ SECONDARY_EXEC_APIC_REGISTER_VIRT | \
+ SECONDARY_EXEC_ENABLE_PML | \
+ SECONDARY_EXEC_ENABLE_VMFUNC | \
+ SECONDARY_EXEC_SHADOW_VMCS | \
+ SECONDARY_EXEC_TSC_SCALING | \
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
+
#if IS_ENABLED(CONFIG_HYPERV)
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
@@ -1473,69 +1539,12 @@ static void evmcs_load(u64 phys_addr)
static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{
- /*
- * Enlightened VMCSv1 doesn't support these:
- *
- * POSTED_INTR_NV = 0x00000002,
- * GUEST_INTR_STATUS = 0x00000810,
- * APIC_ACCESS_ADDR = 0x00002014,
- * POSTED_INTR_DESC_ADDR = 0x00002016,
- * EOI_EXIT_BITMAP0 = 0x0000201c,
- * EOI_EXIT_BITMAP1 = 0x0000201e,
- * EOI_EXIT_BITMAP2 = 0x00002020,
- * EOI_EXIT_BITMAP3 = 0x00002022,
- */
- vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
- vmcs_conf->cpu_based_2nd_exec_ctrl &=
- ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
- vmcs_conf->cpu_based_2nd_exec_ctrl &=
- ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmcs_conf->cpu_based_2nd_exec_ctrl &=
- ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
-
- /*
- * GUEST_PML_INDEX = 0x00000812,
- * PML_ADDRESS = 0x0000200e,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
-
- /* VM_FUNCTION_CONTROL = 0x00002018, */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
-
- /*
- * EPTP_LIST_ADDRESS = 0x00002024,
- * VMREAD_BITMAP = 0x00002026,
- * VMWRITE_BITMAP = 0x00002028,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
-
- /*
- * TSC_MULTIPLIER = 0x00002032,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
+ vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
- /*
- * PLE_GAP = 0x00004020,
- * PLE_WINDOW = 0x00004022,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-
- /*
- * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
- */
- vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
-
- /*
- * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
- * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
- */
- vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
- vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
- /*
- * Currently unsupported in KVM:
- * GUEST_IA32_RTIT_CTL = 0x00002814,
- */
}
/* check_ept_pointer() should be under protection of ept_pointer_lock. */
@@ -1560,26 +1569,27 @@ static void check_ept_pointer_match(struct kvm *kvm)
static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
{
- int ret;
+ struct kvm_vcpu *vcpu;
+ int ret = -ENOTSUPP, i;
spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
check_ept_pointer_match(kvm);
- if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
- ret = -ENOTSUPP;
- goto out;
- }
-
/*
* FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the
* base of EPT PML4 table, strip off EPT configuration information.
*/
- ret = hyperv_flush_guest_mapping(
- to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
+ if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ ret |= hyperv_flush_guest_mapping(
+ to_vmx(kvm_get_vcpu(kvm, i))->ept_pointer & PAGE_MASK);
+ } else {
+ ret = hyperv_flush_guest_mapping(
+ to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK);
+ }
-out:
spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
return ret;
}
@@ -1595,6 +1605,35 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /* We don't support disabling the feature for simplicity. */
+ if (vmx->nested.enlightened_vmcs_enabled)
+ return 0;
+
+ vmx->nested.enlightened_vmcs_enabled = true;
+
+ /*
+ * vmcs_version represents the range of supported Enlightened VMCS
+ * versions: lower 8 bits is the minimal version, higher 8 bits is the
+ * maximum supported version. KVM supports versions from 1 to
+ * KVM_EVMCS_VERSION.
+ */
+ if (vmcs_version)
+ *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
+
+ vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+ vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+
+ return 0;
+}
+
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -1617,11 +1656,6 @@ static inline bool is_page_fault(u32 intr_info)
return is_exception_n(intr_info, PF_VECTOR);
}
-static inline bool is_no_device(u32 intr_info)
-{
- return is_exception_n(intr_info, NM_VECTOR);
-}
-
static inline bool is_invalid_opcode(u32 intr_info)
{
return is_exception_n(intr_info, UD_VECTOR);
@@ -1632,12 +1666,6 @@ static inline bool is_gp_fault(u32 intr_info)
return is_exception_n(intr_info, GP_VECTOR);
}
-static inline bool is_external_interrupt(u32 intr_info)
-{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
- == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
-}
-
static inline bool is_machine_check(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -2063,9 +2091,6 @@ static inline bool is_nmi(u32 intr_info)
static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info,
unsigned long exit_qualification);
-static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12,
- u32 reason, unsigned long qualification);
static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
{
@@ -2077,7 +2102,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
return -1;
}
-static inline void __invvpid(int ext, u16 vpid, gva_t gva)
+static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
{
struct {
u64 vpid : 16;
@@ -2086,22 +2111,20 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
} operand = { vpid, 0, gva };
bool error;
- asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
- : "memory");
+ asm volatile (__ex("invvpid %2, %1") CC_SET(na)
+ : CC_OUT(na) (error) : "r"(ext), "m"(operand));
BUG_ON(error);
}
-static inline void __invept(int ext, u64 eptp, gpa_t gpa)
+static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
{
struct {
u64 eptp, gpa;
} operand = {eptp, gpa};
bool error;
- asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
- : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
- : "memory");
+ asm volatile (__ex("invept %2, %1") CC_SET(na)
+ : CC_OUT(na) (error) : "r"(ext), "m"(operand));
BUG_ON(error);
}
@@ -2120,9 +2143,8 @@ static void vmcs_clear(struct vmcs *vmcs)
u64 phys_addr = __pa(vmcs);
bool error;
- asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
- : "memory");
+ asm volatile (__ex("vmclear %1") CC_SET(na)
+ : CC_OUT(na) (error) : "m"(phys_addr));
if (unlikely(error))
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
vmcs, phys_addr);
@@ -2145,9 +2167,8 @@ static void vmcs_load(struct vmcs *vmcs)
if (static_branch_unlikely(&enable_evmcs))
return evmcs_load(phys_addr);
- asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
- : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
- : "memory");
+ asm volatile (__ex("vmptrld %1") CC_SET(na)
+ : CC_OUT(na) (error) : "m"(phys_addr));
if (unlikely(error))
printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
vmcs, phys_addr);
@@ -2323,8 +2344,8 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
{
unsigned long value;
- asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
- : "=a"(value) : "d"(field) : "cc");
+ asm volatile (__ex_clear("vmread %1, %0", "%k0")
+ : "=r"(value) : "r"(field));
return value;
}
@@ -2375,8 +2396,8 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
{
bool error;
- asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
- : CC_OUT(na) (error) : "a"(value), "d"(field));
+ asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
+ : CC_OUT(na) (error) : "r"(field), "rm"(value));
if (unlikely(error))
vmwrite_error(field, value);
}
@@ -2707,7 +2728,8 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
u64 guest_val, u64 host_val)
{
vmcs_write64(guest_val_vmcs, guest_val);
- vmcs_write64(host_val_vmcs, host_val);
+ if (host_val_vmcs != HOST_IA32_EFER)
+ vmcs_write64(host_val_vmcs, host_val);
vm_entry_controls_setbit(vmx, entry);
vm_exit_controls_setbit(vmx, exit);
}
@@ -2805,8 +2827,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
ignore_bits &= ~(u64)EFER_SCE;
#endif
- clear_atomic_switch_msr(vmx, MSR_EFER);
-
/*
* On EPT, we can't emulate NX, so we must switch EFER atomically.
* On CPUs that support "load IA32_EFER", always switch EFER
@@ -2819,8 +2839,12 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
if (guest_efer != host_efer)
add_atomic_switch_msr(vmx, MSR_EFER,
guest_efer, host_efer, false);
+ else
+ clear_atomic_switch_msr(vmx, MSR_EFER);
return false;
} else {
+ clear_atomic_switch_msr(vmx, MSR_EFER);
+
guest_efer &= ~ignore_bits;
guest_efer |= host_efer & ignore_bits;
@@ -3272,34 +3296,30 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned int nr = vcpu->arch.exception.nr;
+ bool has_payload = vcpu->arch.exception.has_payload;
+ unsigned long payload = vcpu->arch.exception.payload;
if (nr == PF_VECTOR) {
if (vcpu->arch.exception.nested_apf) {
*exit_qual = vcpu->arch.apf.nested_apf_token;
return 1;
}
- /*
- * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception.
- * The fix is to add the ancillary datum (CR2 or DR6) to structs
- * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6
- * can be written only when inject_pending_event runs. This should be
- * conditional on a new capability---if the capability is disabled,
- * kvm_multiple_exception would write the ancillary information to
- * CR2 or DR6, for backwards ABI-compatibility.
- */
if (nested_vmx_is_page_fault_vmexit(vmcs12,
vcpu->arch.exception.error_code)) {
- *exit_qual = vcpu->arch.cr2;
- return 1;
- }
- } else {
- if (vmcs12->exception_bitmap & (1u << nr)) {
- if (nr == DB_VECTOR)
- *exit_qual = vcpu->arch.dr6;
- else
- *exit_qual = 0;
+ *exit_qual = has_payload ? payload : vcpu->arch.cr2;
return 1;
}
+ } else if (vmcs12->exception_bitmap & (1u << nr)) {
+ if (nr == DB_VECTOR) {
+ if (!has_payload) {
+ payload = vcpu->arch.dr6;
+ payload &= ~(DR6_FIXED_1 | DR6_BT);
+ payload ^= DR6_RTM;
+ }
+ *exit_qual = payload;
+ } else
+ *exit_qual = 0;
+ return 1;
}
return 0;
@@ -3326,6 +3346,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK;
+ kvm_deliver_exception_payload(vcpu);
+
if (has_error_code) {
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
@@ -4397,9 +4419,7 @@ static void kvm_cpu_vmxon(u64 addr)
cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
- asm volatile (ASM_VMX_VMXON_RAX
- : : "a"(&addr), "m"(addr)
- : "memory", "cc");
+ asm volatile ("vmxon %0" : : "m"(addr));
}
static int hardware_enable(void)
@@ -4468,7 +4488,7 @@ static void vmclear_local_loaded_vmcss(void)
*/
static void kvm_cpu_vmxoff(void)
{
- asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+ asm volatile (__ex("vmxoff"));
intel_pt_handle_vmx(0);
cr4_clear_bits(X86_CR4_VMXE);
@@ -5112,9 +5132,10 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
bool invalidate_gpa)
{
if (enable_ept && (invalidate_gpa || !enable_vpid)) {
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
- ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
+ ept_sync_context(construct_eptp(vcpu,
+ vcpu->arch.mmu->root_hpa));
} else {
vpid_sync_context(vpid);
}
@@ -5264,7 +5285,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long hw_cr0;
- hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK);
+ hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
if (enable_unrestricted_guest)
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
else {
@@ -6339,6 +6360,9 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
rdmsr(MSR_IA32_CR_PAT, low32, high32);
vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
}
+
+ if (cpu_has_load_ia32_efer)
+ vmcs_write64(HOST_IA32_EFER, host_efer);
}
static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -6666,7 +6690,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
if (enable_pml) {
- ASSERT(vmx->pml_pg);
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
@@ -8067,35 +8090,39 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
/*
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
- * set the success or error code of an emulated VMX instruction, as specified
- * by Vol 2B, VMX Instruction Reference, "Conventions".
+ * set the success or error code of an emulated VMX instruction (as specified
+ * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
+ * instruction.
*/
-static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
+static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
{
vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
+ return kvm_skip_emulated_instruction(vcpu);
}
-static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
+static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
{
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
X86_EFLAGS_SF | X86_EFLAGS_OF))
| X86_EFLAGS_CF);
+ return kvm_skip_emulated_instruction(vcpu);
}
-static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
- u32 vm_instruction_error)
+static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
+ u32 vm_instruction_error)
{
- if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
- /*
- * failValid writes the error number to the current VMCS, which
- * can't be done there isn't a current VMCS.
- */
- nested_vmx_failInvalid(vcpu);
- return;
- }
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * failValid writes the error number to the current VMCS, which
+ * can't be done if there isn't a current VMCS.
+ */
+ if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
+ return nested_vmx_failInvalid(vcpu);
+
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
X86_EFLAGS_SF | X86_EFLAGS_OF))
@@ -8105,6 +8132,7 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
* We don't need to force a shadow sync because
* VM_INSTRUCTION_ERROR is not shadowed
*/
+ return kvm_skip_emulated_instruction(vcpu);
}
static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
@@ -8292,6 +8320,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
vmx->nested.vpid02 = allocate_vpid();
+ vmx->nested.vmcs02_initialized = false;
vmx->nested.vmxon = true;
return 0;
@@ -8345,10 +8374,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
return 1;
}
- if (vmx->nested.vmxon) {
- nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (vmx->nested.vmxon)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
!= VMXON_NEEDED_FEATURES) {
@@ -8367,21 +8395,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
* Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
* which replaces physical address width with 32
*/
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ return nested_vmx_failInvalid(vcpu);
page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
- if (is_error_page(page)) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (is_error_page(page))
+ return nested_vmx_failInvalid(vcpu);
+
if (*(u32 *)kmap(page) != VMCS12_REVISION) {
kunmap(page);
kvm_release_page_clean(page);
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_failInvalid(vcpu);
}
kunmap(page);
kvm_release_page_clean(page);
@@ -8391,8 +8415,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
if (ret)
return ret;
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
}
/*
@@ -8423,8 +8446,24 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
}
-static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
+static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!vmx->nested.hv_evmcs)
+ return;
+
+ kunmap(vmx->nested.hv_evmcs_page);
+ kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+ vmx->nested.hv_evmcs_vmptr = -1ull;
+ vmx->nested.hv_evmcs_page = NULL;
+ vmx->nested.hv_evmcs = NULL;
+}
+
+static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (vmx->nested.current_vmptr == -1ull)
return;
@@ -8432,16 +8471,18 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.sync_shadow_vmcs = false;
+ vmx->nested.need_vmcs12_sync = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
/* Flush VMCS12 to guest memory */
- kvm_vcpu_write_guest_page(&vmx->vcpu,
+ kvm_vcpu_write_guest_page(vcpu,
vmx->nested.current_vmptr >> PAGE_SHIFT,
vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
+ kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+
vmx->nested.current_vmptr = -1ull;
}
@@ -8449,8 +8490,10 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
* Free whatever needs to be freed from vmx->nested when L1 goes down, or
* just stops using VMX.
*/
-static void free_nested(struct vcpu_vmx *vmx)
+static void free_nested(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
@@ -8483,6 +8526,10 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.pi_desc = NULL;
}
+ kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
+
+ nested_release_evmcs(vcpu);
+
free_loaded_vmcs(&vmx->nested.vmcs02);
}
@@ -8491,9 +8538,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
{
if (!nested_vmx_check_permission(vcpu))
return 1;
- free_nested(to_vmx(vcpu));
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ free_nested(vcpu);
+ return nested_vmx_succeed(vcpu);
}
/* Emulate the VMCLEAR instruction */
@@ -8509,25 +8555,28 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
- nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ return nested_vmx_failValid(vcpu,
+ VMXERR_VMCLEAR_INVALID_ADDRESS);
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (vmptr == vmx->nested.vmxon_ptr)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_VMCLEAR_VMXON_POINTER);
- if (vmptr == vmx->nested.current_vmptr)
- nested_release_vmcs12(vmx);
+ if (vmx->nested.hv_evmcs_page) {
+ if (vmptr == vmx->nested.hv_evmcs_vmptr)
+ nested_release_evmcs(vcpu);
+ } else {
+ if (vmptr == vmx->nested.current_vmptr)
+ nested_release_vmcs12(vcpu);
- kvm_vcpu_write_guest(vcpu,
- vmptr + offsetof(struct vmcs12, launch_state),
- &zero, sizeof(zero));
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12,
+ launch_state),
+ &zero, sizeof(zero));
+ }
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
}
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
@@ -8610,6 +8659,395 @@ static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
}
+static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
+{
+ struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
+ struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+ vmcs12->hdr.revision_id = evmcs->revision_id;
+
+ /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
+ vmcs12->tpr_threshold = evmcs->tpr_threshold;
+ vmcs12->guest_rip = evmcs->guest_rip;
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
+ vmcs12->guest_rsp = evmcs->guest_rsp;
+ vmcs12->guest_rflags = evmcs->guest_rflags;
+ vmcs12->guest_interruptibility_info =
+ evmcs->guest_interruptibility_info;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+ vmcs12->cpu_based_vm_exec_control =
+ evmcs->cpu_based_vm_exec_control;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+ vmcs12->exception_bitmap = evmcs->exception_bitmap;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
+ vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
+ vmcs12->vm_entry_intr_info_field =
+ evmcs->vm_entry_intr_info_field;
+ vmcs12->vm_entry_exception_error_code =
+ evmcs->vm_entry_exception_error_code;
+ vmcs12->vm_entry_instruction_len =
+ evmcs->vm_entry_instruction_len;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+ vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
+ vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
+ vmcs12->host_cr0 = evmcs->host_cr0;
+ vmcs12->host_cr3 = evmcs->host_cr3;
+ vmcs12->host_cr4 = evmcs->host_cr4;
+ vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
+ vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
+ vmcs12->host_rip = evmcs->host_rip;
+ vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
+ vmcs12->host_es_selector = evmcs->host_es_selector;
+ vmcs12->host_cs_selector = evmcs->host_cs_selector;
+ vmcs12->host_ss_selector = evmcs->host_ss_selector;
+ vmcs12->host_ds_selector = evmcs->host_ds_selector;
+ vmcs12->host_fs_selector = evmcs->host_fs_selector;
+ vmcs12->host_gs_selector = evmcs->host_gs_selector;
+ vmcs12->host_tr_selector = evmcs->host_tr_selector;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+ vmcs12->pin_based_vm_exec_control =
+ evmcs->pin_based_vm_exec_control;
+ vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
+ vmcs12->secondary_vm_exec_control =
+ evmcs->secondary_vm_exec_control;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
+ vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
+ vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
+ vmcs12->msr_bitmap = evmcs->msr_bitmap;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
+ vmcs12->guest_es_base = evmcs->guest_es_base;
+ vmcs12->guest_cs_base = evmcs->guest_cs_base;
+ vmcs12->guest_ss_base = evmcs->guest_ss_base;
+ vmcs12->guest_ds_base = evmcs->guest_ds_base;
+ vmcs12->guest_fs_base = evmcs->guest_fs_base;
+ vmcs12->guest_gs_base = evmcs->guest_gs_base;
+ vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
+ vmcs12->guest_tr_base = evmcs->guest_tr_base;
+ vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
+ vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
+ vmcs12->guest_es_limit = evmcs->guest_es_limit;
+ vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
+ vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
+ vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
+ vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
+ vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
+ vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
+ vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
+ vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
+ vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
+ vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
+ vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
+ vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
+ vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
+ vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
+ vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
+ vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
+ vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
+ vmcs12->guest_es_selector = evmcs->guest_es_selector;
+ vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
+ vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
+ vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
+ vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
+ vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
+ vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
+ vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
+ vmcs12->tsc_offset = evmcs->tsc_offset;
+ vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
+ vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
+ vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
+ vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
+ vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
+ vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
+ vmcs12->guest_cr0 = evmcs->guest_cr0;
+ vmcs12->guest_cr3 = evmcs->guest_cr3;
+ vmcs12->guest_cr4 = evmcs->guest_cr4;
+ vmcs12->guest_dr7 = evmcs->guest_dr7;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
+ vmcs12->host_fs_base = evmcs->host_fs_base;
+ vmcs12->host_gs_base = evmcs->host_gs_base;
+ vmcs12->host_tr_base = evmcs->host_tr_base;
+ vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
+ vmcs12->host_idtr_base = evmcs->host_idtr_base;
+ vmcs12->host_rsp = evmcs->host_rsp;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
+ vmcs12->ept_pointer = evmcs->ept_pointer;
+ vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
+ }
+
+ if (unlikely(!(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
+ vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
+ vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
+ vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
+ vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
+ vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
+ vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
+ vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
+ vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
+ vmcs12->guest_pending_dbg_exceptions =
+ evmcs->guest_pending_dbg_exceptions;
+ vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
+ vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
+ vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
+ vmcs12->guest_activity_state = evmcs->guest_activity_state;
+ vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
+ }
+
+ /*
+ * Not used?
+ * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
+ * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
+ * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
+ * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
+ * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
+ * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
+ * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
+ * vmcs12->page_fault_error_code_mask =
+ * evmcs->page_fault_error_code_mask;
+ * vmcs12->page_fault_error_code_match =
+ * evmcs->page_fault_error_code_match;
+ * vmcs12->cr3_target_count = evmcs->cr3_target_count;
+ * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
+ * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
+ * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
+ */
+
+ /*
+ * Read only fields:
+ * vmcs12->guest_physical_address = evmcs->guest_physical_address;
+ * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
+ * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
+ * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
+ * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
+ * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
+ * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
+ * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
+ * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
+ * vmcs12->exit_qualification = evmcs->exit_qualification;
+ * vmcs12->guest_linear_address = evmcs->guest_linear_address;
+ *
+ * Not present in struct vmcs12:
+ * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
+ * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
+ * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
+ * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
+ */
+
+ return 0;
+}
+
+static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
+{
+ struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
+ struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+ /*
+ * Should not be changed by KVM:
+ *
+ * evmcs->host_es_selector = vmcs12->host_es_selector;
+ * evmcs->host_cs_selector = vmcs12->host_cs_selector;
+ * evmcs->host_ss_selector = vmcs12->host_ss_selector;
+ * evmcs->host_ds_selector = vmcs12->host_ds_selector;
+ * evmcs->host_fs_selector = vmcs12->host_fs_selector;
+ * evmcs->host_gs_selector = vmcs12->host_gs_selector;
+ * evmcs->host_tr_selector = vmcs12->host_tr_selector;
+ * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
+ * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
+ * evmcs->host_cr0 = vmcs12->host_cr0;
+ * evmcs->host_cr3 = vmcs12->host_cr3;
+ * evmcs->host_cr4 = vmcs12->host_cr4;
+ * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
+ * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
+ * evmcs->host_rip = vmcs12->host_rip;
+ * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
+ * evmcs->host_fs_base = vmcs12->host_fs_base;
+ * evmcs->host_gs_base = vmcs12->host_gs_base;
+ * evmcs->host_tr_base = vmcs12->host_tr_base;
+ * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
+ * evmcs->host_idtr_base = vmcs12->host_idtr_base;
+ * evmcs->host_rsp = vmcs12->host_rsp;
+ * sync_vmcs12() doesn't read these:
+ * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
+ * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
+ * evmcs->msr_bitmap = vmcs12->msr_bitmap;
+ * evmcs->ept_pointer = vmcs12->ept_pointer;
+ * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
+ * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
+ * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
+ * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
+ * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
+ * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
+ * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
+ * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
+ * evmcs->tpr_threshold = vmcs12->tpr_threshold;
+ * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
+ * evmcs->exception_bitmap = vmcs12->exception_bitmap;
+ * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
+ * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
+ * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
+ * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
+ * evmcs->page_fault_error_code_mask =
+ * vmcs12->page_fault_error_code_mask;
+ * evmcs->page_fault_error_code_match =
+ * vmcs12->page_fault_error_code_match;
+ * evmcs->cr3_target_count = vmcs12->cr3_target_count;
+ * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
+ * evmcs->tsc_offset = vmcs12->tsc_offset;
+ * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
+ * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
+ * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
+ * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
+ * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
+ * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
+ * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
+ * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
+ *
+ * Not present in struct vmcs12:
+ * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
+ * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
+ * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
+ * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
+ */
+
+ evmcs->guest_es_selector = vmcs12->guest_es_selector;
+ evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
+ evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
+ evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
+ evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
+ evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
+ evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
+ evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
+
+ evmcs->guest_es_limit = vmcs12->guest_es_limit;
+ evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
+ evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
+ evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
+ evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
+ evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
+ evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
+ evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
+ evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
+ evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
+
+ evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
+ evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
+ evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
+ evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
+ evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
+ evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
+ evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
+ evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
+
+ evmcs->guest_es_base = vmcs12->guest_es_base;
+ evmcs->guest_cs_base = vmcs12->guest_cs_base;
+ evmcs->guest_ss_base = vmcs12->guest_ss_base;
+ evmcs->guest_ds_base = vmcs12->guest_ds_base;
+ evmcs->guest_fs_base = vmcs12->guest_fs_base;
+ evmcs->guest_gs_base = vmcs12->guest_gs_base;
+ evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
+ evmcs->guest_tr_base = vmcs12->guest_tr_base;
+ evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
+ evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
+
+ evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
+ evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
+
+ evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
+ evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
+ evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
+ evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
+
+ evmcs->guest_pending_dbg_exceptions =
+ vmcs12->guest_pending_dbg_exceptions;
+ evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
+ evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
+
+ evmcs->guest_activity_state = vmcs12->guest_activity_state;
+ evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
+
+ evmcs->guest_cr0 = vmcs12->guest_cr0;
+ evmcs->guest_cr3 = vmcs12->guest_cr3;
+ evmcs->guest_cr4 = vmcs12->guest_cr4;
+ evmcs->guest_dr7 = vmcs12->guest_dr7;
+
+ evmcs->guest_physical_address = vmcs12->guest_physical_address;
+
+ evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
+ evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
+ evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
+ evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
+ evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
+ evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
+ evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
+ evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
+
+ evmcs->exit_qualification = vmcs12->exit_qualification;
+
+ evmcs->guest_linear_address = vmcs12->guest_linear_address;
+ evmcs->guest_rsp = vmcs12->guest_rsp;
+ evmcs->guest_rflags = vmcs12->guest_rflags;
+
+ evmcs->guest_interruptibility_info =
+ vmcs12->guest_interruptibility_info;
+ evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
+ evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
+ evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
+ evmcs->vm_entry_exception_error_code =
+ vmcs12->vm_entry_exception_error_code;
+ evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
+
+ evmcs->guest_rip = vmcs12->guest_rip;
+
+ evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
+
+ return 0;
+}
+
/*
* Copy the writable VMCS shadow fields back to the VMCS12, in case
* they have been modified by the L1 guest. Note that the "read-only"
@@ -8683,20 +9121,6 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
vmcs_load(vmx->loaded_vmcs->vmcs);
}
-/*
- * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was
- * used before) all generate the same failure when it is missing.
- */
-static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (vmx->nested.current_vmptr == -1ull) {
- nested_vmx_failInvalid(vcpu);
- return 0;
- }
- return 1;
-}
-
static int handle_vmread(struct kvm_vcpu *vcpu)
{
unsigned long field;
@@ -8709,8 +9133,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_check_vmcs12(vcpu))
- return kvm_skip_emulated_instruction(vcpu);
+ if (to_vmx(vcpu)->nested.current_vmptr == -1ull)
+ return nested_vmx_failInvalid(vcpu);
if (!is_guest_mode(vcpu))
vmcs12 = get_vmcs12(vcpu);
@@ -8719,20 +9143,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
* When vmcs->vmcs_link_pointer is -1ull, any VMREAD
* to shadowed-field sets the ALU flags for VMfailInvalid.
*/
- if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
+ return nested_vmx_failInvalid(vcpu);
vmcs12 = get_shadow_vmcs12(vcpu);
}
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
/* Read the field, zero-extended to a u64 field_value */
- if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
- nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
/*
* Now copy part of this value to register or memory, as requested.
* Note that the number of bits actually copied is 32 or 64 depending
@@ -8750,8 +9172,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
(is_long_mode(vcpu) ? 8 : 4), NULL);
}
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
}
@@ -8776,8 +9197,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_check_vmcs12(vcpu))
- return kvm_skip_emulated_instruction(vcpu);
+ if (vmx->nested.current_vmptr == -1ull)
+ return nested_vmx_failInvalid(vcpu);
if (vmx_instruction_info & (1u << 10))
field_value = kvm_register_readl(vcpu,
@@ -8800,11 +9221,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
* VMCS," then the "read-only" fields are actually read/write.
*/
if (vmcs_field_readonly(field) &&
- !nested_cpu_has_vmwrite_any_field(vcpu)) {
- nested_vmx_failValid(vcpu,
+ !nested_cpu_has_vmwrite_any_field(vcpu))
+ return nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
- return kvm_skip_emulated_instruction(vcpu);
- }
if (!is_guest_mode(vcpu))
vmcs12 = get_vmcs12(vcpu);
@@ -8813,18 +9232,14 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
* When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
* to shadowed-field sets the ALU flags for VMfailInvalid.
*/
- if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)
+ return nested_vmx_failInvalid(vcpu);
vmcs12 = get_shadow_vmcs12(vcpu);
-
}
- if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
- nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (vmcs12_write_any(vmcs12, field, field_value) < 0)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/*
* Do not track vmcs12 dirty-state if in guest-mode
@@ -8846,8 +9261,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
}
}
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
}
static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
@@ -8858,7 +9272,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER,
__pa(vmx->vmcs01.shadow_vmcs));
- vmx->nested.sync_shadow_vmcs = true;
+ vmx->nested.need_vmcs12_sync = true;
}
vmx->nested.dirty_vmcs12 = true;
}
@@ -8875,36 +9289,37 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) {
- nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ return nested_vmx_failValid(vcpu,
+ VMXERR_VMPTRLD_INVALID_ADDRESS);
- if (vmptr == vmx->nested.vmxon_ptr) {
- nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (vmptr == vmx->nested.vmxon_ptr)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_VMPTRLD_VMXON_POINTER);
+
+ /* Forbid normal VMPTRLD if Enlightened version was used */
+ if (vmx->nested.hv_evmcs)
+ return 1;
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
- if (is_error_page(page)) {
- nested_vmx_failInvalid(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ if (is_error_page(page))
+ return nested_vmx_failInvalid(vcpu);
+
new_vmcs12 = kmap(page);
if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
(new_vmcs12->hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
kunmap(page);
kvm_release_page_clean(page);
- nested_vmx_failValid(vcpu,
+ return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
- return kvm_skip_emulated_instruction(vcpu);
}
- nested_release_vmcs12(vmx);
+ nested_release_vmcs12(vcpu);
+
/*
* Load VMCS12 from guest memory since it is not already
* cached.
@@ -8916,8 +9331,71 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
set_current_vmptr(vmx, vmptr);
}
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
+}
+
+/*
+ * This is an equivalent of the nested hypervisor executing the vmptrld
+ * instruction.
+ */
+static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
+ bool from_launch)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct hv_vp_assist_page assist_page;
+
+ if (likely(!vmx->nested.enlightened_vmcs_enabled))
+ return 1;
+
+ if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+ return 1;
+
+ if (unlikely(!assist_page.enlighten_vmentry))
+ return 1;
+
+ if (unlikely(assist_page.current_nested_vmcs !=
+ vmx->nested.hv_evmcs_vmptr)) {
+
+ if (!vmx->nested.hv_evmcs)
+ vmx->nested.current_vmptr = -1ull;
+
+ nested_release_evmcs(vcpu);
+
+ vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
+ vcpu, assist_page.current_nested_vmcs);
+
+ if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
+ return 0;
+
+ vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
+
+ if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) {
+ nested_release_evmcs(vcpu);
+ return 0;
+ }
+
+ vmx->nested.dirty_vmcs12 = true;
+ /*
+ * As we keep L2 state for one guest only 'hv_clean_fields' mask
+ * can't be used when we switch between them. Reset it here for
+ * simplicity.
+ */
+ vmx->nested.hv_evmcs->hv_clean_fields &=
+ ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
+
+ /*
+ * Unlike normal vmcs12, enlightened vmcs12 is not fully
+ * reloaded from guest's memory (read only fields, fields not
+ * present in struct hv_enlightened_vmcs, ...). Make sure there
+ * are no leftovers.
+ */
+ if (from_launch)
+ memset(vmx->nested.cached_vmcs12, 0,
+ sizeof(*vmx->nested.cached_vmcs12));
+
+ }
+ return 1;
}
/* Emulate the VMPTRST instruction */
@@ -8932,6 +9410,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
+ if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
+ return 1;
+
if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
return 1;
/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
@@ -8940,8 +9421,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
kvm_inject_page_fault(vcpu, &e);
return 1;
}
- nested_vmx_succeed(vcpu);
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
}
/* Emulate the INVEPT instruction */
@@ -8971,11 +9451,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
- if (type >= 32 || !(types & (1 << type))) {
- nested_vmx_failValid(vcpu,
+ if (type >= 32 || !(types & (1 << type)))
+ return nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return kvm_skip_emulated_instruction(vcpu);
- }
/* According to the Intel VMX instruction reference, the memory
* operand is read even if it isn't needed (e.g., for type==global)
@@ -8997,14 +9475,20 @@ static int handle_invept(struct kvm_vcpu *vcpu)
case VMX_EPT_EXTENT_CONTEXT:
kvm_mmu_sync_roots(vcpu);
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- nested_vmx_succeed(vcpu);
break;
default:
BUG_ON(1);
break;
}
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
+}
+
+static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
}
static int handle_invvpid(struct kvm_vcpu *vcpu)
@@ -9018,6 +9502,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
u64 vpid;
u64 gla;
} operand;
+ u16 vpid02;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@@ -9035,11 +9520,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
types = (vmx->nested.msrs.vpid_caps &
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
- if (type >= 32 || !(types & (1 << type))) {
- nested_vmx_failValid(vcpu,
+ if (type >= 32 || !(types & (1 << type)))
+ return nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return kvm_skip_emulated_instruction(vcpu);
- }
/* according to the intel vmx instruction reference, the memory
* operand is read even if it isn't needed (e.g., for type==global)
@@ -9051,47 +9534,39 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
kvm_inject_page_fault(vcpu, &e);
return 1;
}
- if (operand.vpid >> 16) {
- nested_vmx_failValid(vcpu,
+ if (operand.vpid >> 16)
+ return nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return kvm_skip_emulated_instruction(vcpu);
- }
+ vpid02 = nested_get_vpid02(vcpu);
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
if (!operand.vpid ||
- is_noncanonical_address(operand.gla, vcpu)) {
- nested_vmx_failValid(vcpu,
+ is_noncanonical_address(operand.gla, vcpu))
+ return nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return kvm_skip_emulated_instruction(vcpu);
- }
- if (cpu_has_vmx_invvpid_individual_addr() &&
- vmx->nested.vpid02) {
+ if (cpu_has_vmx_invvpid_individual_addr()) {
__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
- vmx->nested.vpid02, operand.gla);
+ vpid02, operand.gla);
} else
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+ __vmx_flush_tlb(vcpu, vpid02, false);
break;
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
- if (!operand.vpid) {
- nested_vmx_failValid(vcpu,
+ if (!operand.vpid)
+ return nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return kvm_skip_emulated_instruction(vcpu);
- }
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+ __vmx_flush_tlb(vcpu, vpid02, false);
break;
case VMX_VPID_EXTENT_ALL_CONTEXT:
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+ __vmx_flush_tlb(vcpu, vpid02, false);
break;
default:
WARN_ON_ONCE(1);
return kvm_skip_emulated_instruction(vcpu);
}
- nested_vmx_succeed(vcpu);
-
- return kvm_skip_emulated_instruction(vcpu);
+ return nested_vmx_succeed(vcpu);
}
static int handle_invpcid(struct kvm_vcpu *vcpu)
@@ -9162,11 +9637,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
}
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
- if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
+ if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
== operand.pcid)
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
- kvm_mmu_free_roots(vcpu, roots_to_free);
+ kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
/*
* If neither the current cr3 nor any of the prev_roots use the
* given PCID, then nothing needs to be done here because a
@@ -9293,7 +9768,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
kvm_mmu_unload(vcpu);
mmu->ept_ad = accessed_dirty;
- mmu->base_role.ad_disabled = !accessed_dirty;
+ mmu->mmu_role.base.ad_disabled = !accessed_dirty;
vmcs12->ept_pointer = address;
/*
* TODO: Check what's the correct approach in case
@@ -9652,9 +10127,6 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
return false;
else if (is_page_fault(intr_info))
return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
- else if (is_no_device(intr_info) &&
- !(vmcs12->guest_cr0 & X86_CR0_TS))
- return false;
else if (is_debug(intr_info) &&
vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -10676,9 +11148,25 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.sync_shadow_vmcs) {
- copy_vmcs12_to_shadow(vmx);
- vmx->nested.sync_shadow_vmcs = false;
+ if (vmx->nested.need_vmcs12_sync) {
+ /*
+ * hv_evmcs may end up being not mapped after migration (when
+ * L2 was running), map it here to make sure vmcs12 changes are
+ * properly reflected.
+ */
+ if (vmx->nested.enlightened_vmcs_enabled &&
+ !vmx->nested.hv_evmcs)
+ nested_vmx_handle_enlightened_vmptrld(vcpu, false);
+
+ if (vmx->nested.hv_evmcs) {
+ copy_vmcs12_to_enlightened(vmx);
+ /* All fields are clean */
+ vmx->nested.hv_evmcs->hv_clean_fields |=
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ } else {
+ copy_vmcs12_to_shadow(vmx);
+ }
+ vmx->nested.need_vmcs12_sync = false;
}
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -10745,7 +11233,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
"jmp 1f \n\t"
"2: \n\t"
- __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
+ __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
"1: \n\t"
/* Reload cr2 if changed */
"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
@@ -10777,9 +11265,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
/* Enter guest mode */
"jne 1f \n\t"
- __ex(ASM_VMX_VMLAUNCH) "\n\t"
+ __ex("vmlaunch") "\n\t"
"jmp 2f \n\t"
- "1: " __ex(ASM_VMX_VMRESUME) "\n\t"
+ "1: " __ex("vmresume") "\n\t"
"2: "
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
@@ -10801,6 +11289,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
+ /*
+ * Clear host registers marked as clobbered to prevent
+ * speculative use.
+ */
"xor %%r8d, %%r8d \n\t"
"xor %%r9d, %%r9d \n\t"
"xor %%r10d, %%r10d \n\t"
@@ -10958,6 +11450,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
vmx->loaded_vmcs = vmcs;
vmx_vcpu_load(vcpu, cpu);
put_cpu();
+
+ vm_entry_controls_reset_shadow(vmx);
+ vm_exit_controls_reset_shadow(vmx);
+ vmx_segment_cache_clear(vmx);
}
/*
@@ -10966,12 +11462,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
*/
static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- vcpu_load(vcpu);
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- free_nested(vmx);
- vcpu_put(vcpu);
+ vcpu_load(vcpu);
+ vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01);
+ free_nested(vcpu);
+ vcpu_put(vcpu);
}
static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
@@ -11334,28 +11828,28 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
return get_vmcs12(vcpu)->ept_pointer;
}
-static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
WARN_ON(mmu_is_nested(vcpu));
- if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
- return 1;
+ vcpu->arch.mmu = &vcpu->arch.guest_mmu;
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.msrs.ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
nested_ept_ad_enabled(vcpu),
nested_ept_get_cr3(vcpu));
- vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
- vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
- vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
+ vcpu->arch.mmu->set_cr3 = vmx_set_cr3;
+ vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3;
+ vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
+ vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
- return 0;
}
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
{
- vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+ vcpu->arch.mmu = &vcpu->arch.root_mmu;
+ vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
}
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
@@ -11716,7 +12210,7 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
!nested_exit_intr_ack_set(vcpu) ||
(vmcs12->posted_intr_nv & 0xff00) ||
(vmcs12->posted_intr_desc_addr & 0x3f) ||
- (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
+ (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))
return -EINVAL;
/* tpr shadow is needed by all apicv features. */
@@ -11772,15 +12266,12 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- u64 address = vmcs12->pml_address;
- int maxphyaddr = cpuid_maxphyaddr(vcpu);
+ if (!nested_cpu_has_pml(vmcs12))
+ return 0;
- if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
- if (!nested_cpu_has_ept(vmcs12) ||
- !IS_ALIGNED(address, 4096) ||
- address >> maxphyaddr)
- return -EINVAL;
- }
+ if (!nested_cpu_has_ept(vmcs12) ||
+ !page_address_valid(vcpu, vmcs12->pml_address))
+ return -EINVAL;
return 0;
}
@@ -11960,112 +12451,87 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
return 0;
}
-static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+/*
+ * Returns if KVM is able to config CPU to tag TLB entries
+ * populated by L2 differently than TLB entries populated
+ * by L1.
+ *
+ * If L1 uses EPT, then TLB entries are tagged with different EPTP.
+ *
+ * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
+ * with different VPID (L1 entries are tagged with vmx->vpid
+ * while L2 entries are tagged with vmx->nested.vpid02).
+ */
+static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
- vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
- vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
- vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
- vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
- vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
- vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
- vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
- vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
- vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
- vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
- vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
- vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
- vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
- vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
- vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
- vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
- vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
- vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
- vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
- vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
- vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
- vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
- vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
- vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
- vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
- vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
- vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
- vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
- vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
-
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs12->guest_pending_dbg_exceptions);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+ return nested_cpu_has_ept(vmcs12) ||
+ (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
+}
- if (nested_cpu_has_xsaves(vmcs12))
- vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
- vmcs_write64(VMCS_LINK_POINTER, -1ull);
+static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+{
+ if (vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
+ return vmcs12->guest_ia32_efer;
+ else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
+ return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
+ else
+ return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
+}
- if (cpu_has_vmx_posted_intr())
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
+static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
+{
+ /*
+ * If vmcs02 hasn't been initialized, set the constant vmcs02 state
+ * according to L0's settings (vmcs12 is irrelevant here). Host
+ * fields that come from L0 and are not constant, e.g. HOST_CR3,
+ * will be set as needed prior to VMLAUNCH/VMRESUME.
+ */
+ if (vmx->nested.vmcs02_initialized)
+ return;
+ vmx->nested.vmcs02_initialized = true;
/*
- * Whether page-faults are trapped is determined by a combination of
- * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
- * If enable_ept, L0 doesn't care about page faults and we should
- * set all of these to L1's desires. However, if !enable_ept, L0 does
- * care about (at least some) page faults, and because it is not easy
- * (if at all possible?) to merge L0 and L1's desires, we simply ask
- * to exit on each and every L2 page fault. This is done by setting
- * MASK=MATCH=0 and (see below) EB.PF=1.
- * Note that below we don't need special code to set EB.PF beyond the
- * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
- * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
- * !enable_ept, EB.PF is 1, so the "or" will always be 1.
+ * We don't care what the EPTP value is we just need to guarantee
+ * it's valid so we don't get a false positive when doing early
+ * consistency checks.
*/
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
- enable_ept ? vmcs12->page_fault_error_code_mask : 0);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
- enable_ept ? vmcs12->page_fault_error_code_match : 0);
+ if (enable_ept && nested_early_check)
+ vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0));
/* All VMFUNCs are currently emulated through L0 vmexits. */
if (cpu_has_vmx_vmfunc())
vmcs_write64(VM_FUNCTION_CONTROL, 0);
- if (cpu_has_vmx_apicv()) {
- vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
- vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
- vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
- vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
- }
+ if (cpu_has_vmx_posted_intr())
+ vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
- /*
- * Set host-state according to L0's settings (vmcs12 is irrelevant here)
- * Some constant fields are set here by vmx_set_constant_host_state().
- * Other fields are different per CPU, and will be set later when
- * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
- * is called.
- */
- vmx_set_constant_host_state(vmx);
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+
+ if (enable_pml)
+ vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
/*
- * Set the MSR load/store lists to match L0's settings.
+ * Set the MSR load/store lists to match L0's settings. Only the
+ * addresses are constant (for vmcs02), the counts can change based
+ * on L2's behavior, e.g. switching to/from long mode.
*/
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
- set_cr4_guest_host_mask(vmx);
+ vmx_set_constant_host_state(vmx);
+}
- if (kvm_mpx_supported()) {
- if (vmx->nested.nested_run_pending &&
- (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
- else
- vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
- }
+static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx,
+ struct vmcs12 *vmcs12)
+{
+ prepare_vmcs02_constant_state(vmx);
+
+ vmcs_write64(VMCS_LINK_POINTER, -1ull);
if (enable_vpid) {
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
@@ -12073,78 +12539,30 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
else
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
}
-
- /*
- * L1 may access the L2's PDPTR, so save them to construct vmcs12
- */
- if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- }
-
- if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
}
-/*
- * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
- * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
- * guest in a way that will both be appropriate to L1's requests, and our
- * needs. In addition to modifying the active vmcs (which is vmcs02), this
- * function also has additional necessary side-effects, like setting various
- * vcpu->arch fields.
- * Returns 0 on success, 1 on failure. Invalid state exit qualification code
- * is assigned to entry_failure_code on failure.
- */
-static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- u32 *entry_failure_code)
+static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control, vmcs12_exec_ctrl;
+ u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
- if (vmx->nested.dirty_vmcs12) {
- prepare_vmcs02_full(vcpu, vmcs12);
- vmx->nested.dirty_vmcs12 = false;
- }
+ if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
+ prepare_vmcs02_early_full(vmx, vmcs12);
/*
- * First, the fields that are shadowed. This must be kept in sync
- * with vmx_shadow_fields.h.
+ * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
+ * entry, but only if the current (host) sp changed from the value
+ * we wrote last (vmx->host_rsp). This cache is no longer relevant
+ * if we switch vmcs, and rather than hold a separate cache per vmcs,
+ * here we just force the write to happen on entry. host_rsp will
+ * also be written unconditionally by nested_vmx_check_vmentry_hw()
+ * if we are doing early consistency checks via hardware.
*/
+ vmx->host_rsp = 0;
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
-
- if (vmx->nested.nested_run_pending &&
- (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
- kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
- } else {
- kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
- }
- if (vmx->nested.nested_run_pending) {
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- vmcs12->vm_entry_intr_info_field);
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
- vmcs12->vm_entry_exception_error_code);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
- vmcs12->vm_entry_instruction_len);
- vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
- vmcs12->guest_interruptibility_info);
- vmx->loaded_vmcs->nmi_known_unmasked =
- !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
- } else {
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
- }
- vmx_set_rflags(vcpu, vmcs12->guest_rflags);
-
+ /*
+ * PIN CONTROLS
+ */
exec_control = vmcs12->pin_based_vm_exec_control;
/* Preemption timer setting is computed directly in vmx_vcpu_run. */
@@ -12159,13 +12577,43 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
-
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
- vmx->nested.preemption_timer_expired = false;
- if (nested_cpu_has_preemption_timer(vmcs12))
- vmx_start_preemption_timer(vcpu);
+ /*
+ * EXEC CONTROLS
+ */
+ exec_control = vmx_exec_control(vmx); /* L0's desires */
+ exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+ exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+ exec_control &= ~CPU_BASED_TPR_SHADOW;
+ exec_control |= vmcs12->cpu_based_vm_exec_control;
+ /*
+ * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
+ * nested_get_vmcs12_pages can't fix it up, the illegal value
+ * will result in a VM entry failure.
+ */
+ if (exec_control & CPU_BASED_TPR_SHADOW) {
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
+ vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ } else {
+#ifdef CONFIG_X86_64
+ exec_control |= CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING;
+#endif
+ }
+
+ /*
+ * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
+ * for I/O port accesses.
+ */
+ exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
+ exec_control |= CPU_BASED_UNCOND_IO_EXITING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+
+ /*
+ * SECONDARY EXEC CONTROLS
+ */
if (cpu_has_secondary_exec_ctrls()) {
exec_control = vmx->secondary_exec_control;
@@ -12206,43 +12654,214 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
/*
- * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
- * entry, but only if the current (host) sp changed from the value
- * we wrote last (vmx->host_rsp). This cache is no longer relevant
- * if we switch vmcs, and rather than hold a separate cache per vmcs,
- * here we just force the write to happen on entry.
+ * ENTRY CONTROLS
+ *
+ * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
+ * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
+ * on the related bits (if supported by the CPU) in the hope that
+ * we can avoid VMWrites during vmx_set_efer().
+ */
+ exec_control = (vmcs12->vm_entry_controls | vmcs_config.vmentry_ctrl) &
+ ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
+ if (cpu_has_load_ia32_efer) {
+ if (guest_efer & EFER_LMA)
+ exec_control |= VM_ENTRY_IA32E_MODE;
+ if (guest_efer != host_efer)
+ exec_control |= VM_ENTRY_LOAD_IA32_EFER;
+ }
+ vm_entry_controls_init(vmx, exec_control);
+
+ /*
+ * EXIT CONTROLS
+ *
+ * L2->L1 exit controls are emulated - the hardware exit is to L0 so
+ * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
+ * bits may be modified by vmx_set_efer() in prepare_vmcs02().
*/
- vmx->host_rsp = 0;
+ exec_control = vmcs_config.vmexit_ctrl;
+ if (cpu_has_load_ia32_efer && guest_efer != host_efer)
+ exec_control |= VM_EXIT_LOAD_IA32_EFER;
+ vm_exit_controls_init(vmx, exec_control);
- exec_control = vmx_exec_control(vmx); /* L0's desires */
- exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
- exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
- exec_control &= ~CPU_BASED_TPR_SHADOW;
- exec_control |= vmcs12->cpu_based_vm_exec_control;
+ /*
+ * Conceptually we want to copy the PML address and index from
+ * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
+ * since we always flush the log on each vmexit and never change
+ * the PML address (once set), this happens to be equivalent to
+ * simply resetting the index in vmcs02.
+ */
+ if (enable_pml)
+ vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
/*
- * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
- * nested_get_vmcs12_pages can't fix it up, the illegal value
- * will result in a VM entry failure.
+ * Interrupt/Exception Fields
*/
- if (exec_control & CPU_BASED_TPR_SHADOW) {
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
- vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ if (vmx->nested.nested_run_pending) {
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ vmcs12->vm_entry_intr_info_field);
+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vmcs12->vm_entry_exception_error_code);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+ vmcs12->vm_entry_instruction_len);
+ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs12->guest_interruptibility_info);
+ vmx->loaded_vmcs->nmi_known_unmasked =
+ !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
} else {
-#ifdef CONFIG_X86_64
- exec_control |= CPU_BASED_CR8_LOAD_EXITING |
- CPU_BASED_CR8_STORE_EXITING;
-#endif
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
}
+}
+
+static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+{
+ struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+
+ if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
+ vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
+ vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
+ vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
+ vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
+ vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
+ vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
+ vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
+ vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
+ vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
+ vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
+ vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
+ vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
+ vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
+ vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+ vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
+ vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
+ vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
+ vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
+ vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
+ vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+ vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
+ vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
+ vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
+ vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
+ vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
+ vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
+ vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
+ vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ }
+
+ if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
+ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs12->guest_pending_dbg_exceptions);
+ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+ /*
+ * L1 may access the L2's PDPTR, so save them to construct
+ * vmcs12
+ */
+ if (enable_ept) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+ }
+
+ if (nested_cpu_has_xsaves(vmcs12))
+ vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
/*
- * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
- * for I/O port accesses.
+ * Whether page-faults are trapped is determined by a combination of
+ * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
+ * If enable_ept, L0 doesn't care about page faults and we should
+ * set all of these to L1's desires. However, if !enable_ept, L0 does
+ * care about (at least some) page faults, and because it is not easy
+ * (if at all possible?) to merge L0 and L1's desires, we simply ask
+ * to exit on each and every L2 page fault. This is done by setting
+ * MASK=MATCH=0 and (see below) EB.PF=1.
+ * Note that below we don't need special code to set EB.PF beyond the
+ * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
+ * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
+ * !enable_ept, EB.PF is 1, so the "or" will always be 1.
*/
- exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
- exec_control |= CPU_BASED_UNCOND_IO_EXITING;
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+ enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+ enable_ept ? vmcs12->page_fault_error_code_match : 0);
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+ if (cpu_has_vmx_apicv()) {
+ vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
+ vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
+ vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
+ vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
+ }
+
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+
+ set_cr4_guest_host_mask(vmx);
+
+ if (kvm_mpx_supported()) {
+ if (vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+ else
+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+ }
+}
+
+/*
+ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
+ * guest in a way that will both be appropriate to L1's requests, and our
+ * needs. In addition to modifying the active vmcs (which is vmcs02), this
+ * function also has additional necessary side-effects, like setting various
+ * vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ u32 *entry_failure_code)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+
+ if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
+ prepare_vmcs02_full(vmx, vmcs12);
+ vmx->nested.dirty_vmcs12 = false;
+ }
+
+ /*
+ * First, the fields that are shadowed. This must be kept in sync
+ * with vmx_shadow_fields.h.
+ */
+ if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+ }
+
+ if (vmx->nested.nested_run_pending &&
+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
+ kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
+ vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+ } else {
+ kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
+ vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
+ }
+ vmx_set_rflags(vcpu, vmcs12->guest_rflags);
+
+ vmx->nested.preemption_timer_expired = false;
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ vmx_start_preemption_timer(vcpu);
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
* bitwise-or of what L1 wants to trap for L2, and what we want to
@@ -12252,20 +12871,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
- /* L2->L1 exit controls are emulated - the hardware exit is to L0 so
- * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
- * bits are further modified by vmx_set_efer() below.
- */
- vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
-
- /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
- * emulated by vmx_set_efer(), below.
- */
- vm_entry_controls_init(vmx,
- (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
- ~VM_ENTRY_IA32E_MODE) |
- (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
-
if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
@@ -12288,37 +12893,29 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* influence global bitmap(for vpid01 and vpid02 allocation)
* even if spawn a lot of nested vCPUs.
*/
- if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
+ if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) {
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+ __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false);
}
} else {
- vmx_flush_tlb(vcpu, true);
+ /*
+ * If L1 use EPT, then L0 needs to execute INVEPT on
+ * EPTP02 instead of EPTP01. Therefore, delay TLB
+ * flush until vmcs02->eptp is fully updated by
+ * KVM_REQ_LOAD_CR3. Note that this assumes
+ * KVM_REQ_TLB_FLUSH is evaluated after
+ * KVM_REQ_LOAD_CR3 in vcpu_enter_guest().
+ */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
}
- if (enable_pml) {
- /*
- * Conceptually we want to copy the PML address and index from
- * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
- * since we always flush the log on each vmexit, this happens
- * to be equivalent to simply resetting the fields in vmcs02.
- */
- ASSERT(vmx->pml_pg);
- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
- }
-
- if (nested_cpu_has_ept(vmcs12)) {
- if (nested_ept_init_mmu_context(vcpu)) {
- *entry_failure_code = ENTRY_FAIL_DEFAULT;
- return 1;
- }
- } else if (nested_cpu_has2(vmcs12,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ if (nested_cpu_has_ept(vmcs12))
+ nested_ept_init_mmu_context(vcpu);
+ else if (nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
vmx_flush_tlb(vcpu, true);
- }
/*
* This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
@@ -12334,14 +12931,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmx_set_cr4(vcpu, vmcs12->guest_cr4);
vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
- if (vmx->nested.nested_run_pending &&
- (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
- vcpu->arch.efer = vmcs12->guest_ia32_efer;
- else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
- vcpu->arch.efer |= (EFER_LMA | EFER_LME);
- else
- vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
- /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
+ vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
+ /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
vmx_set_efer(vcpu, vcpu->arch.efer);
/*
@@ -12383,6 +12974,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ bool ia32e;
if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
@@ -12457,6 +13049,21 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
/*
+ * If the load IA32_EFER VM-exit control is 1, bits reserved in the
+ * IA32_EFER MSR must be 0 in the field for that register. In addition,
+ * the values of the LMA and LME bits in the field must each be that of
+ * the host address-space size VM-exit control.
+ */
+ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
+ ia32e = (vmcs12->vm_exit_controls &
+ VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
+ if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
+ ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
+ ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
+ return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
+ }
+
+ /*
* From the Intel SDM, volume 3:
* Fields relevant to VM-entry event injection must be set properly.
* These fields are the VM-entry interruption-information field, the
@@ -12512,6 +13119,10 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
}
+ if (nested_cpu_has_ept(vmcs12) &&
+ !valid_ept_address(vcpu, vmcs12->ept_pointer))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
return 0;
}
@@ -12577,21 +13188,6 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 1;
}
- /*
- * If the load IA32_EFER VM-exit control is 1, bits reserved in the
- * IA32_EFER MSR must be 0 in the field for that register. In addition,
- * the values of the LMA and LME bits in the field must each be that of
- * the host address-space size VM-exit control.
- */
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
- ia32e = (vmcs12->vm_exit_controls &
- VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
- if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
- ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
- ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
- return 1;
- }
-
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
(vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
@@ -12600,26 +13196,139 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 0;
}
+static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long cr3, cr4;
+
+ if (!nested_early_check)
+ return 0;
+
+ if (vmx->msr_autoload.host.nr)
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+ if (vmx->msr_autoload.guest.nr)
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
+
+ preempt_disable();
+
+ vmx_prepare_switch_to_guest(vcpu);
+
+ /*
+ * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
+ * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
+ * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e.
+ * there is no need to preserve other bits or save/restore the field.
+ */
+ vmcs_writel(GUEST_RFLAGS, 0);
+
+ vmcs_writel(HOST_RIP, vmx_early_consistency_check_return);
+
+ cr3 = __get_current_cr3_fast();
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+ vmcs_writel(HOST_CR3, cr3);
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
+ }
+
+ cr4 = cr4_read_shadow();
+ if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
+ vmcs_writel(HOST_CR4, cr4);
+ vmx->loaded_vmcs->host_state.cr4 = cr4;
+ }
+
+ vmx->__launched = vmx->loaded_vmcs->launched;
+
+ asm(
+ /* Set HOST_RSP */
+ __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t"
+ "mov %%" _ASM_SP ", %c[host_rsp](%0)\n\t"
+
+ /* Check if vmlaunch of vmresume is needed */
+ "cmpl $0, %c[launched](%0)\n\t"
+ "je 1f\n\t"
+ __ex("vmresume") "\n\t"
+ "jmp 2f\n\t"
+ "1: " __ex("vmlaunch") "\n\t"
+ "jmp 2f\n\t"
+ "2: "
+
+ /* Set vmx->fail accordingly */
+ "setbe %c[fail](%0)\n\t"
+
+ ".pushsection .rodata\n\t"
+ ".global vmx_early_consistency_check_return\n\t"
+ "vmx_early_consistency_check_return: " _ASM_PTR " 2b\n\t"
+ ".popsection"
+ :
+ : "c"(vmx), "d"((unsigned long)HOST_RSP),
+ [launched]"i"(offsetof(struct vcpu_vmx, __launched)),
+ [fail]"i"(offsetof(struct vcpu_vmx, fail)),
+ [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp))
+ : "rax", "cc", "memory"
+ );
+
+ vmcs_writel(HOST_RIP, vmx_return);
+
+ preempt_enable();
+
+ if (vmx->msr_autoload.host.nr)
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
+ if (vmx->msr_autoload.guest.nr)
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
+
+ if (vmx->fail) {
+ WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
+ VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ vmx->fail = 0;
+ return 1;
+ }
+
+ /*
+ * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
+ */
+ local_irq_enable();
+ if (hw_breakpoint_active())
+ set_debugreg(__this_cpu_read(cpu_dr7), 7);
+
+ /*
+ * A non-failing VMEntry means we somehow entered guest mode with
+ * an illegal RIP, and that's just the tip of the iceberg. There
+ * is no telling what memory has been modified or what state has
+ * been exposed to unknown code. Hitting this all but guarantees
+ * a (very critical) hardware issue.
+ */
+ WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
+ VMX_EXIT_REASONS_FAILED_VMENTRY));
+
+ return 0;
+}
+STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw);
+
+static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12);
+
/*
- * If exit_qual is NULL, this is being called from state restore (either RSM
+ * If from_vmentry is false, this is being called from state restore (either RSM
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
++ *
++ * Returns:
++ * 0 - success, i.e. proceed with actual VMEnter
++ * 1 - consistency check VMExit
++ * -1 - consistency check VMFail
*/
-static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
+static int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- bool from_vmentry = !!exit_qual;
- u32 dummy_exit_qual;
bool evaluate_pending_interrupts;
- int r = 0;
+ u32 exit_reason = EXIT_REASON_INVALID_STATE;
+ u32 exit_qual;
evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
- enter_guest_mode(vcpu);
-
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (kvm_mpx_supported() &&
@@ -12627,24 +13336,35 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
- vmx_segment_cache_clear(vmx);
+ prepare_vmcs02_early(vmx, vmcs12);
+
+ if (from_vmentry) {
+ nested_get_vmcs12_pages(vcpu);
+
+ if (nested_vmx_check_vmentry_hw(vcpu)) {
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+ return -1;
+ }
+
+ if (check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ goto vmentry_fail_vmexit;
+ }
+
+ enter_guest_mode(vcpu);
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vcpu->arch.tsc_offset += vmcs12->tsc_offset;
- r = EXIT_REASON_INVALID_STATE;
- if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
- goto fail;
+ if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+ goto vmentry_fail_vmexit_guest_mode;
if (from_vmentry) {
- nested_get_vmcs12_pages(vcpu);
-
- r = EXIT_REASON_MSR_LOAD_FAIL;
- *exit_qual = nested_vmx_load_msr(vcpu,
- vmcs12->vm_entry_msr_load_addr,
- vmcs12->vm_entry_msr_load_count);
- if (*exit_qual)
- goto fail;
+ exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
+ exit_qual = nested_vmx_load_msr(vcpu,
+ vmcs12->vm_entry_msr_load_addr,
+ vmcs12->vm_entry_msr_load_count);
+ if (exit_qual)
+ goto vmentry_fail_vmexit_guest_mode;
} else {
/*
* The MMU is not initialized to point at the right entities yet and
@@ -12681,12 +13401,28 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
*/
return 0;
-fail:
+ /*
+ * A failed consistency check that leads to a VMExit during L1's
+ * VMEnter to L2 is a variation of a normal VMexit, as explained in
+ * 26.7 "VM-entry failures during or after loading guest state".
+ */
+vmentry_fail_vmexit_guest_mode:
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
leave_guest_mode(vcpu);
+
+vmentry_fail_vmexit:
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- return r;
+
+ if (!from_vmentry)
+ return 1;
+
+ load_vmcs12_host_state(vcpu, vmcs12);
+ vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
+ vmcs12->exit_qualification = exit_qual;
+ if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
+ vmx->nested.need_vmcs12_sync = true;
+ return 1;
}
/*
@@ -12698,14 +13434,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
- u32 exit_qual;
int ret;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!nested_vmx_check_vmcs12(vcpu))
- goto out;
+ if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true))
+ return 1;
+
+ if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull)
+ return nested_vmx_failInvalid(vcpu);
vmcs12 = get_vmcs12(vcpu);
@@ -12715,13 +13453,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* rather than RFLAGS.ZF, and no error number is stored to the
* VM-instruction error field.
*/
- if (vmcs12->hdr.shadow_vmcs) {
- nested_vmx_failInvalid(vcpu);
- goto out;
- }
+ if (vmcs12->hdr.shadow_vmcs)
+ return nested_vmx_failInvalid(vcpu);
- if (enable_shadow_vmcs)
+ if (vmx->nested.hv_evmcs) {
+ copy_enlightened_to_vmcs12(vmx);
+ /* Enlightened VMCS doesn't have launch state */
+ vmcs12->launch_state = !launch;
+ } else if (enable_shadow_vmcs) {
copy_shadow_to_vmcs12(vmx);
+ }
/*
* The nested entry process starts with enforcing various prerequisites
@@ -12733,59 +13474,37 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* for misconfigurations which will anyway be caught by the processor
* when using the merged vmcs02.
*/
- if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) {
- nested_vmx_failValid(vcpu,
- VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
- goto out;
- }
+ if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
- if (vmcs12->launch_state == launch) {
- nested_vmx_failValid(vcpu,
+ if (vmcs12->launch_state == launch)
+ return nested_vmx_failValid(vcpu,
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
- goto out;
- }
ret = check_vmentry_prereqs(vcpu, vmcs12);
- if (ret) {
- nested_vmx_failValid(vcpu, ret);
- goto out;
- }
-
- /*
- * After this point, the trap flag no longer triggers a singlestep trap
- * on the vm entry instructions; don't call kvm_skip_emulated_instruction.
- * This is not 100% correct; for performance reasons, we delegate most
- * of the checks on host state to the processor. If those fail,
- * the singlestep trap is missed.
- */
- skip_emulated_instruction(vcpu);
-
- ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual);
- if (ret) {
- nested_vmx_entry_failure(vcpu, vmcs12,
- EXIT_REASON_INVALID_STATE, exit_qual);
- return 1;
- }
+ if (ret)
+ return nested_vmx_failValid(vcpu, ret);
/*
* We're finally done with prerequisite checking, and can start with
* the nested entry.
*/
-
vmx->nested.nested_run_pending = 1;
- ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
- if (ret) {
- nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
- vmx->nested.nested_run_pending = 0;
+ ret = nested_vmx_enter_non_root_mode(vcpu, true);
+ vmx->nested.nested_run_pending = !ret;
+ if (ret > 0)
return 1;
- }
+ else if (ret)
+ return nested_vmx_failValid(vcpu,
+ VMXERR_ENTRY_INVALID_CONTROL_FIELD);
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
/*
- * Must happen outside of enter_vmx_non_root_mode() as it will
+ * Must happen outside of nested_vmx_enter_non_root_mode() as it will
* also be used as part of restoring nVMX state for
* snapshot restore (migration).
*
@@ -12806,9 +13525,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return kvm_vcpu_halt(vcpu);
}
return 1;
-
-out:
- return kvm_skip_emulated_instruction(vcpu);
}
/*
@@ -13122,24 +13838,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_clear_interrupt_queue(vcpu);
}
-static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
-{
- u32 entry_failure_code;
-
- nested_ept_uninit_mmu_context(vcpu);
-
- /*
- * Only PDPTE load can fail as the value of cr3 was checked on entry and
- * couldn't have changed.
- */
- if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
- nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
-
- if (!enable_ept)
- vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-}
-
/*
* A part of what we need to when the nested L2 guest exits and we want to
* run its L1 parent, is to reset L1's guest state to the host state specified
@@ -13153,6 +13851,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct kvm_segment seg;
+ u32 entry_failure_code;
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -13165,6 +13864,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
+ vmx_set_interrupt_shadow(vcpu, 0);
+
/*
* Note that calling vmx_set_cr0 is important, even if cr0 hasn't
* actually changed, because vmx_set_cr0 refers to efer set above.
@@ -13179,23 +13880,35 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
vmx_set_cr4(vcpu, vmcs12->host_cr4);
- load_vmcs12_mmu_host_state(vcpu, vmcs12);
+ nested_ept_uninit_mmu_context(vcpu);
/*
- * If vmcs01 don't use VPID, CPU flushes TLB on every
+ * Only PDPTE load can fail as the value of cr3 was checked on entry and
+ * couldn't have changed.
+ */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
+
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
+ /*
+ * If vmcs01 doesn't use VPID, CPU flushes TLB on every
* VMEntry/VMExit. Thus, no need to flush TLB.
*
- * If vmcs12 uses VPID, TLB entries populated by L2 are
- * tagged with vmx->nested.vpid02 while L1 entries are tagged
- * with vmx->vpid. Thus, no need to flush TLB.
+ * If vmcs12 doesn't use VPID, L1 expects TLB to be
+ * flushed on every VMEntry/VMExit.
+ *
+ * Otherwise, we can preserve TLB entries as long as we are
+ * able to tag L1 TLB entries differently than L2 TLB entries.
*
- * Therefore, flush TLB only in case vmcs01 uses VPID and
- * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
- * are both tagged with vmx->vpid.
+ * If vmcs12 uses EPT, we need to execute this flush on EPTP01
+ * and therefore we request the TLB flush to happen only after VMCS EPTP
+ * has been set by KVM_REQ_LOAD_CR3.
*/
if (enable_vpid &&
- !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
- vmx_flush_tlb(vcpu, true);
+ (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) {
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
@@ -13275,6 +13988,140 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
}
+static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
+{
+ struct shared_msr_entry *efer_msr;
+ unsigned int i;
+
+ if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
+ return vmcs_read64(GUEST_IA32_EFER);
+
+ if (cpu_has_load_ia32_efer)
+ return host_efer;
+
+ for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
+ if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
+ return vmx->msr_autoload.guest.val[i].value;
+ }
+
+ efer_msr = find_msr_entry(vmx, MSR_EFER);
+ if (efer_msr)
+ return efer_msr->data;
+
+ return host_efer;
+}
+
+static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmx_msr_entry g, h;
+ struct msr_data msr;
+ gpa_t gpa;
+ u32 i, j;
+
+ vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
+
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
+ /*
+ * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
+ * as vmcs01.GUEST_DR7 contains a userspace defined value
+ * and vcpu->arch.dr7 is not squirreled away before the
+ * nested VMENTER (not worth adding a variable in nested_vmx).
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+ else
+ WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
+ }
+
+ /*
+ * Note that calling vmx_set_{efer,cr0,cr4} is important as they
+ * handle a variety of side effects to KVM's software model.
+ */
+ vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
+
+ vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+ vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
+
+ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
+ vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
+
+ nested_ept_uninit_mmu_context(vcpu);
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+ /*
+ * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
+ * from vmcs01 (if necessary). The PDPTRs are not loaded on
+ * VMFail, like everything else we just need to ensure our
+ * software model is up-to-date.
+ */
+ ept_save_pdptrs(vcpu);
+
+ kvm_mmu_reset_context(vcpu);
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmx_update_msr_bitmap(vcpu);
+
+ /*
+ * This nasty bit of open coding is a compromise between blindly
+ * loading L1's MSRs using the exit load lists (incorrect emulation
+ * of VMFail), leaving the nested VM's MSRs in the software model
+ * (incorrect behavior) and snapshotting the modified MSRs (too
+ * expensive since the lists are unbound by hardware). For each
+ * MSR that was (prematurely) loaded from the nested VMEntry load
+ * list, reload it from the exit load list if it exists and differs
+ * from the guest value. The intent is to stuff host state as
+ * silently as possible, not to fully process the exit load list.
+ */
+ msr.host_initiated = false;
+ for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
+ gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
+ if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
+ pr_debug_ratelimited(
+ "%s read MSR index failed (%u, 0x%08llx)\n",
+ __func__, i, gpa);
+ goto vmabort;
+ }
+
+ for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
+ gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
+ if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
+ pr_debug_ratelimited(
+ "%s read MSR failed (%u, 0x%08llx)\n",
+ __func__, j, gpa);
+ goto vmabort;
+ }
+ if (h.index != g.index)
+ continue;
+ if (h.value == g.value)
+ break;
+
+ if (nested_vmx_load_msr_check(vcpu, &h)) {
+ pr_debug_ratelimited(
+ "%s check failed (%u, 0x%x, 0x%x)\n",
+ __func__, j, h.index, h.reserved);
+ goto vmabort;
+ }
+
+ msr.index = h.index;
+ msr.data = h.value;
+ if (kvm_set_msr(vcpu, &msr)) {
+ pr_debug_ratelimited(
+ "%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
+ __func__, j, h.index, h.value);
+ goto vmabort;
+ }
+ }
+ }
+
+ return;
+
+vmabort:
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
+}
+
/*
* Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
* and modify vmcs12 to make it see what it would expect to see there if
@@ -13290,14 +14137,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
- /*
- * The only expected VM-instruction error is "VM entry with
- * invalid control field(s)." Anything else indicates a
- * problem with L0.
- */
- WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
- VMXERR_ENTRY_INVALID_CONTROL_FIELD));
-
leave_guest_mode(vcpu);
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
@@ -13324,12 +14163,19 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
vmcs12->vm_exit_msr_store_count))
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ } else {
+ /*
+ * The only expected VM-instruction error is "VM entry with
+ * invalid control field(s)." Anything else indicates a
+ * problem with L0. And we should never get here with a
+ * VMFail of any type if early consistency checks are enabled.
+ */
+ WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
+ VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ WARN_ON_ONCE(nested_early_check);
}
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- vm_entry_controls_reset_shadow(vmx);
- vm_exit_controls_reset_shadow(vmx);
- vmx_segment_cache_clear(vmx);
/* Update any VMCS fields that might have changed while L2 ran */
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
@@ -13373,8 +14219,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
- if (enable_shadow_vmcs && exit_reason != -1)
- vmx->nested.sync_shadow_vmcs = true;
+ if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+ vmx->nested.need_vmcs12_sync = true;
/* in case we halted in L2 */
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -13409,24 +14255,24 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
return;
}
-
+
/*
* After an early L2 VM-entry failure, we're now back
* in L1 which thinks it just finished a VMLAUNCH or
* VMRESUME instruction, so we need to set the failure
* flag and the VM-instruction error field of the VMCS
- * accordingly.
+ * accordingly, and skip the emulated instruction.
*/
- nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
-
- load_vmcs12_mmu_host_state(vcpu, vmcs12);
+ (void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
/*
- * The emulated instruction was already skipped in
- * nested_vmx_run, but the updated RIP was never
- * written back to the vmcs01.
+ * Restore L1's host state to KVM's software model. We're here
+ * because a consistency check was caught by hardware, which
+ * means some amount of guest state has been propagated to KVM's
+ * model and needs to be unwound to the host's state.
*/
- skip_emulated_instruction(vcpu);
+ nested_vmx_restore_host_state(vcpu);
+
vmx->fail = 0;
}
@@ -13439,26 +14285,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
}
- free_nested(to_vmx(vcpu));
-}
-
-/*
- * L1's failure to enter L2 is a subset of a normal exit, as explained in
- * 23.7 "VM-entry failures during or after loading guest state" (this also
- * lists the acceptable exit-reason and exit-qualification parameters).
- * It should only be called before L2 actually succeeded to run, and when
- * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss).
- */
-static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12,
- u32 reason, unsigned long qualification)
-{
- load_vmcs12_host_state(vcpu, vmcs12);
- vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
- vmcs12->exit_qualification = qualification;
- nested_vmx_succeed(vcpu);
- if (enable_shadow_vmcs)
- to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
+ free_nested(vcpu);
}
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
@@ -13884,7 +14711,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
if (vmx->nested.smm.guest_mode) {
vcpu->arch.hflags &= ~HF_SMM_MASK;
- ret = enter_vmx_non_root_mode(vcpu, NULL);
+ ret = nested_vmx_enter_non_root_mode(vcpu, false);
vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
@@ -13899,6 +14726,20 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * In case we do two consecutive get/set_nested_state()s while L2 was
+ * running hv_evmcs may end up not being mapped (we map it from
+ * nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always
+ * have vmcs12 if it is true.
+ */
+ return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
+ vmx->nested.hv_evmcs;
+}
+
static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
u32 user_data_size)
@@ -13918,12 +14759,16 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
+
+ if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
+ kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
+
if (nested_vmx_allowed(vcpu) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
- if (vmx->nested.current_vmptr != -1ull) {
+ if (vmx_has_valid_vmcs12(vcpu)) {
kvm_state.size += VMCS12_SIZE;
if (is_guest_mode(vcpu) &&
@@ -13952,20 +14797,24 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
return -EFAULT;
- if (vmx->nested.current_vmptr == -1ull)
+ if (!vmx_has_valid_vmcs12(vcpu))
goto out;
/*
* When running L2, the authoritative vmcs12 state is in the
* vmcs02. When running L1, the authoritative vmcs12 state is
- * in the shadow vmcs linked to vmcs01, unless
- * sync_shadow_vmcs is set, in which case, the authoritative
+ * in the shadow or enlightened vmcs linked to vmcs01, unless
+ * need_vmcs12_sync is set, in which case, the authoritative
* vmcs12 state is in the vmcs12 already.
*/
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu)) {
sync_vmcs12(vcpu, vmcs12);
- else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
- copy_shadow_to_vmcs12(vmx);
+ } else if (!vmx->nested.need_vmcs12_sync) {
+ if (vmx->nested.hv_evmcs)
+ copy_enlightened_to_vmcs12(vmx);
+ else if (enable_shadow_vmcs)
+ copy_shadow_to_vmcs12(vmx);
+ }
if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
return -EFAULT;
@@ -13993,6 +14842,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->format != 0)
return -EINVAL;
+ if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
+ nested_enable_evmcs(vcpu, NULL);
+
if (!nested_vmx_allowed(vcpu))
return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
@@ -14010,13 +14862,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
return -EINVAL;
- if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
- return -EINVAL;
-
- if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
- !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
- return -EINVAL;
-
if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return -EINVAL;
@@ -14046,7 +14891,25 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (ret)
return ret;
- set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+ /* Empty 'VMXON' state is permitted */
+ if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+ return 0;
+
+ if (kvm_state->vmx.vmcs_pa != -1ull) {
+ if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
+ !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+ return -EINVAL;
+
+ set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+ } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
+ /*
+ * Sync eVMCS upon entry as we may not have
+ * HV_X64_MSR_VP_ASSIST_PAGE set up yet.
+ */
+ vmx->nested.need_vmcs12_sync = true;
+ } else {
+ return -EINVAL;
+ }
if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
vmx->nested.smm.vmxon = true;
@@ -14090,7 +14953,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
vmx->nested.dirty_vmcs12 = true;
- ret = enter_vmx_non_root_mode(vcpu, NULL);
+ ret = nested_vmx_enter_non_root_mode(vcpu, false);
if (ret)
return -EINVAL;
@@ -14242,6 +15105,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.pre_enter_smm = vmx_pre_enter_smm,
.pre_leave_smm = vmx_pre_leave_smm,
.enable_smi_window = enable_smi_window,
+
+ .nested_enable_evmcs = nested_enable_evmcs,
};
static void vmx_cleanup_l1d_flush(void)
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
index cd0c75f6d037..132432f375c2 100644
--- a/arch/x86/kvm/vmx_shadow_fields.h
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -28,7 +28,6 @@
*/
/* 16-bits */
-SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
SHADOW_FIELD_RW(GUEST_INTR_STATUS)
SHADOW_FIELD_RW(GUEST_PML_INDEX)
SHADOW_FIELD_RW(HOST_FS_SELECTOR)
@@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
SHADOW_FIELD_RW(TPR_THRESHOLD)
-SHADOW_FIELD_RW(GUEST_CS_LIMIT)
SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
+SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
@@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0)
SHADOW_FIELD_RW(GUEST_CR3)
SHADOW_FIELD_RW(GUEST_CR4)
SHADOW_FIELD_RW(GUEST_RFLAGS)
-SHADOW_FIELD_RW(GUEST_CS_BASE)
-SHADOW_FIELD_RW(GUEST_ES_BASE)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
SHADOW_FIELD_RW(CR0_READ_SHADOW)
SHADOW_FIELD_RW(CR4_READ_SHADOW)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ca717737347e..66d66d77caee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,7 +136,7 @@ static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 0;
+unsigned int __read_mostly lapic_timer_advance_ns = 1000;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
@@ -400,9 +400,51 @@ static int exception_type(int vector)
return EXCPT_FAULT;
}
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
+{
+ unsigned nr = vcpu->arch.exception.nr;
+ bool has_payload = vcpu->arch.exception.has_payload;
+ unsigned long payload = vcpu->arch.exception.payload;
+
+ if (!has_payload)
+ return;
+
+ switch (nr) {
+ case DB_VECTOR:
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~DR_TRAP_BITS;
+ /*
+ * DR6.RTM is set by all #DB exceptions that don't clear it.
+ */
+ vcpu->arch.dr6 |= DR6_RTM;
+ vcpu->arch.dr6 |= payload;
+ /*
+ * Bit 16 should be set in the payload whenever the #DB
+ * exception should clear DR6.RTM. This makes the payload
+ * compatible with the pending debug exceptions under VMX.
+ * Though not currently documented in the SDM, this also
+ * makes the payload compatible with the exit qualification
+ * for #DB exceptions under VMX.
+ */
+ vcpu->arch.dr6 ^= payload & DR6_RTM;
+ break;
+ case PF_VECTOR:
+ vcpu->arch.cr2 = payload;
+ break;
+ }
+
+ vcpu->arch.exception.has_payload = false;
+ vcpu->arch.exception.payload = 0;
+}
+EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
+
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
- bool reinject)
+ bool has_payload, unsigned long payload, bool reinject)
{
u32 prev_nr;
int class1, class2;
@@ -424,6 +466,14 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
*/
WARN_ON_ONCE(vcpu->arch.exception.pending);
vcpu->arch.exception.injected = true;
+ if (WARN_ON_ONCE(has_payload)) {
+ /*
+ * A reinjected event has already
+ * delivered its payload.
+ */
+ has_payload = false;
+ payload = 0;
+ }
} else {
vcpu->arch.exception.pending = true;
vcpu->arch.exception.injected = false;
@@ -431,6 +481,22 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = has_error;
vcpu->arch.exception.nr = nr;
vcpu->arch.exception.error_code = error_code;
+ vcpu->arch.exception.has_payload = has_payload;
+ vcpu->arch.exception.payload = payload;
+ /*
+ * In guest mode, payload delivery should be deferred,
+ * so that the L1 hypervisor can intercept #PF before
+ * CR2 is modified (or intercept #DB before DR6 is
+ * modified under nVMX). However, for ABI
+ * compatibility with KVM_GET_VCPU_EVENTS and
+ * KVM_SET_VCPU_EVENTS, we can't delay payload
+ * delivery unless userspace has enabled this
+ * functionality via the per-VM capability,
+ * KVM_CAP_EXCEPTION_PAYLOAD.
+ */
+ if (!vcpu->kvm->arch.exception_payload_enabled ||
+ !is_guest_mode(vcpu))
+ kvm_deliver_exception_payload(vcpu);
return;
}
@@ -455,6 +521,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = true;
vcpu->arch.exception.nr = DF_VECTOR;
vcpu->arch.exception.error_code = 0;
+ vcpu->arch.exception.has_payload = false;
+ vcpu->arch.exception.payload = 0;
} else
/* replace previous exception with a new one in a hope
that instruction re-execution will regenerate lost
@@ -464,16 +532,29 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- kvm_multiple_exception(vcpu, nr, false, 0, false);
+ kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception);
void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
- kvm_multiple_exception(vcpu, nr, false, 0, true);
+ kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
+static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
+ unsigned long payload)
+{
+ kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
+}
+
+static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
+ u32 error_code, unsigned long payload)
+{
+ kvm_multiple_exception(vcpu, nr, true, error_code,
+ true, payload, false);
+}
+
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
@@ -490,11 +571,13 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
++vcpu->stat.pf_guest;
vcpu->arch.exception.nested_apf =
is_guest_mode(vcpu) && fault->async_page_fault;
- if (vcpu->arch.exception.nested_apf)
+ if (vcpu->arch.exception.nested_apf) {
vcpu->arch.apf.nested_apf_token = fault->address;
- else
- vcpu->arch.cr2 = fault->address;
- kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
+ kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
+ } else {
+ kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
+ fault->address);
+ }
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@@ -503,7 +586,7 @@ static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fau
if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
else
- vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+ vcpu->arch.mmu->inject_page_fault(vcpu, fault);
return fault->nested_page_fault;
}
@@ -517,13 +600,13 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- kvm_multiple_exception(vcpu, nr, true, error_code, false);
+ kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
}
EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
{
- kvm_multiple_exception(vcpu, nr, true, error_code, true);
+ kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
@@ -602,7 +685,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
if ((pdpte[i] & PT_PRESENT_MASK) &&
(pdpte[i] &
- vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
+ vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
ret = 0;
goto out;
}
@@ -2477,7 +2560,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_PV_EOI_EN:
- if (kvm_lapic_enable_pv_eoi(vcpu, data))
+ if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
@@ -2912,6 +2995,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_HYPERV_EVENTFD:
case KVM_CAP_HYPERV_TLBFLUSH:
+ case KVM_CAP_HYPERV_SEND_IPI:
+ case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2930,6 +3015,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
+ case KVM_CAP_EXCEPTION_PAYLOAD:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -3362,19 +3448,33 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
{
process_nmi(vcpu);
+
/*
- * FIXME: pass injected and pending separately. This is only
- * needed for nested virtualization, whose state cannot be
- * migrated yet. For now we can combine them.
+ * The API doesn't provide the instruction length for software
+ * exceptions, so don't report them. As long as the guest RIP
+ * isn't advanced, we should expect to encounter the exception
+ * again.
*/
- events->exception.injected =
- (vcpu->arch.exception.pending ||
- vcpu->arch.exception.injected) &&
- !kvm_exception_is_soft(vcpu->arch.exception.nr);
+ if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
+ events->exception.injected = 0;
+ events->exception.pending = 0;
+ } else {
+ events->exception.injected = vcpu->arch.exception.injected;
+ events->exception.pending = vcpu->arch.exception.pending;
+ /*
+ * For ABI compatibility, deliberately conflate
+ * pending and injected exceptions when
+ * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled.
+ */
+ if (!vcpu->kvm->arch.exception_payload_enabled)
+ events->exception.injected |=
+ vcpu->arch.exception.pending;
+ }
events->exception.nr = vcpu->arch.exception.nr;
events->exception.has_error_code = vcpu->arch.exception.has_error_code;
- events->exception.pad = 0;
events->exception.error_code = vcpu->arch.exception.error_code;
+ events->exception_has_payload = vcpu->arch.exception.has_payload;
+ events->exception_payload = vcpu->arch.exception.payload;
events->interrupt.injected =
vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
@@ -3398,6 +3498,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SHADOW
| KVM_VCPUEVENT_VALID_SMM);
+ if (vcpu->kvm->arch.exception_payload_enabled)
+ events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
+
memset(&events->reserved, 0, sizeof(events->reserved));
}
@@ -3409,12 +3512,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW
- | KVM_VCPUEVENT_VALID_SMM))
+ | KVM_VCPUEVENT_VALID_SMM
+ | KVM_VCPUEVENT_VALID_PAYLOAD))
return -EINVAL;
- if (events->exception.injected &&
- (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
- is_guest_mode(vcpu)))
+ if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
+ if (!vcpu->kvm->arch.exception_payload_enabled)
+ return -EINVAL;
+ if (events->exception.pending)
+ events->exception.injected = 0;
+ else
+ events->exception_has_payload = 0;
+ } else {
+ events->exception.pending = 0;
+ events->exception_has_payload = 0;
+ }
+
+ if ((events->exception.injected || events->exception.pending) &&
+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
return -EINVAL;
/* INITs are latched while in SMM */
@@ -3424,11 +3539,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
process_nmi(vcpu);
- vcpu->arch.exception.injected = false;
- vcpu->arch.exception.pending = events->exception.injected;
+ vcpu->arch.exception.injected = events->exception.injected;
+ vcpu->arch.exception.pending = events->exception.pending;
vcpu->arch.exception.nr = events->exception.nr;
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
+ vcpu->arch.exception.has_payload = events->exception_has_payload;
+ vcpu->arch.exception.payload = events->exception_payload;
vcpu->arch.interrupt.injected = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
@@ -3694,6 +3811,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
+ int r;
+ uint16_t vmcs_version;
+ void __user *user_ptr;
+
if (cap->flags)
return -EINVAL;
@@ -3706,6 +3827,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
+ case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+ r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
+ if (!r) {
+ user_ptr = (void __user *)(uintptr_t)cap->args[0];
+ if (copy_to_user(user_ptr, &vmcs_version,
+ sizeof(vmcs_version)))
+ r = -EFAULT;
+ }
+ return r;
+
default:
return -EINVAL;
}
@@ -4047,11 +4178,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
if (kvm_state.flags &
- ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
+ ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
+ | KVM_STATE_NESTED_EVMCS))
break;
/* nested_run_pending implies guest_mode. */
- if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
+ if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
+ && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
break;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
@@ -4363,6 +4496,10 @@ split_irqchip_unlock:
kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
r = 0;
break;
+ case KVM_CAP_EXCEPTION_PAYLOAD:
+ kvm->arch.exception_payload_enabled = cap->args[0];
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -4803,7 +4940,7 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
/* NPT walks are always user-walks */
access |= PFERR_USER_MASK;
- t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
+ t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
return t_gpa;
}
@@ -5889,7 +6026,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
- if (!vcpu->arch.mmu.direct_map) {
+ if (!vcpu->arch.mmu->direct_map) {
/*
* Write permission should be allowed since only
* write access need to be emulated.
@@ -5922,7 +6059,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
kvm_release_pfn_clean(pfn);
/* The instructions are well-emulated on direct mmu. */
- if (vcpu->arch.mmu.direct_map) {
+ if (vcpu->arch.mmu->direct_map) {
unsigned int indirect_shadow_pages;
spin_lock(&vcpu->kvm->mmu_lock);
@@ -5989,7 +6126,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
vcpu->arch.last_retry_eip = ctxt->eip;
vcpu->arch.last_retry_addr = cr2;
- if (!vcpu->arch.mmu.direct_map)
+ if (!vcpu->arch.mmu->direct_map)
gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -6049,14 +6186,7 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
kvm_run->exit_reason = KVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
} else {
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
+ kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
}
}
@@ -6995,10 +7125,22 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
- if (vcpu->arch.exception.nr == DB_VECTOR &&
- (vcpu->arch.dr7 & DR7_GD)) {
- vcpu->arch.dr7 &= ~DR7_GD;
- kvm_update_dr7(vcpu);
+ if (vcpu->arch.exception.nr == DB_VECTOR) {
+ /*
+ * This code assumes that nSVM doesn't use
+ * check_nested_events(). If it does, the
+ * DR6/DR7 changes should happen before L1
+ * gets a #VMEXIT for an intercepted #DB in
+ * L2. (Under VMX, on the other hand, the
+ * DR6/DR7 changes should not happen in the
+ * event of a VM-exit to L1 for an intercepted
+ * #DB in L2.)
+ */
+ kvm_deliver_exception_payload(vcpu);
+ if (vcpu->arch.dr7 & DR7_GD) {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ kvm_update_dr7(vcpu);
+ }
}
kvm_x86_ops->queue_exception(vcpu);
@@ -8478,7 +8620,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_vcpu_mtrr_init(vcpu);
vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
- kvm_mmu_setup(vcpu);
+ kvm_init_mmu(vcpu, false);
vcpu_put(vcpu);
return 0;
}
@@ -9327,7 +9469,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{
int r;
- if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
+ if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
work->wakeup_all)
return;
@@ -9335,11 +9477,11 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
if (unlikely(r))
return;
- if (!vcpu->arch.mmu.direct_map &&
- work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
+ if (!vcpu->arch.mmu->direct_map &&
+ work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
return;
- vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
+ vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
}
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
@@ -9463,6 +9605,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
vcpu->arch.exception.nr = 0;
vcpu->arch.exception.has_error_code = false;
vcpu->arch.exception.error_code = 0;
+ vcpu->arch.exception.has_payload = false;
+ vcpu->arch.exception.payload = 0;
} else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 67b9568613f3..224cd0a47568 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -266,6 +266,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu,
int handle_ud(struct kvm_vcpu *vcpu);
+void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu);
+
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 5559dcaddd5e..948656069cdd 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -356,7 +356,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
} else {
struct pci_root_info *info;
- info = kzalloc_node(sizeof(*info), GFP_KERNEL, node);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
dev_err(&root->device->dev,
"pci_bus %04x:%02x: ignored (out of memory)\n",
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 13f4485ca388..30a5111ae5fd 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -629,17 +629,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff);
static void quirk_no_aersid(struct pci_dev *pdev)
{
/* VMD Domain */
- if (is_vmd(pdev->bus))
+ if (is_vmd(pdev->bus) && pci_is_root_bus(pdev->bus))
pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID;
}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334a, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334b, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334c, quirk_no_aersid);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334d, quirk_no_aersid);
+DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, 8, quirk_no_aersid);
#ifdef CONFIG_PHYS_ADDR_T_64BIT
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index ed66db3bc9bb..574e5520968c 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -5,9 +5,9 @@
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_UTIME
-#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_GETPGRP
/*