summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi2
-rw-r--r--arch/arm/configs/mini2440_defconfig2
-rw-r--r--arch/arm/configs/pxa_defconfig2
-rw-r--r--arch/arm/configs/sama5_defconfig2
-rw-r--r--arch/arm/configs/socfpga_defconfig3
-rw-r--r--arch/arm/include/asm/kvm_emulate.h2
-rw-r--r--arch/arm/include/asm/kvm_host.h26
-rw-r--r--arch/arm/mach-at91/Kconfig3
-rw-r--r--arch/arm/mach-at91/at91sam9.c18
-rw-r--r--arch/arm/mach-at91/generic.h2
-rw-r--r--arch/arm/mach-at91/pm.c193
-rw-r--r--arch/arm/mach-at91/pm_suspend.S111
-rw-r--r--arch/arm/mach-davinci/da830.c1
-rw-r--r--arch/arm/mach-davinci/da850.c1
-rw-r--r--arch/arm/mach-davinci/devices-da8xx.c1
-rw-r--r--arch/arm/mach-davinci/dm355.c1
-rw-r--r--arch/arm/mach-davinci/dm365.c1
-rw-r--r--arch/arm/mach-davinci/dm644x.c1
-rw-r--r--arch/arm/mach-davinci/dm646x.c1
-rw-r--r--arch/arm/mach-dove/common.c1
-rw-r--r--arch/arm/mach-ixp4xx/common.c1
-rw-r--r--arch/arm/mach-mediatek/mediatek.c1
-rw-r--r--arch/arm/mach-mv78xx0/common.c1
-rw-r--r--arch/arm/mach-mvebu/board-v7.c1
-rw-r--r--arch/arm/mach-mvebu/coherency_ll.S2
-rw-r--r--arch/arm/mach-mvebu/kirkwood.c2
-rw-r--r--arch/arm/mach-mvebu/pm-board.c11
-rw-r--r--arch/arm/mach-mvebu/pmsu_ll.S3
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c2
-rw-r--r--arch/arm/mach-orion5x/common.c1
-rw-r--r--arch/arm/mach-rockchip/rockchip.c1
-rw-r--r--arch/arm/mach-zynq/common.c1
-rw-r--r--arch/arm/tools/syscall.tbl6
-rw-r--r--arch/arm64/Kconfig6
-rw-r--r--arch/arm64/Kconfig.platforms1
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts4
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra186.dtsi7
-rw-r--r--arch/arm64/boot/dts/sprd/whale2.dtsi16
-rw-r--r--arch/arm64/include/asm/fpsimd.h29
-rw-r--r--arch/arm64/include/asm/kvm_asm.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h16
-rw-r--r--arch/arm64/include/asm/kvm_host.h101
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h111
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h12
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h43
-rw-r--r--arch/arm64/kernel/asm-offsets.c7
-rw-r--r--arch/arm64/kernel/cpufeature.c2
-rw-r--r--arch/arm64/kernel/fpsimd.c179
-rw-r--r--arch/arm64/kernel/perf_event.c50
-rw-r--r--arch/arm64/kernel/signal.c5
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/fpsimd.c17
-rw-r--r--arch/arm64/kvm/guest.c415
-rw-r--r--arch/arm64/kvm/handle_exit.c36
-rw-r--r--arch/arm64/kvm/hyp/entry.S15
-rw-r--r--arch/arm64/kvm/hyp/switch.c80
-rw-r--r--arch/arm64/kvm/pmu.c239
-rw-r--r--arch/arm64/kvm/reset.c167
-rw-r--r--arch/arm64/kvm/sys_regs.c183
-rw-r--r--arch/arm64/kvm/sys_regs.h25
-rw-r--r--arch/h8300/kernel/setup.c1
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/mips/Kconfig6
-rw-r--r--arch/mips/alchemy/common/platform.c22
-rw-r--r--arch/mips/ath79/clock.c1
-rw-r--r--arch/mips/ath79/setup.c1
-rw-r--r--arch/mips/configs/ip22_defconfig2
-rw-r--r--arch/mips/configs/ip27_defconfig2
-rw-r--r--arch/mips/generic/init.c4
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h11
-rw-r--r--arch/mips/include/asm/pci/bridge.h14
-rw-r--r--arch/mips/include/asm/sn/irq_alloc.h11
-rw-r--r--arch/mips/include/asm/xtalk/xtalk.h9
-rw-r--r--arch/mips/kernel/cpu-probe.c8
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c21
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl6
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl6
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl6
-rw-r--r--arch/mips/pci/Makefile3
-rw-r--r--arch/mips/pci/ops-bridge.c302
-rw-r--r--arch/mips/pci/pci-ip27.c181
-rw-r--r--arch/mips/pci/pci-xtalk-bridge.c610
-rw-r--r--arch/mips/sgi-ip22/ip22-platform.c13
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-irq.c190
-rw-r--r--arch/mips/sgi-ip27/ip27-xtalk.c61
-rw-r--r--arch/mips/txx9/generic/setup.c1
-rw-r--r--arch/nds32/Kconfig16
-rw-r--r--arch/nds32/include/asm/Kbuild1
-rw-r--r--arch/nds32/include/asm/assembler.h2
-rw-r--r--arch/nds32/include/asm/barrier.h2
-rw-r--r--arch/nds32/include/asm/bitfield.h2
-rw-r--r--arch/nds32/include/asm/cache.h2
-rw-r--r--arch/nds32/include/asm/cache_info.h2
-rw-r--r--arch/nds32/include/asm/cacheflush.h2
-rw-r--r--arch/nds32/include/asm/current.h2
-rw-r--r--arch/nds32/include/asm/delay.h2
-rw-r--r--arch/nds32/include/asm/elf.h2
-rw-r--r--arch/nds32/include/asm/fixmap.h2
-rw-r--r--arch/nds32/include/asm/futex.h2
-rw-r--r--arch/nds32/include/asm/highmem.h2
-rw-r--r--arch/nds32/include/asm/io.h2
-rw-r--r--arch/nds32/include/asm/irqflags.h2
-rw-r--r--arch/nds32/include/asm/l2_cache.h2
-rw-r--r--arch/nds32/include/asm/linkage.h2
-rw-r--r--arch/nds32/include/asm/memory.h10
-rw-r--r--arch/nds32/include/asm/mmu.h2
-rw-r--r--arch/nds32/include/asm/mmu_context.h2
-rw-r--r--arch/nds32/include/asm/module.h2
-rw-r--r--arch/nds32/include/asm/nds32.h2
-rw-r--r--arch/nds32/include/asm/page.h2
-rw-r--r--arch/nds32/include/asm/pgalloc.h2
-rw-r--r--arch/nds32/include/asm/pgtable.h2
-rw-r--r--arch/nds32/include/asm/proc-fns.h2
-rw-r--r--arch/nds32/include/asm/processor.h2
-rw-r--r--arch/nds32/include/asm/ptrace.h2
-rw-r--r--arch/nds32/include/asm/shmparam.h2
-rw-r--r--arch/nds32/include/asm/string.h2
-rw-r--r--arch/nds32/include/asm/swab.h2
-rw-r--r--arch/nds32/include/asm/syscall.h2
-rw-r--r--arch/nds32/include/asm/syscalls.h2
-rw-r--r--arch/nds32/include/asm/thread_info.h4
-rw-r--r--arch/nds32/include/asm/tlb.h2
-rw-r--r--arch/nds32/include/asm/tlbflush.h2
-rw-r--r--arch/nds32/include/asm/uaccess.h2
-rw-r--r--arch/nds32/include/asm/unistd.h2
-rw-r--r--arch/nds32/include/asm/vdso.h2
-rw-r--r--arch/nds32/include/asm/vdso_datapage.h3
-rw-r--r--arch/nds32/include/asm/vdso_timer_info.h2
-rw-r--r--arch/nds32/include/uapi/asm/auxvec.h2
-rw-r--r--arch/nds32/include/uapi/asm/byteorder.h2
-rw-r--r--arch/nds32/include/uapi/asm/cachectl.h2
-rw-r--r--arch/nds32/include/uapi/asm/param.h2
-rw-r--r--arch/nds32/include/uapi/asm/ptrace.h2
-rw-r--r--arch/nds32/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/nds32/include/uapi/asm/unistd.h2
-rw-r--r--arch/nds32/kernel/.gitignore1
-rw-r--r--arch/nds32/kernel/cacheinfo.c2
-rw-r--r--arch/nds32/kernel/ex-exit.S4
-rw-r--r--arch/nds32/kernel/nds32_ksyms.c6
-rw-r--r--arch/nds32/kernel/vdso.c1
-rw-r--r--arch/nds32/kernel/vdso/.gitignore1
-rw-r--r--arch/nds32/kernel/vdso/Makefile14
-rw-r--r--arch/nds32/kernel/vdso/gettimeofday.c4
-rw-r--r--arch/nds32/mm/init.c2
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h6
-rw-r--r--arch/powerpc/include/asm/kvm_host.h11
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h41
-rw-r--r--arch/powerpc/include/asm/mmu_context.h1
-rw-r--r--arch/powerpc/include/asm/xive.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h46
-rw-r--r--arch/powerpc/kernel/cacheinfo.c1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c42
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c96
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c105
-rw-r--r--arch/powerpc/kvm/book3s_hv.c152
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c57
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S86
-rw-r--r--arch/powerpc/kvm/book3s_xive.c250
-rw-r--r--arch/powerpc/kvm/book3s_xive.h37
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c1249
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c78
-rw-r--r--arch/powerpc/kvm/powerpc.c40
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S3
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/sysdev/xive/native.c11
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/Makefile5
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/bug.h35
-rw-r--r--arch/riscv/include/asm/cacheflush.h2
-rw-r--r--arch/riscv/include/asm/csr.h123
-rw-r--r--arch/riscv/include/asm/elf.h6
-rw-r--r--arch/riscv/include/asm/futex.h13
-rw-r--r--arch/riscv/include/asm/irqflags.h10
-rw-r--r--arch/riscv/include/asm/mmu_context.h59
-rw-r--r--arch/riscv/include/asm/ptrace.h21
-rw-r--r--arch/riscv/include/asm/sbi.h19
-rw-r--r--arch/riscv/include/asm/sifive_l2_cache.h16
-rw-r--r--arch/riscv/include/asm/thread_info.h4
-rw-r--r--arch/riscv/include/asm/uaccess.h28
-rw-r--r--arch/riscv/kernel/asm-offsets.c3
-rw-r--r--arch/riscv/kernel/cpu.c3
-rw-r--r--arch/riscv/kernel/entry.S22
-rw-r--r--arch/riscv/kernel/head.S33
-rw-r--r--arch/riscv/kernel/irq.c19
-rw-r--r--arch/riscv/kernel/perf_event.c4
-rw-r--r--arch/riscv/kernel/reset.c15
-rw-r--r--arch/riscv/kernel/setup.c6
-rw-r--r--arch/riscv/kernel/signal.c6
-rw-r--r--arch/riscv/kernel/smp.c61
-rw-r--r--arch/riscv/kernel/smpboot.c22
-rw-r--r--arch/riscv/kernel/stacktrace.c14
-rw-r--r--arch/riscv/kernel/traps.c30
-rw-r--r--arch/riscv/kernel/vdso/Makefile2
-rw-r--r--arch/riscv/mm/Makefile2
-rw-r--r--arch/riscv/mm/cacheflush.c61
-rw-r--r--arch/riscv/mm/context.c69
-rw-r--r--arch/riscv/mm/fault.c9
-rw-r--r--arch/riscv/mm/sifive_l2_cache.c175
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/boot/Makefile1
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S2
-rw-r--r--arch/s390/configs/defconfig (renamed from arch/s390/defconfig)0
-rw-r--r--arch/s390/include/asm/cpacf.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h2
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/interrupt.c11
-rw-r--r--arch/s390/kvm/kvm-s390.c120
-rw-r--r--arch/s390/kvm/vsie.c13
-rw-r--r--arch/s390/mm/kasan_init.c2
-rw-r--r--arch/s390/tools/gen_facilities.c3
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/um/include/asm/mmu_context.h1
-rw-r--r--arch/unicore32/configs/unicore32_defconfig2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h1
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl12
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl12
-rw-r--r--arch/x86/events/intel/core.c6
-rw-r--r--arch/x86/include/asm/e820/api.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/mpx.h15
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/hyperv.c24
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h42
-rw-r--r--arch/x86/kvm/lapic.c38
-rw-r--r--arch/x86/kvm/mmu.c23
-rw-r--r--arch/x86/kvm/mtrr.c10
-rw-r--r--arch/x86/kvm/paging_tmpl.h38
-rw-r--r--arch/x86/kvm/svm.c128
-rw-r--r--arch/x86/kvm/vmx/capabilities.h2
-rw-r--r--arch/x86/kvm/vmx/nested.c348
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c90
-rw-r--r--arch/x86/kvm/vmx/vmx.h11
-rw-r--r--arch/x86/kvm/x86.c199
-rw-r--r--arch/x86/kvm/x86.h10
-rw-r--r--arch/x86/mm/mpx.c10
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl6
-rw-r--r--arch/xtensa/platforms/xtfpga/setup.c1
256 files changed, 6373 insertions, 2135 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 165f268beafc..9e7704e44f6d 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -467,3 +467,9 @@
535 common io_uring_setup sys_io_uring_setup
536 common io_uring_enter sys_io_uring_enter
537 common io_uring_register sys_io_uring_register
+538 common open_tree sys_open_tree
+539 common move_mount sys_move_mount
+540 common fsopen sys_fsopen
+541 common fsconfig sys_fsconfig
+542 common fsmount sys_fsmount
+543 common fspick sys_fspick
diff --git a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi
index 4990ed90dcea..3e39b9a1f35d 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi
@@ -153,7 +153,7 @@
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins>;
wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */
- cd-gpios = <&gpio4 14 IRQ_TYPE_LEVEL_LOW>; /* gpio_110 */
+ cd-gpios = <&gpio4 14 GPIO_ACTIVE_LOW>; /* gpio_110 */
vmmc-supply = <&vmmc1>;
bus-width = <4>;
cap-power-off-card;
diff --git a/arch/arm/configs/mini2440_defconfig b/arch/arm/configs/mini2440_defconfig
index 8b0f7c4c3f09..7d26ca0b1302 100644
--- a/arch/arm/configs/mini2440_defconfig
+++ b/arch/arm/configs/mini2440_defconfig
@@ -152,7 +152,7 @@ CONFIG_SPI_S3C24XX=y
CONFIG_SPI_SPIDEV=y
CONFIG_GPIO_SYSFS=y
CONFIG_SENSORS_LM75=y
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_S3C2410_WATCHDOG=y
CONFIG_FB=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index f6d24d762a7f..07ebbdce3645 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -387,7 +387,7 @@ CONFIG_SENSORS_LM75=m
CONFIG_SENSORS_LM90=m
CONFIG_SENSORS_LM95245=m
CONFIG_SENSORS_NTC_THERMISTOR=m
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_XILINX_WATCHDOG=m
CONFIG_SA1100_WATCHDOG=m
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index 515cb37eeab6..d5341b0bd88d 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -150,7 +150,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y
CONFIG_V4L_PLATFORM_DRIVERS=y
CONFIG_SOC_CAMERA=y
CONFIG_VIDEO_ATMEL_ISI=y
-CONFIG_SOC_CAMERA_OV2640=y
+CONFIG_SOC_CAMERA_OV2640=m
CONFIG_DRM=y
CONFIG_DRM_ATMEL_HLCDC=y
CONFIG_DRM_PANEL_SIMPLE=y
diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig
index 9d42cfe85f5b..6701a975e785 100644
--- a/arch/arm/configs/socfpga_defconfig
+++ b/arch/arm/configs/socfpga_defconfig
@@ -21,7 +21,6 @@ CONFIG_NEON=y
CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -128,6 +127,8 @@ CONFIG_RTC_DRV_DS1307=y
CONFIG_DMADEVICES=y
CONFIG_PL330_DMA=y
CONFIG_DMATEST=m
+CONFIG_IIO=y
+CONFIG_LTC2497=y
CONFIG_FPGA=y
CONFIG_FPGA_MGR_SOCFPGA=y
CONFIG_FPGA_MGR_SOCFPGA_A10=y
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 8927cae7c966..efb0e2c0d84c 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -343,4 +343,6 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
}
}
+static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {}
+
#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 770d73257ad9..075e1921fdd9 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,7 @@
#ifndef __ARM_KVM_HOST_H__
#define __ARM_KVM_HOST_H__
+#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/cputype.h>
@@ -53,6 +54,8 @@
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+static inline int kvm_arm_init_sve(void) { return 0; }
+
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -150,9 +153,13 @@ struct kvm_cpu_context {
u32 cp15[NR_CP15_REGS];
};
-typedef struct kvm_cpu_context kvm_cpu_context_t;
+struct kvm_host_data {
+ struct kvm_cpu_context host_ctxt;
+};
+
+typedef struct kvm_host_data kvm_host_data_t;
-static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt,
+static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
int cpu)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
@@ -182,7 +189,7 @@ struct kvm_vcpu_arch {
struct kvm_vcpu_fault_info fault;
/* Host FP context */
- kvm_cpu_context_t *host_cpu_context;
+ struct kvm_cpu_context *host_cpu_context;
/* VGIC state */
struct vgic_cpu vgic_cpu;
@@ -361,6 +368,9 @@ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
+
static inline void kvm_arm_vhe_guest_enter(void) {}
static inline void kvm_arm_vhe_guest_exit(void) {}
@@ -409,4 +419,14 @@ static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
return 0;
}
+static inline int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
+{
+ return -EINVAL;
+}
+
+static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
+{
+ return true;
+}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 903f23c309df..01b1bdb4fb6e 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -21,7 +21,6 @@ config SOC_SAMA5D2
depends on ARCH_MULTI_V7
select SOC_SAMA5
select CACHE_L2X0
- select HAVE_FB_ATMEL
select HAVE_AT91_UTMI
select HAVE_AT91_USB_CLK
select HAVE_AT91_H32MX
@@ -36,7 +35,6 @@ config SOC_SAMA5D3
bool "SAMA5D3 family"
depends on ARCH_MULTI_V7
select SOC_SAMA5
- select HAVE_FB_ATMEL
select HAVE_AT91_UTMI
select HAVE_AT91_SMD
select HAVE_AT91_USB_CLK
@@ -50,7 +48,6 @@ config SOC_SAMA5D4
depends on ARCH_MULTI_V7
select SOC_SAMA5
select CACHE_L2X0
- select HAVE_FB_ATMEL
select HAVE_AT91_UTMI
select HAVE_AT91_SMD
select HAVE_AT91_USB_CLK
diff --git a/arch/arm/mach-at91/at91sam9.c b/arch/arm/mach-at91/at91sam9.c
index 3dbdef4d3cbf..c12563b09656 100644
--- a/arch/arm/mach-at91/at91sam9.c
+++ b/arch/arm/mach-at91/at91sam9.c
@@ -32,3 +32,21 @@ DT_MACHINE_START(at91sam_dt, "Atmel AT91SAM9")
.init_machine = at91sam9_init,
.dt_compat = at91_dt_board_compat,
MACHINE_END
+
+static void __init sam9x60_init(void)
+{
+ of_platform_default_populate(NULL, NULL, NULL);
+
+ sam9x60_pm_init();
+}
+
+static const char *const sam9x60_dt_board_compat[] __initconst = {
+ "microchip,sam9x60",
+ NULL
+};
+
+DT_MACHINE_START(sam9x60_dt, "Microchip SAM9X60")
+ /* Maintainer: Microchip */
+ .init_machine = sam9x60_init,
+ .dt_compat = sam9x60_dt_board_compat,
+MACHINE_END
diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h
index e2bd17237964..72b45accfa0f 100644
--- a/arch/arm/mach-at91/generic.h
+++ b/arch/arm/mach-at91/generic.h
@@ -14,11 +14,13 @@
#ifdef CONFIG_PM
extern void __init at91rm9200_pm_init(void);
extern void __init at91sam9_pm_init(void);
+extern void __init sam9x60_pm_init(void);
extern void __init sama5_pm_init(void);
extern void __init sama5d2_pm_init(void);
#else
static inline void __init at91rm9200_pm_init(void) { }
static inline void __init at91sam9_pm_init(void) { }
+static inline void __init sam9x60_pm_init(void) { }
static inline void __init sama5_pm_init(void) { }
static inline void __init sama5d2_pm_init(void) { }
#endif
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 2a757dcaa1a5..6c8147536f3d 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -39,6 +39,20 @@ extern void at91_pinctrl_gpio_suspend(void);
extern void at91_pinctrl_gpio_resume(void);
#endif
+struct at91_soc_pm {
+ int (*config_shdwc_ws)(void __iomem *shdwc, u32 *mode, u32 *polarity);
+ int (*config_pmc_ws)(void __iomem *pmc, u32 mode, u32 polarity);
+ const struct of_device_id *ws_ids;
+ struct at91_pm_data data;
+};
+
+static struct at91_soc_pm soc_pm = {
+ .data = {
+ .standby_mode = AT91_PM_STANDBY,
+ .suspend_mode = AT91_PM_ULP0,
+ },
+};
+
static const match_table_t pm_modes __initconst = {
{ AT91_PM_STANDBY, "standby" },
{ AT91_PM_ULP0, "ulp0" },
@@ -47,16 +61,11 @@ static const match_table_t pm_modes __initconst = {
{ -1, NULL },
};
-static struct at91_pm_data pm_data = {
- .standby_mode = AT91_PM_STANDBY,
- .suspend_mode = AT91_PM_ULP0,
-};
-
#define at91_ramc_read(id, field) \
- __raw_readl(pm_data.ramc[id] + field)
+ __raw_readl(soc_pm.data.ramc[id] + field)
#define at91_ramc_write(id, field, value) \
- __raw_writel(value, pm_data.ramc[id] + field)
+ __raw_writel(value, soc_pm.data.ramc[id] + field)
static int at91_pm_valid_state(suspend_state_t state)
{
@@ -91,6 +100,8 @@ static const struct wakeup_source_info ws_info[] = {
{ .pmc_fsmr_bit = AT91_PMC_RTCAL, .shdwc_mr_bit = BIT(17) },
{ .pmc_fsmr_bit = AT91_PMC_USBAL },
{ .pmc_fsmr_bit = AT91_PMC_SDMMC_CD },
+ { .pmc_fsmr_bit = AT91_PMC_RTTAL },
+ { .pmc_fsmr_bit = AT91_PMC_RXLP_MCE },
};
static const struct of_device_id sama5d2_ws_ids[] = {
@@ -105,6 +116,17 @@ static const struct of_device_id sama5d2_ws_ids[] = {
{ /* sentinel */ }
};
+static const struct of_device_id sam9x60_ws_ids[] = {
+ { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] },
+ { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] },
+ { .compatible = "usb-ohci", .data = &ws_info[2] },
+ { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] },
+ { .compatible = "usb-ehci", .data = &ws_info[2] },
+ { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] },
+ { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] },
+ { /* sentinel */ }
+};
+
static int at91_pm_config_ws(unsigned int pm_mode, bool set)
{
const struct wakeup_source_info *wsi;
@@ -116,24 +138,22 @@ static int at91_pm_config_ws(unsigned int pm_mode, bool set)
if (pm_mode != AT91_PM_ULP1)
return 0;
- if (!pm_data.pmc || !pm_data.shdwc)
+ if (!soc_pm.data.pmc || !soc_pm.data.shdwc || !soc_pm.ws_ids)
return -EPERM;
if (!set) {
- writel(mode, pm_data.pmc + AT91_PMC_FSMR);
+ writel(mode, soc_pm.data.pmc + AT91_PMC_FSMR);
return 0;
}
- /* SHDWC.WUIR */
- val = readl(pm_data.shdwc + 0x0c);
- mode |= (val & 0x3ff);
- polarity |= ((val >> 16) & 0x3ff);
+ if (soc_pm.config_shdwc_ws)
+ soc_pm.config_shdwc_ws(soc_pm.data.shdwc, &mode, &polarity);
/* SHDWC.MR */
- val = readl(pm_data.shdwc + 0x04);
+ val = readl(soc_pm.data.shdwc + 0x04);
/* Loop through defined wakeup sources. */
- for_each_matching_node_and_match(np, sama5d2_ws_ids, &match) {
+ for_each_matching_node_and_match(np, soc_pm.ws_ids, &match) {
pdev = of_find_device_by_node(np);
if (!pdev)
continue;
@@ -155,8 +175,8 @@ put_device:
}
if (mode) {
- writel(mode, pm_data.pmc + AT91_PMC_FSMR);
- writel(polarity, pm_data.pmc + AT91_PMC_FSPR);
+ if (soc_pm.config_pmc_ws)
+ soc_pm.config_pmc_ws(soc_pm.data.pmc, mode, polarity);
} else {
pr_err("AT91: PM: no ULP1 wakeup sources found!");
}
@@ -164,6 +184,34 @@ put_device:
return mode ? 0 : -EPERM;
}
+static int at91_sama5d2_config_shdwc_ws(void __iomem *shdwc, u32 *mode,
+ u32 *polarity)
+{
+ u32 val;
+
+ /* SHDWC.WUIR */
+ val = readl(shdwc + 0x0c);
+ *mode |= (val & 0x3ff);
+ *polarity |= ((val >> 16) & 0x3ff);
+
+ return 0;
+}
+
+static int at91_sama5d2_config_pmc_ws(void __iomem *pmc, u32 mode, u32 polarity)
+{
+ writel(mode, pmc + AT91_PMC_FSMR);
+ writel(polarity, pmc + AT91_PMC_FSPR);
+
+ return 0;
+}
+
+static int at91_sam9x60_config_pmc_ws(void __iomem *pmc, u32 mode, u32 polarity)
+{
+ writel(mode, pmc + AT91_PMC_FSMR);
+
+ return 0;
+}
+
/*
* Called after processes are frozen, but before we shutdown devices.
*/
@@ -171,18 +219,18 @@ static int at91_pm_begin(suspend_state_t state)
{
switch (state) {
case PM_SUSPEND_MEM:
- pm_data.mode = pm_data.suspend_mode;
+ soc_pm.data.mode = soc_pm.data.suspend_mode;
break;
case PM_SUSPEND_STANDBY:
- pm_data.mode = pm_data.standby_mode;
+ soc_pm.data.mode = soc_pm.data.standby_mode;
break;
default:
- pm_data.mode = -1;
+ soc_pm.data.mode = -1;
}
- return at91_pm_config_ws(pm_data.mode, true);
+ return at91_pm_config_ws(soc_pm.data.mode, true);
}
/*
@@ -194,10 +242,10 @@ static int at91_pm_verify_clocks(void)
unsigned long scsr;
int i;
- scsr = readl(pm_data.pmc + AT91_PMC_SCSR);
+ scsr = readl(soc_pm.data.pmc + AT91_PMC_SCSR);
/* USB must not be using PLLB */
- if ((scsr & pm_data.uhp_udp_mask) != 0) {
+ if ((scsr & soc_pm.data.uhp_udp_mask) != 0) {
pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
return 0;
}
@@ -208,7 +256,7 @@ static int at91_pm_verify_clocks(void)
if ((scsr & (AT91_PMC_PCK0 << i)) == 0)
continue;
- css = readl(pm_data.pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
+ css = readl(soc_pm.data.pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
if (css != AT91_PMC_CSS_SLOW) {
pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
return 0;
@@ -230,7 +278,7 @@ static int at91_pm_verify_clocks(void)
*/
int at91_suspend_entering_slow_clock(void)
{
- return (pm_data.mode >= AT91_PM_ULP0);
+ return (soc_pm.data.mode >= AT91_PM_ULP0);
}
EXPORT_SYMBOL(at91_suspend_entering_slow_clock);
@@ -243,14 +291,14 @@ static int at91_suspend_finish(unsigned long val)
flush_cache_all();
outer_disable();
- at91_suspend_sram_fn(&pm_data);
+ at91_suspend_sram_fn(&soc_pm.data);
return 0;
}
static void at91_pm_suspend(suspend_state_t state)
{
- if (pm_data.mode == AT91_PM_BACKUP) {
+ if (soc_pm.data.mode == AT91_PM_BACKUP) {
pm_bu->suspended = 1;
cpu_suspend(0, at91_suspend_finish);
@@ -289,7 +337,7 @@ static int at91_pm_enter(suspend_state_t state)
/*
* Ensure that clocks are in a valid state.
*/
- if (pm_data.mode >= AT91_PM_ULP0 &&
+ if (soc_pm.data.mode >= AT91_PM_ULP0 &&
!at91_pm_verify_clocks())
goto error;
@@ -318,7 +366,7 @@ error:
*/
static void at91_pm_end(void)
{
- at91_pm_config_ws(pm_data.mode, false);
+ at91_pm_config_ws(soc_pm.data.mode, false);
}
@@ -351,7 +399,7 @@ static void at91rm9200_standby(void)
" str %2, [%1, %3]\n\t"
" mcr p15, 0, %0, c7, c0, 4\n\t"
:
- : "r" (0), "r" (pm_data.ramc[0]),
+ : "r" (0), "r" (soc_pm.data.ramc[0]),
"r" (1), "r" (AT91_MC_SDRAMC_SRR));
}
@@ -374,7 +422,7 @@ static void at91_ddr_standby(void)
at91_ramc_write(0, AT91_DDRSDRC_MDR, mdr);
}
- if (pm_data.ramc[1]) {
+ if (soc_pm.data.ramc[1]) {
saved_lpr1 = at91_ramc_read(1, AT91_DDRSDRC_LPR);
lpr1 = saved_lpr1 & ~AT91_DDRSDRC_LPCB;
lpr1 |= AT91_DDRSDRC_LPCB_SELF_REFRESH;
@@ -392,14 +440,14 @@ static void at91_ddr_standby(void)
/* self-refresh mode now */
at91_ramc_write(0, AT91_DDRSDRC_LPR, lpr0);
- if (pm_data.ramc[1])
+ if (soc_pm.data.ramc[1])
at91_ramc_write(1, AT91_DDRSDRC_LPR, lpr1);
cpu_do_idle();
at91_ramc_write(0, AT91_DDRSDRC_MDR, saved_mdr0);
at91_ramc_write(0, AT91_DDRSDRC_LPR, saved_lpr0);
- if (pm_data.ramc[1]) {
+ if (soc_pm.data.ramc[1]) {
at91_ramc_write(0, AT91_DDRSDRC_MDR, saved_mdr1);
at91_ramc_write(1, AT91_DDRSDRC_LPR, saved_lpr1);
}
@@ -429,7 +477,7 @@ static void at91sam9_sdram_standby(void)
u32 lpr0, lpr1 = 0;
u32 saved_lpr0, saved_lpr1 = 0;
- if (pm_data.ramc[1]) {
+ if (soc_pm.data.ramc[1]) {
saved_lpr1 = at91_ramc_read(1, AT91_SDRAMC_LPR);
lpr1 = saved_lpr1 & ~AT91_SDRAMC_LPCB;
lpr1 |= AT91_SDRAMC_LPCB_SELF_REFRESH;
@@ -441,13 +489,13 @@ static void at91sam9_sdram_standby(void)
/* self-refresh mode now */
at91_ramc_write(0, AT91_SDRAMC_LPR, lpr0);
- if (pm_data.ramc[1])
+ if (soc_pm.data.ramc[1])
at91_ramc_write(1, AT91_SDRAMC_LPR, lpr1);
cpu_do_idle();
at91_ramc_write(0, AT91_SDRAMC_LPR, saved_lpr0);
- if (pm_data.ramc[1])
+ if (soc_pm.data.ramc[1])
at91_ramc_write(1, AT91_SDRAMC_LPR, saved_lpr1);
}
@@ -480,14 +528,14 @@ static __init void at91_dt_ramc(void)
const struct ramc_info *ramc;
for_each_matching_node_and_match(np, ramc_ids, &of_id) {
- pm_data.ramc[idx] = of_iomap(np, 0);
- if (!pm_data.ramc[idx])
+ soc_pm.data.ramc[idx] = of_iomap(np, 0);
+ if (!soc_pm.data.ramc[idx])
panic(pr_fmt("unable to map ramc[%d] cpu registers\n"), idx);
ramc = of_id->data;
if (!standby)
standby = ramc->idle;
- pm_data.memctrl = ramc->memctrl;
+ soc_pm.data.memctrl = ramc->memctrl;
idx++;
}
@@ -509,12 +557,17 @@ static void at91rm9200_idle(void)
* Disable the processor clock. The processor will be automatically
* re-enabled by an interrupt or by a reset.
*/
- writel(AT91_PMC_PCK, pm_data.pmc + AT91_PMC_SCDR);
+ writel(AT91_PMC_PCK, soc_pm.data.pmc + AT91_PMC_SCDR);
+}
+
+static void at91sam9x60_idle(void)
+{
+ cpu_do_idle();
}
static void at91sam9_idle(void)
{
- writel(AT91_PMC_PCK, pm_data.pmc + AT91_PMC_SCDR);
+ writel(AT91_PMC_PCK, soc_pm.data.pmc + AT91_PMC_SCDR);
cpu_do_idle();
}
@@ -566,8 +619,8 @@ static void __init at91_pm_sram_init(void)
static bool __init at91_is_pm_mode_active(int pm_mode)
{
- return (pm_data.standby_mode == pm_mode ||
- pm_data.suspend_mode == pm_mode);
+ return (soc_pm.data.standby_mode == pm_mode ||
+ soc_pm.data.suspend_mode == pm_mode);
}
static int __init at91_pm_backup_init(void)
@@ -577,6 +630,9 @@ static int __init at91_pm_backup_init(void)
struct platform_device *pdev = NULL;
int ret = -ENODEV;
+ if (!IS_ENABLED(CONFIG_SOC_SAMA5D2))
+ return -EPERM;
+
if (!at91_is_pm_mode_active(AT91_PM_BACKUP))
return 0;
@@ -586,7 +642,7 @@ static int __init at91_pm_backup_init(void)
return ret;
}
- pm_data.sfrbu = of_iomap(np, 0);
+ soc_pm.data.sfrbu = of_iomap(np, 0);
of_node_put(np);
np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-securam");
@@ -622,8 +678,8 @@ static int __init at91_pm_backup_init(void)
securam_fail:
put_device(&pdev->dev);
securam_fail_no_ref_dev:
- iounmap(pm_data.sfrbu);
- pm_data.sfrbu = NULL;
+ iounmap(soc_pm.data.sfrbu);
+ soc_pm.data.sfrbu = NULL;
return ret;
}
@@ -632,10 +688,10 @@ static void __init at91_pm_use_default_mode(int pm_mode)
if (pm_mode != AT91_PM_ULP1 && pm_mode != AT91_PM_BACKUP)
return;
- if (pm_data.standby_mode == pm_mode)
- pm_data.standby_mode = AT91_PM_ULP0;
- if (pm_data.suspend_mode == pm_mode)
- pm_data.suspend_mode = AT91_PM_ULP0;
+ if (soc_pm.data.standby_mode == pm_mode)
+ soc_pm.data.standby_mode = AT91_PM_ULP0;
+ if (soc_pm.data.suspend_mode == pm_mode)
+ soc_pm.data.suspend_mode = AT91_PM_ULP0;
}
static void __init at91_pm_modes_init(void)
@@ -653,7 +709,7 @@ static void __init at91_pm_modes_init(void)
goto ulp1_default;
}
- pm_data.shdwc = of_iomap(np, 0);
+ soc_pm.data.shdwc = of_iomap(np, 0);
of_node_put(np);
ret = at91_pm_backup_init();
@@ -667,8 +723,8 @@ static void __init at91_pm_modes_init(void)
return;
unmap:
- iounmap(pm_data.shdwc);
- pm_data.shdwc = NULL;
+ iounmap(soc_pm.data.shdwc);
+ soc_pm.data.shdwc = NULL;
ulp1_default:
at91_pm_use_default_mode(AT91_PM_ULP1);
backup_default:
@@ -711,14 +767,14 @@ static void __init at91_pm_init(void (*pm_idle)(void))
platform_device_register(&at91_cpuidle_device);
pmc_np = of_find_matching_node_and_match(NULL, atmel_pmc_ids, &of_id);
- pm_data.pmc = of_iomap(pmc_np, 0);
- if (!pm_data.pmc) {
+ soc_pm.data.pmc = of_iomap(pmc_np, 0);
+ if (!soc_pm.data.pmc) {
pr_err("AT91: PM not supported, PMC not found\n");
return;
}
pmc = of_id->data;
- pm_data.uhp_udp_mask = pmc->uhp_udp_mask;
+ soc_pm.data.uhp_udp_mask = pmc->uhp_udp_mask;
if (pm_idle)
arm_pm_idle = pm_idle;
@@ -728,8 +784,8 @@ static void __init at91_pm_init(void (*pm_idle)(void))
if (at91_suspend_sram_fn) {
suspend_set_ops(&at91_pm_ops);
pr_info("AT91: PM: standby: %s, suspend: %s\n",
- pm_modes[pm_data.standby_mode].pattern,
- pm_modes[pm_data.suspend_mode].pattern);
+ pm_modes[soc_pm.data.standby_mode].pattern,
+ pm_modes[soc_pm.data.suspend_mode].pattern);
} else {
pr_info("AT91: PM not supported, due to no SRAM allocated\n");
}
@@ -750,6 +806,19 @@ void __init at91rm9200_pm_init(void)
at91_pm_init(at91rm9200_idle);
}
+void __init sam9x60_pm_init(void)
+{
+ if (!IS_ENABLED(CONFIG_SOC_AT91SAM9))
+ return;
+
+ at91_pm_modes_init();
+ at91_dt_ramc();
+ at91_pm_init(at91sam9x60_idle);
+
+ soc_pm.ws_ids = sam9x60_ws_ids;
+ soc_pm.config_pmc_ws = at91_sam9x60_config_pmc_ws;
+}
+
void __init at91sam9_pm_init(void)
{
if (!IS_ENABLED(CONFIG_SOC_AT91SAM9))
@@ -775,6 +844,10 @@ void __init sama5d2_pm_init(void)
at91_pm_modes_init();
sama5_pm_init();
+
+ soc_pm.ws_ids = sama5d2_ws_ids;
+ soc_pm.config_shdwc_ws = at91_sama5d2_config_shdwc_ws;
+ soc_pm.config_pmc_ws = at91_sama5d2_config_pmc_ws;
}
static int __init at91_pm_modes_select(char *str)
@@ -795,8 +868,8 @@ static int __init at91_pm_modes_select(char *str)
if (suspend < 0)
return 0;
- pm_data.standby_mode = standby;
- pm_data.suspend_mode = suspend;
+ soc_pm.data.standby_mode = standby;
+ soc_pm.data.suspend_mode = suspend;
return 0;
}
diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
index bfe1c4d06901..77e29309cc6e 100644
--- a/arch/arm/mach-at91/pm_suspend.S
+++ b/arch/arm/mach-at91/pm_suspend.S
@@ -51,15 +51,6 @@ tmp2 .req r5
.endm
/*
- * Wait until PLLA has locked.
- */
- .macro wait_pllalock
-1: ldr tmp1, [pmc, #AT91_PMC_SR]
- tst tmp1, #AT91_PMC_LOCKA
- beq 1b
- .endm
-
-/*
* Put the processor to enter the idle state
*/
.macro at91_cpu_idle
@@ -178,11 +169,46 @@ ENDPROC(at91_backup_mode)
orr tmp1, tmp1, #AT91_PMC_KEY
str tmp1, [pmc, #AT91_CKGR_MOR]
+ /* Save RC oscillator state */
+ ldr tmp1, [pmc, #AT91_PMC_SR]
+ str tmp1, .saved_osc_status
+ tst tmp1, #AT91_PMC_MOSCRCS
+ bne 1f
+
+ /* Turn off RC oscillator */
+ ldr tmp1, [pmc, #AT91_CKGR_MOR]
+ bic tmp1, tmp1, #AT91_PMC_MOSCRCEN
+ bic tmp1, tmp1, #AT91_PMC_KEY_MASK
+ orr tmp1, tmp1, #AT91_PMC_KEY
+ str tmp1, [pmc, #AT91_CKGR_MOR]
+
+ /* Wait main RC disabled done */
+2: ldr tmp1, [pmc, #AT91_PMC_SR]
+ tst tmp1, #AT91_PMC_MOSCRCS
+ bne 2b
+
/* Wait for interrupt */
- at91_cpu_idle
+1: at91_cpu_idle
- /* Turn on the crystal oscillator */
+ /* Restore RC oscillator state */
+ ldr tmp1, .saved_osc_status
+ tst tmp1, #AT91_PMC_MOSCRCS
+ beq 4f
+
+ /* Turn on RC oscillator */
ldr tmp1, [pmc, #AT91_CKGR_MOR]
+ orr tmp1, tmp1, #AT91_PMC_MOSCRCEN
+ bic tmp1, tmp1, #AT91_PMC_KEY_MASK
+ orr tmp1, tmp1, #AT91_PMC_KEY
+ str tmp1, [pmc, #AT91_CKGR_MOR]
+
+ /* Wait main RC stabilization */
+3: ldr tmp1, [pmc, #AT91_PMC_SR]
+ tst tmp1, #AT91_PMC_MOSCRCS
+ beq 3b
+
+ /* Turn on the crystal oscillator */
+4: ldr tmp1, [pmc, #AT91_CKGR_MOR]
orr tmp1, tmp1, #AT91_PMC_MOSCEN
orr tmp1, tmp1, #AT91_PMC_KEY
str tmp1, [pmc, #AT91_CKGR_MOR]
@@ -197,8 +223,26 @@ ENDPROC(at91_backup_mode)
.macro at91_pm_ulp1_mode
ldr pmc, .pmc_base
- /* Switch the main clock source to 12-MHz RC oscillator */
+ /* Save RC oscillator state and check if it is enabled. */
+ ldr tmp1, [pmc, #AT91_PMC_SR]
+ str tmp1, .saved_osc_status
+ tst tmp1, #AT91_PMC_MOSCRCS
+ bne 2f
+
+ /* Enable RC oscillator */
ldr tmp1, [pmc, #AT91_CKGR_MOR]
+ orr tmp1, tmp1, #AT91_PMC_MOSCRCEN
+ bic tmp1, tmp1, #AT91_PMC_KEY_MASK
+ orr tmp1, tmp1, #AT91_PMC_KEY
+ str tmp1, [pmc, #AT91_CKGR_MOR]
+
+ /* Wait main RC stabilization */
+1: ldr tmp1, [pmc, #AT91_PMC_SR]
+ tst tmp1, #AT91_PMC_MOSCRCS
+ beq 1b
+
+ /* Switch the main clock source to 12-MHz RC oscillator */
+2: ldr tmp1, [pmc, #AT91_CKGR_MOR]
bic tmp1, tmp1, #AT91_PMC_MOSCSEL
bic tmp1, tmp1, #AT91_PMC_KEY_MASK
orr tmp1, tmp1, #AT91_PMC_KEY
@@ -262,6 +306,25 @@ ENDPROC(at91_backup_mode)
str tmp1, [pmc, #AT91_PMC_MCKR]
wait_mckrdy
+
+ /* Restore RC oscillator state */
+ ldr tmp1, .saved_osc_status
+ tst tmp1, #AT91_PMC_MOSCRCS
+ bne 3f
+
+ /* Disable RC oscillator */
+ ldr tmp1, [pmc, #AT91_CKGR_MOR]
+ bic tmp1, tmp1, #AT91_PMC_MOSCRCEN
+ bic tmp1, tmp1, #AT91_PMC_KEY_MASK
+ orr tmp1, tmp1, #AT91_PMC_KEY
+ str tmp1, [pmc, #AT91_CKGR_MOR]
+
+ /* Wait RC oscillator disable done */
+4: ldr tmp1, [pmc, #AT91_PMC_SR]
+ tst tmp1, #AT91_PMC_MOSCRCS
+ bne 4b
+
+3:
.endm
ENTRY(at91_ulp_mode)
@@ -279,14 +342,6 @@ ENTRY(at91_ulp_mode)
wait_mckrdy
- /* Save PLLA setting and disable it */
- ldr tmp1, [pmc, #AT91_CKGR_PLLAR]
- str tmp1, .saved_pllar
-
- mov tmp1, #AT91_PMC_PLLCOUNT
- orr tmp1, tmp1, #(1 << 29) /* bit 29 always set */
- str tmp1, [pmc, #AT91_CKGR_PLLAR]
-
ldr r0, .pm_mode
cmp r0, #AT91_PM_ULP1
beq ulp1_mode
@@ -301,18 +356,6 @@ ulp1_mode:
ulp_exit:
ldr pmc, .pmc_base
- /* Restore PLLA setting */
- ldr tmp1, .saved_pllar
- str tmp1, [pmc, #AT91_CKGR_PLLAR]
-
- tst tmp1, #(AT91_PMC_MUL & 0xff0000)
- bne 3f
- tst tmp1, #(AT91_PMC_MUL & ~0xff0000)
- beq 4f
-3:
- wait_pllalock
-4:
-
/*
* Restore master clock setting
*/
@@ -465,8 +508,6 @@ ENDPROC(at91_sramc_self_refresh)
.word 0
.saved_mckr:
.word 0
-.saved_pllar:
- .word 0
.saved_sam9_lpr:
.word 0
.saved_sam9_lpr1:
@@ -475,6 +516,8 @@ ENDPROC(at91_sramc_self_refresh)
.word 0
.saved_sam9_mdr1:
.word 0
+.saved_osc_status:
+ .word 0
ENTRY(at91_pm_suspend_in_sram_sz)
.word .-at91_pm_suspend_in_sram
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index 63511f638ce4..e6b8ffd934a1 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -12,6 +12,7 @@
#include <linux/clk/davinci.h>
#include <linux/gpio.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/irqchip/irq-davinci-cp-intc.h>
#include <linux/platform_data/gpio-davinci.h>
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 67ab71ba3ad3..77bc64d6e39b 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -18,6 +18,7 @@
#include <linux/cpufreq.h>
#include <linux/gpio.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/irqchip/irq-davinci-cp-intc.h>
#include <linux/mfd/da8xx-cfgchip.h>
#include <linux/platform_data/clk-da8xx-cfgchip.h>
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index b8dc674e06bc..036139fe0d0f 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -17,6 +17,7 @@
#include <linux/dma-contiguous.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <linux/serial_8250.h>
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 4a482445b9a2..c6073326be2e 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -15,6 +15,7 @@
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/irqchip/irq-davinci-aintc.h>
#include <linux/platform_data/edma.h>
#include <linux/platform_data/gpio-davinci.h>
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 8e0a77315add..2f9ae6431bf5 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -19,6 +19,7 @@
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/irqchip/irq-davinci-aintc.h>
#include <linux/platform_data/edma.h>
#include <linux/platform_data/gpio-davinci.h>
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index cecc7ceb8d34..1b9e9a6192ef 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -14,6 +14,7 @@
#include <linux/clkdev.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/irqchip/irq-davinci-aintc.h>
#include <linux/platform_data/edma.h>
#include <linux/platform_data/gpio-davinci.h>
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index f33392f77a03..62ca952fe161 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -15,6 +15,7 @@
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/irqchip/irq-davinci-aintc.h>
#include <linux/platform_data/edma.h>
#include <linux/platform_data/gpio-davinci.h>
diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 0d420a2bfe3e..d7b826d2695c 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -11,6 +11,7 @@
#include <linux/clk-provider.h>
#include <linux/dma-mapping.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/platform_data/dma-mv_xor.h>
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index cc5f15679d29..381f452de28d 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -27,7 +27,6 @@
#include <linux/cpu.h>
#include <linux/pci.h>
#include <linux/sched_clock.h>
-#include <linux/bitops.h>
#include <linux/irqchip/irq-ixp4xx.h>
#include <linux/platform_data/timer-ixp4xx.h>
#include <mach/udc.h>
diff --git a/arch/arm/mach-mediatek/mediatek.c b/arch/arm/mach-mediatek/mediatek.c
index b6a81ba1ce32..5a9c016b3c6c 100644
--- a/arch/arm/mach-mediatek/mediatek.c
+++ b/arch/arm/mach-mediatek/mediatek.c
@@ -15,6 +15,7 @@
* GNU General Public License for more details.
*/
#include <linux/init.h>
+#include <linux/io.h>
#include <asm/mach/arch.h>
#include <linux/of.h>
#include <linux/clk-provider.h>
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index f72e1e9f5fc5..dd762d1b083f 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/serial_8250.h>
#include <linux/ata_platform.h>
diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c
index 0b10acd7d1b9..d2df5ef9382b 100644
--- a/arch/arm/mach-mvebu/board-v7.c
+++ b/arch/arm/mach-mvebu/board-v7.c
@@ -136,7 +136,6 @@ static void __init i2c_quirk(void)
of_update_property(np, new_compat);
}
- return;
}
static void __init mvebu_dt_init(void)
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S
index 8b2fbc8b6bc6..2d962fe48821 100644
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ b/arch/arm/mach-mvebu/coherency_ll.S
@@ -66,7 +66,7 @@ ENDPROC(ll_get_coherency_base)
* fabric registers
*/
ENTRY(ll_get_coherency_cpumask)
- mrc 15, 0, r3, cr0, cr0, 5
+ mrc p15, 0, r3, cr0, cr0, 5
and r3, r3, #15
mov r2, #(1 << 24)
lsl r3, r2, r3
diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c
index 9b5f4d665374..ceaad6d5927e 100644
--- a/arch/arm/mach-mvebu/kirkwood.c
+++ b/arch/arm/mach-mvebu/kirkwood.c
@@ -108,8 +108,6 @@ static void __init kirkwood_dt_eth_fixup(void)
clk_prepare_enable(clk);
/* store MAC address register contents in local-mac-address */
- pr_err(FW_INFO "%pOF: local-mac-address is not set\n", np);
-
pmac = kzalloc(sizeof(*pmac) + 6, GFP_KERNEL);
if (!pmac)
goto eth_fixup_no_mem;
diff --git a/arch/arm/mach-mvebu/pm-board.c b/arch/arm/mach-mvebu/pm-board.c
index db17121d7d63..070552511699 100644
--- a/arch/arm/mach-mvebu/pm-board.c
+++ b/arch/arm/mach-mvebu/pm-board.c
@@ -79,7 +79,7 @@ static void mvebu_armada_pm_enter(void __iomem *sdram_reg, u32 srcmd)
static int __init mvebu_armada_pm_init(void)
{
struct device_node *np;
- struct device_node *gpio_ctrl_np;
+ struct device_node *gpio_ctrl_np = NULL;
int ret = 0, i;
if (!of_machine_is_compatible("marvell,axp-gp"))
@@ -126,18 +126,23 @@ static int __init mvebu_armada_pm_init(void)
goto out;
}
+ if (gpio_ctrl_np)
+ of_node_put(gpio_ctrl_np);
gpio_ctrl_np = args.np;
pic_raw_gpios[i] = args.args[0];
}
gpio_ctrl = of_iomap(gpio_ctrl_np, 0);
- if (!gpio_ctrl)
- return -ENOMEM;
+ if (!gpio_ctrl) {
+ ret = -ENOMEM;
+ goto out;
+ }
mvebu_pm_suspend_init(mvebu_armada_pm_enter);
out:
of_node_put(np);
+ of_node_put(gpio_ctrl_np);
return ret;
}
diff --git a/arch/arm/mach-mvebu/pmsu_ll.S b/arch/arm/mach-mvebu/pmsu_ll.S
index 88651221dbdd..7aae9a25cfeb 100644
--- a/arch/arm/mach-mvebu/pmsu_ll.S
+++ b/arch/arm/mach-mvebu/pmsu_ll.S
@@ -16,7 +16,7 @@
ENTRY(armada_38x_scu_power_up)
mrc p15, 4, r1, c15, c0 @ get SCU base address
orr r1, r1, #0x8 @ SCU CPU Power Status Register
- mrc 15, 0, r0, cr0, cr0, 5 @ get the CPU ID
+ mrc p15, 0, r0, cr0, cr0, 5 @ get the CPU ID
and r0, r0, #15
add r1, r1, r0
mov r0, #0x0
@@ -56,7 +56,6 @@ ENDPROC(armada_38x_cpu_resume)
/* The following code will be executed from SRAM */
ENTRY(mvebu_boot_wa_start)
-mvebu_boot_wa_start:
ARM_BE8(setend be)
adr r0, 1f
ldr r0, [r0] @ load the address of the
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 1b15d593837e..b6e814166ee0 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -749,7 +749,7 @@ static void __init ams_delta_init(void)
ARRAY_SIZE(ams_delta_gpio_tables));
leds_pdev = gpio_led_register_device(PLATFORM_DEVID_NONE, &leds_pdata);
- if (!IS_ERR(leds_pdev)) {
+ if (!IS_ERR_OR_NULL(leds_pdev)) {
leds_gpio_table.dev_id = dev_name(&leds_pdev->dev);
gpiod_add_lookup_table(&leds_gpio_table);
}
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index c67f92bfa30e..7bcb41137bbf 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <linux/serial_8250.h>
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index e41cabc4dc2b..06ab03b93109 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/of_platform.h>
#include <linux/irqchip.h>
#include <linux/clk-provider.h>
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 6aba9ebf8041..7f634eaeaf10 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -15,6 +15,7 @@
*/
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/cpumask.h>
#include <linux/platform_device.h>
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 0393917eaa57..aaf479a9e92d 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -441,3 +441,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 69a59a5d1143..4780eb7af842 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1341,6 +1341,7 @@ menu "ARMv8.3 architectural features"
config ARM64_PTR_AUTH
bool "Enable support for pointer authentication"
default y
+ depends on !KVM || ARM64_VHE
help
Pointer authentication (part of the ARMv8.3 Extensions) provides
instructions for signing and authenticating pointers against secret
@@ -1354,8 +1355,9 @@ config ARM64_PTR_AUTH
context-switched along with the process.
The feature is detected at runtime. If the feature is not present in
- hardware it will not be advertised to userspace nor will it be
- enabled.
+ hardware it will not be advertised to userspace/KVM guest nor will it
+ be enabled. However, KVM guest also require VHE mode and hence
+ CONFIG_ARM64_VHE=y option to use this feature.
endmenu
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 0f4d91824e4b..1001410c4782 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -215,6 +215,7 @@ config ARCH_SYNQUACER
config ARCH_TEGRA
bool "NVIDIA Tegra SoC Family"
select ARCH_HAS_RESET_CONTROLLER
+ select ARM_GIC_PM
select CLKDEV_LOOKUP
select CLKSRC_MMIO
select TIMER_OF
diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
index 75ee6cf1e1b4..14d7fea82daf 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
@@ -59,7 +59,7 @@
};
padctl@3520000 {
- status = "okay";
+ status = "disabled";
avdd-pll-erefeut-supply = <&vdd_1v8_pll>;
avdd-usb-supply = <&vdd_3v3_sys>;
@@ -137,7 +137,7 @@
};
usb@3530000 {
- status = "okay";
+ status = "disabled";
phys = <&{/padctl@3520000/pads/usb2/lanes/usb2-0}>,
<&{/padctl@3520000/pads/usb2/lanes/usb2-1}>,
diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
index f0bb6ced4976..426ac0bdf6a6 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
@@ -60,6 +60,7 @@
clock-names = "master_bus", "slave_bus", "rx", "tx", "ptp_ref";
resets = <&bpmp TEGRA186_RESET_EQOS>;
reset-names = "eqos";
+ iommus = <&smmu TEGRA186_SID_EQOS>;
status = "disabled";
snps,write-requests = <1>;
@@ -338,6 +339,7 @@
<&bpmp TEGRA186_RESET_HDA2CODEC_2X>;
reset-names = "hda", "hda2hdmi", "hda2codec_2x";
power-domains = <&bpmp TEGRA186_POWER_DOMAIN_DISP>;
+ iommus = <&smmu TEGRA186_SID_HDA>;
status = "disabled";
};
@@ -671,6 +673,10 @@
<&bpmp TEGRA186_RESET_PCIEXCLK>;
reset-names = "afi", "pex", "pcie_x";
+ iommus = <&smmu TEGRA186_SID_AFI>;
+ iommu-map = <0x0 &smmu TEGRA186_SID_AFI 0x1000>;
+ iommu-map-mask = <0x0>;
+
status = "disabled";
pci@1,0 {
@@ -1158,6 +1164,7 @@
bpmp: bpmp {
compatible = "nvidia,tegra186-bpmp";
+ iommus = <&smmu TEGRA186_SID_BPMP>;
mboxes = <&hsp_top0 TEGRA_HSP_MBOX_TYPE_DB
TEGRA_HSP_DB_MASTER_BPMP>;
shmem = <&cpu_bpmp_tx &cpu_bpmp_rx>;
diff --git a/arch/arm64/boot/dts/sprd/whale2.dtsi b/arch/arm64/boot/dts/sprd/whale2.dtsi
index eb6be5675f79..4bb862c6b083 100644
--- a/arch/arm64/boot/dts/sprd/whale2.dtsi
+++ b/arch/arm64/boot/dts/sprd/whale2.dtsi
@@ -75,7 +75,9 @@
"sprd,sc9836-uart";
reg = <0x0 0x100>;
interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ext_26m>;
+ clock-names = "enable", "uart", "source";
+ clocks = <&apapb_gate CLK_UART0_EB>,
+ <&ap_clk CLK_UART0>, <&ext_26m>;
status = "disabled";
};
@@ -84,7 +86,9 @@
"sprd,sc9836-uart";
reg = <0x100000 0x100>;
interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ext_26m>;
+ clock-names = "enable", "uart", "source";
+ clocks = <&apapb_gate CLK_UART1_EB>,
+ <&ap_clk CLK_UART1>, <&ext_26m>;
status = "disabled";
};
@@ -93,7 +97,9 @@
"sprd,sc9836-uart";
reg = <0x200000 0x100>;
interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ext_26m>;
+ clock-names = "enable", "uart", "source";
+ clocks = <&apapb_gate CLK_UART2_EB>,
+ <&ap_clk CLK_UART2>, <&ext_26m>;
status = "disabled";
};
@@ -102,7 +108,9 @@
"sprd,sc9836-uart";
reg = <0x300000 0x100>;
interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ext_26m>;
+ clock-names = "enable", "uart", "source";
+ clocks = <&apapb_gate CLK_UART3_EB>,
+ <&ap_clk CLK_UART3>, <&ext_26m>;
status = "disabled";
};
};
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index dd1ad3950ef5..df62bbd33a9a 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -24,10 +24,13 @@
#ifndef __ASSEMBLY__
+#include <linux/bitmap.h>
#include <linux/build_bug.h>
+#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <linux/stddef.h>
+#include <linux/types.h>
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -56,7 +59,8 @@ extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_bind_task_to_cpu(void);
-extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state);
+extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
+ void *sve_state, unsigned int sve_vl);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_flush_cpu_state(void);
@@ -87,6 +91,29 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
extern int __ro_after_init sve_max_vl;
+extern int __ro_after_init sve_max_virtualisable_vl;
+extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+
+/*
+ * Helpers to translate bit indices in sve_vq_map to VQ values (and
+ * vice versa). This allows find_next_bit() to be used to find the
+ * _maximum_ VQ not exceeding a certain value.
+ */
+static inline unsigned int __vq_to_bit(unsigned int vq)
+{
+ return SVE_VQ_MAX - vq;
+}
+
+static inline unsigned int __bit_to_vq(unsigned int bit)
+{
+ return SVE_VQ_MAX - bit;
+}
+
+/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */
+static inline bool sve_vq_available(unsigned int vq)
+{
+ return test_bit(__vq_to_bit(vq), sve_vq_map);
+}
#ifdef CONFIG_ARM64_SVE
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index f5b79e995f40..ff73f5462aca 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -108,7 +108,8 @@ extern u32 __kvm_get_mdcr_el2(void);
.endm
.macro get_host_ctxt reg, tmp
- hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
+ hyp_adr_this_cpu \reg, kvm_host_data, \tmp
+ add \reg, \reg, #HOST_DATA_CONTEXT
.endm
.macro get_vcpu_ptr vcpu, ctxt
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d3842791e1c4..613427fafff9 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -98,6 +98,22 @@ static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 |= HCR_TWE;
}
+static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+}
+
+static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
+}
+
+static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_has_ptrauth(vcpu))
+ vcpu_ptrauth_disable(vcpu);
+}
+
static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.vsesr_el2;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a01fe087e022..2a8d3f8ca22c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,9 +22,13 @@
#ifndef __ARM64_KVM_HOST_H__
#define __ARM64_KVM_HOST_H__
+#include <linux/bitmap.h>
#include <linux/types.h>
+#include <linux/jump_label.h>
#include <linux/kvm_types.h>
+#include <linux/percpu.h>
#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/fpsimd.h>
@@ -45,7 +49,7 @@
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
-#define KVM_VCPU_MAX_FEATURES 4
+#define KVM_VCPU_MAX_FEATURES 7
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
@@ -54,8 +58,12 @@
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+extern unsigned int kvm_sve_max_vl;
+int kvm_arm_init_sve(void);
+
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
@@ -117,6 +125,7 @@ enum vcpu_sysreg {
SCTLR_EL1, /* System Control Register */
ACTLR_EL1, /* Auxiliary Control Register */
CPACR_EL1, /* Coprocessor Access Control */
+ ZCR_EL1, /* SVE Control */
TTBR0_EL1, /* Translation Table Base Register 0 */
TTBR1_EL1, /* Translation Table Base Register 1 */
TCR_EL1, /* Translation Control Register */
@@ -152,6 +161,18 @@ enum vcpu_sysreg {
PMSWINC_EL0, /* Software Increment Register */
PMUSERENR_EL0, /* User Enable Register */
+ /* Pointer Authentication Registers in a strict increasing order. */
+ APIAKEYLO_EL1,
+ APIAKEYHI_EL1,
+ APIBKEYLO_EL1,
+ APIBKEYHI_EL1,
+ APDAKEYLO_EL1,
+ APDAKEYHI_EL1,
+ APDBKEYLO_EL1,
+ APDBKEYHI_EL1,
+ APGAKEYLO_EL1,
+ APGAKEYHI_EL1,
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -212,7 +233,17 @@ struct kvm_cpu_context {
struct kvm_vcpu *__hyp_running_vcpu;
};
-typedef struct kvm_cpu_context kvm_cpu_context_t;
+struct kvm_pmu_events {
+ u32 events_host;
+ u32 events_guest;
+};
+
+struct kvm_host_data {
+ struct kvm_cpu_context host_ctxt;
+ struct kvm_pmu_events pmu_events;
+};
+
+typedef struct kvm_host_data kvm_host_data_t;
struct vcpu_reset_state {
unsigned long pc;
@@ -223,6 +254,8 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
+ void *sve_state;
+ unsigned int sve_max_vl;
/* HYP configuration */
u64 hcr_el2;
@@ -255,7 +288,7 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch external_debug_state;
/* Pointer to host CPU context */
- kvm_cpu_context_t *host_cpu_context;
+ struct kvm_cpu_context *host_cpu_context;
struct thread_info *host_thread_info; /* hyp VA */
struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
@@ -318,12 +351,40 @@ struct kvm_vcpu_arch {
bool sysregs_loaded_on_cpu;
};
+/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
+#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
+ sve_ffr_offset((vcpu)->arch.sve_max_vl)))
+
+#define vcpu_sve_state_size(vcpu) ({ \
+ size_t __size_ret; \
+ unsigned int __vcpu_vq; \
+ \
+ if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
+ __size_ret = 0; \
+ } else { \
+ __vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \
+ __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
+ } \
+ \
+ __size_ret; \
+})
+
/* vcpu_arch flags field values: */
#define KVM_ARM64_DEBUG_DIRTY (1 << 0)
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
+#define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */
+#define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */
+#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
+
+#define vcpu_has_sve(vcpu) (system_supports_sve() && \
+ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
+
+#define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \
+ system_supports_generic_auth()) && \
+ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH))
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
@@ -432,9 +493,9 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
-DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
-static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt,
+static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
int cpu)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
@@ -452,8 +513,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
* kernel's mapping to the linear mapping, and store it in tpidr_el2
* so that we can use adr_l to access per-cpu variables in EL2.
*/
- u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
- (u64)kvm_ksym_ref(kvm_host_cpu_state));
+ u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
+ (u64)kvm_ksym_ref(kvm_host_data));
/*
* Call initialization code, and switch to the full blown HYP code.
@@ -491,9 +552,10 @@ static inline bool kvm_arch_requires_vhe(void)
return false;
}
+void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
+
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
@@ -516,11 +578,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
+{
+ return (!has_vhe() && attr->exclude_host);
+}
+
#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
return kvm_arch_vcpu_run_map_fp(vcpu);
}
+
+void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
+void kvm_clr_pmu_events(u32 clr);
+
+void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt);
+bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt);
+
+void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
+void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+#else
+static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
+static inline void kvm_clr_pmu_events(u32 clr) {}
#endif
static inline void kvm_arm_vhe_guest_enter(void)
@@ -594,4 +673,10 @@ void kvm_arch_free_vm(struct kvm *kvm);
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
+int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
+bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
+
+#define kvm_arm_vcpu_sve_finalized(vcpu) \
+ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c3060833b7a5..09fe8bd15f6e 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -149,7 +149,6 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-bool __fpsimd_enabled(void);
void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
void deactivate_traps_vhe_put(void);
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h
new file mode 100644
index 000000000000..6301813dcace
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_ptrauth.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* arch/arm64/include/asm/kvm_ptrauth.h: Guest/host ptrauth save/restore
+ * Copyright 2019 Arm Limited
+ * Authors: Mark Rutland <mark.rutland@arm.com>
+ * Amit Daniel Kachhap <amit.kachhap@arm.com>
+ */
+
+#ifndef __ASM_KVM_PTRAUTH_H
+#define __ASM_KVM_PTRAUTH_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+
+#define PTRAUTH_REG_OFFSET(x) (x - CPU_APIAKEYLO_EL1)
+
+/*
+ * CPU_AP*_EL1 values exceed immediate offset range (512) for stp
+ * instruction so below macros takes CPU_APIAKEYLO_EL1 as base and
+ * calculates the offset of the keys from this base to avoid an extra add
+ * instruction. These macros assumes the keys offsets follow the order of
+ * the sysreg enum in kvm_host.h.
+ */
+.macro ptrauth_save_state base, reg1, reg2
+ mrs_s \reg1, SYS_APIAKEYLO_EL1
+ mrs_s \reg2, SYS_APIAKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)]
+ mrs_s \reg1, SYS_APIBKEYLO_EL1
+ mrs_s \reg2, SYS_APIBKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)]
+ mrs_s \reg1, SYS_APDAKEYLO_EL1
+ mrs_s \reg2, SYS_APDAKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)]
+ mrs_s \reg1, SYS_APDBKEYLO_EL1
+ mrs_s \reg2, SYS_APDBKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)]
+ mrs_s \reg1, SYS_APGAKEYLO_EL1
+ mrs_s \reg2, SYS_APGAKEYHI_EL1
+ stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)]
+.endm
+
+.macro ptrauth_restore_state base, reg1, reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)]
+ msr_s SYS_APIAKEYLO_EL1, \reg1
+ msr_s SYS_APIAKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)]
+ msr_s SYS_APIBKEYLO_EL1, \reg1
+ msr_s SYS_APIBKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)]
+ msr_s SYS_APDAKEYLO_EL1, \reg1
+ msr_s SYS_APDAKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)]
+ msr_s SYS_APDBKEYLO_EL1, \reg1
+ msr_s SYS_APDBKEYHI_EL1, \reg2
+ ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)]
+ msr_s SYS_APGAKEYLO_EL1, \reg1
+ msr_s SYS_APGAKEYHI_EL1, \reg2
+.endm
+
+/*
+ * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will
+ * check for the presence of one of the cpufeature flag
+ * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and
+ * then proceed ahead with the save/restore of Pointer Authentication
+ * key registers.
+ */
+.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
+alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
+ b 1000f
+alternative_else_nop_endif
+alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
+ b 1001f
+alternative_else_nop_endif
+1000:
+ ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
+ and \reg1, \reg1, #(HCR_API | HCR_APK)
+ cbz \reg1, 1001f
+ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
+ ptrauth_restore_state \reg1, \reg2, \reg3
+1001:
+.endm
+
+.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
+alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH
+ b 2000f
+alternative_else_nop_endif
+alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF
+ b 2001f
+alternative_else_nop_endif
+2000:
+ ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)]
+ and \reg1, \reg1, #(HCR_API | HCR_APK)
+ cbz \reg1, 2001f
+ add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1
+ ptrauth_save_state \reg1, \reg2, \reg3
+ add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1
+ ptrauth_restore_state \reg1, \reg2, \reg3
+ isb
+2001:
+.endm
+
+#else /* !CONFIG_ARM64_PTR_AUTH */
+.macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3
+.endm
+.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3
+.endm
+#endif /* CONFIG_ARM64_PTR_AUTH */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_PTRAUTH_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 3f7b917e8f3a..902d75b60914 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -454,6 +454,9 @@
#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
+/* VHE encodings for architectural EL0/1 system registers */
+#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_DSSBS (_BITUL(44))
#define SCTLR_ELx_ENIA (_BITUL(31))
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index f2a83ff6b73c..70e6882853c0 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 428
+#define __NR_compat_syscalls 434
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 23f1a44acada..c39e90600bb3 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -874,6 +874,18 @@ __SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
#define __NR_io_uring_register 427
__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
+#define __NR_open_tree 428
+__SYSCALL(__NR_open_tree, sys_open_tree)
+#define __NR_move_mount 429
+__SYSCALL(__NR_move_mount, sys_move_mount)
+#define __NR_fsopen 430
+__SYSCALL(__NR_fsopen, sys_fsopen)
+#define __NR_fsconfig 431
+__SYSCALL(__NR_fsconfig, sys_fsconfig)
+#define __NR_fsmount 432
+__SYSCALL(__NR_fsmount, sys_fsmount)
+#define __NR_fspick 433
+__SYSCALL(__NR_fspick, sys_fspick)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 97c3478ee6e7..7b7ac0f6cec9 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -35,6 +35,7 @@
#include <linux/psci.h>
#include <linux/types.h>
#include <asm/ptrace.h>
+#include <asm/sve_context.h>
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
@@ -102,6 +103,9 @@ struct kvm_regs {
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
+#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
+#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
+#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
struct kvm_vcpu_init {
__u32 target;
@@ -226,6 +230,45 @@ struct kvm_vcpu_events {
KVM_REG_ARM_FW | ((r) & 0xffff))
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+/* SVE registers */
+#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)
+
+/* Z- and P-regs occupy blocks at the following offsets within this range: */
+#define KVM_REG_ARM64_SVE_ZREG_BASE 0
+#define KVM_REG_ARM64_SVE_PREG_BASE 0x400
+#define KVM_REG_ARM64_SVE_FFR_BASE 0x600
+
+#define KVM_ARM64_SVE_NUM_ZREGS __SVE_NUM_ZREGS
+#define KVM_ARM64_SVE_NUM_PREGS __SVE_NUM_PREGS
+
+#define KVM_ARM64_SVE_MAX_SLICES 32
+
+#define KVM_REG_ARM64_SVE_ZREG(n, i) \
+ (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_ZREG_BASE | \
+ KVM_REG_SIZE_U2048 | \
+ (((n) & (KVM_ARM64_SVE_NUM_ZREGS - 1)) << 5) | \
+ ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
+
+#define KVM_REG_ARM64_SVE_PREG(n, i) \
+ (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_PREG_BASE | \
+ KVM_REG_SIZE_U256 | \
+ (((n) & (KVM_ARM64_SVE_NUM_PREGS - 1)) << 5) | \
+ ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
+
+#define KVM_REG_ARM64_SVE_FFR(i) \
+ (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_FFR_BASE | \
+ KVM_REG_SIZE_U256 | \
+ ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
+
+#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN
+#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX
+
+/* Vector lengths pseudo-register: */
+#define KVM_REG_ARM64_SVE_VLS (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \
+ KVM_REG_SIZE_U512 | 0xffff)
+#define KVM_ARM64_SVE_VLS_WORDS \
+ ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1)
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index e10e2a5d9ddc..947e39896e28 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -125,9 +125,16 @@ int main(void)
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
+ DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
+ DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
+ DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
+ DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
+ DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1]));
+ DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
+ DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2b807f129e60..ca27e08e3d8a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1913,7 +1913,7 @@ static void verify_sve_features(void)
unsigned int len = zcr & ZCR_ELx_LEN_MASK;
if (len < safe_len || sve_verify_vq_map()) {
- pr_crit("CPU%d: SVE: required vector length(s) missing\n",
+ pr_crit("CPU%d: SVE: vector length support mismatch\n",
smp_processor_id());
cpu_die_early();
}
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 735cf1f8b109..a38bf74bcca8 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -18,6 +18,7 @@
*/
#include <linux/bitmap.h>
+#include <linux/bitops.h>
#include <linux/bottom_half.h>
#include <linux/bug.h>
#include <linux/cache.h>
@@ -48,6 +49,7 @@
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
#include <asm/traps.h>
+#include <asm/virt.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
@@ -119,6 +121,8 @@
*/
struct fpsimd_last_state_struct {
struct user_fpsimd_state *st;
+ void *sve_state;
+ unsigned int sve_vl;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -130,14 +134,23 @@ static int sve_default_vl = -1;
/* Maximum supported vector length across all CPUs (initially poisoned) */
int __ro_after_init sve_max_vl = SVE_VL_MIN;
-/* Set of available vector lengths, as vq_to_bit(vq): */
-static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
+
+/*
+ * Set of available vector lengths,
+ * where length vq encoded as bit __vq_to_bit(vq):
+ */
+__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+/* Set of vector lengths present on at least one cpu: */
+static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
+
static void __percpu *efi_sve_state;
#else /* ! CONFIG_ARM64_SVE */
/* Dummy declaration for code that will be optimised out: */
extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
+extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
extern void __percpu *efi_sve_state;
#endif /* ! CONFIG_ARM64_SVE */
@@ -235,14 +248,15 @@ static void task_fpsimd_load(void)
*/
void fpsimd_save(void)
{
- struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
+ struct fpsimd_last_state_struct const *last =
+ this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
WARN_ON(!in_softirq() && !irqs_disabled());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
- if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) {
+ if (WARN_ON(sve_get_vl() != last->sve_vl)) {
/*
* Can't save the user regs, so current would
* re-enter user with corrupt state.
@@ -252,32 +266,15 @@ void fpsimd_save(void)
return;
}
- sve_save_state(sve_pffr(&current->thread), &st->fpsr);
+ sve_save_state((char *)last->sve_state +
+ sve_ffr_offset(last->sve_vl),
+ &last->st->fpsr);
} else
- fpsimd_save_state(st);
+ fpsimd_save_state(last->st);
}
}
/*
- * Helpers to translate bit indices in sve_vq_map to VQ values (and
- * vice versa). This allows find_next_bit() to be used to find the
- * _maximum_ VQ not exceeding a certain value.
- */
-
-static unsigned int vq_to_bit(unsigned int vq)
-{
- return SVE_VQ_MAX - vq;
-}
-
-static unsigned int bit_to_vq(unsigned int bit)
-{
- if (WARN_ON(bit >= SVE_VQ_MAX))
- bit = SVE_VQ_MAX - 1;
-
- return SVE_VQ_MAX - bit;
-}
-
-/*
* All vector length selection from userspace comes through here.
* We're on a slow path, so some sanity-checks are included.
* If things go wrong there's a bug somewhere, but try to fall back to a
@@ -298,8 +295,8 @@ static unsigned int find_supported_vector_length(unsigned int vl)
vl = max_vl;
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
- vq_to_bit(sve_vq_from_vl(vl)));
- return sve_vl_from_vq(bit_to_vq(bit));
+ __vq_to_bit(sve_vq_from_vl(vl)));
+ return sve_vl_from_vq(__bit_to_vq(bit));
}
#ifdef CONFIG_SYSCTL
@@ -550,7 +547,6 @@ int sve_set_vector_length(struct task_struct *task,
local_bh_disable();
fpsimd_save();
- set_thread_flag(TIF_FOREIGN_FPSTATE);
}
fpsimd_flush_task_state(task);
@@ -624,12 +620,6 @@ int sve_get_current_vl(void)
return sve_prctl_status(0);
}
-/*
- * Bitmap for temporary storage of the per-CPU set of supported vector lengths
- * during secondary boot.
- */
-static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
-
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
{
unsigned int vq, vl;
@@ -644,40 +634,82 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */
vl = sve_get_vl();
vq = sve_vq_from_vl(vl); /* skip intervening lengths */
- set_bit(vq_to_bit(vq), map);
+ set_bit(__vq_to_bit(vq), map);
}
}
+/*
+ * Initialise the set of known supported VQs for the boot CPU.
+ * This is called during kernel boot, before secondary CPUs are brought up.
+ */
void __init sve_init_vq_map(void)
{
sve_probe_vqs(sve_vq_map);
+ bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX);
}
/*
* If we haven't committed to the set of supported VQs yet, filter out
* those not supported by the current CPU.
+ * This function is called during the bring-up of early secondary CPUs only.
*/
void sve_update_vq_map(void)
{
- sve_probe_vqs(sve_secondary_vq_map);
- bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX);
+ DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+
+ sve_probe_vqs(tmp_map);
+ bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX);
+ bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
}
-/* Check whether the current CPU supports all VQs in the committed set */
+/*
+ * Check whether the current CPU supports all VQs in the committed set.
+ * This function is called during the bring-up of late secondary CPUs only.
+ */
int sve_verify_vq_map(void)
{
- int ret = 0;
+ DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+ unsigned long b;
- sve_probe_vqs(sve_secondary_vq_map);
- bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
- SVE_VQ_MAX);
- if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
+ sve_probe_vqs(tmp_map);
+
+ bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
+ if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
smp_processor_id());
- ret = -EINVAL;
+ return -EINVAL;
}
- return ret;
+ if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
+ return 0;
+
+ /*
+ * For KVM, it is necessary to ensure that this CPU doesn't
+ * support any vector length that guests may have probed as
+ * unsupported.
+ */
+
+ /* Recover the set of supported VQs: */
+ bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
+ /* Find VQs supported that are not globally supported: */
+ bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
+
+ /* Find the lowest such VQ, if any: */
+ b = find_last_bit(tmp_map, SVE_VQ_MAX);
+ if (b >= SVE_VQ_MAX)
+ return 0; /* no mismatches */
+
+ /*
+ * Mismatches above sve_max_virtualisable_vl are fine, since
+ * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
+ */
+ if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) {
+ pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
+ smp_processor_id());
+ return -EINVAL;
+ }
+
+ return 0;
}
static void __init sve_efi_setup(void)
@@ -744,6 +776,8 @@ u64 read_zcr_features(void)
void __init sve_setup(void)
{
u64 zcr;
+ DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
+ unsigned long b;
if (!system_supports_sve())
return;
@@ -753,8 +787,8 @@ void __init sve_setup(void)
* so sve_vq_map must have at least SVE_VQ_MIN set.
* If something went wrong, at least try to patch it up:
*/
- if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
- set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
+ if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map)))
+ set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map);
zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1);
@@ -772,11 +806,31 @@ void __init sve_setup(void)
*/
sve_default_vl = find_supported_vector_length(64);
+ bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map,
+ SVE_VQ_MAX);
+
+ b = find_last_bit(tmp_map, SVE_VQ_MAX);
+ if (b >= SVE_VQ_MAX)
+ /* No non-virtualisable VLs found */
+ sve_max_virtualisable_vl = SVE_VQ_MAX;
+ else if (WARN_ON(b == SVE_VQ_MAX - 1))
+ /* No virtualisable VLs? This is architecturally forbidden. */
+ sve_max_virtualisable_vl = SVE_VQ_MIN;
+ else /* b + 1 < SVE_VQ_MAX */
+ sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1));
+
+ if (sve_max_virtualisable_vl > sve_max_vl)
+ sve_max_virtualisable_vl = sve_max_vl;
+
pr_info("SVE: maximum available vector length %u bytes per vector\n",
sve_max_vl);
pr_info("SVE: default vector length %u bytes per vector\n",
sve_default_vl);
+ /* KVM decides whether to support mismatched systems. Just warn here: */
+ if (sve_max_virtualisable_vl < sve_max_vl)
+ pr_warn("SVE: unvirtualisable vector lengths present\n");
+
sve_efi_setup();
}
@@ -816,12 +870,11 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
local_bh_disable();
fpsimd_save();
- fpsimd_to_sve(current);
/* Force ret_to_user to reload the registers: */
fpsimd_flush_task_state(current);
- set_thread_flag(TIF_FOREIGN_FPSTATE);
+ fpsimd_to_sve(current);
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
@@ -894,9 +947,9 @@ void fpsimd_flush_thread(void)
local_bh_disable();
+ fpsimd_flush_task_state(current);
memset(&current->thread.uw.fpsimd_state, 0,
sizeof(current->thread.uw.fpsimd_state));
- fpsimd_flush_task_state(current);
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
@@ -933,8 +986,6 @@ void fpsimd_flush_thread(void)
current->thread.sve_vl_onexec = 0;
}
- set_thread_flag(TIF_FOREIGN_FPSTATE);
-
local_bh_enable();
}
@@ -974,6 +1025,8 @@ void fpsimd_bind_task_to_cpu(void)
this_cpu_ptr(&fpsimd_last_state);
last->st = &current->thread.uw.fpsimd_state;
+ last->sve_state = current->thread.sve_state;
+ last->sve_vl = current->thread.sve_vl;
current->thread.fpsimd_cpu = smp_processor_id();
if (system_supports_sve()) {
@@ -987,7 +1040,8 @@ void fpsimd_bind_task_to_cpu(void)
}
}
-void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
+void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
+ unsigned int sve_vl)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -995,6 +1049,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
WARN_ON(!in_softirq() && !irqs_disabled());
last->st = st;
+ last->sve_state = sve_state;
+ last->sve_vl = sve_vl;
}
/*
@@ -1043,12 +1099,29 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
/*
* Invalidate live CPU copies of task t's FPSIMD state
+ *
+ * This function may be called with preemption enabled. The barrier()
+ * ensures that the assignment to fpsimd_cpu is visible to any
+ * preemption/softirq that could race with set_tsk_thread_flag(), so
+ * that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared.
+ *
+ * The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any
+ * subsequent code.
*/
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+
+ barrier();
+ set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE);
+
+ barrier();
}
+/*
+ * Invalidate any task's FPSIMD state that is present on this cpu.
+ * This function must be called with softirqs disabled.
+ */
void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 6164d389eed6..348d12eec566 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -26,6 +26,7 @@
#include <linux/acpi.h>
#include <linux/clocksource.h>
+#include <linux/kvm_host.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
@@ -528,12 +529,21 @@ static inline int armv8pmu_enable_counter(int idx)
static inline void armv8pmu_enable_event_counter(struct perf_event *event)
{
+ struct perf_event_attr *attr = &event->attr;
int idx = event->hw.idx;
+ u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
- armv8pmu_enable_counter(idx);
if (armv8pmu_event_is_chained(event))
- armv8pmu_enable_counter(idx - 1);
- isb();
+ counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
+
+ kvm_set_pmu_events(counter_bits, attr);
+
+ /* We rely on the hypervisor switch code to enable guest counters */
+ if (!kvm_pmu_counter_deferred(attr)) {
+ armv8pmu_enable_counter(idx);
+ if (armv8pmu_event_is_chained(event))
+ armv8pmu_enable_counter(idx - 1);
+ }
}
static inline int armv8pmu_disable_counter(int idx)
@@ -546,11 +556,21 @@ static inline int armv8pmu_disable_counter(int idx)
static inline void armv8pmu_disable_event_counter(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
+ struct perf_event_attr *attr = &event->attr;
int idx = hwc->idx;
+ u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx));
if (armv8pmu_event_is_chained(event))
- armv8pmu_disable_counter(idx - 1);
- armv8pmu_disable_counter(idx);
+ counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1));
+
+ kvm_clr_pmu_events(counter_bits);
+
+ /* We rely on the hypervisor switch code to disable guest counters */
+ if (!kvm_pmu_counter_deferred(attr)) {
+ if (armv8pmu_event_is_chained(event))
+ armv8pmu_disable_counter(idx - 1);
+ armv8pmu_disable_counter(idx);
+ }
}
static inline int armv8pmu_enable_intens(int idx)
@@ -827,14 +847,23 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
* with other architectures (x86 and Power).
*/
if (is_kernel_in_hyp_mode()) {
- if (!attr->exclude_kernel)
+ if (!attr->exclude_kernel && !attr->exclude_host)
config_base |= ARMV8_PMU_INCLUDE_EL2;
- } else {
- if (attr->exclude_kernel)
+ if (attr->exclude_guest)
config_base |= ARMV8_PMU_EXCLUDE_EL1;
- if (!attr->exclude_hv)
+ if (attr->exclude_host)
+ config_base |= ARMV8_PMU_EXCLUDE_EL0;
+ } else {
+ if (!attr->exclude_hv && !attr->exclude_host)
config_base |= ARMV8_PMU_INCLUDE_EL2;
}
+
+ /*
+ * Filter out !VHE kernels and guest kernels
+ */
+ if (attr->exclude_kernel)
+ config_base |= ARMV8_PMU_EXCLUDE_EL1;
+
if (attr->exclude_user)
config_base |= ARMV8_PMU_EXCLUDE_EL0;
@@ -864,6 +893,9 @@ static void armv8pmu_reset(void *info)
armv8pmu_disable_intens(idx);
}
+ /* Clear the counters we flip at guest entry/exit */
+ kvm_clr_pmu_events(U32_MAX);
+
/*
* Initialize & Reset PMNC. Request overflow interrupt for
* 64 bit cycle counter but cheat in armv8pmu_write_counter().
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 867a7cea70e5..a9b0485df074 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -296,11 +296,6 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
*/
fpsimd_flush_task_state(current);
- barrier();
- /* From now, fpsimd_thread_switch() won't clear TIF_FOREIGN_FPSTATE */
-
- set_thread_flag(TIF_FOREIGN_FPSTATE);
- barrier();
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
sve_alloc(current);
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 690e033a91c0..3ac1a64d2fb9 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index aac7808ce216..6e3c9c8b2df9 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/kvm_host.h>
+#include <asm/fpsimd.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
@@ -85,9 +86,12 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
- fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs);
+ fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
+ vcpu->arch.sve_state,
+ vcpu->arch.sve_max_vl);
+
clear_thread_flag(TIF_FOREIGN_FPSTATE);
- clear_thread_flag(TIF_SVE);
+ update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
}
}
@@ -100,14 +104,21 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ bool host_has_sve = system_supports_sve();
+ bool guest_has_sve = vcpu_has_sve(vcpu);
local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
+ u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
+
/* Clean guest FP state to memory and invalidate cpu view */
fpsimd_save();
fpsimd_flush_cpu_state();
- } else if (system_supports_sve()) {
+
+ if (guest_has_sve)
+ *guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
+ } else if (host_has_sve) {
/*
* The FPSIMD/SVE state in the CPU has not been touched, and we
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dd436a50fce7..3ae2f82fca46 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -19,18 +19,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bits.h>
#include <linux/errno.h>
#include <linux/err.h>
+#include <linux/nospec.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
#include <kvm/arm_psci.h>
#include <asm/cputype.h>
#include <linux/uaccess.h>
+#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_host.h>
+#include <asm/sigcontext.h>
#include "trace.h"
@@ -52,12 +59,19 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
+static bool core_reg_offset_is_vreg(u64 off)
+{
+ return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
+ off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr);
+}
+
static u64 core_reg_offset_from_id(u64 id)
{
return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
}
-static int validate_core_offset(const struct kvm_one_reg *reg)
+static int validate_core_offset(const struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
{
u64 off = core_reg_offset_from_id(reg->id);
int size;
@@ -89,11 +103,19 @@ static int validate_core_offset(const struct kvm_one_reg *reg)
return -EINVAL;
}
- if (KVM_REG_SIZE(reg->id) == size &&
- IS_ALIGNED(off, size / sizeof(__u32)))
- return 0;
+ if (KVM_REG_SIZE(reg->id) != size ||
+ !IS_ALIGNED(off, size / sizeof(__u32)))
+ return -EINVAL;
- return -EINVAL;
+ /*
+ * The KVM_REG_ARM64_SVE regs must be used instead of
+ * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
+ * SVE-enabled vcpus:
+ */
+ if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off))
+ return -EINVAL;
+
+ return 0;
}
static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -115,7 +137,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
- if (validate_core_offset(reg))
+ if (validate_core_offset(vcpu, reg))
return -EINVAL;
if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
@@ -140,7 +162,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
(off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
return -ENOENT;
- if (validate_core_offset(reg))
+ if (validate_core_offset(vcpu, reg))
return -EINVAL;
if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
@@ -183,6 +205,239 @@ out:
return err;
}
+#define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
+#define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
+
+static bool vq_present(
+ const u64 (*const vqs)[KVM_ARM64_SVE_VLS_WORDS],
+ unsigned int vq)
+{
+ return (*vqs)[vq_word(vq)] & vq_mask(vq);
+}
+
+static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ unsigned int max_vq, vq;
+ u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
+
+ if (!vcpu_has_sve(vcpu))
+ return -ENOENT;
+
+ if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl)))
+ return -EINVAL;
+
+ memset(vqs, 0, sizeof(vqs));
+
+ max_vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
+ for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
+ if (sve_vq_available(vq))
+ vqs[vq_word(vq)] |= vq_mask(vq);
+
+ if (copy_to_user((void __user *)reg->addr, vqs, sizeof(vqs)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ unsigned int max_vq, vq;
+ u64 vqs[KVM_ARM64_SVE_VLS_WORDS];
+
+ if (!vcpu_has_sve(vcpu))
+ return -ENOENT;
+
+ if (kvm_arm_vcpu_sve_finalized(vcpu))
+ return -EPERM; /* too late! */
+
+ if (WARN_ON(vcpu->arch.sve_state))
+ return -EINVAL;
+
+ if (copy_from_user(vqs, (const void __user *)reg->addr, sizeof(vqs)))
+ return -EFAULT;
+
+ max_vq = 0;
+ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq)
+ if (vq_present(&vqs, vq))
+ max_vq = vq;
+
+ if (max_vq > sve_vq_from_vl(kvm_sve_max_vl))
+ return -EINVAL;
+
+ /*
+ * Vector lengths supported by the host can't currently be
+ * hidden from the guest individually: instead we can only set a
+ * maxmium via ZCR_EL2.LEN. So, make sure the available vector
+ * lengths match the set requested exactly up to the requested
+ * maximum:
+ */
+ for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
+ if (vq_present(&vqs, vq) != sve_vq_available(vq))
+ return -EINVAL;
+
+ /* Can't run with no vector lengths at all: */
+ if (max_vq < SVE_VQ_MIN)
+ return -EINVAL;
+
+ /* vcpu->arch.sve_state will be alloc'd by kvm_vcpu_finalize_sve() */
+ vcpu->arch.sve_max_vl = sve_vl_from_vq(max_vq);
+
+ return 0;
+}
+
+#define SVE_REG_SLICE_SHIFT 0
+#define SVE_REG_SLICE_BITS 5
+#define SVE_REG_ID_SHIFT (SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS)
+#define SVE_REG_ID_BITS 5
+
+#define SVE_REG_SLICE_MASK \
+ GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1, \
+ SVE_REG_SLICE_SHIFT)
+#define SVE_REG_ID_MASK \
+ GENMASK(SVE_REG_ID_SHIFT + SVE_REG_ID_BITS - 1, SVE_REG_ID_SHIFT)
+
+#define SVE_NUM_SLICES (1 << SVE_REG_SLICE_BITS)
+
+#define KVM_SVE_ZREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0))
+#define KVM_SVE_PREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_PREG(0, 0))
+
+/*
+ * Number of register slices required to cover each whole SVE register.
+ * NOTE: Only the first slice every exists, for now.
+ * If you are tempted to modify this, you must also rework sve_reg_to_region()
+ * to match:
+ */
+#define vcpu_sve_slices(vcpu) 1
+
+/* Bounds of a single SVE register slice within vcpu->arch.sve_state */
+struct sve_state_reg_region {
+ unsigned int koffset; /* offset into sve_state in kernel memory */
+ unsigned int klen; /* length in kernel memory */
+ unsigned int upad; /* extra trailing padding in user memory */
+};
+
+/*
+ * Validate SVE register ID and get sanitised bounds for user/kernel SVE
+ * register copy
+ */
+static int sve_reg_to_region(struct sve_state_reg_region *region,
+ struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ /* reg ID ranges for Z- registers */
+ const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0);
+ const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1,
+ SVE_NUM_SLICES - 1);
+
+ /* reg ID ranges for P- registers and FFR (which are contiguous) */
+ const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0);
+ const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1);
+
+ unsigned int vq;
+ unsigned int reg_num;
+
+ unsigned int reqoffset, reqlen; /* User-requested offset and length */
+ unsigned int maxlen; /* Maxmimum permitted length */
+
+ size_t sve_state_size;
+
+ const u64 last_preg_id = KVM_REG_ARM64_SVE_PREG(SVE_NUM_PREGS - 1,
+ SVE_NUM_SLICES - 1);
+
+ /* Verify that the P-regs and FFR really do have contiguous IDs: */
+ BUILD_BUG_ON(KVM_REG_ARM64_SVE_FFR(0) != last_preg_id + 1);
+
+ /* Verify that we match the UAPI header: */
+ BUILD_BUG_ON(SVE_NUM_SLICES != KVM_ARM64_SVE_MAX_SLICES);
+
+ reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT;
+
+ if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) {
+ if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
+ return -ENOENT;
+
+ vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
+
+ reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) -
+ SVE_SIG_REGS_OFFSET;
+ reqlen = KVM_SVE_ZREG_SIZE;
+ maxlen = SVE_SIG_ZREG_SIZE(vq);
+ } else if (reg->id >= preg_id_min && reg->id <= preg_id_max) {
+ if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
+ return -ENOENT;
+
+ vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
+
+ reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) -
+ SVE_SIG_REGS_OFFSET;
+ reqlen = KVM_SVE_PREG_SIZE;
+ maxlen = SVE_SIG_PREG_SIZE(vq);
+ } else {
+ return -EINVAL;
+ }
+
+ sve_state_size = vcpu_sve_state_size(vcpu);
+ if (WARN_ON(!sve_state_size))
+ return -EINVAL;
+
+ region->koffset = array_index_nospec(reqoffset, sve_state_size);
+ region->klen = min(maxlen, reqlen);
+ region->upad = reqlen - region->klen;
+
+ return 0;
+}
+
+static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ int ret;
+ struct sve_state_reg_region region;
+ char __user *uptr = (char __user *)reg->addr;
+
+ /* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
+ if (reg->id == KVM_REG_ARM64_SVE_VLS)
+ return get_sve_vls(vcpu, reg);
+
+ /* Try to interpret reg ID as an architectural SVE register... */
+ ret = sve_reg_to_region(&region, vcpu, reg);
+ if (ret)
+ return ret;
+
+ if (!kvm_arm_vcpu_sve_finalized(vcpu))
+ return -EPERM;
+
+ if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset,
+ region.klen) ||
+ clear_user(uptr + region.klen, region.upad))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ int ret;
+ struct sve_state_reg_region region;
+ const char __user *uptr = (const char __user *)reg->addr;
+
+ /* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */
+ if (reg->id == KVM_REG_ARM64_SVE_VLS)
+ return set_sve_vls(vcpu, reg);
+
+ /* Try to interpret reg ID as an architectural SVE register... */
+ ret = sve_reg_to_region(&region, vcpu, reg);
+ if (ret)
+ return ret;
+
+ if (!kvm_arm_vcpu_sve_finalized(vcpu))
+ return -EPERM;
+
+ if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
+ region.klen))
+ return -EFAULT;
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
return -EINVAL;
@@ -193,9 +448,37 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return -EINVAL;
}
-static unsigned long num_core_regs(void)
+static int copy_core_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ unsigned int i;
+ int n = 0;
+ const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+
+ for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+ /*
+ * The KVM_REG_ARM64_SVE regs must be used instead of
+ * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
+ * SVE-enabled vcpus:
+ */
+ if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(i))
+ continue;
+
+ if (uindices) {
+ if (put_user(core_reg | i, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ n++;
+ }
+
+ return n;
+}
+
+static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
{
- return sizeof(struct kvm_regs) / sizeof(__u32);
+ return copy_core_reg_indices(vcpu, NULL);
}
/**
@@ -251,6 +534,67 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
}
+static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
+{
+ const unsigned int slices = vcpu_sve_slices(vcpu);
+
+ if (!vcpu_has_sve(vcpu))
+ return 0;
+
+ /* Policed by KVM_GET_REG_LIST: */
+ WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
+
+ return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */)
+ + 1; /* KVM_REG_ARM64_SVE_VLS */
+}
+
+static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ const unsigned int slices = vcpu_sve_slices(vcpu);
+ u64 reg;
+ unsigned int i, n;
+ int num_regs = 0;
+
+ if (!vcpu_has_sve(vcpu))
+ return 0;
+
+ /* Policed by KVM_GET_REG_LIST: */
+ WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu));
+
+ /*
+ * Enumerate this first, so that userspace can save/restore in
+ * the order reported by KVM_GET_REG_LIST:
+ */
+ reg = KVM_REG_ARM64_SVE_VLS;
+ if (put_user(reg, uindices++))
+ return -EFAULT;
+ ++num_regs;
+
+ for (i = 0; i < slices; i++) {
+ for (n = 0; n < SVE_NUM_ZREGS; n++) {
+ reg = KVM_REG_ARM64_SVE_ZREG(n, i);
+ if (put_user(reg, uindices++))
+ return -EFAULT;
+ num_regs++;
+ }
+
+ for (n = 0; n < SVE_NUM_PREGS; n++) {
+ reg = KVM_REG_ARM64_SVE_PREG(n, i);
+ if (put_user(reg, uindices++))
+ return -EFAULT;
+ num_regs++;
+ }
+
+ reg = KVM_REG_ARM64_SVE_FFR(i);
+ if (put_user(reg, uindices++))
+ return -EFAULT;
+ num_regs++;
+ }
+
+ return num_regs;
+}
+
/**
* kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
*
@@ -258,8 +602,15 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
*/
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
{
- return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
- + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS;
+ unsigned long res = 0;
+
+ res += num_core_regs(vcpu);
+ res += num_sve_regs(vcpu);
+ res += kvm_arm_num_sys_reg_descs(vcpu);
+ res += kvm_arm_get_fw_num_regs(vcpu);
+ res += NUM_TIMER_REGS;
+
+ return res;
}
/**
@@ -269,23 +620,25 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
*/
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
- unsigned int i;
- const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
int ret;
- for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
- if (put_user(core_reg | i, uindices))
- return -EFAULT;
- uindices++;
- }
+ ret = copy_core_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_sve_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
- if (ret)
+ if (ret < 0)
return ret;
uindices += kvm_arm_get_fw_num_regs(vcpu);
ret = copy_timer_indices(vcpu, uindices);
- if (ret)
+ if (ret < 0)
return ret;
uindices += NUM_TIMER_REGS;
@@ -298,12 +651,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
return -EINVAL;
- /* Register group 16 means we want a core register. */
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
- return get_core_reg(vcpu, reg);
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
- return kvm_arm_get_fw_reg(vcpu, reg);
+ switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
+ case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg);
+ case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg);
+ case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg);
+ }
if (is_timer_reg(reg->id))
return get_timer_reg(vcpu, reg);
@@ -317,12 +669,11 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
return -EINVAL;
- /* Register group 16 means we set a core register. */
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
- return set_core_reg(vcpu, reg);
-
- if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
- return kvm_arm_set_fw_reg(vcpu, reg);
+ switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
+ case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg);
+ case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg);
+ case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg);
+ }
if (is_timer_reg(reg->id))
return set_timer_reg(vcpu, reg);
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 0b7983442071..516aead3c2a9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -173,20 +173,40 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+#define __ptrauth_save_key(regs, key) \
+({ \
+ regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
+ regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
+})
+
+/*
+ * Handle the guest trying to use a ptrauth instruction, or trying to access a
+ * ptrauth register.
+ */
+void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *ctxt;
+
+ if (vcpu_has_ptrauth(vcpu)) {
+ vcpu_ptrauth_enable(vcpu);
+ ctxt = vcpu->arch.host_cpu_context;
+ __ptrauth_save_key(ctxt->sys_regs, APIA);
+ __ptrauth_save_key(ctxt->sys_regs, APIB);
+ __ptrauth_save_key(ctxt->sys_regs, APDA);
+ __ptrauth_save_key(ctxt->sys_regs, APDB);
+ __ptrauth_save_key(ctxt->sys_regs, APGA);
+ } else {
+ kvm_inject_undefined(vcpu);
+ }
+}
+
/*
* Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
* a NOP).
*/
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- /*
- * We don't currently support ptrauth in a guest, and we mask the ID
- * registers to prevent well-behaved guests from trying to make use of
- * it.
- *
- * Inject an UNDEF, as if the feature really isn't present.
- */
- kvm_inject_undefined(vcpu);
+ kvm_arm_vcpu_ptrauth_trap(vcpu);
return 1;
}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 675fdc186e3b..93ba3d7ef027 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -24,6 +24,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_ptrauth.h>
#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -64,6 +65,13 @@ ENTRY(__guest_enter)
add x18, x0, #VCPU_CONTEXT
+ // Macro ptrauth_switch_to_guest format:
+ // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
+ // The below macro to restore guest keys is not implemented in C code
+ // as it may cause Pointer Authentication key signing mismatch errors
+ // when this feature is enabled for kernel code.
+ ptrauth_switch_to_guest x18, x0, x1, x2
+
// Restore guest regs x0-x17
ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
@@ -118,6 +126,13 @@ ENTRY(__guest_exit)
get_host_ctxt x2, x3
+ // Macro ptrauth_switch_to_guest format:
+ // ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3)
+ // The below macro to save/restore keys is not implemented in C code
+ // as it may cause Pointer Authentication key signing mismatch errors
+ // when this feature is enabled for kernel code.
+ ptrauth_switch_to_host x1, x2, x3, x4, x5
+
// Now restore the host regs
restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 3563fe655cd5..22b4c335e0b2 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -100,7 +100,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
val &= ~CPACR_EL1_ZEN;
- if (!update_fp_enabled(vcpu)) {
+ if (update_fp_enabled(vcpu)) {
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_EL1_ZEN;
+ } else {
val &= ~CPACR_EL1_FPEN;
__activate_traps_fpsimd32(vcpu);
}
@@ -317,16 +320,48 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
return true;
}
-static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+/* Check for an FPSIMD/SVE trap and handle as appropriate */
+static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
{
- struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+ bool vhe, sve_guest, sve_host;
+ u8 hsr_ec;
- if (has_vhe())
- write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
- cpacr_el1);
- else
+ if (!system_supports_fpsimd())
+ return false;
+
+ if (system_supports_sve()) {
+ sve_guest = vcpu_has_sve(vcpu);
+ sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
+ vhe = true;
+ } else {
+ sve_guest = false;
+ sve_host = false;
+ vhe = has_vhe();
+ }
+
+ hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+ if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
+ hsr_ec != ESR_ELx_EC_SVE)
+ return false;
+
+ /* Don't handle SVE traps for non-SVE vcpus here: */
+ if (!sve_guest)
+ if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
+ return false;
+
+ /* Valid trap. Switch the context: */
+
+ if (vhe) {
+ u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+ if (sve_guest)
+ reg |= CPACR_EL1_ZEN;
+
+ write_sysreg(reg, cpacr_el1);
+ } else {
write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
cptr_el2);
+ }
isb();
@@ -335,21 +370,28 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
* In the SVE case, VHE is assumed: it is enforced by
* Kconfig and kvm_arch_init().
*/
- if (system_supports_sve() &&
- (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+ if (sve_host) {
struct thread_struct *thread = container_of(
- host_fpsimd,
+ vcpu->arch.host_fpsimd_state,
struct thread_struct, uw.fpsimd_state);
- sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+ sve_save_state(sve_pffr(thread),
+ &vcpu->arch.host_fpsimd_state->fpsr);
} else {
- __fpsimd_save_state(host_fpsimd);
+ __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
}
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
- __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+ if (sve_guest) {
+ sve_load_state(vcpu_sve_pffr(vcpu),
+ &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
+ sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
+ write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
+ } else {
+ __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+ }
/* Skip restoring fpexc32 for AArch64 guests */
if (!(read_sysreg(hcr_el2) & HCR_RW))
@@ -385,10 +427,10 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
* and restore the guest context lazily.
* If FP/SIMD is not implemented, handle the trap and inject an
* undefined instruction exception to the guest.
+ * Similarly for trapped SVE accesses.
*/
- if (system_supports_fpsimd() &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
- return __hyp_switch_fpsimd(vcpu);
+ if (__hyp_handle_fpsimd(vcpu))
+ return true;
if (!__populate_fault_info(vcpu))
return true;
@@ -524,6 +566,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
+ bool pmu_switch_needed;
u64 exit_code;
/*
@@ -543,6 +586,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
+ pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+
__sysreg_save_state_nvhe(host_ctxt);
__activate_vm(kern_hyp_va(vcpu->kvm));
@@ -589,6 +634,9 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
*/
__debug_switch_to_host(vcpu);
+ if (pmu_switch_needed)
+ __pmu_switch_to_host(host_ctxt);
+
/* Returning to host will clear PSR.I, remask PMR if needed */
if (system_uses_irq_prio_masking())
gic_write_pmr(GIC_PRIO_IRQOFF);
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
new file mode 100644
index 000000000000..3da94a5bb6b7
--- /dev/null
+++ b/arch/arm64/kvm/pmu.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Arm Limited
+ * Author: Andrew Murray <Andrew.Murray@arm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include <asm/kvm_hyp.h>
+
+/*
+ * Given the perf event attributes and system type, determine
+ * if we are going to need to switch counters at guest entry/exit.
+ */
+static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
+{
+ /**
+ * With VHE the guest kernel runs at EL1 and the host at EL2,
+ * where user (EL0) is excluded then we have no reason to switch
+ * counters.
+ */
+ if (has_vhe() && attr->exclude_user)
+ return false;
+
+ /* Only switch if attributes are different */
+ return (attr->exclude_host != attr->exclude_guest);
+}
+
+/*
+ * Add events to track that we may want to switch at guest entry/exit
+ * time.
+ */
+void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
+{
+ struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data);
+
+ if (!kvm_pmu_switch_needed(attr))
+ return;
+
+ if (!attr->exclude_host)
+ ctx->pmu_events.events_host |= set;
+ if (!attr->exclude_guest)
+ ctx->pmu_events.events_guest |= set;
+}
+
+/*
+ * Stop tracking events
+ */
+void kvm_clr_pmu_events(u32 clr)
+{
+ struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data);
+
+ ctx->pmu_events.events_host &= ~clr;
+ ctx->pmu_events.events_guest &= ~clr;
+}
+
+/**
+ * Disable host events, enable guest events
+ */
+bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenclr_el0);
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenset_el0);
+
+ return (pmu->events_host || pmu->events_guest);
+}
+
+/**
+ * Disable guest events, enable host events
+ */
+void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+{
+ struct kvm_host_data *host;
+ struct kvm_pmu_events *pmu;
+
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ pmu = &host->pmu_events;
+
+ if (pmu->events_guest)
+ write_sysreg(pmu->events_guest, pmcntenclr_el0);
+
+ if (pmu->events_host)
+ write_sysreg(pmu->events_host, pmcntenset_el0);
+}
+
+#define PMEVTYPER_READ_CASE(idx) \
+ case idx: \
+ return read_sysreg(pmevtyper##idx##_el0)
+
+#define PMEVTYPER_WRITE_CASE(idx) \
+ case idx: \
+ write_sysreg(val, pmevtyper##idx##_el0); \
+ break
+
+#define PMEVTYPER_CASES(readwrite) \
+ PMEVTYPER_##readwrite##_CASE(0); \
+ PMEVTYPER_##readwrite##_CASE(1); \
+ PMEVTYPER_##readwrite##_CASE(2); \
+ PMEVTYPER_##readwrite##_CASE(3); \
+ PMEVTYPER_##readwrite##_CASE(4); \
+ PMEVTYPER_##readwrite##_CASE(5); \
+ PMEVTYPER_##readwrite##_CASE(6); \
+ PMEVTYPER_##readwrite##_CASE(7); \
+ PMEVTYPER_##readwrite##_CASE(8); \
+ PMEVTYPER_##readwrite##_CASE(9); \
+ PMEVTYPER_##readwrite##_CASE(10); \
+ PMEVTYPER_##readwrite##_CASE(11); \
+ PMEVTYPER_##readwrite##_CASE(12); \
+ PMEVTYPER_##readwrite##_CASE(13); \
+ PMEVTYPER_##readwrite##_CASE(14); \
+ PMEVTYPER_##readwrite##_CASE(15); \
+ PMEVTYPER_##readwrite##_CASE(16); \
+ PMEVTYPER_##readwrite##_CASE(17); \
+ PMEVTYPER_##readwrite##_CASE(18); \
+ PMEVTYPER_##readwrite##_CASE(19); \
+ PMEVTYPER_##readwrite##_CASE(20); \
+ PMEVTYPER_##readwrite##_CASE(21); \
+ PMEVTYPER_##readwrite##_CASE(22); \
+ PMEVTYPER_##readwrite##_CASE(23); \
+ PMEVTYPER_##readwrite##_CASE(24); \
+ PMEVTYPER_##readwrite##_CASE(25); \
+ PMEVTYPER_##readwrite##_CASE(26); \
+ PMEVTYPER_##readwrite##_CASE(27); \
+ PMEVTYPER_##readwrite##_CASE(28); \
+ PMEVTYPER_##readwrite##_CASE(29); \
+ PMEVTYPER_##readwrite##_CASE(30)
+
+/*
+ * Read a value direct from PMEVTYPER<idx> where idx is 0-30
+ * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
+ */
+static u64 kvm_vcpu_pmu_read_evtype_direct(int idx)
+{
+ switch (idx) {
+ PMEVTYPER_CASES(READ);
+ case ARMV8_PMU_CYCLE_IDX:
+ return read_sysreg(pmccfiltr_el0);
+ default:
+ WARN_ON(1);
+ }
+
+ return 0;
+}
+
+/*
+ * Write a value direct to PMEVTYPER<idx> where idx is 0-30
+ * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
+ */
+static void kvm_vcpu_pmu_write_evtype_direct(int idx, u32 val)
+{
+ switch (idx) {
+ PMEVTYPER_CASES(WRITE);
+ case ARMV8_PMU_CYCLE_IDX:
+ write_sysreg(val, pmccfiltr_el0);
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+/*
+ * Modify ARMv8 PMU events to include EL0 counting
+ */
+static void kvm_vcpu_pmu_enable_el0(unsigned long events)
+{
+ u64 typer;
+ u32 counter;
+
+ for_each_set_bit(counter, &events, 32) {
+ typer = kvm_vcpu_pmu_read_evtype_direct(counter);
+ typer &= ~ARMV8_PMU_EXCLUDE_EL0;
+ kvm_vcpu_pmu_write_evtype_direct(counter, typer);
+ }
+}
+
+/*
+ * Modify ARMv8 PMU events to exclude EL0 counting
+ */
+static void kvm_vcpu_pmu_disable_el0(unsigned long events)
+{
+ u64 typer;
+ u32 counter;
+
+ for_each_set_bit(counter, &events, 32) {
+ typer = kvm_vcpu_pmu_read_evtype_direct(counter);
+ typer |= ARMV8_PMU_EXCLUDE_EL0;
+ kvm_vcpu_pmu_write_evtype_direct(counter, typer);
+ }
+}
+
+/*
+ * On VHE ensure that only guest events have EL0 counting enabled
+ */
+void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_host_data *host;
+ u32 events_guest, events_host;
+
+ if (!has_vhe())
+ return;
+
+ host_ctxt = vcpu->arch.host_cpu_context;
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ events_guest = host->pmu_events.events_guest;
+ events_host = host->pmu_events.events_host;
+
+ kvm_vcpu_pmu_enable_el0(events_guest);
+ kvm_vcpu_pmu_disable_el0(events_host);
+}
+
+/*
+ * On VHE ensure that only host events have EL0 counting enabled
+ */
+void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_host_data *host;
+ u32 events_guest, events_host;
+
+ if (!has_vhe())
+ return;
+
+ host_ctxt = vcpu->arch.host_cpu_context;
+ host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+ events_guest = host->pmu_events.events_guest;
+ events_host = host->pmu_events.events_host;
+
+ kvm_vcpu_pmu_enable_el0(events_host);
+ kvm_vcpu_pmu_disable_el0(events_guest);
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e2a0500cd7a2..1140b4485575 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -20,20 +20,26 @@
*/
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/hw_breakpoint.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
#include <kvm/arm_arch_timer.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
+#include <asm/fpsimd.h>
#include <asm/ptrace.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
+#include <asm/virt.h>
/* Maximum phys_shift supported for any VM on this host */
static u32 kvm_ipa_limit;
@@ -92,6 +98,14 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_VM_IPA_SIZE:
r = kvm_ipa_limit;
break;
+ case KVM_CAP_ARM_SVE:
+ r = system_supports_sve();
+ break;
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ r = has_vhe() && system_supports_address_auth() &&
+ system_supports_generic_auth();
+ break;
default:
r = 0;
}
@@ -99,13 +113,148 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
+unsigned int kvm_sve_max_vl;
+
+int kvm_arm_init_sve(void)
+{
+ if (system_supports_sve()) {
+ kvm_sve_max_vl = sve_max_virtualisable_vl;
+
+ /*
+ * The get_sve_reg()/set_sve_reg() ioctl interface will need
+ * to be extended with multiple register slice support in
+ * order to support vector lengths greater than
+ * SVE_VL_ARCH_MAX:
+ */
+ if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX))
+ kvm_sve_max_vl = SVE_VL_ARCH_MAX;
+
+ /*
+ * Don't even try to make use of vector lengths that
+ * aren't available on all CPUs, for now:
+ */
+ if (kvm_sve_max_vl < sve_max_vl)
+ pr_warn("KVM: SVE vector length for guests limited to %u bytes\n",
+ kvm_sve_max_vl);
+ }
+
+ return 0;
+}
+
+static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
+{
+ if (!system_supports_sve())
+ return -EINVAL;
+
+ /* Verify that KVM startup enforced this when SVE was detected: */
+ if (WARN_ON(!has_vhe()))
+ return -EINVAL;
+
+ vcpu->arch.sve_max_vl = kvm_sve_max_vl;
+
+ /*
+ * Userspace can still customize the vector lengths by writing
+ * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until
+ * kvm_arm_vcpu_finalize(), which freezes the configuration.
+ */
+ vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
+
+ return 0;
+}
+
+/*
+ * Finalize vcpu's maximum SVE vector length, allocating
+ * vcpu->arch.sve_state as necessary.
+ */
+static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
+{
+ void *buf;
+ unsigned int vl;
+
+ vl = vcpu->arch.sve_max_vl;
+
+ /*
+ * Resposibility for these properties is shared between
+ * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
+ * set_sve_vls(). Double-check here just to be sure:
+ */
+ if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl ||
+ vl > SVE_VL_ARCH_MAX))
+ return -EIO;
+
+ buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ vcpu->arch.sve_state = buf;
+ vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
+ return 0;
+}
+
+int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
+{
+ switch (feature) {
+ case KVM_ARM_VCPU_SVE:
+ if (!vcpu_has_sve(vcpu))
+ return -EINVAL;
+
+ if (kvm_arm_vcpu_sve_finalized(vcpu))
+ return -EPERM;
+
+ return kvm_vcpu_finalize_sve(vcpu);
+ }
+
+ return -EINVAL;
+}
+
+bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu))
+ return false;
+
+ return true;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ kfree(vcpu->arch.sve_state);
+}
+
+static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_has_sve(vcpu))
+ memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
+}
+
+static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
+{
+ /* Support ptrauth only if the system supports these capabilities. */
+ if (!has_vhe())
+ return -EINVAL;
+
+ if (!system_supports_address_auth() ||
+ !system_supports_generic_auth())
+ return -EINVAL;
+ /*
+ * For now make sure that both address/generic pointer authentication
+ * features are requested by the userspace together.
+ */
+ if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
+ !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features))
+ return -EINVAL;
+
+ vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
+ return 0;
+}
+
/**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @vcpu: The VCPU pointer
*
* This function finds the right table above and sets the registers on
* the virtual CPU struct to their architecturally defined reset
- * values.
+ * values, except for registers whose reset is deferred until
+ * kvm_arm_vcpu_finalize().
*
* Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
* ioctl or as part of handling a request issued by another VCPU in the PSCI
@@ -131,6 +280,22 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
if (loaded)
kvm_arch_vcpu_put(vcpu);
+ if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
+ if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
+ ret = kvm_vcpu_enable_sve(vcpu);
+ if (ret)
+ goto out;
+ }
+ } else {
+ kvm_vcpu_reset_sve(vcpu);
+ }
+
+ if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
+ test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
+ if (kvm_vcpu_enable_ptrauth(vcpu))
+ goto out;
+ }
+
switch (vcpu->arch.target) {
default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 539feecda5b8..857b226bcdde 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -695,6 +695,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val |= p->regval & ARMV8_PMU_PMCR_MASK;
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
+ kvm_vcpu_pmu_restore_guest(vcpu);
} else {
/* PMCR.P & PMCR.C are RAZ */
val = __vcpu_sys_reg(vcpu, PMCR_EL0)
@@ -850,6 +851,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
__vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ kvm_vcpu_pmu_restore_guest(vcpu);
} else {
p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
}
@@ -875,6 +877,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
/* accessing PMCNTENSET_EL0 */
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
kvm_pmu_enable_counter(vcpu, val);
+ kvm_vcpu_pmu_restore_guest(vcpu);
} else {
/* accessing PMCNTENCLR_EL0 */
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
@@ -1007,6 +1010,37 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{ SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \
access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
+static bool trap_ptrauth(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ kvm_arm_vcpu_ptrauth_trap(vcpu);
+
+ /*
+ * Return false for both cases as we never skip the trapped
+ * instruction:
+ *
+ * - Either we re-execute the same key register access instruction
+ * after enabling ptrauth.
+ * - Or an UNDEF is injected as ptrauth is not supported/enabled.
+ */
+ return false;
+}
+
+static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST;
+}
+
+#define __PTRAUTH_KEY(k) \
+ { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \
+ .visibility = ptrauth_visibility}
+
+#define PTRAUTH_KEY(k) \
+ __PTRAUTH_KEY(k ## KEYLO_EL1), \
+ __PTRAUTH_KEY(k ## KEYHI_EL1)
+
static bool access_arch_timer(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -1044,25 +1078,20 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
}
/* Read a sanitised cpufeature ID register by sys_reg_desc */
-static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
+static u64 read_id_reg(const struct kvm_vcpu *vcpu,
+ struct sys_reg_desc const *r, bool raz)
{
u32 id = sys_reg((u32)r->Op0, (u32)r->Op1,
(u32)r->CRn, (u32)r->CRm, (u32)r->Op2);
u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
- if (id == SYS_ID_AA64PFR0_EL1) {
- if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT))
- kvm_debug("SVE unsupported for guests, suppressing\n");
-
+ if (id == SYS_ID_AA64PFR0_EL1 && !vcpu_has_sve(vcpu)) {
val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
- } else if (id == SYS_ID_AA64ISAR1_EL1) {
- const u64 ptrauth_mask = (0xfUL << ID_AA64ISAR1_APA_SHIFT) |
- (0xfUL << ID_AA64ISAR1_API_SHIFT) |
- (0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
- (0xfUL << ID_AA64ISAR1_GPI_SHIFT);
- if (val & ptrauth_mask)
- kvm_debug("ptrauth unsupported for guests, suppressing\n");
- val &= ~ptrauth_mask;
+ } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
+ val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) |
+ (0xfUL << ID_AA64ISAR1_API_SHIFT) |
+ (0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
+ (0xfUL << ID_AA64ISAR1_GPI_SHIFT));
}
return val;
@@ -1078,7 +1107,7 @@ static bool __access_id_reg(struct kvm_vcpu *vcpu,
if (p->is_write)
return write_to_read_only(vcpu, p, r);
- p->regval = read_id_reg(r, raz);
+ p->regval = read_id_reg(vcpu, r, raz);
return true;
}
@@ -1100,6 +1129,81 @@ static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+/* Visibility overrides for SVE-specific control registers */
+static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (vcpu_has_sve(vcpu))
+ return 0;
+
+ return REG_HIDDEN_USER | REG_HIDDEN_GUEST;
+}
+
+/* Visibility overrides for SVE-specific ID registers */
+static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (vcpu_has_sve(vcpu))
+ return 0;
+
+ return REG_HIDDEN_USER;
+}
+
+/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */
+static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu)
+{
+ if (!vcpu_has_sve(vcpu))
+ return 0;
+
+ return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1);
+}
+
+static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *rd)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, rd);
+
+ p->regval = guest_id_aa64zfr0_el1(vcpu);
+ return true;
+}
+
+static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ u64 val;
+
+ if (WARN_ON(!vcpu_has_sve(vcpu)))
+ return -ENOENT;
+
+ val = guest_id_aa64zfr0_el1(vcpu);
+ return reg_to_user(uaddr, &val, reg->id);
+}
+
+static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ const u64 id = sys_reg_to_index(rd);
+ int err;
+ u64 val;
+
+ if (WARN_ON(!vcpu_has_sve(vcpu)))
+ return -ENOENT;
+
+ err = reg_from_user(&val, uaddr, id);
+ if (err)
+ return err;
+
+ /* This is what we mean by invariant: you can't change it. */
+ if (val != guest_id_aa64zfr0_el1(vcpu))
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* cpufeature ID register user accessors
*
@@ -1107,16 +1211,18 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
* are stored, and for set_id_reg() we don't allow the effective value
* to be changed.
*/
-static int __get_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+static int __get_id_reg(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, void __user *uaddr,
bool raz)
{
const u64 id = sys_reg_to_index(rd);
- const u64 val = read_id_reg(rd, raz);
+ const u64 val = read_id_reg(vcpu, rd, raz);
return reg_to_user(uaddr, &val, id);
}
-static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
+static int __set_id_reg(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, void __user *uaddr,
bool raz)
{
const u64 id = sys_reg_to_index(rd);
@@ -1128,7 +1234,7 @@ static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
return err;
/* This is what we mean by invariant: you can't change it. */
- if (val != read_id_reg(rd, raz))
+ if (val != read_id_reg(vcpu, rd, raz))
return -EINVAL;
return 0;
@@ -1137,25 +1243,25 @@ static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr,
static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __get_id_reg(rd, uaddr, false);
+ return __get_id_reg(vcpu, rd, uaddr, false);
}
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __set_id_reg(rd, uaddr, false);
+ return __set_id_reg(vcpu, rd, uaddr, false);
}
static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __get_id_reg(rd, uaddr, true);
+ return __get_id_reg(vcpu, rd, uaddr, true);
}
static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __set_id_reg(rd, uaddr, true);
+ return __set_id_reg(vcpu, rd, uaddr, true);
}
static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
@@ -1343,7 +1449,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_SANITISED(ID_AA64PFR1_EL1),
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
- ID_UNALLOCATED(4,4),
+ { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility },
ID_UNALLOCATED(4,5),
ID_UNALLOCATED(4,6),
ID_UNALLOCATED(4,7),
@@ -1380,10 +1486,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+ { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
+ PTRAUTH_KEY(APIA),
+ PTRAUTH_KEY(APIB),
+ PTRAUTH_KEY(APDA),
+ PTRAUTH_KEY(APDB),
+ PTRAUTH_KEY(APGA),
+
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
@@ -1924,6 +2037,12 @@ static void perform_access(struct kvm_vcpu *vcpu,
{
trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
+ /* Check for regs disabled by runtime config */
+ if (sysreg_hidden_from_guest(vcpu, r)) {
+ kvm_inject_undefined(vcpu);
+ return;
+ }
+
/*
* Not having an accessor means that we have configured a trap
* that we don't know how to handle. This certainly qualifies
@@ -2435,6 +2554,10 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (!r)
return get_invariant_sys_reg(reg->id, uaddr);
+ /* Check for regs disabled by runtime config */
+ if (sysreg_hidden_from_user(vcpu, r))
+ return -ENOENT;
+
if (r->get_user)
return (r->get_user)(vcpu, r, reg, uaddr);
@@ -2456,6 +2579,10 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (!r)
return set_invariant_sys_reg(reg->id, uaddr);
+ /* Check for regs disabled by runtime config */
+ if (sysreg_hidden_from_user(vcpu, r))
+ return -ENOENT;
+
if (r->set_user)
return (r->set_user)(vcpu, r, reg, uaddr);
@@ -2512,7 +2639,8 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
return true;
}
-static int walk_one_sys_reg(const struct sys_reg_desc *rd,
+static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
u64 __user **uind,
unsigned int *total)
{
@@ -2523,6 +2651,9 @@ static int walk_one_sys_reg(const struct sys_reg_desc *rd,
if (!(rd->reg || rd->get_user))
return 0;
+ if (sysreg_hidden_from_user(vcpu, rd))
+ return 0;
+
if (!copy_reg_to_user(rd, uind))
return -EFAULT;
@@ -2551,9 +2682,9 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
int cmp = cmp_sys_reg(i1, i2);
/* target-specific overrides generic entry. */
if (cmp <= 0)
- err = walk_one_sys_reg(i1, &uind, &total);
+ err = walk_one_sys_reg(vcpu, i1, &uind, &total);
else
- err = walk_one_sys_reg(i2, &uind, &total);
+ err = walk_one_sys_reg(vcpu, i2, &uind, &total);
if (err)
return err;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 3b1bc7f01d0b..2be99508dcb9 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -64,8 +64,15 @@ struct sys_reg_desc {
const struct kvm_one_reg *reg, void __user *uaddr);
int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr);
+
+ /* Return mask of REG_* runtime visibility overrides */
+ unsigned int (*visibility)(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd);
};
+#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
+#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
+
static inline void print_sys_reg_instr(const struct sys_reg_params *p)
{
/* Look, we even formatted it for you to paste into the table! */
@@ -102,6 +109,24 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
__vcpu_sys_reg(vcpu, r->reg) = r->val;
}
+static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ if (likely(!r->visibility))
+ return false;
+
+ return r->visibility(vcpu, r) & REG_HIDDEN_GUEST;
+}
+
+static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ if (likely(!r->visibility))
+ return false;
+
+ return r->visibility(vcpu, r) & REG_HIDDEN_USER;
+}
+
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
const struct sys_reg_desc *i2)
{
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index b32bfa1fe99e..23a979a85f14 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/console.h>
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 56e3d0b685e1..e01df3f2f80d 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -348,3 +348,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index df4ec3ec71d1..7e3d0734b2f3 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -427,3 +427,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 4964947732af..26339e417695 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -433,3 +433,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 677e5bfeff47..70d3200476bf 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -674,7 +674,10 @@ config SGI_IP27
select SYS_HAS_EARLY_PRINTK
select HAVE_PCI
select IRQ_MIPS_CPU
+ select IRQ_DOMAIN_HIERARCHY
select NR_CPUS_DEFAULT_64
+ select PCI_DRIVERS_GENERIC
+ select PCI_XTALK_BRIDGE
select SYS_HAS_CPU_R10000
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
@@ -1241,6 +1244,9 @@ config IRQ_GT641XX
config PCI_GT64XXX_PCI0
bool
+config PCI_XTALK_BRIDGE
+ bool
+
config NO_EXCEPT_FILL
bool
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 1454d9f6ab2d..b8f3397c59c9 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -131,9 +131,7 @@ static void __init alchemy_setup_uarts(int ctype)
}
-/* The dmamask must be set for OHCI/EHCI to work */
-static u64 alchemy_ohci_dmamask = DMA_BIT_MASK(32);
-static u64 __maybe_unused alchemy_ehci_dmamask = DMA_BIT_MASK(32);
+static u64 alchemy_all_dmamask = DMA_BIT_MASK(32);
/* Power on callback for the ehci platform driver */
static int alchemy_ehci_power_on(struct platform_device *pdev)
@@ -231,7 +229,7 @@ static void __init alchemy_setup_usb(int ctype)
res[1].flags = IORESOURCE_IRQ;
pdev->name = "ohci-platform";
pdev->id = 0;
- pdev->dev.dma_mask = &alchemy_ohci_dmamask;
+ pdev->dev.dma_mask = &alchemy_all_dmamask;
pdev->dev.platform_data = &alchemy_ohci_pdata;
if (platform_device_register(pdev))
@@ -251,7 +249,7 @@ static void __init alchemy_setup_usb(int ctype)
res[1].flags = IORESOURCE_IRQ;
pdev->name = "ehci-platform";
pdev->id = 0;
- pdev->dev.dma_mask = &alchemy_ehci_dmamask;
+ pdev->dev.dma_mask = &alchemy_all_dmamask;
pdev->dev.platform_data = &alchemy_ehci_pdata;
if (platform_device_register(pdev))
@@ -271,7 +269,7 @@ static void __init alchemy_setup_usb(int ctype)
res[1].flags = IORESOURCE_IRQ;
pdev->name = "ohci-platform";
pdev->id = 1;
- pdev->dev.dma_mask = &alchemy_ohci_dmamask;
+ pdev->dev.dma_mask = &alchemy_all_dmamask;
pdev->dev.platform_data = &alchemy_ohci_pdata;
if (platform_device_register(pdev))
@@ -338,7 +336,11 @@ static struct platform_device au1xxx_eth0_device = {
.name = "au1000-eth",
.id = 0,
.num_resources = MAC_RES_COUNT,
- .dev.platform_data = &au1xxx_eth0_platform_data,
+ .dev = {
+ .dma_mask = &alchemy_all_dmamask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = &au1xxx_eth0_platform_data,
+ },
};
static struct resource au1xxx_eth1_resources[][MAC_RES_COUNT] __initdata = {
@@ -370,7 +372,11 @@ static struct platform_device au1xxx_eth1_device = {
.name = "au1000-eth",
.id = 1,
.num_resources = MAC_RES_COUNT,
- .dev.platform_data = &au1xxx_eth1_platform_data,
+ .dev = {
+ .dma_mask = &alchemy_all_dmamask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = &au1xxx_eth1_platform_data,
+ },
};
void __init au1xxx_override_eth_cfg(unsigned int port,
diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index d4ca97e2ec6c..228cdc736db7 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/err.h>
#include <linux/clk.h>
#include <linux/clkdev.h>
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 25a57895a3a3..298b46b4e9cb 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/clk.h>
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index ff40fbc2f439..21a1168ae301 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -228,7 +228,7 @@ CONFIG_SERIAL_IP22_ZILOG=m
# CONFIG_HW_RANDOM is not set
CONFIG_RAW_DRIVER=m
# CONFIG_HWMON is not set
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_INDYDOG=m
# CONFIG_VGA_CONSOLE is not set
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 81c47e18131b..54db5dedf776 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -271,7 +271,7 @@ CONFIG_I2C_PARPORT_LIGHT=m
CONFIG_I2C_TAOS_EVM=m
CONFIG_I2C_STUB=m
# CONFIG_HWMON is not set
-CONFIG_THERMAL=m
+CONFIG_THERMAL=y
CONFIG_MFD_PCF50633=m
CONFIG_PCF50633_ADC=m
CONFIG_PCF50633_GPIO=m
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
index a106f8113842..a84475f1924f 100644
--- a/arch/mips/generic/init.c
+++ b/arch/mips/generic/init.c
@@ -43,14 +43,14 @@ void __init *plat_get_fdt(void)
/* Already set up */
return (void *)fdt;
- if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) {
+ if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_passed_dtb)) {
/*
* We booted using the UHI boot protocol, so we have been
* provided with the appropriate device tree for the board.
* Make use of it & search for any machine struct based upon
* the root compatible string.
*/
- fdt = (void *)fw_arg1;
+ fdt = (void *)fw_passed_dtb;
for_each_mips_machine(check_mach) {
match = mips_machine_is_compatible(check_mach, fdt);
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 42ea1313626c..965f0793a5f9 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -7,18 +7,9 @@
#include <asm/mmzone.h>
struct cpuinfo_ip27 {
-// cpuid_t p_cpuid; /* PROM assigned cpuid */
cnodeid_t p_nodeid; /* my node ID in compact-id-space */
nasid_t p_nasid; /* my node ID in numa-as-id-space */
unsigned char p_slice; /* Physical position on node board */
-#if 0
- unsigned long loops_per_sec;
- unsigned long ipi_count;
- unsigned long irq_attempt[NR_IRQS];
- unsigned long smp_local_irq_count;
- unsigned long prof_multiplier;
- unsigned long prof_counter;
-#endif
};
extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
@@ -30,7 +21,7 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
struct pci_bus;
extern int pcibus_to_node(struct pci_bus *);
-#define cpumask_of_pcibus(bus) (cpu_online_mask)
+#define cpumask_of_pcibus(bus) (cpumask_of_node(pcibus_to_node(bus)))
extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
diff --git a/arch/mips/include/asm/pci/bridge.h b/arch/mips/include/asm/pci/bridge.h
index 23574c27eb40..a92cd30b48c9 100644
--- a/arch/mips/include/asm/pci/bridge.h
+++ b/arch/mips/include/asm/pci/bridge.h
@@ -801,15 +801,13 @@ struct bridge_err_cmdword {
#define PCI64_ATTR_RMF_SHFT 48
struct bridge_controller {
- struct pci_controller pc;
- struct resource mem;
- struct resource io;
struct resource busn;
struct bridge_regs *base;
- nasid_t nasid;
- unsigned int widget_id;
- u64 baddr;
+ unsigned long baddr;
+ unsigned long intr_addr;
+ struct irq_domain *domain;
unsigned int pci_int[8];
+ nasid_t nasid;
};
#define BRIDGE_CONTROLLER(bus) \
@@ -822,8 +820,4 @@ struct bridge_controller {
#define bridge_clr(bc, reg, val) \
__raw_writel(__raw_readl(&bc->base->reg) & ~(val), &bc->base->reg)
-extern int request_bridge_irq(struct bridge_controller *bc, int pin);
-
-extern struct pci_ops bridge_pci_ops;
-
#endif /* _ASM_PCI_BRIDGE_H */
diff --git a/arch/mips/include/asm/sn/irq_alloc.h b/arch/mips/include/asm/sn/irq_alloc.h
new file mode 100644
index 000000000000..09b89cecff56
--- /dev/null
+++ b/arch/mips/include/asm/sn/irq_alloc.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SN_IRQ_ALLOC_H
+#define __ASM_SN_IRQ_ALLOC_H
+
+struct irq_alloc_info {
+ void *ctrl;
+ nasid_t nasid;
+ int pin;
+};
+
+#endif /* __ASM_SN_IRQ_ALLOC_H */
diff --git a/arch/mips/include/asm/xtalk/xtalk.h b/arch/mips/include/asm/xtalk/xtalk.h
index 26d2ed1fa917..680e7efebbaf 100644
--- a/arch/mips/include/asm/xtalk/xtalk.h
+++ b/arch/mips/include/asm/xtalk/xtalk.h
@@ -47,15 +47,6 @@ typedef struct xtalk_piomap_s *xtalk_piomap_t;
#define XIO_PORT(x) ((xwidgetnum_t)(((x)&XIO_PORT_BITS) >> XIO_PORT_SHIFT))
#define XIO_PACK(p, o) ((((uint64_t)(p))<<XIO_PORT_SHIFT) | ((o)&XIO_ADDR_BITS))
-#ifdef CONFIG_PCI
-extern int bridge_probe(nasid_t nasid, int widget, int masterwid);
-#else
-static inline int bridge_probe(nasid_t nasid, int widget, int masterwid)
-{
- return 0;
-}
-#endif
-
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_XTALK_XTALK_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index d5e335e6846a..6126b77d5a62 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1973,6 +1973,14 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu)
panic("Unknown Ingenic Processor ID!");
break;
}
+
+ /*
+ * The config0 register in the Xburst CPUs with a processor ID of
+ * PRID_COMP_INGENIC_D0 report themselves as MIPS32r2 compatible,
+ * but they don't actually support this ISA.
+ */
+ if ((c->processor_id & PRID_COMP_MASK) == PRID_COMP_INGENIC_D0)
+ c->isa_level &= ~MIPS_CPU_ISA_M32R2;
}
static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 413863508f6f..d67fb64e908c 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -64,17 +64,11 @@ struct mips_perf_event {
#define CNTR_EVEN 0x55555555
#define CNTR_ODD 0xaaaaaaaa
#define CNTR_ALL 0xffffffff
-#ifdef CONFIG_MIPS_MT_SMP
enum {
T = 0,
V = 1,
P = 2,
} range;
-#else
- #define T
- #define V
- #define P
-#endif
};
static struct mips_perf_event raw_event;
@@ -325,9 +319,7 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
{
struct perf_event *event = container_of(evt, struct perf_event, hw);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-#ifdef CONFIG_MIPS_MT_SMP
unsigned int range = evt->event_base >> 24;
-#endif /* CONFIG_MIPS_MT_SMP */
WARN_ON(idx < 0 || idx >= mipspmu.num_counters);
@@ -336,21 +328,15 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
/* Make sure interrupt enabled. */
MIPS_PERFCTRL_IE;
-#ifdef CONFIG_CPU_BMIPS5000
- {
+ if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) {
/* enable the counter for the calling thread */
cpuc->saved_ctrl[idx] |=
(1 << (12 + vpe_id())) | BRCM_PERFCTRL_TC;
- }
-#else
-#ifdef CONFIG_MIPS_MT_SMP
- if (range > V) {
+ } else if (IS_ENABLED(CONFIG_MIPS_MT_SMP) && range > V) {
/* The counter is processor wide. Set it up to count all TCs. */
pr_debug("Enabling perf counter for all TCs\n");
cpuc->saved_ctrl[idx] |= M_TC_EN_ALL;
- } else
-#endif /* CONFIG_MIPS_MT_SMP */
- {
+ } else {
unsigned int cpu, ctrl;
/*
@@ -365,7 +351,6 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx)
cpuc->saved_ctrl[idx] |= ctrl;
pr_debug("Enabling perf counter for CPU%d\n", cpu);
}
-#endif /* CONFIG_CPU_BMIPS5000 */
/*
* We do not actually let the counter run. Leave it until start().
*/
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 9392dfe33f97..0e2dd68ade57 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -366,3 +366,9 @@
425 n32 io_uring_setup sys_io_uring_setup
426 n32 io_uring_enter sys_io_uring_enter
427 n32 io_uring_register sys_io_uring_register
+428 n32 open_tree sys_open_tree
+429 n32 move_mount sys_move_mount
+430 n32 fsopen sys_fsopen
+431 n32 fsconfig sys_fsconfig
+432 n32 fsmount sys_fsmount
+433 n32 fspick sys_fspick
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index cd0c8aa21fba..5eebfa0d155c 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -342,3 +342,9 @@
425 n64 io_uring_setup sys_io_uring_setup
426 n64 io_uring_enter sys_io_uring_enter
427 n64 io_uring_register sys_io_uring_register
+428 n64 open_tree sys_open_tree
+429 n64 move_mount sys_move_mount
+430 n64 fsopen sys_fsopen
+431 n64 fsconfig sys_fsconfig
+432 n64 fsmount sys_fsmount
+433 n64 fspick sys_fspick
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index e849e8ffe4a2..3cc1374e02d0 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -415,3 +415,9 @@
425 o32 io_uring_setup sys_io_uring_setup
426 o32 io_uring_enter sys_io_uring_enter
427 o32 io_uring_register sys_io_uring_register
+428 o32 open_tree sys_open_tree
+429 o32 move_mount sys_move_mount
+430 o32 fsopen sys_fsopen
+431 o32 fsconfig sys_fsconfig
+432 o32 fsmount sys_fsmount
+433 o32 fspick sys_fspick
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index c4f976593061..d6de4cb2e31c 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_PCI_AR2315) += pci-ar2315.o
obj-$(CONFIG_SOC_AR71XX) += pci-ar71xx.o
obj-$(CONFIG_PCI_AR724X) += pci-ar724x.o
obj-$(CONFIG_MIPS_PCI_VIRTIO) += pci-virtio-guest.o
+obj-$(CONFIG_PCI_XTALK_BRIDGE) += pci-xtalk-bridge.o
#
# These are still pretty much in the old state, watch, go blind.
#
@@ -39,7 +40,7 @@ obj-$(CONFIG_MIPS_MALTA) += fixup-malta.o pci-malta.o
obj-$(CONFIG_PMC_MSP7120_GW) += fixup-pmcmsp.o ops-pmcmsp.o
obj-$(CONFIG_PMC_MSP7120_EVAL) += fixup-pmcmsp.o ops-pmcmsp.o
obj-$(CONFIG_PMC_MSP7120_FPGA) += fixup-pmcmsp.o ops-pmcmsp.o
-obj-$(CONFIG_SGI_IP27) += ops-bridge.o pci-ip27.o
+obj-$(CONFIG_SGI_IP27) += pci-ip27.o
obj-$(CONFIG_SGI_IP32) += fixup-ip32.o ops-mace.o pci-ip32.o
obj-$(CONFIG_SIBYTE_SB1250) += fixup-sb1250.o pci-sb1250.o
obj-$(CONFIG_SIBYTE_BCM112X) += fixup-sb1250.o pci-sb1250.o
diff --git a/arch/mips/pci/ops-bridge.c b/arch/mips/pci/ops-bridge.c
deleted file mode 100644
index df95b0da08f2..000000000000
--- a/arch/mips/pci/ops-bridge.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1999, 2000, 04, 06 Ralf Baechle (ralf@linux-mips.org)
- * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- */
-#include <linux/pci.h>
-#include <asm/paccess.h>
-#include <asm/pci/bridge.h>
-#include <asm/sn/arch.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn0/hub.h>
-
-/*
- * Most of the IOC3 PCI config register aren't present
- * we emulate what is needed for a normal PCI enumeration
- */
-static u32 emulate_ioc3_cfg(int where, int size)
-{
- if (size == 1 && where == 0x3d)
- return 0x01;
- else if (size == 2 && where == 0x3c)
- return 0x0100;
- else if (size == 4 && where == 0x3c)
- return 0x00000100;
-
- return 0;
-}
-
-/*
- * The Bridge ASIC supports both type 0 and type 1 access. Type 1 is
- * not really documented, so right now I can't write code which uses it.
- * Therefore we use type 0 accesses for now even though they won't work
- * correctly for PCI-to-PCI bridges.
- *
- * The function is complicated by the ultimate brokenness of the IOC3 chip
- * which is used in SGI systems. The IOC3 can only handle 32-bit PCI
- * accesses and does only decode parts of it's address space.
- */
-
-static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 * value)
-{
- struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
- struct bridge_regs *bridge = bc->base;
- int slot = PCI_SLOT(devfn);
- int fn = PCI_FUNC(devfn);
- volatile void *addr;
- u32 cf, shift, mask;
- int res;
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[PCI_VENDOR_ID];
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- /*
- * IOC3 is broken beyond belief ... Don't even give the
- * generic PCI code a chance to look at it for real ...
- */
- if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
- goto is_ioc3;
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
-
- if (size == 1)
- res = get_dbe(*value, (u8 *) addr);
- else if (size == 2)
- res = get_dbe(*value, (u16 *) addr);
- else
- res = get_dbe(*value, (u32 *) addr);
-
- return res ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
- /*
- * IOC3 special handling
- */
- if ((where >= 0x14 && where < 0x40) || (where >= 0x48)) {
- *value = emulate_ioc3_cfg(where, size);
- return PCIBIOS_SUCCESSFUL;
- }
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- shift = ((where & 3) << 3);
- mask = (0xffffffffU >> ((4 - size) << 3));
- *value = (cf >> shift) & mask;
-
- return PCIBIOS_SUCCESSFUL;
-}
-
-static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 * value)
-{
- struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
- struct bridge_regs *bridge = bc->base;
- int busno = bus->number;
- int slot = PCI_SLOT(devfn);
- int fn = PCI_FUNC(devfn);
- volatile void *addr;
- u32 cf, shift, mask;
- int res;
-
- bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
- addr = &bridge->b_type1_cfg.c[(fn << 8) | PCI_VENDOR_ID];
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- /*
- * IOC3 is broken beyond belief ... Don't even give the
- * generic PCI code a chance to look at it for real ...
- */
- if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
- goto is_ioc3;
-
- bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
- addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
-
- if (size == 1)
- res = get_dbe(*value, (u8 *) addr);
- else if (size == 2)
- res = get_dbe(*value, (u16 *) addr);
- else
- res = get_dbe(*value, (u32 *) addr);
-
- return res ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
- /*
- * IOC3 special handling
- */
- if ((where >= 0x14 && where < 0x40) || (where >= 0x48)) {
- *value = emulate_ioc3_cfg(where, size);
- return PCIBIOS_SUCCESSFUL;
- }
-
- bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
- addr = &bridge->b_type1_cfg.c[(fn << 8) | where];
-
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- shift = ((where & 3) << 3);
- mask = (0xffffffffU >> ((4 - size) << 3));
- *value = (cf >> shift) & mask;
-
- return PCIBIOS_SUCCESSFUL;
-}
-
-static int pci_read_config(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 * value)
-{
- if (!pci_is_root_bus(bus))
- return pci_conf1_read_config(bus, devfn, where, size, value);
-
- return pci_conf0_read_config(bus, devfn, where, size, value);
-}
-
-static int pci_conf0_write_config(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 value)
-{
- struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
- struct bridge_regs *bridge = bc->base;
- int slot = PCI_SLOT(devfn);
- int fn = PCI_FUNC(devfn);
- volatile void *addr;
- u32 cf, shift, mask, smask;
- int res;
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[PCI_VENDOR_ID];
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- /*
- * IOC3 is broken beyond belief ... Don't even give the
- * generic PCI code a chance to look at it for real ...
- */
- if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
- goto is_ioc3;
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
-
- if (size == 1) {
- res = put_dbe(value, (u8 *) addr);
- } else if (size == 2) {
- res = put_dbe(value, (u16 *) addr);
- } else {
- res = put_dbe(value, (u32 *) addr);
- }
-
- if (res)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- return PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
- /*
- * IOC3 special handling
- */
- if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
- return PCIBIOS_SUCCESSFUL;
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- shift = ((where & 3) << 3);
- mask = (0xffffffffU >> ((4 - size) << 3));
- smask = mask << shift;
-
- cf = (cf & ~smask) | ((value & mask) << shift);
- if (put_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- return PCIBIOS_SUCCESSFUL;
-}
-
-static int pci_conf1_write_config(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 value)
-{
- struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
- struct bridge_regs *bridge = bc->base;
- int slot = PCI_SLOT(devfn);
- int fn = PCI_FUNC(devfn);
- int busno = bus->number;
- volatile void *addr;
- u32 cf, shift, mask, smask;
- int res;
-
- bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
- addr = &bridge->b_type1_cfg.c[(fn << 8) | PCI_VENDOR_ID];
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- /*
- * IOC3 is broken beyond belief ... Don't even give the
- * generic PCI code a chance to look at it for real ...
- */
- if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
- goto is_ioc3;
-
- addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
-
- if (size == 1) {
- res = put_dbe(value, (u8 *) addr);
- } else if (size == 2) {
- res = put_dbe(value, (u16 *) addr);
- } else {
- res = put_dbe(value, (u32 *) addr);
- }
-
- if (res)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- return PCIBIOS_SUCCESSFUL;
-
-is_ioc3:
-
- /*
- * IOC3 special handling
- */
- if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
- return PCIBIOS_SUCCESSFUL;
-
- addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
-
- if (get_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- shift = ((where & 3) << 3);
- mask = (0xffffffffU >> ((4 - size) << 3));
- smask = mask << shift;
-
- cf = (cf & ~smask) | ((value & mask) << shift);
- if (put_dbe(cf, (u32 *) addr))
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- return PCIBIOS_SUCCESSFUL;
-}
-
-static int pci_write_config(struct pci_bus *bus, unsigned int devfn,
- int where, int size, u32 value)
-{
- if (!pci_is_root_bus(bus))
- return pci_conf1_write_config(bus, devfn, where, size, value);
-
- return pci_conf0_write_config(bus, devfn, where, size, value);
-}
-
-struct pci_ops bridge_pci_ops = {
- .read = pci_read_config,
- .write = pci_write_config,
-};
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 3c177b4d0609..441eb9383b20 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -7,162 +7,7 @@
* Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
*/
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/dma-direct.h>
-#include <asm/sn/arch.h>
#include <asm/pci/bridge.h>
-#include <asm/paccess.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn0/hub.h>
-
-/*
- * Max #PCI busses we can handle; ie, max #PCI bridges.
- */
-#define MAX_PCI_BUSSES 40
-
-/*
- * XXX: No kmalloc available when we do our crosstalk scan,
- * we should try to move it later in the boot process.
- */
-static struct bridge_controller bridges[MAX_PCI_BUSSES];
-
-extern struct pci_ops bridge_pci_ops;
-
-int bridge_probe(nasid_t nasid, int widget_id, int masterwid)
-{
- unsigned long offset = NODE_OFFSET(nasid);
- struct bridge_controller *bc;
- static int num_bridges = 0;
- int slot;
-
- pci_set_flags(PCI_PROBE_ONLY);
-
- printk("a bridge\n");
-
- /* XXX: kludge alert.. */
- if (!num_bridges)
- ioport_resource.end = ~0UL;
-
- bc = &bridges[num_bridges];
-
- bc->pc.pci_ops = &bridge_pci_ops;
- bc->pc.mem_resource = &bc->mem;
- bc->pc.io_resource = &bc->io;
-
- bc->pc.index = num_bridges;
-
- bc->mem.name = "Bridge PCI MEM";
- bc->pc.mem_offset = offset;
- bc->mem.start = 0;
- bc->mem.end = ~0UL;
- bc->mem.flags = IORESOURCE_MEM;
-
- bc->io.name = "Bridge IO MEM";
- bc->pc.io_offset = offset;
- bc->io.start = 0UL;
- bc->io.end = ~0UL;
- bc->io.flags = IORESOURCE_IO;
-
- bc->widget_id = widget_id;
- bc->nasid = nasid;
-
- bc->baddr = (u64)masterwid << 60 | PCI64_ATTR_BAR;
-
- /*
- * point to this bridge
- */
- bc->base = (struct bridge_regs *)RAW_NODE_SWIN_BASE(nasid, widget_id);
-
- /*
- * Clear all pending interrupts.
- */
- bridge_write(bc, b_int_rst_stat, BRIDGE_IRR_ALL_CLR);
-
- /*
- * Until otherwise set up, assume all interrupts are from slot 0
- */
- bridge_write(bc, b_int_device, 0x0);
-
- /*
- * swap pio's to pci mem and io space (big windows)
- */
- bridge_set(bc, b_wid_control, BRIDGE_CTRL_IO_SWAP |
- BRIDGE_CTRL_MEM_SWAP);
-#ifdef CONFIG_PAGE_SIZE_4KB
- bridge_clr(bc, b_wid_control, BRIDGE_CTRL_PAGE_SIZE);
-#else /* 16kB or larger */
- bridge_set(bc, b_wid_control, BRIDGE_CTRL_PAGE_SIZE);
-#endif
-
- /*
- * Hmm... IRIX sets additional bits in the address which
- * are documented as reserved in the bridge docs.
- */
- bridge_write(bc, b_wid_int_upper, 0x8000 | (masterwid << 16));
- bridge_write(bc, b_wid_int_lower, 0x01800090); /* PI_INT_PEND_MOD off*/
- bridge_write(bc, b_dir_map, (masterwid << 20)); /* DMA */
- bridge_write(bc, b_int_enable, 0);
-
- for (slot = 0; slot < 8; slot ++) {
- bridge_set(bc, b_device[slot].reg, BRIDGE_DEV_SWAP_DIR);
- bc->pci_int[slot] = -1;
- }
- bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */
-
- register_pci_controller(&bc->pc);
-
- num_bridges++;
-
- return 0;
-}
-
-/*
- * All observed requests have pin == 1. We could have a global here, that
- * gets incremented and returned every time - unfortunately, pci_map_irq
- * may be called on the same device over and over, and need to return the
- * same value. On O2000, pin can be 0 or 1, and PCI slots can be [0..7].
- *
- * A given PCI device, in general, should be able to intr any of the cpus
- * on any one of the hubs connected to its xbow.
- */
-int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
- return 0;
-}
-
-static inline struct pci_dev *bridge_root_dev(struct pci_dev *dev)
-{
- while (dev->bus->parent) {
- /* Move up the chain of bridges. */
- dev = dev->bus->self;
- }
-
- return dev;
-}
-
-/* Do platform specific device initialization at pci_enable_device() time */
-int pcibios_plat_dev_init(struct pci_dev *dev)
-{
- struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus);
- struct pci_dev *rdev = bridge_root_dev(dev);
- int slot = PCI_SLOT(rdev->devfn);
- int irq;
-
- irq = bc->pci_int[slot];
- if (irq == -1) {
- irq = request_bridge_irq(bc, slot);
- if (irq < 0)
- return irq;
-
- bc->pci_int[slot] = irq;
- }
- dev->irq = irq;
-
- return 0;
-}
dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
@@ -177,29 +22,6 @@ phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
return dma_addr & ~(0xffUL << 56);
}
-/*
- * Device might live on a subordinate PCI bus. XXX Walk up the chain of buses
- * to find the slot number in sense of the bridge device register.
- * XXX This also means multiple devices might rely on conflicting bridge
- * settings.
- */
-
-static inline void pci_disable_swapping(struct pci_dev *dev)
-{
- struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus);
- struct bridge_regs *bridge = bc->base;
- int slot = PCI_SLOT(dev->devfn);
-
- /* Turn off byte swapping */
- bridge->b_device[slot].reg &= ~BRIDGE_DEV_SWAP_DIR;
- bridge->b_widget.w_tflush; /* Flush */
-}
-
-static void pci_fixup_ioc3(struct pci_dev *d)
-{
- pci_disable_swapping(d);
-}
-
#ifdef CONFIG_NUMA
int pcibus_to_node(struct pci_bus *bus)
{
@@ -209,6 +31,3 @@ int pcibus_to_node(struct pci_bus *bus)
}
EXPORT_SYMBOL(pcibus_to_node);
#endif /* CONFIG_NUMA */
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3,
- pci_fixup_ioc3);
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
new file mode 100644
index 000000000000..bcf7f559789a
--- /dev/null
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2003 Christoph Hellwig (hch@lst.de)
+ * Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/dma-direct.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/xtalk-bridge.h>
+
+#include <asm/pci/bridge.h>
+#include <asm/paccess.h>
+#include <asm/sn/irq_alloc.h>
+
+/*
+ * Most of the IOC3 PCI config register aren't present
+ * we emulate what is needed for a normal PCI enumeration
+ */
+static u32 emulate_ioc3_cfg(int where, int size)
+{
+ if (size == 1 && where == 0x3d)
+ return 0x01;
+ else if (size == 2 && where == 0x3c)
+ return 0x0100;
+ else if (size == 4 && where == 0x3c)
+ return 0x00000100;
+
+ return 0;
+}
+
+static void bridge_disable_swapping(struct pci_dev *dev)
+{
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus);
+ int slot = PCI_SLOT(dev->devfn);
+
+ /* Turn off byte swapping */
+ bridge_clr(bc, b_device[slot].reg, BRIDGE_DEV_SWAP_DIR);
+ bridge_read(bc, b_widget.w_tflush); /* Flush */
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC3,
+ bridge_disable_swapping);
+
+
+/*
+ * The Bridge ASIC supports both type 0 and type 1 access. Type 1 is
+ * not really documented, so right now I can't write code which uses it.
+ * Therefore we use type 0 accesses for now even though they won't work
+ * correctly for PCI-to-PCI bridges.
+ *
+ * The function is complicated by the ultimate brokenness of the IOC3 chip
+ * which is used in SGI systems. The IOC3 can only handle 32-bit PCI
+ * accesses and does only decode parts of it's address space.
+ */
+static int pci_conf0_read_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
+ struct bridge_regs *bridge = bc->base;
+ int slot = PCI_SLOT(devfn);
+ int fn = PCI_FUNC(devfn);
+ void *addr;
+ u32 cf, shift, mask;
+ int res;
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[PCI_VENDOR_ID];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /*
+ * IOC3 is broken beyond belief ... Don't even give the
+ * generic PCI code a chance to look at it for real ...
+ */
+ if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
+ goto is_ioc3;
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
+
+ if (size == 1)
+ res = get_dbe(*value, (u8 *)addr);
+ else if (size == 2)
+ res = get_dbe(*value, (u16 *)addr);
+ else
+ res = get_dbe(*value, (u32 *)addr);
+
+ return res ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+
+is_ioc3:
+
+ /*
+ * IOC3 special handling
+ */
+ if ((where >= 0x14 && where < 0x40) || (where >= 0x48)) {
+ *value = emulate_ioc3_cfg(where, size);
+ return PCIBIOS_SUCCESSFUL;
+ }
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ shift = ((where & 3) << 3);
+ mask = (0xffffffffU >> ((4 - size) << 3));
+ *value = (cf >> shift) & mask;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf1_read_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
+ struct bridge_regs *bridge = bc->base;
+ int busno = bus->number;
+ int slot = PCI_SLOT(devfn);
+ int fn = PCI_FUNC(devfn);
+ void *addr;
+ u32 cf, shift, mask;
+ int res;
+
+ bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
+ addr = &bridge->b_type1_cfg.c[(fn << 8) | PCI_VENDOR_ID];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /*
+ * IOC3 is broken beyond belief ... Don't even give the
+ * generic PCI code a chance to look at it for real ...
+ */
+ if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
+ goto is_ioc3;
+
+ addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
+
+ if (size == 1)
+ res = get_dbe(*value, (u8 *)addr);
+ else if (size == 2)
+ res = get_dbe(*value, (u16 *)addr);
+ else
+ res = get_dbe(*value, (u32 *)addr);
+
+ return res ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+
+is_ioc3:
+
+ /*
+ * IOC3 special handling
+ */
+ if ((where >= 0x14 && where < 0x40) || (where >= 0x48)) {
+ *value = emulate_ioc3_cfg(where, size);
+ return PCIBIOS_SUCCESSFUL;
+ }
+
+ addr = &bridge->b_type1_cfg.c[(fn << 8) | where];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ shift = ((where & 3) << 3);
+ mask = (0xffffffffU >> ((4 - size) << 3));
+ *value = (cf >> shift) & mask;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_read_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 *value)
+{
+ if (!pci_is_root_bus(bus))
+ return pci_conf1_read_config(bus, devfn, where, size, value);
+
+ return pci_conf0_read_config(bus, devfn, where, size, value);
+}
+
+static int pci_conf0_write_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
+ struct bridge_regs *bridge = bc->base;
+ int slot = PCI_SLOT(devfn);
+ int fn = PCI_FUNC(devfn);
+ void *addr;
+ u32 cf, shift, mask, smask;
+ int res;
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[PCI_VENDOR_ID];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /*
+ * IOC3 is broken beyond belief ... Don't even give the
+ * generic PCI code a chance to look at it for real ...
+ */
+ if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
+ goto is_ioc3;
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].c[where ^ (4 - size)];
+
+ if (size == 1)
+ res = put_dbe(value, (u8 *)addr);
+ else if (size == 2)
+ res = put_dbe(value, (u16 *)addr);
+ else
+ res = put_dbe(value, (u32 *)addr);
+
+ if (res)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+
+is_ioc3:
+
+ /*
+ * IOC3 special handling
+ */
+ if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
+ return PCIBIOS_SUCCESSFUL;
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
+
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ shift = ((where & 3) << 3);
+ mask = (0xffffffffU >> ((4 - size) << 3));
+ smask = mask << shift;
+
+ cf = (cf & ~smask) | ((value & mask) << shift);
+ if (put_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf1_write_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
+ struct bridge_regs *bridge = bc->base;
+ int slot = PCI_SLOT(devfn);
+ int fn = PCI_FUNC(devfn);
+ int busno = bus->number;
+ void *addr;
+ u32 cf, shift, mask, smask;
+ int res;
+
+ bridge_write(bc, b_pci_cfg, (busno << 16) | (slot << 11));
+ addr = &bridge->b_type1_cfg.c[(fn << 8) | PCI_VENDOR_ID];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /*
+ * IOC3 is broken beyond belief ... Don't even give the
+ * generic PCI code a chance to look at it for real ...
+ */
+ if (cf == (PCI_VENDOR_ID_SGI | (PCI_DEVICE_ID_SGI_IOC3 << 16)))
+ goto is_ioc3;
+
+ addr = &bridge->b_type1_cfg.c[(fn << 8) | (where ^ (4 - size))];
+
+ if (size == 1)
+ res = put_dbe(value, (u8 *)addr);
+ else if (size == 2)
+ res = put_dbe(value, (u16 *)addr);
+ else
+ res = put_dbe(value, (u32 *)addr);
+
+ if (res)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+
+is_ioc3:
+
+ /*
+ * IOC3 special handling
+ */
+ if ((where >= 0x14 && where < 0x40) || (where >= 0x48))
+ return PCIBIOS_SUCCESSFUL;
+
+ addr = &bridge->b_type0_cfg_dev[slot].f[fn].l[where >> 2];
+ if (get_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ shift = ((where & 3) << 3);
+ mask = (0xffffffffU >> ((4 - size) << 3));
+ smask = mask << shift;
+
+ cf = (cf & ~smask) | ((value & mask) << shift);
+ if (put_dbe(cf, (u32 *)addr))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_write_config(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 value)
+{
+ if (!pci_is_root_bus(bus))
+ return pci_conf1_write_config(bus, devfn, where, size, value);
+
+ return pci_conf0_write_config(bus, devfn, where, size, value);
+}
+
+static struct pci_ops bridge_pci_ops = {
+ .read = pci_read_config,
+ .write = pci_write_config,
+};
+
+struct bridge_irq_chip_data {
+ struct bridge_controller *bc;
+ nasid_t nasid;
+};
+
+static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask,
+ bool force)
+{
+#ifdef CONFIG_NUMA
+ struct bridge_irq_chip_data *data = d->chip_data;
+ int bit = d->parent_data->hwirq;
+ int pin = d->hwirq;
+ nasid_t nasid;
+ int ret, cpu;
+
+ ret = irq_chip_set_affinity_parent(d, mask, force);
+ if (ret >= 0) {
+ cpu = cpumask_first_and(mask, cpu_online_mask);
+ nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+ bridge_write(data->bc, b_int_addr[pin].addr,
+ (((data->bc->intr_addr >> 30) & 0x30000) |
+ bit | (nasid << 8)));
+ bridge_read(data->bc, b_wid_tflush);
+ }
+ return ret;
+#else
+ return irq_chip_set_affinity_parent(d, mask, force);
+#endif
+}
+
+struct irq_chip bridge_irq_chip = {
+ .name = "BRIDGE",
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_set_affinity = bridge_set_affinity
+};
+
+static int bridge_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct bridge_irq_chip_data *data;
+ struct irq_alloc_info *info = arg;
+ int ret;
+
+ if (nr_irqs > 1 || !info)
+ return -EINVAL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret >= 0) {
+ data->bc = info->ctrl;
+ data->nasid = info->nasid;
+ irq_domain_set_info(domain, virq, info->pin, &bridge_irq_chip,
+ data, handle_level_irq, NULL, NULL);
+ } else {
+ kfree(data);
+ }
+
+ return ret;
+}
+
+static void bridge_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *irqd = irq_domain_get_irq_data(domain, virq);
+
+ if (nr_irqs)
+ return;
+
+ kfree(irqd->chip_data);
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
+}
+
+static int bridge_domain_activate(struct irq_domain *domain,
+ struct irq_data *irqd, bool reserve)
+{
+ struct bridge_irq_chip_data *data = irqd->chip_data;
+ struct bridge_controller *bc = data->bc;
+ int bit = irqd->parent_data->hwirq;
+ int pin = irqd->hwirq;
+ u32 device;
+
+ bridge_write(bc, b_int_addr[pin].addr,
+ (((bc->intr_addr >> 30) & 0x30000) |
+ bit | (data->nasid << 8)));
+ bridge_set(bc, b_int_enable, (1 << pin));
+ bridge_set(bc, b_int_enable, 0x7ffffe00); /* more stuff in int_enable */
+
+ /*
+ * Enable sending of an interrupt clear packt to the hub on a high to
+ * low transition of the interrupt pin.
+ *
+ * IRIX sets additional bits in the address which are documented as
+ * reserved in the bridge docs.
+ */
+ bridge_set(bc, b_int_mode, (1UL << pin));
+
+ /*
+ * We assume the bridge to have a 1:1 mapping between devices
+ * (slots) and intr pins.
+ */
+ device = bridge_read(bc, b_int_device);
+ device &= ~(7 << (pin*3));
+ device |= (pin << (pin*3));
+ bridge_write(bc, b_int_device, device);
+
+ bridge_read(bc, b_wid_tflush);
+ return 0;
+}
+
+static void bridge_domain_deactivate(struct irq_domain *domain,
+ struct irq_data *irqd)
+{
+ struct bridge_irq_chip_data *data = irqd->chip_data;
+
+ bridge_clr(data->bc, b_int_enable, (1 << irqd->hwirq));
+ bridge_read(data->bc, b_wid_tflush);
+}
+
+static const struct irq_domain_ops bridge_domain_ops = {
+ .alloc = bridge_domain_alloc,
+ .free = bridge_domain_free,
+ .activate = bridge_domain_activate,
+ .deactivate = bridge_domain_deactivate
+};
+
+/*
+ * All observed requests have pin == 1. We could have a global here, that
+ * gets incremented and returned every time - unfortunately, pci_map_irq
+ * may be called on the same device over and over, and need to return the
+ * same value. On O2000, pin can be 0 or 1, and PCI slots can be [0..7].
+ *
+ * A given PCI device, in general, should be able to intr any of the cpus
+ * on any one of the hubs connected to its xbow.
+ */
+static int bridge_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(dev->bus);
+ struct irq_alloc_info info;
+ int irq;
+
+ irq = bc->pci_int[slot];
+ if (irq == -1) {
+ info.ctrl = bc;
+ info.nasid = bc->nasid;
+ info.pin = slot;
+
+ irq = irq_domain_alloc_irqs(bc->domain, 1, bc->nasid, &info);
+ if (irq < 0)
+ return irq;
+
+ bc->pci_int[slot] = irq;
+ }
+ return irq;
+}
+
+static int bridge_probe(struct platform_device *pdev)
+{
+ struct xtalk_bridge_platform_data *bd = dev_get_platdata(&pdev->dev);
+ struct device *dev = &pdev->dev;
+ struct bridge_controller *bc;
+ struct pci_host_bridge *host;
+ struct irq_domain *domain, *parent;
+ struct fwnode_handle *fn;
+ int slot;
+ int err;
+
+ parent = irq_get_default_host();
+ if (!parent)
+ return -ENODEV;
+ fn = irq_domain_alloc_named_fwnode("BRIDGE");
+ if (!fn)
+ return -ENOMEM;
+ domain = irq_domain_create_hierarchy(parent, 0, 8, fn,
+ &bridge_domain_ops, NULL);
+ irq_domain_free_fwnode(fn);
+ if (!domain)
+ return -ENOMEM;
+
+ pci_set_flags(PCI_PROBE_ONLY);
+
+ host = devm_pci_alloc_host_bridge(dev, sizeof(*bc));
+ if (!host) {
+ err = -ENOMEM;
+ goto err_remove_domain;
+ }
+
+ bc = pci_host_bridge_priv(host);
+
+ bc->busn.name = "Bridge PCI busn";
+ bc->busn.start = 0;
+ bc->busn.end = 0xff;
+ bc->busn.flags = IORESOURCE_BUS;
+
+ bc->domain = domain;
+
+ pci_add_resource_offset(&host->windows, &bd->mem, bd->mem_offset);
+ pci_add_resource_offset(&host->windows, &bd->io, bd->io_offset);
+ pci_add_resource(&host->windows, &bc->busn);
+
+ err = devm_request_pci_bus_resources(dev, &host->windows);
+ if (err < 0)
+ goto err_free_resource;
+
+ bc->nasid = bd->nasid;
+
+ bc->baddr = (u64)bd->masterwid << 60 | PCI64_ATTR_BAR;
+ bc->base = (struct bridge_regs *)bd->bridge_addr;
+ bc->intr_addr = bd->intr_addr;
+
+ /*
+ * Clear all pending interrupts.
+ */
+ bridge_write(bc, b_int_rst_stat, BRIDGE_IRR_ALL_CLR);
+
+ /*
+ * Until otherwise set up, assume all interrupts are from slot 0
+ */
+ bridge_write(bc, b_int_device, 0x0);
+
+ /*
+ * disable swapping for big windows
+ */
+ bridge_clr(bc, b_wid_control,
+ BRIDGE_CTRL_IO_SWAP | BRIDGE_CTRL_MEM_SWAP);
+#ifdef CONFIG_PAGE_SIZE_4KB
+ bridge_clr(bc, b_wid_control, BRIDGE_CTRL_PAGE_SIZE);
+#else /* 16kB or larger */
+ bridge_set(bc, b_wid_control, BRIDGE_CTRL_PAGE_SIZE);
+#endif
+
+ /*
+ * Hmm... IRIX sets additional bits in the address which
+ * are documented as reserved in the bridge docs.
+ */
+ bridge_write(bc, b_wid_int_upper,
+ ((bc->intr_addr >> 32) & 0xffff) | (bd->masterwid << 16));
+ bridge_write(bc, b_wid_int_lower, bc->intr_addr & 0xffffffff);
+ bridge_write(bc, b_dir_map, (bd->masterwid << 20)); /* DMA */
+ bridge_write(bc, b_int_enable, 0);
+
+ for (slot = 0; slot < 8; slot++) {
+ bridge_set(bc, b_device[slot].reg, BRIDGE_DEV_SWAP_DIR);
+ bc->pci_int[slot] = -1;
+ }
+ bridge_read(bc, b_wid_tflush); /* wait until Bridge PIO complete */
+
+ host->dev.parent = dev;
+ host->sysdata = bc;
+ host->busnr = 0;
+ host->ops = &bridge_pci_ops;
+ host->map_irq = bridge_map_irq;
+ host->swizzle_irq = pci_common_swizzle;
+
+ err = pci_scan_root_bus_bridge(host);
+ if (err < 0)
+ goto err_free_resource;
+
+ pci_bus_claim_resources(host->bus);
+ pci_bus_add_devices(host->bus);
+
+ platform_set_drvdata(pdev, host->bus);
+
+ return 0;
+
+err_free_resource:
+ pci_free_resource_list(&host->windows);
+err_remove_domain:
+ irq_domain_remove(domain);
+ return err;
+}
+
+static int bridge_remove(struct platform_device *pdev)
+{
+ struct pci_bus *bus = platform_get_drvdata(pdev);
+ struct bridge_controller *bc = BRIDGE_CONTROLLER(bus);
+
+ irq_domain_remove(bc->domain);
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(bus);
+ pci_remove_root_bus(bus);
+ pci_unlock_rescan_remove();
+
+ return 0;
+}
+
+static struct platform_driver bridge_driver = {
+ .probe = bridge_probe,
+ .remove = bridge_remove,
+ .driver = {
+ .name = "xtalk-bridge",
+ }
+};
+
+builtin_platform_driver(bridge_driver);
diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c
index 37ad26716579..0b2002e02a47 100644
--- a/arch/mips/sgi-ip22/ip22-platform.c
+++ b/arch/mips/sgi-ip22/ip22-platform.c
@@ -3,6 +3,7 @@
#include <linux/if_ether.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
#include <asm/paccess.h>
#include <asm/sgi/ip22.h>
@@ -25,6 +26,8 @@ static struct sgiwd93_platform_data sgiwd93_0_pd = {
.irq = SGI_WD93_0_IRQ,
};
+static u64 sgiwd93_0_dma_mask = DMA_BIT_MASK(32);
+
static struct platform_device sgiwd93_0_device = {
.name = "sgiwd93",
.id = 0,
@@ -32,6 +35,8 @@ static struct platform_device sgiwd93_0_device = {
.resource = sgiwd93_0_resources,
.dev = {
.platform_data = &sgiwd93_0_pd,
+ .dma_mask = &sgiwd93_0_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -49,6 +54,8 @@ static struct sgiwd93_platform_data sgiwd93_1_pd = {
.irq = SGI_WD93_1_IRQ,
};
+static u64 sgiwd93_1_dma_mask = DMA_BIT_MASK(32);
+
static struct platform_device sgiwd93_1_device = {
.name = "sgiwd93",
.id = 1,
@@ -56,6 +63,8 @@ static struct platform_device sgiwd93_1_device = {
.resource = sgiwd93_1_resources,
.dev = {
.platform_data = &sgiwd93_1_pd,
+ .dma_mask = &sgiwd93_1_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
@@ -96,6 +105,8 @@ static struct resource sgiseeq_0_resources[] = {
static struct sgiseeq_platform_data eth0_pd;
+static u64 sgiseeq_dma_mask = DMA_BIT_MASK(32);
+
static struct platform_device eth0_device = {
.name = "sgiseeq",
.id = 0,
@@ -103,6 +114,8 @@ static struct platform_device eth0_device = {
.resource = sgiseeq_0_resources,
.dev = {
.platform_data = &eth0_pd,
+ .dma_mask = &sgiseeq_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 6074efeff894..066b33f50bcc 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -184,5 +184,7 @@ void __init plat_mem_setup(void)
ioc3_eth_init();
+ ioport_resource.start = 0;
+ ioport_resource.end = ~0UL;
set_io_port_base(IO_BASE);
}
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index a32f843cdbe0..37be04975831 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -12,22 +12,20 @@
#include <linux/ioport.h>
#include <linux/kernel.h>
#include <linux/bitops.h>
+#include <linux/sched.h>
#include <asm/io.h>
#include <asm/irq_cpu.h>
-#include <asm/pci/bridge.h>
#include <asm/sn/addrs.h>
#include <asm/sn/agent.h>
#include <asm/sn/arch.h>
#include <asm/sn/hub.h>
#include <asm/sn/intr.h>
+#include <asm/sn/irq_alloc.h>
struct hub_irq_data {
- struct bridge_controller *bc;
u64 *irq_mask[2];
cpuid_t cpu;
- int bit;
- int pin;
};
static DECLARE_BITMAP(hub_irq_map, IP27_HUB_IRQ_COUNT);
@@ -54,7 +52,7 @@ static void enable_hub_irq(struct irq_data *d)
struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
unsigned long *mask = per_cpu(irq_enable_mask, hd->cpu);
- set_bit(hd->bit, mask);
+ set_bit(d->hwirq, mask);
__raw_writeq(mask[0], hd->irq_mask[0]);
__raw_writeq(mask[1], hd->irq_mask[1]);
}
@@ -64,71 +62,11 @@ static void disable_hub_irq(struct irq_data *d)
struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
unsigned long *mask = per_cpu(irq_enable_mask, hd->cpu);
- clear_bit(hd->bit, mask);
+ clear_bit(d->hwirq, mask);
__raw_writeq(mask[0], hd->irq_mask[0]);
__raw_writeq(mask[1], hd->irq_mask[1]);
}
-static unsigned int startup_bridge_irq(struct irq_data *d)
-{
- struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
- struct bridge_controller *bc;
- nasid_t nasid;
- u32 device;
- int pin;
-
- if (!hd)
- return -EINVAL;
-
- pin = hd->pin;
- bc = hd->bc;
-
- nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(hd->cpu));
- bridge_write(bc, b_int_addr[pin].addr,
- (0x20000 | hd->bit | (nasid << 8)));
- bridge_set(bc, b_int_enable, (1 << pin));
- bridge_set(bc, b_int_enable, 0x7ffffe00); /* more stuff in int_enable */
-
- /*
- * Enable sending of an interrupt clear packt to the hub on a high to
- * low transition of the interrupt pin.
- *
- * IRIX sets additional bits in the address which are documented as
- * reserved in the bridge docs.
- */
- bridge_set(bc, b_int_mode, (1UL << pin));
-
- /*
- * We assume the bridge to have a 1:1 mapping between devices
- * (slots) and intr pins.
- */
- device = bridge_read(bc, b_int_device);
- device &= ~(7 << (pin*3));
- device |= (pin << (pin*3));
- bridge_write(bc, b_int_device, device);
-
- bridge_read(bc, b_wid_tflush);
-
- enable_hub_irq(d);
-
- return 0; /* Never anything pending. */
-}
-
-static void shutdown_bridge_irq(struct irq_data *d)
-{
- struct hub_irq_data *hd = irq_data_get_irq_chip_data(d);
- struct bridge_controller *bc;
-
- if (!hd)
- return;
-
- disable_hub_irq(d);
-
- bc = hd->bc;
- bridge_clr(bc, b_int_enable, (1 << hd->pin));
- bridge_read(bc, b_wid_tflush);
-}
-
static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask)
{
nasid_t nasid;
@@ -144,9 +82,6 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask)
hd->irq_mask[0] = REMOTE_HUB_PTR(nasid, PI_INT_MASK0_B);
hd->irq_mask[1] = REMOTE_HUB_PTR(nasid, PI_INT_MASK1_B);
}
-
- /* Make sure it's not already pending when we connect it. */
- REMOTE_HUB_CLR_INTR(nasid, hd->bit);
}
static int set_affinity_hub_irq(struct irq_data *d, const struct cpumask *mask,
@@ -163,7 +98,7 @@ static int set_affinity_hub_irq(struct irq_data *d, const struct cpumask *mask,
setup_hub_mask(hd, mask);
if (irqd_is_started(d))
- startup_bridge_irq(d);
+ enable_hub_irq(d);
irq_data_update_effective_affinity(d, cpumask_of(hd->cpu));
@@ -172,20 +107,22 @@ static int set_affinity_hub_irq(struct irq_data *d, const struct cpumask *mask,
static struct irq_chip hub_irq_type = {
.name = "HUB",
- .irq_startup = startup_bridge_irq,
- .irq_shutdown = shutdown_bridge_irq,
.irq_mask = disable_hub_irq,
.irq_unmask = enable_hub_irq,
.irq_set_affinity = set_affinity_hub_irq,
};
-int request_bridge_irq(struct bridge_controller *bc, int pin)
+static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
+ struct irq_alloc_info *info = arg;
struct hub_irq_data *hd;
struct hub_data *hub;
struct irq_desc *desc;
int swlevel;
- int irq;
+
+ if (nr_irqs > 1 || !info)
+ return -EINVAL;
hd = kzalloc(sizeof(*hd), GFP_KERNEL);
if (!hd)
@@ -196,46 +133,41 @@ int request_bridge_irq(struct bridge_controller *bc, int pin)
kfree(hd);
return -EAGAIN;
}
- irq = swlevel + IP27_HUB_IRQ_BASE;
-
- hd->bc = bc;
- hd->bit = swlevel;
- hd->pin = pin;
- irq_set_chip_data(irq, hd);
+ irq_domain_set_info(domain, virq, swlevel, &hub_irq_type, hd,
+ handle_level_irq, NULL, NULL);
/* use CPU connected to nearest hub */
- hub = hub_data(NASID_TO_COMPACT_NODEID(bc->nasid));
+ hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid));
setup_hub_mask(hd, &hub->h_cpus);
- desc = irq_to_desc(irq);
- desc->irq_common_data.node = bc->nasid;
+ /* Make sure it's not already pending when we connect it. */
+ REMOTE_HUB_CLR_INTR(info->nasid, swlevel);
+
+ desc = irq_to_desc(virq);
+ desc->irq_common_data.node = info->nasid;
cpumask_copy(desc->irq_common_data.affinity, &hub->h_cpus);
- return irq;
+ return 0;
}
-void ip27_hub_irq_init(void)
+static void hub_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
{
- int i;
+ struct irq_data *irqd;
- for (i = IP27_HUB_IRQ_BASE;
- i < (IP27_HUB_IRQ_BASE + IP27_HUB_IRQ_COUNT); i++)
- irq_set_chip_and_handler(i, &hub_irq_type, handle_level_irq);
-
- /*
- * Some interrupts are reserved by hardware or by software convention.
- * Mark these as reserved right away so they won't be used accidentally
- * later.
- */
- for (i = 0; i <= BASE_PCI_IRQ; i++)
- set_bit(i, hub_irq_map);
-
- set_bit(IP_PEND0_6_63, hub_irq_map);
+ if (nr_irqs > 1)
+ return;
- for (i = NI_BRDCAST_ERR_A; i <= MSC_PANIC_INTR; i++)
- set_bit(i, hub_irq_map);
+ irqd = irq_domain_get_irq_data(domain, virq);
+ if (irqd && irqd->chip_data)
+ kfree(irqd->chip_data);
}
+static const struct irq_domain_ops hub_domain_ops = {
+ .alloc = hub_domain_alloc,
+ .free = hub_domain_free,
+};
+
/*
* This code is unnecessarily complex, because we do
* intr enabling. Basically, once we grab the set of intrs we need
@@ -252,7 +184,9 @@ static void ip27_do_irq_mask0(struct irq_desc *desc)
{
cpuid_t cpu = smp_processor_id();
unsigned long *mask = per_cpu(irq_enable_mask, cpu);
+ struct irq_domain *domain;
u64 pend0;
+ int irq;
/* copied from Irix intpend0() */
pend0 = LOCAL_HUB_L(PI_INT_PEND0);
@@ -276,7 +210,14 @@ static void ip27_do_irq_mask0(struct irq_desc *desc)
generic_smp_call_function_interrupt();
} else
#endif
- generic_handle_irq(__ffs(pend0) + IP27_HUB_IRQ_BASE);
+ {
+ domain = irq_desc_get_handler_data(desc);
+ irq = irq_linear_revmap(domain, __ffs(pend0));
+ if (irq)
+ generic_handle_irq(irq);
+ else
+ spurious_interrupt();
+ }
LOCAL_HUB_L(PI_INT_PEND0);
}
@@ -285,7 +226,9 @@ static void ip27_do_irq_mask1(struct irq_desc *desc)
{
cpuid_t cpu = smp_processor_id();
unsigned long *mask = per_cpu(irq_enable_mask, cpu);
+ struct irq_domain *domain;
u64 pend1;
+ int irq;
/* copied from Irix intpend0() */
pend1 = LOCAL_HUB_L(PI_INT_PEND1);
@@ -294,7 +237,12 @@ static void ip27_do_irq_mask1(struct irq_desc *desc)
if (!pend1)
return;
- generic_handle_irq(__ffs(pend1) + IP27_HUB_IRQ_BASE + 64);
+ domain = irq_desc_get_handler_data(desc);
+ irq = irq_linear_revmap(domain, __ffs(pend1) + 64);
+ if (irq)
+ generic_handle_irq(irq);
+ else
+ spurious_interrupt();
LOCAL_HUB_L(PI_INT_PEND1);
}
@@ -325,11 +273,41 @@ void install_ipi(void)
void __init arch_init_irq(void)
{
+ struct irq_domain *domain;
+ struct fwnode_handle *fn;
+ int i;
+
mips_cpu_irq_init();
- ip27_hub_irq_init();
+
+ /*
+ * Some interrupts are reserved by hardware or by software convention.
+ * Mark these as reserved right away so they won't be used accidentally
+ * later.
+ */
+ for (i = 0; i <= BASE_PCI_IRQ; i++)
+ set_bit(i, hub_irq_map);
+
+ set_bit(IP_PEND0_6_63, hub_irq_map);
+
+ for (i = NI_BRDCAST_ERR_A; i <= MSC_PANIC_INTR; i++)
+ set_bit(i, hub_irq_map);
+
+ fn = irq_domain_alloc_named_fwnode("HUB");
+ WARN_ON(fn == NULL);
+ if (!fn)
+ return;
+ domain = irq_domain_create_linear(fn, IP27_HUB_IRQ_COUNT,
+ &hub_domain_ops, NULL);
+ WARN_ON(domain == NULL);
+ if (!domain)
+ return;
+
+ irq_set_default_host(domain);
irq_set_percpu_devid(IP27_HUB_PEND0_IRQ);
- irq_set_chained_handler(IP27_HUB_PEND0_IRQ, ip27_do_irq_mask0);
+ irq_set_chained_handler_and_data(IP27_HUB_PEND0_IRQ, ip27_do_irq_mask0,
+ domain);
irq_set_percpu_devid(IP27_HUB_PEND1_IRQ);
- irq_set_chained_handler(IP27_HUB_PEND1_IRQ, ip27_do_irq_mask1);
+ irq_set_chained_handler_and_data(IP27_HUB_PEND1_IRQ, ip27_do_irq_mask1,
+ domain);
}
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index ce06aaa115ae..bd5cb855c6e5 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -9,6 +9,9 @@
#include <linux/kernel.h>
#include <linux/smp.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/xtalk-bridge.h>
+#include <asm/sn/addrs.h>
#include <asm/sn/types.h>
#include <asm/sn/klconfig.h>
#include <asm/sn/hub.h>
@@ -20,7 +23,48 @@
#define XXBOW_WIDGET_PART_NUM 0xd000 /* Xbow in Xbridge */
#define BASE_XBOW_PORT 8 /* Lowest external port */
-extern int bridge_probe(nasid_t nasid, int widget, int masterwid);
+static void bridge_platform_create(nasid_t nasid, int widget, int masterwid)
+{
+ struct xtalk_bridge_platform_data *bd;
+ struct platform_device *pdev;
+ unsigned long offset;
+
+ bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+ if (!bd)
+ goto no_mem;
+ pdev = platform_device_alloc("xtalk-bridge", PLATFORM_DEVID_AUTO);
+ if (!pdev) {
+ kfree(bd);
+ goto no_mem;
+ }
+
+ offset = NODE_OFFSET(nasid);
+
+ bd->bridge_addr = RAW_NODE_SWIN_BASE(nasid, widget);
+ bd->intr_addr = BIT_ULL(47) + 0x01800000 + PI_INT_PEND_MOD;
+ bd->nasid = nasid;
+ bd->masterwid = masterwid;
+
+ bd->mem.name = "Bridge PCI MEM";
+ bd->mem.start = offset + (widget << SWIN_SIZE_BITS);
+ bd->mem.end = bd->mem.start + SWIN_SIZE - 1;
+ bd->mem.flags = IORESOURCE_MEM;
+ bd->mem_offset = offset;
+
+ bd->io.name = "Bridge PCI IO";
+ bd->io.start = offset + (widget << SWIN_SIZE_BITS);
+ bd->io.end = bd->io.start + SWIN_SIZE - 1;
+ bd->io.flags = IORESOURCE_IO;
+ bd->io_offset = offset;
+
+ platform_device_add_data(pdev, bd, sizeof(*bd));
+ platform_device_add(pdev);
+ pr_info("xtalk:n%d/%x bridge widget\n", nasid, widget);
+ return;
+
+no_mem:
+ pr_warn("xtalk:n%d/%x bridge create out of memory\n", nasid, widget);
+}
static int probe_one_port(nasid_t nasid, int widget, int masterwid)
{
@@ -31,13 +75,10 @@ static int probe_one_port(nasid_t nasid, int widget, int masterwid)
(RAW_NODE_SWIN_BASE(nasid, widget) + WIDGET_ID);
partnum = XWIDGET_PART_NUM(widget_id);
- printk(KERN_INFO "Cpu %d, Nasid 0x%x, widget 0x%x (partnum 0x%x) is ",
- smp_processor_id(), nasid, widget, partnum);
-
switch (partnum) {
case BRIDGE_WIDGET_PART_NUM:
case XBRIDGE_WIDGET_PART_NUM:
- bridge_probe(nasid, widget, masterwid);
+ bridge_platform_create(nasid, widget, masterwid);
break;
default:
break;
@@ -52,8 +93,6 @@ static int xbow_probe(nasid_t nasid)
klxbow_t *xbow_p;
unsigned masterwid, i;
- printk("is xbow\n");
-
/*
* found xbow, so may have multiple bridges
* need to probe xbow
@@ -117,19 +156,17 @@ static void xtalk_probe_node(cnodeid_t nid)
(RAW_NODE_SWIN_BASE(nasid, 0x0) + WIDGET_ID);
partnum = XWIDGET_PART_NUM(widget_id);
- printk(KERN_INFO "Cpu %d, Nasid 0x%x: partnum 0x%x is ",
- smp_processor_id(), nasid, partnum);
-
switch (partnum) {
case BRIDGE_WIDGET_PART_NUM:
- bridge_probe(nasid, 0x8, 0xa);
+ bridge_platform_create(nasid, 0x8, 0xa);
break;
case XBOW_WIDGET_PART_NUM:
case XXBOW_WIDGET_PART_NUM:
+ pr_info("xtalk:n%d/0 xbow widget\n", nasid);
xbow_probe(nasid);
break;
default:
- printk(" unknown widget??\n");
+ pr_info("xtalk:n%d/0 unknown widget (0x%x)\n", nasid, partnum);
break;
}
}
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 70a1ab66d252..46537c2ca86a 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -26,6 +26,7 @@
#include <linux/leds.h>
#include <linux/device.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include <linux/irq.h>
#include <asm/bootinfo.h>
#include <asm/idle.h>
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 55559ca0efe4..2245169c72af 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -4,7 +4,7 @@
#
config NDS32
- def_bool y
+ def_bool y
select ARCH_32BIT_OFF_T
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -51,20 +51,20 @@ config GENERIC_CALIBRATE_DELAY
def_bool y
config GENERIC_CSUM
- def_bool y
+ def_bool y
config GENERIC_HWEIGHT
- def_bool y
+ def_bool y
config GENERIC_LOCKBREAK
- def_bool y
+ def_bool y
depends on PREEMPT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config STACKTRACE_SUPPORT
- def_bool y
+ def_bool y
config FIX_EARLYCON_MEM
def_bool y
@@ -79,11 +79,11 @@ config NR_CPUS
default 1
config MMU
- def_bool y
+ def_bool y
config NDS32_BUILTIN_DTB
- string "Builtin DTB"
- default ""
+ string "Builtin DTB"
+ default ""
help
User can use it to specify the dts of the SoC
endmenu
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index d8ce778d0640..f43b44d692ca 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -6,7 +6,6 @@ generic-y += bugs.h
generic-y += checksum.h
generic-y += clkdev.h
generic-y += cmpxchg.h
-generic-y += cmpxchg-local.h
generic-y += compat.h
generic-y += cputime.h
generic-y += device.h
diff --git a/arch/nds32/include/asm/assembler.h b/arch/nds32/include/asm/assembler.h
index c3855782a541..5e7c56926049 100644
--- a/arch/nds32/include/asm/assembler.h
+++ b/arch/nds32/include/asm/assembler.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_ASSEMBLER_H__
diff --git a/arch/nds32/include/asm/barrier.h b/arch/nds32/include/asm/barrier.h
index faafc373ea6c..16413172fd50 100644
--- a/arch/nds32/include/asm/barrier.h
+++ b/arch/nds32/include/asm/barrier.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_ASM_BARRIER_H
diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h
index 7414fcbbab4e..e75212c76b20 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_BITFIELD_H__
diff --git a/arch/nds32/include/asm/cache.h b/arch/nds32/include/asm/cache.h
index 347db4881c5f..fc3c41b59169 100644
--- a/arch/nds32/include/asm/cache.h
+++ b/arch/nds32/include/asm/cache.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_CACHE_H__
diff --git a/arch/nds32/include/asm/cache_info.h b/arch/nds32/include/asm/cache_info.h
index 38ec458ba543..e89d8078f3a6 100644
--- a/arch/nds32/include/asm/cache_info.h
+++ b/arch/nds32/include/asm/cache_info.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
struct cache_info {
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 8b26198d51bb..d9ac7e6408ef 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_CACHEFLUSH_H__
diff --git a/arch/nds32/include/asm/current.h b/arch/nds32/include/asm/current.h
index b4dcd22b7bcb..65d30096142b 100644
--- a/arch/nds32/include/asm/current.h
+++ b/arch/nds32/include/asm/current.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASM_NDS32_CURRENT_H
diff --git a/arch/nds32/include/asm/delay.h b/arch/nds32/include/asm/delay.h
index 519ba97acb6e..56ea3894f8f8 100644
--- a/arch/nds32/include/asm/delay.h
+++ b/arch/nds32/include/asm/delay.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_DELAY_H__
diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
index 02250626b9f0..1c8e56d7013d 100644
--- a/arch/nds32/include/asm/elf.h
+++ b/arch/nds32/include/asm/elf.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASMNDS32_ELF_H
diff --git a/arch/nds32/include/asm/fixmap.h b/arch/nds32/include/asm/fixmap.h
index 0e60e153a71a..5a4bf11e5800 100644
--- a/arch/nds32/include/asm/fixmap.h
+++ b/arch/nds32/include/asm/fixmap.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_FIXMAP_H
diff --git a/arch/nds32/include/asm/futex.h b/arch/nds32/include/asm/futex.h
index baf178bf1d0b..5213c65c2e0b 100644
--- a/arch/nds32/include/asm/futex.h
+++ b/arch/nds32/include/asm/futex.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_FUTEX_H__
diff --git a/arch/nds32/include/asm/highmem.h b/arch/nds32/include/asm/highmem.h
index 425d546cb059..b3a82c97ded3 100644
--- a/arch/nds32/include/asm/highmem.h
+++ b/arch/nds32/include/asm/highmem.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASM_HIGHMEM_H
diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
index 5ef8ae5ba833..16f262322b8f 100644
--- a/arch/nds32/include/asm/io.h
+++ b/arch/nds32/include/asm/io.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_IO_H
diff --git a/arch/nds32/include/asm/irqflags.h b/arch/nds32/include/asm/irqflags.h
index 2bfd00f8bc48..fb45ec46bb1b 100644
--- a/arch/nds32/include/asm/irqflags.h
+++ b/arch/nds32/include/asm/irqflags.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#include <asm/nds32.h>
diff --git a/arch/nds32/include/asm/l2_cache.h b/arch/nds32/include/asm/l2_cache.h
index 37dd5ef61de8..3ea48e19e6de 100644
--- a/arch/nds32/include/asm/l2_cache.h
+++ b/arch/nds32/include/asm/l2_cache.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef L2_CACHE_H
diff --git a/arch/nds32/include/asm/linkage.h b/arch/nds32/include/asm/linkage.h
index e708c8bdb926..a696469abb70 100644
--- a/arch/nds32/include/asm/linkage.h
+++ b/arch/nds32/include/asm/linkage.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_LINKAGE_H
diff --git a/arch/nds32/include/asm/memory.h b/arch/nds32/include/asm/memory.h
index 60efc726b56e..940d32842793 100644
--- a/arch/nds32/include/asm/memory.h
+++ b/arch/nds32/include/asm/memory.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_MEMORY_H
@@ -15,14 +15,6 @@
#define PHYS_OFFSET (0x0)
#endif
-#ifndef __virt_to_bus
-#define __virt_to_bus __virt_to_phys
-#endif
-
-#ifndef __bus_to_virt
-#define __bus_to_virt __phys_to_virt
-#endif
-
/*
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
diff --git a/arch/nds32/include/asm/mmu.h b/arch/nds32/include/asm/mmu.h
index 88b9ee8c1064..89d63afee455 100644
--- a/arch/nds32/include/asm/mmu.h
+++ b/arch/nds32/include/asm/mmu.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_MMU_H
diff --git a/arch/nds32/include/asm/mmu_context.h b/arch/nds32/include/asm/mmu_context.h
index fd7d13cefccc..b8fd3d189fdc 100644
--- a/arch/nds32/include/asm/mmu_context.h
+++ b/arch/nds32/include/asm/mmu_context.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_MMU_CONTEXT_H
diff --git a/arch/nds32/include/asm/module.h b/arch/nds32/include/asm/module.h
index 16cf9c7237ad..a3a08e993c65 100644
--- a/arch/nds32/include/asm/module.h
+++ b/arch/nds32/include/asm/module.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASM_NDS32_MODULE_H
diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h
index 68c38151c3e4..4994f6a9e0a0 100644
--- a/arch/nds32/include/asm/nds32.h
+++ b/arch/nds32/include/asm/nds32.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASM_NDS32_NDS32_H_
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index 947f0491c9a7..8feb1fa12f01 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -1,5 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
- * SPDX-License-Identifier: GPL-2.0
* Copyright (C) 2005-2017 Andes Technology Corporation
*/
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 3c5fee5b5759..3cbc749c79aa 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMNDS32_PGALLOC_H
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index ee59c1f9e4fc..c70cc56bec09 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMNDS32_PGTABLE_H
diff --git a/arch/nds32/include/asm/proc-fns.h b/arch/nds32/include/asm/proc-fns.h
index bedc4f59e064..27c617fa77af 100644
--- a/arch/nds32/include/asm/proc-fns.h
+++ b/arch/nds32/include/asm/proc-fns.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_PROCFNS_H__
diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h
index 72024f8bc129..b82369c7659d 100644
--- a/arch/nds32/include/asm/processor.h
+++ b/arch/nds32/include/asm/processor.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_PROCESSOR_H
diff --git a/arch/nds32/include/asm/ptrace.h b/arch/nds32/include/asm/ptrace.h
index c4538839055c..919ee223620c 100644
--- a/arch/nds32/include/asm/ptrace.h
+++ b/arch/nds32/include/asm/ptrace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_PTRACE_H
diff --git a/arch/nds32/include/asm/shmparam.h b/arch/nds32/include/asm/shmparam.h
index fd1cff64b68e..3aeee946973d 100644
--- a/arch/nds32/include/asm/shmparam.h
+++ b/arch/nds32/include/asm/shmparam.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMNDS32_SHMPARAM_H
diff --git a/arch/nds32/include/asm/string.h b/arch/nds32/include/asm/string.h
index 179272caa540..cae8fe16de98 100644
--- a/arch/nds32/include/asm/string.h
+++ b/arch/nds32/include/asm/string.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_STRING_H
diff --git a/arch/nds32/include/asm/swab.h b/arch/nds32/include/asm/swab.h
index e01a755a37d2..362a466f2976 100644
--- a/arch/nds32/include/asm/swab.h
+++ b/arch/nds32/include/asm/swab.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_SWAB_H__
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index 174b8571d362..899b2fb4b52f 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
// Copyright (C) 2005-2017 Andes Technology Corporation
diff --git a/arch/nds32/include/asm/syscalls.h b/arch/nds32/include/asm/syscalls.h
index da32101b455d..f3b16f602cb5 100644
--- a/arch/nds32/include/asm/syscalls.h
+++ b/arch/nds32/include/asm/syscalls.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_SYSCALLS_H
diff --git a/arch/nds32/include/asm/thread_info.h b/arch/nds32/include/asm/thread_info.h
index bff741ff337b..c135111ec44e 100644
--- a/arch/nds32/include/asm/thread_info.h
+++ b/arch/nds32/include/asm/thread_info.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_THREAD_INFO_H
@@ -42,7 +42,6 @@ struct thread_info {
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
- * TIF_USEDFPU - FPU was used by this task this quantum (SMP)
* TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
*/
#define TIF_SIGPENDING 1
@@ -50,7 +49,6 @@ struct thread_info {
#define TIF_SINGLESTEP 3
#define TIF_NOTIFY_RESUME 4 /* callback before returning to user */
#define TIF_SYSCALL_TRACE 8
-#define TIF_USEDFPU 16
#define TIF_POLLING_NRFLAG 17
#define TIF_MEMDIE 18
#define TIF_FREEZE 19
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index d5ae571c8d30..a8aff1c8b4f4 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASMNDS32_TLB_H
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
index 38ee769b18d8..97155366ea01 100644
--- a/arch/nds32/include/asm/tlbflush.h
+++ b/arch/nds32/include/asm/tlbflush.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMNDS32_TLBFLUSH_H
diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 116598b47c4d..8916ad9f9f13 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMANDES_UACCESS_H
diff --git a/arch/nds32/include/asm/unistd.h b/arch/nds32/include/asm/unistd.h
index b586a2862beb..bf5e2d440913 100644
--- a/arch/nds32/include/asm/unistd.h
+++ b/arch/nds32/include/asm/unistd.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/nds32/include/asm/vdso.h b/arch/nds32/include/asm/vdso.h
index af2c6afc2469..89b113ffc3dc 100644
--- a/arch/nds32/include/asm/vdso.h
+++ b/arch/nds32/include/asm/vdso.h
@@ -1,5 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
- * SPDX-License-Identifier: GPL-2.0
* Copyright (C) 2005-2017 Andes Technology Corporation
*/
diff --git a/arch/nds32/include/asm/vdso_datapage.h b/arch/nds32/include/asm/vdso_datapage.h
index 79db5a12ca5e..74c68802021e 100644
--- a/arch/nds32/include/asm/vdso_datapage.h
+++ b/arch/nds32/include/asm/vdso_datapage.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2012 ARM Limited
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_VDSO_DATAPAGE_H
@@ -20,6 +20,7 @@ struct vdso_data {
u32 xtime_clock_sec; /* CLOCK_REALTIME - seconds */
u32 cs_mult; /* clocksource multiplier */
u32 cs_shift; /* Cycle to nanosecond divisor (power of two) */
+ u32 hrtimer_res; /* hrtimer resolution */
u64 cs_cycle_last; /* last cycle value */
u64 cs_mask; /* clocksource mask */
diff --git a/arch/nds32/include/asm/vdso_timer_info.h b/arch/nds32/include/asm/vdso_timer_info.h
index 50ba117cff12..328439ce37db 100644
--- a/arch/nds32/include/asm/vdso_timer_info.h
+++ b/arch/nds32/include/asm/vdso_timer_info.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
extern struct timer_info_t timer_info;
diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h
index 2d3213f5e595..b5d58ea8decb 100644
--- a/arch/nds32/include/uapi/asm/auxvec.h
+++ b/arch/nds32/include/uapi/asm/auxvec.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_AUXVEC_H
diff --git a/arch/nds32/include/uapi/asm/byteorder.h b/arch/nds32/include/uapi/asm/byteorder.h
index a23f6f3a2468..511e653c709d 100644
--- a/arch/nds32/include/uapi/asm/byteorder.h
+++ b/arch/nds32/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_BYTEORDER_H__
diff --git a/arch/nds32/include/uapi/asm/cachectl.h b/arch/nds32/include/uapi/asm/cachectl.h
index 4cdca9b23974..73793662815c 100644
--- a/arch/nds32/include/uapi/asm/cachectl.h
+++ b/arch/nds32/include/uapi/asm/cachectl.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 1994, 1995, 1996 by Ralf Baechle
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASM_CACHECTL
diff --git a/arch/nds32/include/uapi/asm/param.h b/arch/nds32/include/uapi/asm/param.h
index e3fb723ee362..2977534a6bd3 100644
--- a/arch/nds32/include/uapi/asm/param.h
+++ b/arch/nds32/include/uapi/asm/param.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_PARAM_H
diff --git a/arch/nds32/include/uapi/asm/ptrace.h b/arch/nds32/include/uapi/asm/ptrace.h
index 358c99e399d0..1a6e01c00e6f 100644
--- a/arch/nds32/include/uapi/asm/ptrace.h
+++ b/arch/nds32/include/uapi/asm/ptrace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __UAPI_ASM_NDS32_PTRACE_H
diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h
index 58afc416473e..628ff6b75825 100644
--- a/arch/nds32/include/uapi/asm/sigcontext.h
+++ b/arch/nds32/include/uapi/asm/sigcontext.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMNDS32_SIGCONTEXT_H
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index 4ec8f543103f..c691735017ed 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2005-2017 Andes Technology Corporation
#define __ARCH_WANT_STAT64
diff --git a/arch/nds32/kernel/.gitignore b/arch/nds32/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/nds32/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/nds32/kernel/cacheinfo.c b/arch/nds32/kernel/cacheinfo.c
index 0a7bc696dd55..aab98e447feb 100644
--- a/arch/nds32/kernel/cacheinfo.c
+++ b/arch/nds32/kernel/cacheinfo.c
@@ -13,7 +13,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
this_leaf->level = level;
this_leaf->type = type;
this_leaf->coherency_line_size = CACHE_LINE_SIZE(cache_type);
- this_leaf->number_of_sets = CACHE_SET(cache_type);;
+ this_leaf->number_of_sets = CACHE_SET(cache_type);
this_leaf->ways_of_associativity = CACHE_WAY(cache_type);
this_leaf->size = this_leaf->number_of_sets *
this_leaf->coherency_line_size * this_leaf->ways_of_associativity;
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
index 97ba15cd4180..1df02a793364 100644
--- a/arch/nds32/kernel/ex-exit.S
+++ b/arch/nds32/kernel/ex-exit.S
@@ -163,7 +163,7 @@ resume_kernel:
gie_disable
lwi $t0, [tsk+#TSK_TI_PREEMPT]
bnez $t0, no_work_pending
-need_resched:
+
lwi $t0, [tsk+#TSK_TI_FLAGS]
andi $p1, $t0, #_TIF_NEED_RESCHED
beqz $p1, no_work_pending
@@ -173,7 +173,7 @@ need_resched:
beqz $t0, no_work_pending
jal preempt_schedule_irq
- b need_resched
+ b no_work_pending
#endif
/*
diff --git a/arch/nds32/kernel/nds32_ksyms.c b/arch/nds32/kernel/nds32_ksyms.c
index 5ecebd0e60cb..20719e42ae36 100644
--- a/arch/nds32/kernel/nds32_ksyms.c
+++ b/arch/nds32/kernel/nds32_ksyms.c
@@ -23,9 +23,3 @@ EXPORT_SYMBOL(memzero);
EXPORT_SYMBOL(__arch_copy_from_user);
EXPORT_SYMBOL(__arch_copy_to_user);
EXPORT_SYMBOL(__arch_clear_user);
-
-/* cache handling */
-EXPORT_SYMBOL(cpu_icache_inval_all);
-EXPORT_SYMBOL(cpu_dcache_wbinval_all);
-EXPORT_SYMBOL(cpu_dma_inval_range);
-EXPORT_SYMBOL(cpu_dma_wb_range);
diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c
index 016f15891f6d..90bcae6f8554 100644
--- a/arch/nds32/kernel/vdso.c
+++ b/arch/nds32/kernel/vdso.c
@@ -220,6 +220,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift;
+ vdso_data->hrtimer_res = hrtimer_resolution;
vdso_write_end(vdso_data);
}
diff --git a/arch/nds32/kernel/vdso/.gitignore b/arch/nds32/kernel/vdso/.gitignore
new file mode 100644
index 000000000000..f8b69d84238e
--- /dev/null
+++ b/arch/nds32/kernel/vdso/.gitignore
@@ -0,0 +1 @@
+vdso.lds
diff --git a/arch/nds32/kernel/vdso/Makefile b/arch/nds32/kernel/vdso/Makefile
index e6c50a701313..8792fda19a64 100644
--- a/arch/nds32/kernel/vdso/Makefile
+++ b/arch/nds32/kernel/vdso/Makefile
@@ -11,10 +11,8 @@ obj-vdso := note.o datapage.o sigreturn.o gettimeofday.o
targets := $(obj-vdso) vdso.so vdso.so.dbg
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-ccflags-y += -fPIC -Wl,-shared -g
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib -fPIC -Wl,-shared -g \
+ -Wl,-soname=linux-vdso.so.1 -Wl,--hash-style=sysv
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
@@ -28,7 +26,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
$(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold)
@@ -40,9 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
# Generate VDSO offsets using helper script
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
- $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-endef
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
@@ -65,7 +61,7 @@ gettimeofday.o : gettimeofday.c FORCE
# Actual build commands
quiet_cmd_vdsold = VDSOL $@
- cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
+ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@
quiet_cmd_vdsoas = VDSOA $@
cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
quiet_cmd_vdsocc = VDSOA $@
diff --git a/arch/nds32/kernel/vdso/gettimeofday.c b/arch/nds32/kernel/vdso/gettimeofday.c
index 038721af40e3..b02581891c33 100644
--- a/arch/nds32/kernel/vdso/gettimeofday.c
+++ b/arch/nds32/kernel/vdso/gettimeofday.c
@@ -208,6 +208,8 @@ static notrace int clock_getres_fallback(clockid_t _clk_id,
notrace int __vdso_clock_getres(clockid_t clk_id, struct timespec *res)
{
+ struct vdso_data *vdata = __get_datapage();
+
if (res == NULL)
return 0;
switch (clk_id) {
@@ -215,7 +217,7 @@ notrace int __vdso_clock_getres(clockid_t clk_id, struct timespec *res)
case CLOCK_MONOTONIC:
case CLOCK_MONOTONIC_RAW:
res->tv_sec = 0;
- res->tv_nsec = CLOCK_REALTIME_RES;
+ res->tv_nsec = vdata->hrtimer_res;
break;
case CLOCK_REALTIME_COARSE:
case CLOCK_MONOTONIC_COARSE:
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 1a4ab1b7525f..55703b03d172 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -260,7 +260,7 @@ void __set_fixmap(enum fixed_addresses idx,
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
- pte = (pte_t *)&fixmap_pmd_p[pte_index(addr)];;
+ pte = (pte_t *)&fixmap_pmd_p[pte_index(addr)];
if (pgprot_val(flags)) {
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index fe8ca623add8..c9e377d59232 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -424,3 +424,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 1d1183048cfd..2781ebf6add4 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -93,6 +93,7 @@
#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
+#define INVALID_REGION_ID (VMEMMAP_REGION_ID + 1)
/*
* Defines the address of the vmemap area, in its own region on
@@ -119,14 +120,15 @@ static inline int get_region_id(unsigned long ea)
if (id == 0)
return USER_REGION_ID;
+ if (id != (PAGE_OFFSET >> 60))
+ return INVALID_REGION_ID;
+
if (ea < H_KERN_VIRT_START)
return LINEAR_MAP_REGION_ID;
- VM_BUG_ON(id != 0xc);
BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
region_id = NON_LINEAR_REGION_ID(ea);
- VM_BUG_ON(region_id > VMEMMAP_REGION_ID);
return region_id;
}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e6b5bb012ccb..013c76a0a03e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
struct kref kref;
};
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct list_head iommu_tables;
+ struct mutex alloc_lock;
struct page *pages[0];
};
@@ -222,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops;
struct kvmppc_xive;
struct kvmppc_xive_vcpu;
extern struct kvm_device_ops kvm_xive_ops;
+extern struct kvm_device_ops kvm_xive_native_ops;
struct kvmppc_passthru_irqmap;
@@ -312,7 +316,11 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
- struct kvmppc_xive *xive;
+ struct kvmppc_xive *xive; /* Current XIVE device in use */
+ struct {
+ struct kvmppc_xive *native;
+ struct kvmppc_xive *xics_on_xive;
+ } xive_devices;
struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
@@ -449,6 +457,7 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */
#define MMIO_HPTE_CACHE_SIZE 4
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ac22b28ae78d..bc892380e6cd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -197,10 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS)
-extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap);
-extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
- unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
@@ -273,6 +269,7 @@ union kvmppc_one_reg {
u64 addr;
u64 length;
} vpaval;
+ u64 xive_timaval[2];
};
struct kvmppc_ops {
@@ -480,6 +477,9 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested);
+
#else
static inline void __init kvm_cma_reserve(void)
{}
@@ -594,6 +594,22 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_native_init_module(void);
+extern void kvmppc_xive_native_exit_module(void);
+extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
@@ -617,6 +633,21 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+ { return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_native_init_module(void) { }
+static inline void kvmppc_xive_native_exit_module(void) { }
+static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return 0; }
+static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return -ENOENT; }
+
#endif /* CONFIG_KVM_XIVE */
#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
@@ -665,6 +696,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src);
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
unsigned long slb_v, unsigned int status, bool data);
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 611204e588b9..58efca934311 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -232,7 +232,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
extern void arch_exit_mmap(struct mm_struct *mm);
static inline void arch_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
if (start <= mm->context.vdso_base && mm->context.vdso_base < end)
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index b579a943407b..eaf76f57023a 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -23,6 +23,7 @@
* same offset regardless of where the code is executing
*/
extern void __iomem *xive_tima;
+extern unsigned long xive_tima_os;
/*
* Offset in the TM area of our current execution level (provided by
@@ -73,6 +74,8 @@ struct xive_q {
u32 esc_irq;
atomic_t count;
atomic_t pending_count;
+ u64 guest_qaddr;
+ u32 guest_qshift;
};
/* Global enable flags for the XIVE support */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 26ca425f4c2c..b0f72dea8b11 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
@@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char {
#define KVM_XICS_PRESENTED (1ULL << 43)
#define KVM_XICS_QUEUED (1ULL << 44)
+/* POWER9 XIVE Native Interrupt Controller */
+#define KVM_DEV_XIVE_GRP_CTRL 1
+#define KVM_DEV_XIVE_RESET 1
+#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */
+
+/* Layout of 64-bit XIVE source attribute values */
+#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
+#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1)
+
+/* Layout of 64-bit XIVE source configuration attribute values */
+#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0
+#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7
+#define KVM_XIVE_SOURCE_SERVER_SHIFT 3
+#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL
+#define KVM_XIVE_SOURCE_MASKED_SHIFT 32
+#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL
+#define KVM_XIVE_SOURCE_EISN_SHIFT 33
+#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL
+
+/* Layout of 64-bit EQ identifier */
+#define KVM_XIVE_EQ_PRIORITY_SHIFT 0
+#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
+#define KVM_XIVE_EQ_SERVER_SHIFT 3
+#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL
+
+/* Layout of EQ configuration values (64 bytes) */
+struct kvm_ppc_xive_eq {
+ __u32 flags;
+ __u32 qshift;
+ __u64 qaddr;
+ __u32 qtoggle;
+ __u32 qindex;
+ __u8 pad[40];
+};
+
+#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001
+
+#define KVM_XIVE_TIMA_PAGE_OFFSET 0
+#define KVM_XIVE_ESB_PAGE_OFFSET 4
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index f2ed3ef4b129..862e2890bd3d 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -767,7 +767,6 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index,
cache_dir->kobj, "index%d", index);
if (rc) {
kobject_put(&index_dir->kobj);
- kfree(index_dir);
return;
}
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 00f5a63c8d9a..103655d84b4b 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -509,3 +509,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3223aec88b2c..4c67cc79de7c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -94,7 +94,7 @@ endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
-kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 10c5579d20ce..61a212d0daf0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_get_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
*val = get_reg_val(id, vcpu->arch.fscr);
break;
@@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_set_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
vcpu->arch.fscr = set_reg_val(id, *val);
break;
@@ -891,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvmppc_rtas_tokens_free(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif
+
+#ifdef CONFIG_KVM_XICS
+ /*
+ * Free the XIVE devices which are not directly freed by the
+ * device 'release' method
+ */
+ kfree(kvm->arch.xive_devices.native);
+ kvm->arch.xive_devices.native = NULL;
+ kfree(kvm->arch.xive_devices.xics_on_xive);
+ kvm->arch.xive_devices.xics_on_xive = NULL;
+#endif /* CONFIG_KVM_XICS */
}
int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
@@ -1050,6 +1085,9 @@ static int kvmppc_book3s_init(void)
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+ kvmppc_xive_native_init_module();
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1060,8 +1098,10 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
#ifdef CONFIG_KVM_XICS
- if (xics_on_xive())
+ if (xics_on_xive()) {
kvmppc_xive_exit_module();
+ kvmppc_xive_native_exit_module();
+ }
#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f100e331e69b..66270e07449a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
- __free_page(stt->pages[i]);
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
kfree(stt);
}
+static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
+ unsigned long sttpage)
+{
+ struct page *page = stt->pages[sttpage];
+
+ if (page)
+ return page;
+
+ mutex_lock(&stt->alloc_lock);
+ page = stt->pages[sttpage];
+ if (!page) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ WARN_ON_ONCE(!page);
+ if (page)
+ stt->pages[sttpage] = page;
+ }
+ mutex_unlock(&stt->alloc_lock);
+
+ return page;
+}
+
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
- page = stt->pages[vmf->pgoff];
+ page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
+ if (!page)
+ return VM_FAULT_OOM;
+
get_page(page);
vmf->page = page;
return 0;
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
- int i;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
+ mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
mutex_lock(&kvm->lock);
/* Check this LIOBN hasn't been previously allocated */
@@ -352,17 +371,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
return ret;
- fail:
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
kfree(stt);
fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
return ret;
}
+static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ return 0;
+}
+
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
@@ -378,7 +408,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_TOO_HARD;
- if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
return H_TOO_HARD;
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
@@ -397,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ */
+static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+ unsigned long sttpage;
+
+ idx -= stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ page = stt->pages[sttpage];
+
+ if (!page) {
+ /* We allow any TCE, not just with read|write permissions */
+ if (!tce)
+ return;
+
+ page = kvm_spapr_get_tce_page(stt, sttpage);
+ if (!page)
+ return;
+ }
+ tbl = page_to_virt(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry)
{
@@ -551,7 +611,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
+ if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
goto unlock_exit;
}
@@ -612,7 +672,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -647,7 +707,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
tce = be64_to_cpu(tce);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 2206bc729b9a..484b47fa3960 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -66,8 +66,6 @@
#endif
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-
/*
* Finds a TCE table descriptor by LIOBN.
*
@@ -88,6 +86,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua, unsigned long **prmap)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ if (prmap)
+ *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+
+ return 0;
+}
+
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
@@ -111,7 +128,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
- if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -129,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode,
*
@@ -161,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
- * Called in both real and virtual modes.
- * Cannot fail so kvmppc_tce_validate must be called before it.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
+ * Cannot fail so kvmppc_rm_tce_validate must be called before it.
*/
-void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
@@ -175,35 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ /*
+ * page must not be NULL in real mode,
+ * kvmppc_rm_ioba_validate() must have taken care of this.
+ */
+ WARN_ON_ONCE_RM(!page);
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_put);
-long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap)
+/*
+ * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
+ * in real mode.
+ * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
+ * allocated or not required (when clearing a tce entry).
+ */
+static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages, bool clearing)
{
- unsigned long gfn = tce >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot;
+ unsigned long i, idx, sttpage, sttpages;
+ unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
- memslot = search_memslots(kvm_memslots(kvm), gfn);
- if (!memslot)
- return -EINVAL;
-
- *ua = __gfn_to_hva_memslot(memslot, gfn) |
- (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+ if (ret)
+ return ret;
+ /*
+ * clearing==true says kvmppc_rm_tce_put won't be allocating pages
+ * for empty tces.
+ */
+ if (clearing)
+ return H_SUCCESS;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (prmap)
- *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
-#endif
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
+ TCES_PER_PAGE;
+ for (i = sttpage; i < sttpage + sttpages; ++i)
+ if (!stt->pages[i])
+ return H_TOO_HARD;
- return 0;
+ return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
@@ -381,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, 1);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
if (ret != H_SUCCESS)
return ret;
@@ -390,7 +415,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret;
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -409,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
}
}
- kvmppc_tce_put(stt, entry, tce);
+ kvmppc_rm_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -480,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
if (ret != H_SUCCESS)
return ret;
@@ -492,7 +517,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
struct mm_iommu_table_group_mem_t *mem;
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -508,7 +533,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap);
@@ -542,7 +567,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -557,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
}
- kvmppc_tce_put(stt, entry + i, tce);
+ kvmppc_rm_tce_put(stt, entry + i, tce);
}
unlock_exit:
@@ -583,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
if (ret != H_SUCCESS)
return ret;
@@ -610,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
}
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+ kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
}
@@ -635,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ if (!page) {
+ vcpu->arch.regs.gpr[4] = 0;
+ return H_SUCCESS;
+ }
tbl = (u64 *)page_address(page);
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7bdcd4d7a9f0..d5fc624e0655 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -750,7 +750,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
- * smb_wmb() in kvmppc_guest_entry_inject().
+ * smp_wmb() in kvmppc_guest_entry_inject().
*/
smp_rmb();
vc = vcpu->arch.vcore;
@@ -802,6 +802,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
}
}
+/* Copy guest memory in place - must reside within a single memslot */
+static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
+ unsigned long len)
+{
+ struct kvm_memory_slot *to_memslot = NULL;
+ struct kvm_memory_slot *from_memslot = NULL;
+ unsigned long to_addr, from_addr;
+ int r;
+
+ /* Get HPA for from address */
+ from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
+ if (!from_memslot)
+ return -EFAULT;
+ if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
+ if (kvm_is_error_hva(from_addr))
+ return -EFAULT;
+ from_addr |= (from & (PAGE_SIZE - 1));
+
+ /* Get HPA for to address */
+ to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
+ if (!to_memslot)
+ return -EFAULT;
+ if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
+ if (kvm_is_error_hva(to_addr))
+ return -EFAULT;
+ to_addr |= (to & (PAGE_SIZE - 1));
+
+ /* Perform copy */
+ r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
+ len);
+ if (r)
+ return -EFAULT;
+ mark_page_dirty(kvm, to >> PAGE_SHIFT);
+ return 0;
+}
+
+static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ u64 pg_sz = SZ_4K; /* 4K page size */
+ u64 pg_mask = SZ_4K - 1;
+ int ret;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE) {
+ ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ } else if (flags & H_ZERO_PAGE) {
+ ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ }
+
+ /* We can ignore the remaining flags */
+
+ return H_SUCCESS;
+}
+
static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
{
struct kvmppc_vcore *vcore = target->arch.vcore;
@@ -1004,6 +1078,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (nesting_enabled(vcpu->kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
+ case H_PAGE_INIT:
+ ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
default:
return RESUME_HOST;
}
@@ -1048,6 +1127,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_IPOLL:
case H_XIRR_X:
#endif
+ case H_PAGE_INIT:
return 1;
}
@@ -2505,37 +2585,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
}
}
-static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
- struct kvm_nested_guest *nested)
-{
- cpumask_t *need_tlb_flush;
- int lpid;
-
- if (!cpu_has_feature(CPU_FTR_HVMODE))
- return;
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu &= ~0x3UL;
-
- if (nested) {
- lpid = nested->shadow_lpid;
- need_tlb_flush = &nested->need_tlb_flush;
- } else {
- lpid = kvm->arch.lpid;
- need_tlb_flush = &kvm->arch.need_tlb_flush;
- }
-
- mtspr(SPRN_LPID, lpid);
- isync();
- smp_mb();
-
- if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
- radix__local_flush_tlb_lpid_guest(lpid);
- /* Clear the bit after the TLB flush */
- cpumask_clear_cpu(pcpu, need_tlb_flush);
- }
-}
-
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
@@ -3229,19 +3278,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
for (sub = 0; sub < core_info.n_subcores; ++sub)
spin_unlock(&core_info.vc[sub]->lock);
- if (kvm_is_radix(vc->kvm)) {
- /*
- * Do we need to flush the process scoped TLB for the LPAR?
- *
- * On POWER9, individual threads can come in here, but the
- * TLB is shared between the 4 threads in a core, hence
- * invalidating on one thread invalidates for all.
- * Thus we make all 4 threads use the same bit here.
- *
- * Hash must be flushed in realmode in order to use tlbiel.
- */
- kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL);
- }
+ guest_enter_irqoff();
+
+ srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+
+ this_cpu_disable_ftrace();
/*
* Interrupts will be enabled once we get into the guest,
@@ -3249,19 +3290,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
trace_hardirqs_on();
- guest_enter_irqoff();
-
- srcu_idx = srcu_read_lock(&vc->kvm->srcu);
-
- this_cpu_disable_ftrace();
-
trap = __kvmppc_vcore_entry();
+ trace_hardirqs_off();
+
this_cpu_enable_ftrace();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
- trace_hardirqs_off();
set_irq_happened(trap);
spin_lock(&vc->lock);
@@ -3514,6 +3550,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#ifdef CONFIG_ALTIVEC
load_vr_state(&vcpu->arch.vr);
#endif
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
mtspr(SPRN_DSCR, vcpu->arch.dscr);
mtspr(SPRN_IAMR, vcpu->arch.iamr);
@@ -3605,6 +3642,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#ifdef CONFIG_ALTIVEC
store_vr_state(&vcpu->arch.vr);
#endif
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
@@ -3970,7 +4008,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
unsigned long lpcr)
{
int trap, r, pcpu;
- int srcu_idx;
+ int srcu_idx, lpid;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4046,8 +4084,12 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
- if (cpu_has_feature(CPU_FTR_HVMODE))
- kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+ mtspr(SPRN_LPID, lpid);
+ isync();
+ kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
+ }
trace_hardirqs_on();
guest_enter_irqoff();
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index b0cf22477e87..6035d24f1d1d 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -805,3 +805,60 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
vcpu->arch.doorbell_request = 0;
}
}
+
+static void flush_guest_tlb(struct kvm *kvm)
+{
+ unsigned long rb, set;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ if (kvm_is_radix(kvm)) {
+ /* R=1 PRS=1 RIC=2 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (2),
+ "r" (0) : "memory");
+ for (set = 1; set < kvm->arch.tlb_sets; ++set) {
+ rb += PPC_BIT(51); /* increment set number */
+ /* R=1 PRS=1 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (0),
+ "r" (0) : "memory");
+ }
+ } else {
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+ /* R=0 PRS=0 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
+ "r" (0) : "memory");
+ rb += PPC_BIT(51); /* increment set number */
+ }
+ }
+ asm volatile("ptesync": : :"memory");
+}
+
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested)
+{
+ cpumask_t *need_tlb_flush;
+
+ /*
+ * On POWER9, individual threads can come in here, but the
+ * TLB is shared between the 4 threads in a core, hence
+ * invalidating on one thread invalidates for all.
+ * Thus we make all 4 threads use the same bit.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pcpu = cpu_first_thread_sibling(pcpu);
+
+ if (nested)
+ need_tlb_flush = &nested->need_tlb_flush;
+ else
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+ if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+ flush_guest_tlb(kvm);
+
+ /* Clear the bit after the TLB flush */
+ cpumask_clear_cpu(pcpu, need_tlb_flush);
+ }
+}
+EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 3b3791ed74a6..8431ad1e8391 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -13,6 +13,7 @@
#include <linux/hugetlb.h>
#include <linux/module.h>
#include <linux/log2.h>
+#include <linux/sizes.h>
#include <asm/trace.h>
#include <asm/kvm_ppc.h>
@@ -867,6 +868,149 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
return ret;
}
+static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa,
+ int writing, unsigned long *hpa,
+ struct kvm_memory_slot **memslot_p)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
+ unsigned int shift;
+ pte_t *ptep, pte;
+
+ /* Find the memslot for this address */
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return H_PARAMETER;
+
+ /* Translate to host virtual address */
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+
+ /* Try to find the host pte for that virtual address */
+ ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+ if (!ptep)
+ return H_TOO_HARD;
+ pte = kvmppc_read_update_linux_pte(ptep, writing);
+ if (!pte_present(pte))
+ return H_TOO_HARD;
+
+ /* Convert to a physical address */
+ if (shift)
+ psize = 1UL << shift;
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ pa |= hva & (psize - 1);
+ pa |= gpa & ~PAGE_MASK;
+
+ if (hpa)
+ *hpa = pa;
+ if (memslot_p)
+ *memslot_p = memslot;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
+ unsigned long dest)
+{
+ struct kvm_memory_slot *memslot;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long pa, mmu_seq;
+ long ret = H_SUCCESS;
+ int i;
+
+ /* Used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check if we've been invalidated */
+ raw_spin_lock(&kvm->mmu_lock.rlock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = H_TOO_HARD;
+ goto out_unlock;
+ }
+
+ /* Zero the page */
+ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
+ dcbz((void *)pa);
+ kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+ raw_spin_unlock(&kvm->mmu_lock.rlock);
+ return ret;
+}
+
+static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
+ unsigned long dest, unsigned long src)
+{
+ unsigned long dest_pa, src_pa, mmu_seq;
+ struct kvm_memory_slot *dest_memslot;
+ struct kvm *kvm = vcpu->kvm;
+ long ret = H_SUCCESS;
+
+ /* Used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
+
+ ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot);
+ if (ret != H_SUCCESS)
+ return ret;
+ ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check if we've been invalidated */
+ raw_spin_lock(&kvm->mmu_lock.rlock);
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
+ ret = H_TOO_HARD;
+ goto out_unlock;
+ }
+
+ /* Copy the page */
+ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
+
+ kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+ raw_spin_unlock(&kvm->mmu_lock.rlock);
+ return ret;
+}
+
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 pg_mask = SZ_4K - 1; /* 4K page size */
+ long ret = H_SUCCESS;
+
+ /* Don't handle radix mode here, go up to the virtual mode handler */
+ if (kvm_is_radix(kvm))
+ return H_TOO_HARD;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE)
+ ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
+ else if (flags & H_ZERO_PAGE)
+ ret = kvmppc_do_h_page_init_zero(vcpu, dest);
+
+ /* We can ignore the other flags */
+
+ return ret;
+}
+
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index dd014308f065..f9b2620fbecd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -589,11 +589,8 @@ kvmppc_hv_entry:
1:
#endif
- /* Use cr7 as an indication of radix mode */
ld r5, HSTATE_KVM_VCORE(r13)
ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
- lbz r0, KVM_RADIX(r9)
- cmpwi cr7, r0, 0
/*
* POWER7/POWER8 host -> guest partition switch code.
@@ -616,9 +613,6 @@ kvmppc_hv_entry:
cmpwi r6,0
bne 10f
- /* Radix has already switched LPID and flushed core TLB */
- bne cr7, 22f
-
lwz r7,KVM_LPID(r9)
BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
@@ -630,41 +624,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
- /* See if we need to flush the TLB. Hash has to be done in RM */
- lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
-BEGIN_FTR_SECTION
- /*
- * On POWER9, individual threads can come in here, but the
- * TLB is shared between the 4 threads in a core, hence
- * invalidating on one thread invalidates for all.
- * Thus we make all 4 threads use the same bit here.
- */
- clrrdi r6,r6,2
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- clrldi r7,r6,64-6 /* extract bit number (6 bits) */
- srdi r6,r6,6 /* doubleword number */
- sldi r6,r6,3 /* address offset */
- add r6,r6,r9
- addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
- li r8,1
- sld r8,r8,r7
- ld r7,0(r6)
- and. r7,r7,r8
- beq 22f
- /* Flush the TLB of any entries for this LPID */
- lwz r0,KVM_TLB_SETS(r9)
- mtctr r0
- li r7,0x800 /* IS field = 0b10 */
- ptesync
- li r0,0 /* RS for P9 version of tlbiel */
-28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */
- addi r7,r7,0x1000
- bdnz 28b
- ptesync
-23: ldarx r7,0,r6 /* clear the bit after TLB flushed */
- andc r7,r7,r8
- stdcx. r7,0,r6
- bne 23b
+ /* See if we need to flush the TLB. */
+ mr r3, r9 /* kvm pointer */
+ lhz r4, PACAPACAINDEX(r13) /* physical cpu number */
+ li r5, 0 /* nested vcpu pointer */
+ bl kvmppc_check_need_tlb_flush
+ nop
+ ld r5, HSTATE_KVM_VCORE(r13)
/* Add timebase offset onto timebase */
22: ld r8,VCORE_TB_OFFSET(r5)
@@ -980,17 +946,27 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
- ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, 0
- beq no_xive
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
+ lwz r8, VCPU_XIVE_CAM_WORD(r4)
+ li r7, TM_QW1_OS + TM_WORD2
+ mfmsr r0
+ andi. r0, r0, MSR_DR /* in real mode? */
+ beq 2f
+ ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
+ cmpldi cr1, r10, 0
+ beq cr1, no_xive
+ eieio
+ stdx r11,r9,r10
+ stwx r8,r7,r10
+ b 3f
+2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
+ cmpldi cr1, r10, 0
+ beq cr1, no_xive
eieio
stdcix r11,r9,r10
- lwz r11, VCPU_XIVE_CAM_WORD(r4)
- li r9, TM_QW1_OS + TM_WORD2
- stwcix r11,r9,r10
- li r9, 1
+ stwcix r8,r7,r10
+3: li r9, 1
stb r9, VCPU_XIVE_PUSHED(r4)
eieio
@@ -1009,12 +985,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
* on, we mask it.
*/
lbz r0, VCPU_XIVE_ESC_ON(r4)
- cmpwi r0,0
- beq 1f
- ld r10, VCPU_XIVE_ESC_RADDR(r4)
+ cmpwi cr1, r0,0
+ beq cr1, 1f
li r9, XIVE_ESB_SET_PQ_01
+ beq 4f /* in real mode? */
+ ld r10, VCPU_XIVE_ESC_VADDR(r4)
+ ldx r0, r10, r9
+ b 5f
+4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
ldcix r0, r10, r9
- sync
+5: sync
/* We have a possible subtle race here: The escalation interrupt might
* have fired and be on its way to the host queue while we mask it,
@@ -2292,7 +2272,7 @@ hcall_real_table:
#endif
.long 0 /* 0x24 - H_SET_SPRG0 */
.long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
- .long 0 /* 0x2c */
+ .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
.long 0 /* 0x30 */
.long 0 /* 0x34 */
.long 0 /* 0x38 */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f78d002f0fe0..4953957333b7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
return IRQ_HANDLED;
}
-static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+ bool single_escalation)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct xive_q *q = &xc->queues[prio];
@@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO;
}
- if (xc->xive->single_escalation)
+ if (single_escalation)
name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
vcpu->kvm->arch.lpid, xc->server_num);
else
@@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
* interrupt, thus leaving it effectively masked after
* it fires once.
*/
- if (xc->xive->single_escalation) {
+ if (single_escalation) {
struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
continue;
rc = xive_provision_queue(vcpu, prio);
if (rc == 0 && !xive->single_escalation)
- xive_attach_escalation(vcpu, prio);
+ kvmppc_xive_attach_escalation(vcpu, prio,
+ xive->single_escalation);
if (rc)
return rc;
}
@@ -342,7 +344,7 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
}
-static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
{
struct kvm_vcpu *vcpu;
int i, rc;
@@ -380,11 +382,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
return -EBUSY;
}
-static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
-{
- return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
-}
-
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
@@ -430,8 +427,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive_vp(xive, state->act_server),
- MASKED, state->number);
+ kvmppc_xive_vp(xive, state->act_server),
+ MASKED, state->number);
/* set old_p so we can track if an H_EOI was done */
state->old_p = true;
state->old_q = false;
@@ -486,8 +483,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive_vp(xive, state->act_server),
- state->act_priority, state->number);
+ kvmppc_xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
/* If an EOI is needed, do it here */
if (!state->old_p)
xive_vm_source_eoi(hw_num, xd);
@@ -535,7 +532,7 @@ static int xive_target_interrupt(struct kvm *kvm,
* priority. The count for that new target will have
* already been incremented.
*/
- rc = xive_select_target(kvm, &server, prio);
+ rc = kvmppc_xive_select_target(kvm, &server, prio);
/*
* We failed to find a target ? Not much we can do
@@ -563,7 +560,7 @@ static int xive_target_interrupt(struct kvm *kvm,
kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num,
- xive_vp(xive, server),
+ kvmppc_xive_vp(xive, server),
prio, state->number);
}
@@ -849,7 +846,8 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
/*
* We can't update the state of a "pushed" VCPU, but that
- * shouldn't happen.
+ * shouldn't happen because the vcpu->mutex makes running a
+ * vcpu mutually exclusive with doing one_reg get/set on it.
*/
if (WARN_ON(vcpu->arch.xive_pushed))
return -EIO;
@@ -940,6 +938,13 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Turn the IPI hard off */
xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ /*
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
+ * to the guest in XIVE native mode
+ */
+ if (xive->ops && xive->ops->reset_mapped)
+ xive->ops->reset_mapped(kvm, guest_irq);
+
/* Grab info about irq */
state->pt_number = hw_irq;
state->pt_data = irq_data_get_irq_handler_data(host_data);
@@ -951,7 +956,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
* which is fine for a never started interrupt.
*/
xive_native_configure_irq(hw_irq,
- xive_vp(xive, state->act_server),
+ kvmppc_xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -1025,9 +1030,17 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
state->pt_number = 0;
state->pt_data = NULL;
+ /*
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
+ * to the guest in XIVE native mode
+ */
+ if (xive->ops && xive->ops->reset_mapped) {
+ xive->ops->reset_mapped(kvm, guest_irq);
+ }
+
/* Reconfigure the IPI */
xive_native_configure_irq(state->ipi_number,
- xive_vp(xive, state->act_server),
+ kvmppc_xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -1049,7 +1062,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
}
EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
-static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
struct kvm *kvm = vcpu->kvm;
@@ -1083,14 +1096,35 @@ static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
arch_spin_unlock(&sb->lock);
}
}
+
+ /* Disable vcpu's escalation interrupt */
+ if (vcpu->arch.xive_esc_on) {
+ __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+ XIVE_ESB_SET_PQ_01));
+ vcpu->arch.xive_esc_on = false;
+ }
+
+ /*
+ * Clear pointers to escalation interrupt ESB.
+ * This is safe because the vcpu->mutex is held, preventing
+ * any other CPU from concurrently executing a KVM_RUN ioctl.
+ */
+ vcpu->arch.xive_esc_vaddr = 0;
+ vcpu->arch.xive_esc_raddr = 0;
}
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
- struct kvmppc_xive *xive = xc->xive;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
int i;
+ if (!kvmppc_xics_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
/* Ensure no interrupt is still routed to that VP */
@@ -1129,6 +1163,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
}
/* Free the VP */
kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
}
int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
@@ -1146,7 +1184,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
}
if (xive->kvm != vcpu->kvm)
return -EPERM;
- if (vcpu->arch.irq_type)
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
pr_devel("Duplicate !\n");
@@ -1166,7 +1204,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = xive_vp(xive, cpu);
+ xc->vp_id = kvmppc_xive_vp(xive, cpu);
xc->mfrr = 0xff;
xc->valid = true;
@@ -1219,7 +1257,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
if (r == 0 && !xive->single_escalation)
- xive_attach_escalation(vcpu, i);
+ kvmppc_xive_attach_escalation(
+ vcpu, i, xive->single_escalation);
if (r)
goto bail;
} else {
@@ -1234,7 +1273,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
}
/* If not done above, attach priority 0 escalation */
- r = xive_attach_escalation(vcpu, 0);
+ r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
if (r)
goto bail;
@@ -1485,8 +1524,8 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
return 0;
}
-static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
- int irq)
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+ struct kvmppc_xive *xive, int irq)
{
struct kvm *kvm = xive->kvm;
struct kvmppc_xive_src_block *sb;
@@ -1509,6 +1548,7 @@ static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *x
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
+ sb->irq_state[i].eisn = 0;
sb->irq_state[i].guest_priority = MASKED;
sb->irq_state[i].saved_priority = MASKED;
sb->irq_state[i].act_priority = MASKED;
@@ -1565,7 +1605,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
sb = kvmppc_xive_find_source(xive, irq, &idx);
if (!sb) {
pr_devel("No source, creating source block...\n");
- sb = xive_create_src_block(xive, irq);
+ sb = kvmppc_xive_create_src_block(xive, irq);
if (!sb) {
pr_devel("Failed to create block...\n");
return -ENOMEM;
@@ -1789,7 +1829,7 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
xive_cleanup_irq_data(xd);
}
-static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
{
int i;
@@ -1810,16 +1850,55 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
}
}
-static void kvmppc_xive_free(struct kvm_device *dev)
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xive_release(struct kvm_device *dev)
{
struct kvmppc_xive *xive = dev->private;
struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
int i;
+ int was_ready;
+
+ pr_devel("Releasing xive device\n");
debugfs_remove(xive->dentry);
- if (kvm)
- kvm->arch.xive = NULL;
+ /*
+ * Clearing mmu_ready temporarily while holding kvm->lock
+ * is a way of ensuring that no vcpus can enter the guest
+ * until we drop kvm->lock. Doing kick_all_cpus_sync()
+ * ensures that any vcpu executing inside the guest has
+ * exited the guest. Once kick_all_cpus_sync() has finished,
+ * we know that no vcpu can be executing the XIVE push or
+ * pull code, or executing a XICS hcall.
+ *
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd referring to the
+ * device. Therefore there can not be any of the device
+ * attribute set/get functions being executed concurrently,
+ * and similarly, the connect_vcpu and set/clr_mapped
+ * functions also cannot be being executed.
+ */
+ was_ready = kvm->arch.mmu_ready;
+ kvm->arch.mmu_ready = 0;
+ kick_all_cpus_sync();
+
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xive_cleanup_vcpu(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+ kvm->arch.xive = NULL;
/* Mask and free interrupts */
for (i = 0; i <= xive->max_sbid; i++) {
@@ -1832,11 +1911,47 @@ static void kvmppc_xive_free(struct kvm_device *dev)
if (xive->vp_base != XIVE_INVALID_VP)
xive_native_free_vp_block(xive->vp_base);
+ kvm->arch.mmu_ready = was_ready;
+
+ /*
+ * A reference of the kvmppc_xive pointer is now kept under
+ * the xive_devices struct of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
- kfree(xive);
kfree(dev);
}
+/*
+ * When the guest chooses the interrupt mode (XICS legacy or XIVE
+ * native), the VM will switch of KVM device. The previous device will
+ * be "released" before the new one is created.
+ *
+ * Until we are sure all execution paths are well protected, provide a
+ * fail safe (transitional) method for device destruction, in which
+ * the XIVE device pointer is recycled and not directly freed.
+ */
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
+{
+ struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
+ &kvm->arch.xive_devices.native :
+ &kvm->arch.xive_devices.xics_on_xive;
+ struct kvmppc_xive *xive = *kvm_xive_device;
+
+ if (!xive) {
+ xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+ *kvm_xive_device = xive;
+ } else {
+ memset(xive, 0, sizeof(*xive));
+ }
+
+ return xive;
+}
+
+/*
+ * Create a XICS device with XIVE backend. kvm->lock is held.
+ */
static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xive *xive;
@@ -1845,7 +1960,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
pr_devel("Creating xive for partition\n");
- xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+ xive = kvmppc_xive_get_device(kvm, type);
if (!xive)
return -ENOMEM;
@@ -1883,6 +1998,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
+
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ seq_printf(m, " [q%d]: ", i);
+
+ if (q->qpage) {
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
+ i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd =
+ irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+ xc->esc_virq[i], pq, xd->eoi_page);
+ seq_puts(m, "\n");
+ }
+ }
+ return 0;
+}
static int xive_debug_show(struct seq_file *m, void *private)
{
@@ -1908,7 +2060,6 @@ static int xive_debug_show(struct seq_file *m, void *private)
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
- unsigned int i;
if (!xc)
continue;
@@ -1918,33 +2069,8 @@ static int xive_debug_show(struct seq_file *m, void *private)
xc->server_num, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
- for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
- u32 i0, i1, idx;
-
- if (!q->qpage && !xc->esc_virq[i])
- continue;
- seq_printf(m, " [q%d]: ", i);
-
- if (q->qpage) {
- idx = q->idx;
- i0 = be32_to_cpup(q->qpage + idx);
- idx = (idx + 1) & q->msk;
- i1 = be32_to_cpup(q->qpage + idx);
- seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
- }
- if (xc->esc_virq[i]) {
- struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
- struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
- u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
- seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
- (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
- (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
- xc->esc_virq[i], pq, xd->eoi_page);
- seq_printf(m, "\n");
- }
- }
+ kvmppc_xive_debug_show_queues(m, vcpu);
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
@@ -1999,7 +2125,7 @@ struct kvm_device_ops kvm_xive_ops = {
.name = "kvm-xive",
.create = kvmppc_xive_create,
.init = kvmppc_xive_init,
- .destroy = kvmppc_xive_free,
+ .release = kvmppc_xive_release,
.set_attr = xive_set_attr,
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index a08ae6fd4c51..426146332984 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -13,6 +13,13 @@
#include "book3s_xics.h"
/*
+ * The XIVE Interrupt source numbers are within the range 0 to
+ * KVMPPC_XICS_NR_IRQS.
+ */
+#define KVMPPC_XIVE_FIRST_IRQ 0
+#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS
+
+/*
* State for one guest irq source.
*
* For each guest source we allocate a HW interrupt in the XIVE
@@ -54,6 +61,9 @@ struct kvmppc_xive_irq_state {
bool saved_p;
bool saved_q;
u8 saved_scan_prio;
+
+ /* Xive native */
+ u32 eisn; /* Guest Effective IRQ number */
};
/* Select the "right" interrupt (IPI vs. passthrough) */
@@ -84,6 +94,11 @@ struct kvmppc_xive_src_block {
struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
+struct kvmppc_xive;
+
+struct kvmppc_xive_ops {
+ int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
+};
struct kvmppc_xive {
struct kvm *kvm;
@@ -122,6 +137,10 @@ struct kvmppc_xive {
/* Flags */
u8 single_escalation;
+
+ struct kvmppc_xive_ops *ops;
+ struct address_space *mapping;
+ struct mutex mapping_lock;
};
#define KVMPPC_XIVE_Q_COUNT 8
@@ -198,6 +217,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
return xive->src_blocks[bid];
}
+static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
/*
* Mapping between guest priorities and host priorities
* is as follow.
@@ -248,5 +272,18 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
+/*
+ * Common Xive routines for XICS-over-XIVE and XIVE native
+ */
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu);
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu);
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+ struct kvmppc_xive *xive, int irq);
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+ bool single_escalation);
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
new file mode 100644
index 000000000000..6a8e698c4b6e
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -0,0 +1,1249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+ u64 val;
+
+ if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
+ offset |= offset << 4;
+
+ val = in_be64(xd->eoi_mmio + offset);
+ return (u8)val;
+}
+
+static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q = &xc->queues[prio];
+
+ xive_native_disable_queue(xc->vp_id, q, prio);
+ if (q->qpage) {
+ put_page(virt_to_page(q->qpage));
+ q->qpage = NULL;
+ }
+}
+
+void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ int i;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
+ pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+ /* Ensure no interrupt is still routed to that VP */
+ xc->valid = false;
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Free the queues & associated interrupts */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ /* Free the escalation irq */
+ if (xc->esc_virq[i]) {
+ free_irq(xc->esc_virq[i], vcpu);
+ irq_dispose_mapping(xc->esc_virq[i]);
+ kfree(xc->esc_virq_names[i]);
+ xc->esc_virq[i] = 0;
+ }
+
+ /* Free the queue */
+ kvmppc_xive_native_cleanup_queue(vcpu, i);
+ }
+
+ /* Free the VP */
+ kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 server_num)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_vcpu *xc = NULL;
+ int rc;
+
+ pr_devel("native_connect_vcpu(server=%d)\n", server_num);
+
+ if (dev->ops != &kvm_xive_native_ops) {
+ pr_devel("Wrong ops !\n");
+ return -EPERM;
+ }
+ if (xive->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+ return -EBUSY;
+ if (server_num >= KVM_MAX_VCPUS) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&vcpu->kvm->lock);
+
+ if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
+ pr_devel("Duplicate !\n");
+ rc = -EEXIST;
+ goto bail;
+ }
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ rc = -ENOMEM;
+ goto bail;
+ }
+
+ vcpu->arch.xive_vcpu = xc;
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = server_num;
+
+ xc->vp_id = kvmppc_xive_vp(xive, server_num);
+ xc->valid = true;
+ vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+
+ rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+ if (rc) {
+ pr_err("Failed to get VP info from OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ if (rc) {
+ pr_err("Failed to enable VP in OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /* Configure VCPU fields for use by assembly push/pull */
+ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+ vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+ /* TODO: reset all queues to a clean state ? */
+bail:
+ mutex_unlock(&vcpu->kvm->lock);
+ if (rc)
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+ return rc;
+}
+
+/*
+ * Device passthrough support
+ */
+static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+
+ if (irq >= KVMPPC_XIVE_NR_IRQS)
+ return -EINVAL;
+
+ /*
+ * Clear the ESB pages of the IRQ number being mapped (or
+ * unmapped) into the guest and let the the VM fault handler
+ * repopulate with the appropriate ESB pages (device or IC)
+ */
+ pr_debug("clearing esb pages for girq 0x%lx\n", irq);
+ mutex_lock(&xive->mapping_lock);
+ if (xive->mapping)
+ unmap_mapping_range(xive->mapping,
+ irq * (2ull << PAGE_SHIFT),
+ 2ull << PAGE_SHIFT, 1);
+ mutex_unlock(&xive->mapping_lock);
+ return 0;
+}
+
+static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
+ .reset_mapped = kvmppc_xive_native_reset_mapped,
+};
+
+static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct kvm_device *dev = vma->vm_file->private_data;
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u16 src;
+ u64 page;
+ unsigned long irq;
+ u64 page_offset;
+
+ /*
+ * Linux/KVM uses a two pages ESB setting, one for trigger and
+ * one for EOI
+ */
+ page_offset = vmf->pgoff - vma->vm_pgoff;
+ irq = page_offset / 2;
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb) {
+ pr_devel("%s: source %lx not found !\n", __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ state = &sb->irq_state[src];
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ arch_spin_lock(&sb->lock);
+
+ /*
+ * first/even page is for trigger
+ * second/odd page is for EOI and management.
+ */
+ page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
+ arch_spin_unlock(&sb->lock);
+
+ if (WARN_ON(!page)) {
+ pr_err("%s: accessing invalid ESB page for source %lx !\n",
+ __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct xive_native_esb_vmops = {
+ .fault = xive_native_esb_fault,
+};
+
+static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+
+ switch (vmf->pgoff - vma->vm_pgoff) {
+ case 0: /* HW - forbid access */
+ case 1: /* HV - forbid access */
+ return VM_FAULT_SIGBUS;
+ case 2: /* OS */
+ vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+ case 3: /* USER - TODO */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+static const struct vm_operations_struct xive_native_tima_vmops = {
+ .fault = xive_native_tima_fault,
+};
+
+static int kvmppc_xive_native_mmap(struct kvm_device *dev,
+ struct vm_area_struct *vma)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* We only allow mappings at fixed offset for now */
+ if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
+ if (vma_pages(vma) > 4)
+ return -EINVAL;
+ vma->vm_ops = &xive_native_tima_vmops;
+ } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
+ if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
+ return -EINVAL;
+ vma->vm_ops = &xive_native_esb_vmops;
+ } else {
+ return -EINVAL;
+ }
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+ /*
+ * Grab the KVM device file address_space to be able to clear
+ * the ESB pages mapping when a device is passed-through into
+ * the guest.
+ */
+ xive->mapping = vma->vm_file->f_mapping;
+ return 0;
+}
+
+static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
+ u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u64 val;
+ u16 idx;
+ int rc;
+
+ pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+ if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
+ return -E2BIG;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb) {
+ pr_debug("No source, creating source block...\n");
+ sb = kvmppc_xive_create_src_block(xive, irq);
+ if (!sb) {
+ pr_err("Failed to create block...\n");
+ return -ENOMEM;
+ }
+ }
+ state = &sb->irq_state[idx];
+
+ if (get_user(val, ubufp)) {
+ pr_err("fault getting user info !\n");
+ return -EFAULT;
+ }
+
+ arch_spin_lock(&sb->lock);
+
+ /*
+ * If the source doesn't already have an IPI, allocate
+ * one and get the corresponding data
+ */
+ if (!state->ipi_number) {
+ state->ipi_number = xive_native_alloc_irq();
+ if (state->ipi_number == 0) {
+ pr_err("Failed to allocate IRQ !\n");
+ rc = -ENXIO;
+ goto unlock;
+ }
+ xive_native_populate_irq_data(state->ipi_number,
+ &state->ipi_data);
+ pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
+ state->ipi_number, irq);
+ }
+
+ /* Restore LSI state */
+ if (val & KVM_XIVE_LEVEL_SENSITIVE) {
+ state->lsi = true;
+ if (val & KVM_XIVE_LEVEL_ASSERTED)
+ state->asserted = true;
+ pr_devel(" LSI ! Asserted=%d\n", state->asserted);
+ }
+
+ /* Mask IRQ to start with */
+ state->act_server = 0;
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+
+ /* Increment the number of valid sources and mark this one valid */
+ if (!state->valid)
+ xive->src_count++;
+ state->valid = true;
+
+ rc = 0;
+
+unlock:
+ arch_spin_unlock(&sb->lock);
+
+ return rc;
+}
+
+static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state,
+ u32 server, u8 priority, bool masked,
+ u32 eisn)
+{
+ struct kvm *kvm = xive->kvm;
+ u32 hw_num;
+ int rc = 0;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->act_server == server && state->act_priority == priority &&
+ state->eisn == eisn)
+ goto unlock;
+
+ pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
+ priority, server, masked, state->act_server,
+ state->act_priority);
+
+ kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+ if (priority != MASKED && !masked) {
+ rc = kvmppc_xive_select_target(kvm, &server, priority);
+ if (rc)
+ goto unlock;
+
+ state->act_priority = priority;
+ state->act_server = server;
+ state->eisn = eisn;
+
+ rc = xive_native_configure_irq(hw_num,
+ kvmppc_xive_vp(xive, server),
+ priority, eisn);
+ } else {
+ state->act_priority = MASKED;
+ state->act_server = 0;
+ state->eisn = 0;
+
+ rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
+ }
+
+unlock:
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
+ long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 src;
+ u64 kvm_cfg;
+ u32 server;
+ u8 priority;
+ bool masked;
+ u32 eisn;
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ if (!state->valid)
+ return -EINVAL;
+
+ if (get_user(kvm_cfg, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
+
+ priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
+ KVM_XIVE_SOURCE_PRIORITY_SHIFT;
+ server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
+ KVM_XIVE_SOURCE_SERVER_SHIFT;
+ masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
+ KVM_XIVE_SOURCE_MASKED_SHIFT;
+ eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
+ KVM_XIVE_SOURCE_EISN_SHIFT;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("invalid priority for queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+
+ return kvmppc_xive_native_update_source_config(xive, sb, state, server,
+ priority, masked, eisn);
+}
+
+static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
+ long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u16 src;
+ int rc = 0;
+
+ pr_devel("%s irq=0x%lx", __func__, irq);
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ rc = -EINVAL;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->valid) {
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+ xive_native_sync_source(hw_num);
+ rc = 0;
+ }
+
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+static int xive_native_validate_queue_size(u32 qshift)
+{
+ /*
+ * We only support 64K pages for the moment. This is also
+ * advertised in the DT property "ibm,xive-eq-sizes"
+ */
+ switch (qshift) {
+ case 0: /* EQ reset */
+ case 16:
+ return 0;
+ case 12:
+ case 21:
+ case 24:
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
+ long eq_idx, u64 addr)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ void __user *ubufp = (void __user *) addr;
+ u32 server;
+ u8 priority;
+ struct kvm_ppc_xive_eq kvm_eq;
+ int rc;
+ __be32 *qaddr = 0;
+ struct page *page;
+ struct xive_q *q;
+ gfn_t gfn;
+ unsigned long page_size;
+
+ /*
+ * Demangle priority/server tuple from the EQ identifier
+ */
+ priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+ KVM_XIVE_EQ_PRIORITY_SHIFT;
+ server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+ KVM_XIVE_EQ_SERVER_SHIFT;
+
+ if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
+ return -EFAULT;
+
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_err("Can't find server %d\n", server);
+ return -ENOENT;
+ }
+ xc = vcpu->arch.xive_vcpu;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("Trying to restore invalid queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+ __func__, server, priority, kvm_eq.flags,
+ kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+ /*
+ * sPAPR specifies a "Unconditional Notify (n) flag" for the
+ * H_INT_SET_QUEUE_CONFIG hcall which forces notification
+ * without using the coalescing mechanisms provided by the
+ * XIVE END ESBs. This is required on KVM as notification
+ * using the END ESBs is not supported.
+ */
+ if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
+ pr_err("invalid flags %d\n", kvm_eq.flags);
+ return -EINVAL;
+ }
+
+ rc = xive_native_validate_queue_size(kvm_eq.qshift);
+ if (rc) {
+ pr_err("invalid queue size %d\n", kvm_eq.qshift);
+ return rc;
+ }
+
+ /* reset queue and disable queueing */
+ if (!kvm_eq.qshift) {
+ q->guest_qaddr = 0;
+ q->guest_qshift = 0;
+
+ rc = xive_native_configure_queue(xc->vp_id, q, priority,
+ NULL, 0, true);
+ if (rc) {
+ pr_err("Failed to reset queue %d for VCPU %d: %d\n",
+ priority, xc->server_num, rc);
+ return rc;
+ }
+
+ if (q->qpage) {
+ put_page(virt_to_page(q->qpage));
+ q->qpage = NULL;
+ }
+
+ return 0;
+ }
+
+ if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
+ pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
+ 1ull << kvm_eq.qshift);
+ return -EINVAL;
+ }
+
+ gfn = gpa_to_gfn(kvm_eq.qaddr);
+ page = gfn_to_page(kvm, gfn);
+ if (is_error_page(page)) {
+ pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+ return -EINVAL;
+ }
+
+ page_size = kvm_host_page_size(kvm, gfn);
+ if (1ull << kvm_eq.qshift > page_size) {
+ pr_warn("Incompatible host page size %lx!\n", page_size);
+ return -EINVAL;
+ }
+
+ qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
+
+ /*
+ * Backup the queue page guest address to the mark EQ page
+ * dirty for migration.
+ */
+ q->guest_qaddr = kvm_eq.qaddr;
+ q->guest_qshift = kvm_eq.qshift;
+
+ /*
+ * Unconditional Notification is forced by default at the
+ * OPAL level because the use of END ESBs is not supported by
+ * Linux.
+ */
+ rc = xive_native_configure_queue(xc->vp_id, q, priority,
+ (__be32 *) qaddr, kvm_eq.qshift, true);
+ if (rc) {
+ pr_err("Failed to configure queue %d for VCPU %d: %d\n",
+ priority, xc->server_num, rc);
+ put_page(page);
+ return rc;
+ }
+
+ /*
+ * Only restore the queue state when needed. When doing the
+ * H_INT_SET_SOURCE_CONFIG hcall, it should not.
+ */
+ if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
+ rc = xive_native_set_queue_state(xc->vp_id, priority,
+ kvm_eq.qtoggle,
+ kvm_eq.qindex);
+ if (rc)
+ goto error;
+ }
+
+ rc = kvmppc_xive_attach_escalation(vcpu, priority,
+ xive->single_escalation);
+error:
+ if (rc)
+ kvmppc_xive_native_cleanup_queue(vcpu, priority);
+ return rc;
+}
+
+static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
+ long eq_idx, u64 addr)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ struct xive_q *q;
+ void __user *ubufp = (u64 __user *) addr;
+ u32 server;
+ u8 priority;
+ struct kvm_ppc_xive_eq kvm_eq;
+ u64 qaddr;
+ u64 qshift;
+ u64 qeoi_page;
+ u32 escalate_irq;
+ u64 qflags;
+ int rc;
+
+ /*
+ * Demangle priority/server tuple from the EQ identifier
+ */
+ priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+ KVM_XIVE_EQ_PRIORITY_SHIFT;
+ server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+ KVM_XIVE_EQ_SERVER_SHIFT;
+
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_err("Can't find server %d\n", server);
+ return -ENOENT;
+ }
+ xc = vcpu->arch.xive_vcpu;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("invalid priority for queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ memset(&kvm_eq, 0, sizeof(kvm_eq));
+
+ if (!q->qpage)
+ return 0;
+
+ rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc)
+ return rc;
+
+ kvm_eq.flags = 0;
+ if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
+ kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+
+ kvm_eq.qshift = q->guest_qshift;
+ kvm_eq.qaddr = q->guest_qaddr;
+
+ rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
+ &kvm_eq.qindex);
+ if (rc)
+ return rc;
+
+ pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+ __func__, server, priority, kvm_eq.flags,
+ kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+ if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
+{
+ int i;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+ if (!state->valid)
+ continue;
+
+ if (state->act_priority == MASKED)
+ continue;
+
+ state->eisn = 0;
+ state->act_server = 0;
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+ if (state->pt_number) {
+ xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->pt_number,
+ 0, MASKED, 0);
+ }
+ }
+}
+
+static int kvmppc_xive_reset(struct kvmppc_xive *xive)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
+
+ pr_devel("%s\n", __func__);
+
+ mutex_lock(&kvm->lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int prio;
+
+ if (!xc)
+ continue;
+
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+
+ /* Single escalation, no queue 7 */
+ if (prio == 7 && xive->single_escalation)
+ break;
+
+ if (xc->esc_virq[prio]) {
+ free_irq(xc->esc_virq[prio], vcpu);
+ irq_dispose_mapping(xc->esc_virq[prio]);
+ kfree(xc->esc_virq_names[prio]);
+ xc->esc_virq[prio] = 0;
+ }
+
+ kvmppc_xive_native_cleanup_queue(vcpu, prio);
+ }
+ }
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_reset_sources(sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ return 0;
+}
+
+static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
+{
+ int j;
+
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+ struct xive_irq_data *xd;
+ u32 hw_num;
+
+ if (!state->valid)
+ continue;
+
+ /*
+ * The struct kvmppc_xive_irq_state reflects the state
+ * of the EAS configuration and not the state of the
+ * source. The source is masked setting the PQ bits to
+ * '-Q', which is what is being done before calling
+ * the KVM_DEV_XIVE_EQ_SYNC control.
+ *
+ * If a source EAS is configured, OPAL syncs the XIVE
+ * IC of the source and the XIVE IC of the previous
+ * target if any.
+ *
+ * So it should be fine ignoring MASKED sources as
+ * they have been synced already.
+ */
+ if (state->act_priority == MASKED)
+ continue;
+
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+ xive_native_sync_source(hw_num);
+ xive_native_sync_queue(hw_num);
+ }
+}
+
+static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int prio;
+
+ if (!xc)
+ return -ENOENT;
+
+ for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+
+ if (!q->qpage)
+ continue;
+
+ /* Mark EQ page dirty for migration */
+ mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
+ }
+ return 0;
+}
+
+static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
+
+ pr_devel("%s\n", __func__);
+
+ mutex_lock(&kvm->lock);
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_native_sync_sources(sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvmppc_xive_native_vcpu_eq_sync(vcpu);
+ }
+ mutex_unlock(&kvm->lock);
+
+ return 0;
+}
+
+static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XIVE_RESET:
+ return kvmppc_xive_reset(xive);
+ case KVM_DEV_XIVE_EQ_SYNC:
+ return kvmppc_xive_native_eq_sync(xive);
+ }
+ break;
+ case KVM_DEV_XIVE_GRP_SOURCE:
+ return kvmppc_xive_native_set_source(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+ return kvmppc_xive_native_set_source_config(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return kvmppc_xive_native_set_queue_config(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+ return kvmppc_xive_native_sync_source(xive, attr->attr,
+ attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return kvmppc_xive_native_get_queue_config(xive, attr->attr,
+ attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XIVE_RESET:
+ case KVM_DEV_XIVE_EQ_SYNC:
+ return 0;
+ }
+ break;
+ case KVM_DEV_XIVE_GRP_SOURCE:
+ case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+ case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+ if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
+ attr->attr < KVMPPC_XIVE_NR_IRQS)
+ return 0;
+ break;
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return 0;
+ }
+ return -ENXIO;
+}
+
+/*
+ * Called when device fd is closed
+ */
+static void kvmppc_xive_native_release(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ int i;
+ int was_ready;
+
+ debugfs_remove(xive->dentry);
+
+ pr_devel("Releasing xive native device\n");
+
+ /*
+ * Clearing mmu_ready temporarily while holding kvm->lock
+ * is a way of ensuring that no vcpus can enter the guest
+ * until we drop kvm->lock. Doing kick_all_cpus_sync()
+ * ensures that any vcpu executing inside the guest has
+ * exited the guest. Once kick_all_cpus_sync() has finished,
+ * we know that no vcpu can be executing the XIVE push or
+ * pull code or accessing the XIVE MMIO regions.
+ *
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd or mmap referring to
+ * the device. Therefore there can not be any of the
+ * device attribute set/get, mmap, or page fault functions
+ * being executed concurrently, and similarly, the
+ * connect_vcpu and set/clr_mapped functions also cannot
+ * be being executed.
+ */
+ was_ready = kvm->arch.mmu_ready;
+ kvm->arch.mmu_ready = 0;
+ kick_all_cpus_sync();
+
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+ kvm->arch.xive = NULL;
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ if (xive->src_blocks[i])
+ kvmppc_xive_free_sources(xive->src_blocks[i]);
+ kfree(xive->src_blocks[i]);
+ xive->src_blocks[i] = NULL;
+ }
+
+ if (xive->vp_base != XIVE_INVALID_VP)
+ xive_native_free_vp_block(xive->vp_base);
+
+ kvm->arch.mmu_ready = was_ready;
+
+ /*
+ * A reference of the kvmppc_xive pointer is now kept under
+ * the xive_devices struct of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
+
+ kfree(dev);
+}
+
+/*
+ * Create a XIVE device. kvm->lock is held.
+ */
+static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
+{
+ struct kvmppc_xive *xive;
+ struct kvm *kvm = dev->kvm;
+ int ret = 0;
+
+ pr_devel("Creating xive native device\n");
+
+ if (kvm->arch.xive)
+ return -EEXIST;
+
+ xive = kvmppc_xive_get_device(kvm, type);
+ if (!xive)
+ return -ENOMEM;
+
+ dev->private = xive;
+ xive->dev = dev;
+ xive->kvm = kvm;
+ kvm->arch.xive = xive;
+ mutex_init(&xive->mapping_lock);
+
+ /*
+ * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
+ * a default. Getting the max number of CPUs the VM was
+ * configured with would improve our usage of the XIVE VP space.
+ */
+ xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
+ pr_devel("VP_Base=%x\n", xive->vp_base);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ ret = -ENXIO;
+
+ xive->single_escalation = xive_native_has_single_escalation();
+ xive->ops = &kvmppc_xive_native_ops;
+
+ if (ret)
+ kfree(xive);
+
+ return ret;
+}
+
+/*
+ * Interrupt Pending Buffer (IPB) offset
+ */
+#define TM_IPB_SHIFT 40
+#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
+
+int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u64 opal_state;
+ int rc;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc)
+ return -ENOENT;
+
+ /* Thread context registers. We only care about IPB and CPPR */
+ val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
+
+ /* Get the VP state from OPAL */
+ rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
+ if (rc)
+ return rc;
+
+ /*
+ * Capture the backup of IPB register in the NVT structure and
+ * merge it in our KVM VP state.
+ */
+ val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
+
+ pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+ __func__,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word, opal_state);
+
+ return 0;
+}
+
+int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+
+ pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
+ val->xive_timaval[0], val->xive_timaval[1]);
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc || !xive)
+ return -ENOENT;
+
+ /* We can't update the state of a "pushed" VCPU */
+ if (WARN_ON(vcpu->arch.xive_pushed))
+ return -EBUSY;
+
+ /*
+ * Restore the thread context registers. IPB and CPPR should
+ * be the only ones that matter.
+ */
+ vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
+
+ /*
+ * There is no need to restore the XIVE internal state (IPB
+ * stored in the NVT) as the IPB register was merged in KVM VP
+ * state when captured.
+ */
+ return 0;
+}
+
+static int xive_native_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xive *xive = m->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned int i;
+
+ if (!kvm)
+ return 0;
+
+ seq_puts(m, "=========\nVCPU state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ continue;
+
+ seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word);
+
+ kvmppc_xive_debug_show_queues(m, vcpu);
+ }
+
+ return 0;
+}
+
+static int xive_native_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, xive_native_debug_show, inode->i_private);
+}
+
+static const struct file_operations xive_native_debug_fops = {
+ .open = xive_native_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void xive_native_debugfs_init(struct kvmppc_xive *xive)
+{
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
+ if (!name) {
+ pr_err("%s: no memory for name\n", __func__);
+ return;
+ }
+
+ xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
+ xive, &xive_native_debug_fops);
+
+ pr_debug("%s: created %s\n", __func__, name);
+ kfree(name);
+}
+
+static void kvmppc_xive_native_init(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
+
+ /* Register some debug interfaces */
+ xive_native_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_native_ops = {
+ .name = "kvm-xive-native",
+ .create = kvmppc_xive_native_create,
+ .init = kvmppc_xive_native_init,
+ .release = kvmppc_xive_native_release,
+ .set_attr = kvmppc_xive_native_set_attr,
+ .get_attr = kvmppc_xive_native_get_attr,
+ .has_attr = kvmppc_xive_native_has_attr,
+ .mmap = kvmppc_xive_native_mmap,
+};
+
+void kvmppc_xive_native_init_module(void)
+{
+ ;
+}
+
+void kvmppc_xive_native_exit_module(void)
+{
+ ;
+}
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 033363d6e764..0737acfd17f1 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -130,24 +130,14 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
*/
prio = ffs(pending) - 1;
- /*
- * If the most favoured prio we found pending is less
- * favored (or equal) than a pending IPI, we return
- * the IPI instead.
- *
- * Note: If pending was 0 and mfrr is 0xff, we will
- * not spurriously take an IPI because mfrr cannot
- * then be smaller than cppr.
- */
- if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
- prio = xc->mfrr;
- hirq = XICS_IPI;
- break;
- }
-
/* Don't scan past the guest cppr */
- if (prio >= xc->cppr || prio > 7)
+ if (prio >= xc->cppr || prio > 7) {
+ if (xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ }
break;
+ }
/* Grab queue and pointers */
q = &xc->queues[prio];
@@ -184,9 +174,12 @@ skip_ipi:
* been set and another occurrence of the IPI will trigger.
*/
if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
- if (scan_type == scan_fetch)
+ if (scan_type == scan_fetch) {
GLUE(X_PFX,source_eoi)(xc->vp_ipi,
&xc->vp_ipi_data);
+ q->idx = idx;
+ q->toggle = toggle;
+ }
/* Loop back on same queue with updated idx/toggle */
#ifdef XIVE_RUNTIME_CHECKS
WARN_ON(hirq && hirq != XICS_IPI);
@@ -199,32 +192,41 @@ skip_ipi:
if (hirq == XICS_DUMMY)
goto skip_ipi;
- /* If fetching, update queue pointers */
- if (scan_type == scan_fetch) {
- q->idx = idx;
- q->toggle = toggle;
- }
-
- /* Something found, stop searching */
- if (hirq)
- break;
-
- /* Clear the pending bit on the now empty queue */
- pending &= ~(1 << prio);
+ /* Clear the pending bit if the queue is now empty */
+ if (!hirq) {
+ pending &= ~(1 << prio);
- /*
- * Check if the queue count needs adjusting due to
- * interrupts being moved away.
- */
- if (atomic_read(&q->pending_count)) {
- int p = atomic_xchg(&q->pending_count, 0);
- if (p) {
+ /*
+ * Check if the queue count needs adjusting due to
+ * interrupts being moved away.
+ */
+ if (atomic_read(&q->pending_count)) {
+ int p = atomic_xchg(&q->pending_count, 0);
+ if (p) {
#ifdef XIVE_RUNTIME_CHECKS
- WARN_ON(p > atomic_read(&q->count));
+ WARN_ON(p > atomic_read(&q->count));
#endif
- atomic_sub(p, &q->count);
+ atomic_sub(p, &q->count);
+ }
}
}
+
+ /*
+ * If the most favoured prio we found pending is less
+ * favored (or equal) than a pending IPI, we return
+ * the IPI instead.
+ */
+ if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ break;
+ }
+
+ /* If fetching, update queue pointers */
+ if (scan_type == scan_fetch) {
+ q->idx = idx;
+ q->toggle = toggle;
+ }
}
/* If we are just taking a "peek", do nothing else */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8885377ec3e0..3393b166817a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -570,6 +570,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE:
+ /*
+ * We need XIVE to be enabled on the platform (implies
+ * a POWER9 processor) and the PowerNV platform, as
+ * nested is not yet supported.
+ */
+ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
+ break;
+#endif
case KVM_CAP_PPC_ALLOC_HTAB:
r = hv_enabled;
@@ -644,9 +654,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
else
r = num_online_cpus();
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
@@ -753,6 +760,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
else
kvmppc_xics_free_icp(vcpu);
break;
+ case KVMPPC_IRQ_XIVE:
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+ break;
}
kvmppc_core_vcpu_free(vcpu);
@@ -1941,6 +1951,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE: {
+ struct fd f;
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ f = fdget(cap->args[0]);
+ if (!f.file)
+ break;
+
+ r = -ENXIO;
+ if (!xive_enabled())
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(f.file);
+ if (dev)
+ r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+ cap->args[1]);
+
+ fdput(f);
+ break;
+ }
+#endif /* CONFIG_KVM_XIVE */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_FWNMI:
r = -EINVAL;
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index e27792d0b744..8366c2abeafc 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -539,7 +539,8 @@ _GLOBAL(flush_hash_pages)
#ifdef CONFIG_SMP
lis r9, (mmu_hash_lock - PAGE_OFFSET)@ha
addi r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l
- lwz r8,TASK_CPU(r2)
+ tophys (r8, r2)
+ lwz r8, TASK_CPU(r8)
oris r8,r8,9
10: lwarx r0,0,r9
cmpi 0,r0,0
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index c5c9ff2d7afc..b5d92dc32844 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -556,7 +556,7 @@ static int __init add_huge_page_size(unsigned long long size)
if (size <= PAGE_SIZE || !is_power_of_2(size))
return -EINVAL;
- mmu_psize = check_and_get_huge_psize(size);
+ mmu_psize = check_and_get_huge_psize(shift);
if (mmu_psize < 0)
return -EINVAL;
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 0c037e933e55..7782201e5fe8 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -521,6 +521,9 @@ u32 xive_native_default_eq_shift(void)
}
EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
+unsigned long xive_tima_os;
+EXPORT_SYMBOL_GPL(xive_tima_os);
+
bool __init xive_native_init(void)
{
struct device_node *np;
@@ -573,6 +576,14 @@ bool __init xive_native_init(void)
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
+ /* Resource 2 is OS window */
+ if (of_address_to_resource(np, 2, &r)) {
+ pr_err("Failed to get thread mgmnt area resource\n");
+ return false;
+ }
+
+ xive_tima_os = r.start;
+
/* Grab size of provisionning pages */
xive_parse_provisioning(np);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e66745decea1..ee32c66e1af3 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,7 +27,7 @@ config RISCV
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_SMP_IDLE_THREAD
- select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
+ select GENERIC_ATOMIC64 if !64BIT
select HAVE_ARCH_AUDITSYSCALL
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_CONTIGUOUS
@@ -35,7 +35,6 @@ config RISCV
select HAVE_PERF_EVENTS
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
- select RISCV_ISA_A if SMP
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select HAVE_ARCH_TRACEHOOK
@@ -195,9 +194,6 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
-config RISCV_ISA_A
- def_bool y
-
menu "supported PMU type"
depends on PERF_EVENTS
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index c6342e638ef7..6b0741c9f348 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -39,9 +39,8 @@ endif
KBUILD_CFLAGS += -Wall
# ISA string setting
-riscv-march-$(CONFIG_ARCH_RV32I) := rv32im
-riscv-march-$(CONFIG_ARCH_RV64I) := rv64im
-riscv-march-$(CONFIG_RISCV_ISA_A) := $(riscv-march-y)a
+riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
+riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd
riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c
KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y))
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index cccd12cf27d4..5a7a19d9aa7f 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += compat.h
generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
+generic-y += extable.h
generic-y += dma.h
generic-y += dma-contiguous.h
generic-y += dma-mapping.h
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index bfc7f099ab1f..52a1fbdeab3b 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -21,7 +21,12 @@
#include <asm/asm.h>
#ifdef CONFIG_GENERIC_BUG
-#define __BUG_INSN _AC(0x00100073, UL) /* ebreak */
+#define __INSN_LENGTH_MASK _UL(0x3)
+#define __INSN_LENGTH_32 _UL(0x3)
+#define __COMPRESSED_INSN_MASK _UL(0xffff)
+
+#define __BUG_INSN_32 _UL(0x00100073) /* ebreak */
+#define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */
#ifndef __ASSEMBLY__
typedef u32 bug_insn_t;
@@ -38,38 +43,46 @@ typedef u32 bug_insn_t;
#define __BUG_ENTRY \
__BUG_ENTRY_ADDR "\n\t" \
__BUG_ENTRY_FILE "\n\t" \
- RISCV_SHORT " %1"
+ RISCV_SHORT " %1\n\t" \
+ RISCV_SHORT " %2"
#else
#define __BUG_ENTRY \
- __BUG_ENTRY_ADDR
+ __BUG_ENTRY_ADDR "\n\t" \
+ RISCV_SHORT " %2"
#endif
-#define BUG() \
+#define __BUG_FLAGS(flags) \
do { \
__asm__ __volatile__ ( \
"1:\n\t" \
"ebreak\n" \
- ".pushsection __bug_table,\"a\"\n\t" \
+ ".pushsection __bug_table,\"aw\"\n\t" \
"2:\n\t" \
__BUG_ENTRY "\n\t" \
- ".org 2b + %2\n\t" \
+ ".org 2b + %3\n\t" \
".popsection" \
: \
: "i" (__FILE__), "i" (__LINE__), \
- "i" (sizeof(struct bug_entry))); \
- unreachable(); \
+ "i" (flags), \
+ "i" (sizeof(struct bug_entry))); \
} while (0)
+
#endif /* !__ASSEMBLY__ */
#else /* CONFIG_GENERIC_BUG */
#ifndef __ASSEMBLY__
-#define BUG() \
-do { \
+#define __BUG_FLAGS(flags) do { \
__asm__ __volatile__ ("ebreak\n"); \
- unreachable(); \
} while (0)
#endif /* !__ASSEMBLY__ */
#endif /* CONFIG_GENERIC_BUG */
+#define BUG() do { \
+ __BUG_FLAGS(0); \
+ unreachable(); \
+} while (0)
+
+#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags))
+
#define HAVE_ARCH_BUG
#include <asm-generic/bug.h>
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 8f13074413a7..1f4ba68ab9aa 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -47,7 +47,7 @@ static inline void flush_dcache_page(struct page *page)
#else /* CONFIG_SMP */
-#define flush_icache_all() sbi_remote_fence_i(NULL)
+void flush_icache_all(void);
void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 28a0d1cb374c..3c3c26c3a1f1 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -14,64 +14,95 @@
#ifndef _ASM_RISCV_CSR_H
#define _ASM_RISCV_CSR_H
+#include <asm/asm.h>
#include <linux/const.h>
/* Status register flags */
-#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
-#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
-#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
-#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
-
-#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
-#define SR_FS_OFF _AC(0x00000000, UL)
-#define SR_FS_INITIAL _AC(0x00002000, UL)
-#define SR_FS_CLEAN _AC(0x00004000, UL)
-#define SR_FS_DIRTY _AC(0x00006000, UL)
-
-#define SR_XS _AC(0x00018000, UL) /* Extension Status */
-#define SR_XS_OFF _AC(0x00000000, UL)
-#define SR_XS_INITIAL _AC(0x00008000, UL)
-#define SR_XS_CLEAN _AC(0x00010000, UL)
-#define SR_XS_DIRTY _AC(0x00018000, UL)
+#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_SUM _AC(0x00040000, UL) /* Supervisor User Memory Access */
+
+#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
+#define SR_FS_OFF _AC(0x00000000, UL)
+#define SR_FS_INITIAL _AC(0x00002000, UL)
+#define SR_FS_CLEAN _AC(0x00004000, UL)
+#define SR_FS_DIRTY _AC(0x00006000, UL)
+
+#define SR_XS _AC(0x00018000, UL) /* Extension Status */
+#define SR_XS_OFF _AC(0x00000000, UL)
+#define SR_XS_INITIAL _AC(0x00008000, UL)
+#define SR_XS_CLEAN _AC(0x00010000, UL)
+#define SR_XS_DIRTY _AC(0x00018000, UL)
#ifndef CONFIG_64BIT
-#define SR_SD _AC(0x80000000, UL) /* FS/XS dirty */
+#define SR_SD _AC(0x80000000, UL) /* FS/XS dirty */
#else
-#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */
+#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */
#endif
/* SATP flags */
-#if __riscv_xlen == 32
-#define SATP_PPN _AC(0x003FFFFF, UL)
-#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
+#ifndef CONFIG_64BIT
+#define SATP_PPN _AC(0x003FFFFF, UL)
+#define SATP_MODE_32 _AC(0x80000000, UL)
+#define SATP_MODE SATP_MODE_32
#else
-#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
-#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39 _AC(0x8000000000000000, UL)
+#define SATP_MODE SATP_MODE_39
#endif
-/* Interrupt Enable and Interrupt Pending flags */
-#define SIE_SSIE _AC(0x00000002, UL) /* Software Interrupt Enable */
-#define SIE_STIE _AC(0x00000020, UL) /* Timer Interrupt Enable */
-#define SIE_SEIE _AC(0x00000200, UL) /* External Interrupt Enable */
-
-#define EXC_INST_MISALIGNED 0
-#define EXC_INST_ACCESS 1
-#define EXC_BREAKPOINT 3
-#define EXC_LOAD_ACCESS 5
-#define EXC_STORE_ACCESS 7
-#define EXC_SYSCALL 8
-#define EXC_INST_PAGE_FAULT 12
-#define EXC_LOAD_PAGE_FAULT 13
-#define EXC_STORE_PAGE_FAULT 15
+/* SCAUSE */
+#define SCAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
+
+#define IRQ_U_SOFT 0
+#define IRQ_S_SOFT 1
+#define IRQ_M_SOFT 3
+#define IRQ_U_TIMER 4
+#define IRQ_S_TIMER 5
+#define IRQ_M_TIMER 7
+#define IRQ_U_EXT 8
+#define IRQ_S_EXT 9
+#define IRQ_M_EXT 11
+
+#define EXC_INST_MISALIGNED 0
+#define EXC_INST_ACCESS 1
+#define EXC_BREAKPOINT 3
+#define EXC_LOAD_ACCESS 5
+#define EXC_STORE_ACCESS 7
+#define EXC_SYSCALL 8
+#define EXC_INST_PAGE_FAULT 12
+#define EXC_LOAD_PAGE_FAULT 13
+#define EXC_STORE_PAGE_FAULT 15
+
+/* SIE (Interrupt Enable) and SIP (Interrupt Pending) flags */
+#define SIE_SSIE (_AC(0x1, UL) << IRQ_S_SOFT)
+#define SIE_STIE (_AC(0x1, UL) << IRQ_S_TIMER)
+#define SIE_SEIE (_AC(0x1, UL) << IRQ_S_EXT)
+
+#define CSR_CYCLE 0xc00
+#define CSR_TIME 0xc01
+#define CSR_INSTRET 0xc02
+#define CSR_SSTATUS 0x100
+#define CSR_SIE 0x104
+#define CSR_STVEC 0x105
+#define CSR_SCOUNTEREN 0x106
+#define CSR_SSCRATCH 0x140
+#define CSR_SEPC 0x141
+#define CSR_SCAUSE 0x142
+#define CSR_STVAL 0x143
+#define CSR_SIP 0x144
+#define CSR_SATP 0x180
+#define CSR_CYCLEH 0xc80
+#define CSR_TIMEH 0xc81
+#define CSR_INSTRETH 0xc82
#ifndef __ASSEMBLY__
#define csr_swap(csr, val) \
({ \
unsigned long __v = (unsigned long)(val); \
- __asm__ __volatile__ ("csrrw %0, " #csr ", %1" \
+ __asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1"\
: "=r" (__v) : "rK" (__v) \
: "memory"); \
__v; \
@@ -80,7 +111,7 @@
#define csr_read(csr) \
({ \
register unsigned long __v; \
- __asm__ __volatile__ ("csrr %0, " #csr \
+ __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) \
: "=r" (__v) : \
: "memory"); \
__v; \
@@ -89,7 +120,7 @@
#define csr_write(csr, val) \
({ \
unsigned long __v = (unsigned long)(val); \
- __asm__ __volatile__ ("csrw " #csr ", %0" \
+ __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" \
: : "rK" (__v) \
: "memory"); \
})
@@ -97,7 +128,7 @@
#define csr_read_set(csr, val) \
({ \
unsigned long __v = (unsigned long)(val); \
- __asm__ __volatile__ ("csrrs %0, " #csr ", %1" \
+ __asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1"\
: "=r" (__v) : "rK" (__v) \
: "memory"); \
__v; \
@@ -106,7 +137,7 @@
#define csr_set(csr, val) \
({ \
unsigned long __v = (unsigned long)(val); \
- __asm__ __volatile__ ("csrs " #csr ", %0" \
+ __asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0" \
: : "rK" (__v) \
: "memory"); \
})
@@ -114,7 +145,7 @@
#define csr_read_clear(csr, val) \
({ \
unsigned long __v = (unsigned long)(val); \
- __asm__ __volatile__ ("csrrc %0, " #csr ", %1" \
+ __asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1"\
: "=r" (__v) : "rK" (__v) \
: "memory"); \
__v; \
@@ -123,7 +154,7 @@
#define csr_clear(csr, val) \
({ \
unsigned long __v = (unsigned long)(val); \
- __asm__ __volatile__ ("csrc " #csr ", %0" \
+ __asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0" \
: : "rK" (__v) \
: "memory"); \
})
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index 697fc23b0d5a..ce0cd7d77eb0 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -27,13 +27,7 @@
#define ELF_CLASS ELFCLASS32
#endif
-#if defined(__LITTLE_ENDIAN)
#define ELF_DATA ELFDATA2LSB
-#elif defined(__BIG_ENDIAN)
-#define ELF_DATA ELFDATA2MSB
-#else
-#error "Unknown endianness"
-#endif
/*
* This is used to ensure we don't load something for the wrong architecture.
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 66641624d8a5..4ad6409c4647 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -7,18 +7,6 @@
#ifndef _ASM_FUTEX_H
#define _ASM_FUTEX_H
-#ifndef CONFIG_RISCV_ISA_A
-/*
- * Use the generic interrupt disabling versions if the A extension
- * is not supported.
- */
-#ifdef CONFIG_SMP
-#error "Can't support generic futex calls without A extension on SMP"
-#endif
-#include <asm-generic/futex.h>
-
-#else /* CONFIG_RISCV_ISA_A */
-
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <linux/errno.h>
@@ -124,5 +112,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
return ret;
}
-#endif /* CONFIG_RISCV_ISA_A */
#endif /* _ASM_FUTEX_H */
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index 07a3c6d5706f..1a69b3bcd371 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -21,25 +21,25 @@
/* read interrupt enabled status */
static inline unsigned long arch_local_save_flags(void)
{
- return csr_read(sstatus);
+ return csr_read(CSR_SSTATUS);
}
/* unconditionally enable interrupts */
static inline void arch_local_irq_enable(void)
{
- csr_set(sstatus, SR_SIE);
+ csr_set(CSR_SSTATUS, SR_SIE);
}
/* unconditionally disable interrupts */
static inline void arch_local_irq_disable(void)
{
- csr_clear(sstatus, SR_SIE);
+ csr_clear(CSR_SSTATUS, SR_SIE);
}
/* get status and disable interrupts */
static inline unsigned long arch_local_irq_save(void)
{
- return csr_read_clear(sstatus, SR_SIE);
+ return csr_read_clear(CSR_SSTATUS, SR_SIE);
}
/* test flags */
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
/* set interrupt enabled status */
static inline void arch_local_irq_restore(unsigned long flags)
{
- csr_set(sstatus, flags & SR_SIE);
+ csr_set(CSR_SSTATUS, flags & SR_SIE);
}
#endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index 336d60ec5698..bf4f097a9051 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -20,8 +20,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *task)
@@ -39,61 +37,8 @@ static inline void destroy_context(struct mm_struct *mm)
{
}
-/*
- * When necessary, performs a deferred icache flush for the given MM context,
- * on the local CPU. RISC-V has no direct mechanism for instruction cache
- * shoot downs, so instead we send an IPI that informs the remote harts they
- * need to flush their local instruction caches. To avoid pathologically slow
- * behavior in a common case (a bunch of single-hart processes on a many-hart
- * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
- * executing a MM context and instead schedule a deferred local instruction
- * cache flush to be performed before execution resumes on each hart. This
- * actually performs that local instruction cache flush, which implicitly only
- * refers to the current hart.
- */
-static inline void flush_icache_deferred(struct mm_struct *mm)
-{
-#ifdef CONFIG_SMP
- unsigned int cpu = smp_processor_id();
- cpumask_t *mask = &mm->context.icache_stale_mask;
-
- if (cpumask_test_cpu(cpu, mask)) {
- cpumask_clear_cpu(cpu, mask);
- /*
- * Ensure the remote hart's writes are visible to this hart.
- * This pairs with a barrier in flush_icache_mm.
- */
- smp_mb();
- local_flush_icache_all();
- }
-#endif
-}
-
-static inline void switch_mm(struct mm_struct *prev,
- struct mm_struct *next, struct task_struct *task)
-{
- if (likely(prev != next)) {
- /*
- * Mark the current MM context as inactive, and the next as
- * active. This is at least used by the icache flushing
- * routines in order to determine who should
- */
- unsigned int cpu = smp_processor_id();
-
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
- cpumask_set_cpu(cpu, mm_cpumask(next));
-
- /*
- * Use the old spbtr name instead of using the current satp
- * name to support binutils 2.29 which doesn't know about the
- * privileged ISA 1.10 yet.
- */
- csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
- local_flush_tlb_all();
-
- flush_icache_deferred(next);
- }
-}
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *task);
static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index d35ec2f41381..9c867a4bac83 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -70,47 +70,38 @@ struct pt_regs {
/* Helpers for working with the instruction pointer */
-#define GET_IP(regs) ((regs)->sepc)
-#define SET_IP(regs, val) (GET_IP(regs) = (val))
-
static inline unsigned long instruction_pointer(struct pt_regs *regs)
{
- return GET_IP(regs);
+ return regs->sepc;
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- SET_IP(regs, val);
+ regs->sepc = val;
}
#define profile_pc(regs) instruction_pointer(regs)
/* Helpers for working with the user stack pointer */
-#define GET_USP(regs) ((regs)->sp)
-#define SET_USP(regs, val) (GET_USP(regs) = (val))
-
static inline unsigned long user_stack_pointer(struct pt_regs *regs)
{
- return GET_USP(regs);
+ return regs->sp;
}
static inline void user_stack_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- SET_USP(regs, val);
+ regs->sp = val;
}
/* Helpers for working with the frame pointer */
-#define GET_FP(regs) ((regs)->s0)
-#define SET_FP(regs, val) (GET_FP(regs) = (val))
-
static inline unsigned long frame_pointer(struct pt_regs *regs)
{
- return GET_FP(regs);
+ return regs->s0;
}
static inline void frame_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- SET_FP(regs, val);
+ regs->s0 = val;
}
static inline unsigned long regs_return_value(struct pt_regs *regs)
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index b6bb10b92fe2..19f231615510 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -26,22 +26,27 @@
#define SBI_REMOTE_SFENCE_VMA_ASID 7
#define SBI_SHUTDOWN 8
-#define SBI_CALL(which, arg0, arg1, arg2) ({ \
+#define SBI_CALL(which, arg0, arg1, arg2, arg3) ({ \
register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \
register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); \
register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); \
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); \
register uintptr_t a7 asm ("a7") = (uintptr_t)(which); \
asm volatile ("ecall" \
: "+r" (a0) \
- : "r" (a1), "r" (a2), "r" (a7) \
+ : "r" (a1), "r" (a2), "r" (a3), "r" (a7) \
: "memory"); \
a0; \
})
/* Lazy implementations until SBI is finalized */
-#define SBI_CALL_0(which) SBI_CALL(which, 0, 0, 0)
-#define SBI_CALL_1(which, arg0) SBI_CALL(which, arg0, 0, 0)
-#define SBI_CALL_2(which, arg0, arg1) SBI_CALL(which, arg0, arg1, 0)
+#define SBI_CALL_0(which) SBI_CALL(which, 0, 0, 0, 0)
+#define SBI_CALL_1(which, arg0) SBI_CALL(which, arg0, 0, 0, 0)
+#define SBI_CALL_2(which, arg0, arg1) SBI_CALL(which, arg0, arg1, 0, 0)
+#define SBI_CALL_3(which, arg0, arg1, arg2) \
+ SBI_CALL(which, arg0, arg1, arg2, 0)
+#define SBI_CALL_4(which, arg0, arg1, arg2, arg3) \
+ SBI_CALL(which, arg0, arg1, arg2, arg3)
static inline void sbi_console_putchar(int ch)
{
@@ -86,7 +91,7 @@ static inline void sbi_remote_sfence_vma(const unsigned long *hart_mask,
unsigned long start,
unsigned long size)
{
- SBI_CALL_1(SBI_REMOTE_SFENCE_VMA, hart_mask);
+ SBI_CALL_3(SBI_REMOTE_SFENCE_VMA, hart_mask, start, size);
}
static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
@@ -94,7 +99,7 @@ static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
unsigned long size,
unsigned long asid)
{
- SBI_CALL_1(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask);
+ SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
}
#endif
diff --git a/arch/riscv/include/asm/sifive_l2_cache.h b/arch/riscv/include/asm/sifive_l2_cache.h
new file mode 100644
index 000000000000..04f6748fc50b
--- /dev/null
+++ b/arch/riscv/include/asm/sifive_l2_cache.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SiFive L2 Cache Controller header file
+ *
+ */
+
+#ifndef _ASM_RISCV_SIFIVE_L2_CACHE_H
+#define _ASM_RISCV_SIFIVE_L2_CACHE_H
+
+extern int register_sifive_l2_error_notifier(struct notifier_block *nb);
+extern int unregister_sifive_l2_error_notifier(struct notifier_block *nb);
+
+#define SIFIVE_L2_ERR_TYPE_CE 0
+#define SIFIVE_L2_ERR_TYPE_UE 1
+
+#endif /* _ASM_RISCV_SIFIVE_L2_CACHE_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 1c9cc8389928..9c039870019b 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -28,7 +28,9 @@
#include <asm/processor.h>
#include <asm/csr.h>
-typedef unsigned long mm_segment_t;
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
/*
* low level task data that entry.S needs immediate access to
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index fb53a8089e76..b26f407be5c8 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -23,6 +23,7 @@
#include <linux/compiler.h>
#include <linux/thread_info.h>
#include <asm/byteorder.h>
+#include <asm/extable.h>
#include <asm/asm.h>
#define __enable_user_access() \
@@ -38,8 +39,10 @@
* For historical reasons, these macros are grossly misnamed.
*/
-#define KERNEL_DS (~0UL)
-#define USER_DS (TASK_SIZE)
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+#define KERNEL_DS MAKE_MM_SEG(~0UL)
+#define USER_DS MAKE_MM_SEG(TASK_SIZE)
#define get_fs() (current_thread_info()->addr_limit)
@@ -48,9 +51,9 @@ static inline void set_fs(mm_segment_t fs)
current_thread_info()->addr_limit = fs;
}
-#define segment_eq(a, b) ((a) == (b))
+#define segment_eq(a, b) ((a).seg == (b).seg)
-#define user_addr_max() (get_fs())
+#define user_addr_max() (get_fs().seg)
/**
@@ -82,7 +85,7 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
{
const mm_segment_t fs = get_fs();
- return (size <= fs) && (addr <= (fs - size));
+ return size <= fs.seg && addr <= fs.seg - size;
}
/*
@@ -98,21 +101,8 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
* on our cache or tlb entries.
*/
-struct exception_table_entry {
- unsigned long insn, fixup;
-};
-
-extern int fixup_exception(struct pt_regs *state);
-
-#if defined(__LITTLE_ENDIAN)
-#define __MSW 1
#define __LSW 0
-#elif defined(__BIG_ENDIAN)
-#define __MSW 0
-#define __LSW 1
-#else
-#error "Unknown endianness"
-#endif
+#define __MSW 1
/*
* The "__xxx" versions of the user access functions do not verify the address
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index dac98348c6a3..578bb5efc085 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -312,9 +312,6 @@ void asm_offsets(void)
- offsetof(struct task_struct, thread.fstate.f[0])
);
- /* The assembler needs access to THREAD_SIZE as well. */
- DEFINE(ASM_THREAD_SIZE, THREAD_SIZE);
-
/*
* We allocate a pt_regs on the stack when entering the kernel. This
* ensures the alignment is sane.
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index cf2fca12414a..c8d2a3223099 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -136,8 +136,7 @@ static void c_stop(struct seq_file *m, void *v)
static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
- struct device_node *node = of_get_cpu_node(cpuid_to_hartid_map(cpu_id),
- NULL);
+ struct device_node *node = of_get_cpu_node(cpu_id, NULL);
const char *compat, *isa, *mmu;
seq_printf(m, "processor\t: %lu\n", cpu_id);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index fd9b57c8b4ce..1c1ecc238cfa 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -37,11 +37,11 @@
* the kernel thread pointer. If we came from the kernel, sscratch
* will contain 0, and we should continue on the current TP.
*/
- csrrw tp, sscratch, tp
+ csrrw tp, CSR_SSCRATCH, tp
bnez tp, _save_context
_restore_kernel_tpsp:
- csrr tp, sscratch
+ csrr tp, CSR_SSCRATCH
REG_S sp, TASK_TI_KERNEL_SP(tp)
_save_context:
REG_S sp, TASK_TI_USER_SP(tp)
@@ -87,11 +87,11 @@ _save_context:
li t0, SR_SUM | SR_FS
REG_L s0, TASK_TI_USER_SP(tp)
- csrrc s1, sstatus, t0
- csrr s2, sepc
- csrr s3, sbadaddr
- csrr s4, scause
- csrr s5, sscratch
+ csrrc s1, CSR_SSTATUS, t0
+ csrr s2, CSR_SEPC
+ csrr s3, CSR_STVAL
+ csrr s4, CSR_SCAUSE
+ csrr s5, CSR_SSCRATCH
REG_S s0, PT_SP(sp)
REG_S s1, PT_SSTATUS(sp)
REG_S s2, PT_SEPC(sp)
@@ -107,8 +107,8 @@ _save_context:
.macro RESTORE_ALL
REG_L a0, PT_SSTATUS(sp)
REG_L a2, PT_SEPC(sp)
- csrw sstatus, a0
- csrw sepc, a2
+ csrw CSR_SSTATUS, a0
+ csrw CSR_SEPC, a2
REG_L x1, PT_RA(sp)
REG_L x3, PT_GP(sp)
@@ -155,7 +155,7 @@ ENTRY(handle_exception)
* Set sscratch register to 0, so that if a recursive exception
* occurs, the exception vector knows it came from the kernel
*/
- csrw sscratch, x0
+ csrw CSR_SSCRATCH, x0
/* Load the global pointer */
.option push
@@ -248,7 +248,7 @@ resume_userspace:
* Save TP into sscratch, so we can find the kernel data structures
* again.
*/
- csrw sscratch, tp
+ csrw CSR_SSCRATCH, tp
restore_all:
RESTORE_ALL
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index fe884cd69abd..370c66ce187a 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -23,7 +23,8 @@
__INIT
ENTRY(_start)
/* Mask all interrupts */
- csrw sie, zero
+ csrw CSR_SIE, zero
+ csrw CSR_SIP, zero
/* Load the global pointer */
.option push
@@ -68,14 +69,10 @@ clear_bss_done:
/* Restore C environment */
la tp, init_task
sw zero, TASK_TI_CPU(tp)
-
- la sp, init_thread_union
- li a0, ASM_THREAD_SIZE
- add sp, sp, a0
+ la sp, init_thread_union + THREAD_SIZE
/* Start the kernel */
- mv a0, s0
- mv a1, s1
+ mv a0, s1
call parse_dtb
tail start_kernel
@@ -89,7 +86,7 @@ relocate:
/* Point stvec to virtual address of intruction after satp write */
la a0, 1f
add a0, a0, a1
- csrw stvec, a0
+ csrw CSR_STVEC, a0
/* Compute satp for kernel page tables, but don't load it yet */
la a2, swapper_pg_dir
@@ -99,18 +96,20 @@ relocate:
/*
* Load trampoline page directory, which will cause us to trap to
- * stvec if VA != PA, or simply fall through if VA == PA
+ * stvec if VA != PA, or simply fall through if VA == PA. We need a
+ * full fence here because setup_vm() just wrote these PTEs and we need
+ * to ensure the new translations are in use.
*/
la a0, trampoline_pg_dir
srl a0, a0, PAGE_SHIFT
or a0, a0, a1
sfence.vma
- csrw sptbr, a0
+ csrw CSR_SATP, a0
.align 2
1:
/* Set trap vector to spin forever to help debug */
la a0, .Lsecondary_park
- csrw stvec, a0
+ csrw CSR_STVEC, a0
/* Reload the global pointer */
.option push
@@ -118,8 +117,14 @@ relocate:
la gp, __global_pointer$
.option pop
- /* Switch to kernel page tables */
- csrw sptbr, a2
+ /*
+ * Switch to kernel page tables. A full fence is necessary in order to
+ * avoid using the trampoline translations, which are only correct for
+ * the first superpage. Fetching the fence is guarnteed to work
+ * because that first superpage is translated the same way.
+ */
+ csrw CSR_SATP, a2
+ sfence.vma
ret
@@ -130,7 +135,7 @@ relocate:
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
- csrw stvec, a3
+ csrw CSR_STVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 48e6b7db83a1..6d8659388c49 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -14,17 +14,9 @@
/*
* Possible interrupt causes:
*/
-#define INTERRUPT_CAUSE_SOFTWARE 1
-#define INTERRUPT_CAUSE_TIMER 5
-#define INTERRUPT_CAUSE_EXTERNAL 9
-
-/*
- * The high order bit of the trap cause register is always set for
- * interrupts, which allows us to differentiate them from exceptions
- * quickly. The INTERRUPT_CAUSE_* macros don't contain that bit, so we
- * need to mask it off.
- */
-#define INTERRUPT_CAUSE_FLAG (1UL << (__riscv_xlen - 1))
+#define INTERRUPT_CAUSE_SOFTWARE IRQ_S_SOFT
+#define INTERRUPT_CAUSE_TIMER IRQ_S_TIMER
+#define INTERRUPT_CAUSE_EXTERNAL IRQ_S_EXT
int arch_show_interrupts(struct seq_file *p, int prec)
{
@@ -37,7 +29,7 @@ asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
- switch (regs->scause & ~INTERRUPT_CAUSE_FLAG) {
+ switch (regs->scause & ~SCAUSE_IRQ_FLAG) {
case INTERRUPT_CAUSE_TIMER:
riscv_timer_interrupt();
break;
@@ -54,7 +46,8 @@ asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
handle_arch_irq(regs);
break;
default:
- panic("unexpected interrupt cause");
+ pr_alert("unexpected interrupt cause 0x%lx", regs->scause);
+ BUG();
}
irq_exit();
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
index 667ee70defea..91626d9ae5f2 100644
--- a/arch/riscv/kernel/perf_event.c
+++ b/arch/riscv/kernel/perf_event.c
@@ -185,10 +185,10 @@ static inline u64 read_counter(int idx)
switch (idx) {
case RISCV_PMU_CYCLE:
- val = csr_read(cycle);
+ val = csr_read(CSR_CYCLE);
break;
case RISCV_PMU_INSTRET:
- val = csr_read(instret);
+ val = csr_read(CSR_INSTRET);
break;
default:
WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index 2a53d26ffdd6..ed637aee514b 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -12,11 +12,15 @@
*/
#include <linux/reboot.h>
-#include <linux/export.h>
#include <asm/sbi.h>
-void (*pm_power_off)(void) = machine_power_off;
-EXPORT_SYMBOL(pm_power_off);
+static void default_power_off(void)
+{
+ sbi_shutdown();
+ while (1);
+}
+
+void (*pm_power_off)(void) = default_power_off;
void machine_restart(char *cmd)
{
@@ -26,11 +30,10 @@ void machine_restart(char *cmd)
void machine_halt(void)
{
- machine_power_off();
+ pm_power_off();
}
void machine_power_off(void)
{
- sbi_shutdown();
- while (1);
+ pm_power_off();
}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 540a331d1376..d93bcce004e3 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -52,9 +52,11 @@ struct screen_info screen_info = {
atomic_t hart_lottery;
unsigned long boot_cpu_hartid;
-void __init parse_dtb(unsigned int hartid, void *dtb)
+void __init parse_dtb(phys_addr_t dtb_phys)
{
- if (early_init_dt_scan(__va(dtb)))
+ void *dtb = __va(dtb_phys);
+
+ if (early_init_dt_scan(dtb))
return;
pr_err("No DTB passed to the kernel\n");
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 837e1646091a..804d6ee4f3c5 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -234,6 +234,9 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/* Are we from a system call? */
if (regs->scause == EXC_SYSCALL) {
+ /* Avoid additional syscall restarting via ret_from_exception */
+ regs->scause = -1UL;
+
/* If so, check system call restarting.. */
switch (regs->a0) {
case -ERESTART_RESTARTBLOCK:
@@ -272,6 +275,9 @@ static void do_signal(struct pt_regs *regs)
/* Did we come from a system call? */
if (regs->scause == EXC_SYSCALL) {
+ /* Avoid additional syscall restarting via ret_from_exception */
+ regs->scause = -1UL;
+
/* Restart the system call - no handlers present */
switch (regs->a0) {
case -ERESTARTNOHAND:
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 0c41d07ec281..b2537ffa855c 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -42,7 +42,7 @@ unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
void __init smp_setup_processor_id(void)
{
- cpuid_to_hartid_map(0) = boot_cpu_hartid;
+ cpuid_to_hartid_map(0) = boot_cpu_hartid;
}
/* A collection of single bit ipi messages. */
@@ -53,7 +53,7 @@ static struct {
int riscv_hartid_to_cpuid(int hartid)
{
- int i = -1;
+ int i;
for (i = 0; i < NR_CPUS; i++)
if (cpuid_to_hartid_map(i) == hartid)
@@ -70,6 +70,12 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
for_each_cpu(cpu, in)
cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
}
+
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+ return phys_id == cpuid_to_hartid_map(cpu);
+}
+
/* Unsupported */
int setup_profiling_timer(unsigned int multiplier)
{
@@ -89,7 +95,7 @@ void riscv_software_interrupt(void)
unsigned long *stats = ipi_data[smp_processor_id()].stats;
/* Clear pending IPI */
- csr_clear(sip, SIE_SSIE);
+ csr_clear(CSR_SIP, SIE_SSIE);
while (true) {
unsigned long ops;
@@ -199,52 +205,3 @@ void smp_send_reschedule(int cpu)
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
}
-/*
- * Performs an icache flush for the given MM context. RISC-V has no direct
- * mechanism for instruction cache shoot downs, so instead we send an IPI that
- * informs the remote harts they need to flush their local instruction caches.
- * To avoid pathologically slow behavior in a common case (a bunch of
- * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
- * IPIs for harts that are not currently executing a MM context and instead
- * schedule a deferred local instruction cache flush to be performed before
- * execution resumes on each hart.
- */
-void flush_icache_mm(struct mm_struct *mm, bool local)
-{
- unsigned int cpu;
- cpumask_t others, hmask, *mask;
-
- preempt_disable();
-
- /* Mark every hart's icache as needing a flush for this MM. */
- mask = &mm->context.icache_stale_mask;
- cpumask_setall(mask);
- /* Flush this hart's I$ now, and mark it as flushed. */
- cpu = smp_processor_id();
- cpumask_clear_cpu(cpu, mask);
- local_flush_icache_all();
-
- /*
- * Flush the I$ of other harts concurrently executing, and mark them as
- * flushed.
- */
- cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
- local |= cpumask_empty(&others);
- if (mm != current->active_mm || !local) {
- cpumask_clear(&hmask);
- riscv_cpuid_to_hartid_mask(&others, &hmask);
- sbi_remote_fence_i(hmask.bits);
- } else {
- /*
- * It's assumed that at least one strongly ordered operation is
- * performed on this hart between setting a hart's cpumask bit
- * and scheduling this MM context on that hart. Sending an SBI
- * remote message will do this, but in the case where no
- * messages are sent we still need to order this hart's writes
- * with flush_icache_deferred().
- */
- smp_mb();
- }
-
- preempt_enable();
-}
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index eb533b5c2c8c..7a0b62252524 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -47,6 +47,17 @@ void __init smp_prepare_boot_cpu(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ int cpuid;
+
+ /* This covers non-smp usecase mandated by "nosmp" option */
+ if (max_cpus == 0)
+ return;
+
+ for_each_possible_cpu(cpuid) {
+ if (cpuid == smp_processor_id())
+ continue;
+ set_cpu_present(cpuid, true);
+ }
}
void __init setup_smp(void)
@@ -73,12 +84,19 @@ void __init setup_smp(void)
}
cpuid_to_hartid_map(cpuid) = hart;
- set_cpu_possible(cpuid, true);
- set_cpu_present(cpuid, true);
cpuid++;
}
BUG_ON(!found_boot_cpu);
+
+ if (cpuid > nr_cpu_ids)
+ pr_warn("Total number of cpus [%d] is greater than nr_cpus option value [%d]\n",
+ cpuid, nr_cpu_ids);
+
+ for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) {
+ if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID)
+ set_cpu_possible(cpuid, true);
+ }
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 4d403274c2e8..e80a5e8da119 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -33,9 +33,9 @@ static void notrace walk_stackframe(struct task_struct *task,
unsigned long fp, sp, pc;
if (regs) {
- fp = GET_FP(regs);
- sp = GET_USP(regs);
- pc = GET_IP(regs);
+ fp = frame_pointer(regs);
+ sp = user_stack_pointer(regs);
+ pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
const register unsigned long current_sp __asm__ ("sp");
fp = (unsigned long)__builtin_frame_address(0);
@@ -64,12 +64,8 @@ static void notrace walk_stackframe(struct task_struct *task,
frame = (struct stackframe *)fp - 1;
sp = fp;
fp = frame->fp;
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
(unsigned long *)(fp - 8));
-#else
- pc = frame->ra - 0x4;
-#endif
}
}
@@ -82,8 +78,8 @@ static void notrace walk_stackframe(struct task_struct *task,
unsigned long *ksp;
if (regs) {
- sp = GET_USP(regs);
- pc = GET_IP(regs);
+ sp = user_stack_pointer(regs);
+ pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
const register unsigned long current_sp __asm__ ("sp");
sp = current_sp;
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 24a9333dda2c..3d1a651dc54c 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -70,7 +70,7 @@ void do_trap(struct pt_regs *regs, int signo, int code,
&& printk_ratelimit()) {
pr_info("%s[%d]: unhandled signal %d code 0x%x at 0x" REG_FMT,
tsk->comm, task_pid_nr(tsk), signo, code, addr);
- print_vma_addr(KERN_CONT " in ", GET_IP(regs));
+ print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
show_regs(regs);
}
@@ -118,6 +118,17 @@ DO_ERROR_INFO(do_trap_ecall_s,
DO_ERROR_INFO(do_trap_ecall_m,
SIGILL, ILL_ILLTRP, "environment call from M-mode");
+#ifdef CONFIG_GENERIC_BUG
+static inline unsigned long get_break_insn_length(unsigned long pc)
+{
+ bug_insn_t insn;
+
+ if (probe_kernel_address((bug_insn_t *)pc, insn))
+ return 0;
+ return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL);
+}
+#endif /* CONFIG_GENERIC_BUG */
+
asmlinkage void do_trap_break(struct pt_regs *regs)
{
#ifdef CONFIG_GENERIC_BUG
@@ -129,8 +140,8 @@ asmlinkage void do_trap_break(struct pt_regs *regs)
case BUG_TRAP_TYPE_NONE:
break;
case BUG_TRAP_TYPE_WARN:
- regs->sepc += sizeof(bug_insn_t);
- return;
+ regs->sepc += get_break_insn_length(regs->sepc);
+ break;
case BUG_TRAP_TYPE_BUG:
die(regs, "Kernel BUG");
}
@@ -145,11 +156,14 @@ int is_valid_bugaddr(unsigned long pc)
{
bug_insn_t insn;
- if (pc < PAGE_OFFSET)
+ if (pc < VMALLOC_START)
return 0;
if (probe_kernel_address((bug_insn_t *)pc, insn))
return 0;
- return (insn == __BUG_INSN);
+ if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
+ return (insn == __BUG_INSN_32);
+ else
+ return ((insn & __COMPRESSED_INSN_MASK) == __BUG_INSN_16);
}
#endif /* CONFIG_GENERIC_BUG */
@@ -159,9 +173,9 @@ void __init trap_init(void)
* Set sup0 scratch register to 0, indicating to exception vector
* that we are presently executing in the kernel
*/
- csr_write(sscratch, 0);
+ csr_write(CSR_SSCRATCH, 0);
/* Set the exception vector address */
- csr_write(stvec, &handle_exception);
+ csr_write(CSR_STVEC, &handle_exception);
/* Enable all interrupts */
- csr_write(sie, -1);
+ csr_write(CSR_SIE, -1);
}
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index fec62b24df89..b07b765f312a 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -36,7 +36,7 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
# these symbols in the kernel code rather than hand-coded addresses.
SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ -Wl,--hash-style=both
$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE
$(call if_changed,vdsold)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index b68aac701803..8db569141485 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -9,3 +9,5 @@ obj-y += fault.o
obj-y += extable.o
obj-y += ioremap.o
obj-y += cacheflush.o
+obj-y += context.o
+obj-y += sifive_l2_cache.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 498c0a0814fe..497b7d07af0c 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -14,6 +14,67 @@
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
+#ifdef CONFIG_SMP
+
+#include <asm/sbi.h>
+
+void flush_icache_all(void)
+{
+ sbi_remote_fence_i(NULL);
+}
+
+/*
+ * Performs an icache flush for the given MM context. RISC-V has no direct
+ * mechanism for instruction cache shoot downs, so instead we send an IPI that
+ * informs the remote harts they need to flush their local instruction caches.
+ * To avoid pathologically slow behavior in a common case (a bunch of
+ * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
+ * IPIs for harts that are not currently executing a MM context and instead
+ * schedule a deferred local instruction cache flush to be performed before
+ * execution resumes on each hart.
+ */
+void flush_icache_mm(struct mm_struct *mm, bool local)
+{
+ unsigned int cpu;
+ cpumask_t others, hmask, *mask;
+
+ preempt_disable();
+
+ /* Mark every hart's icache as needing a flush for this MM. */
+ mask = &mm->context.icache_stale_mask;
+ cpumask_setall(mask);
+ /* Flush this hart's I$ now, and mark it as flushed. */
+ cpu = smp_processor_id();
+ cpumask_clear_cpu(cpu, mask);
+ local_flush_icache_all();
+
+ /*
+ * Flush the I$ of other harts concurrently executing, and mark them as
+ * flushed.
+ */
+ cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+ local |= cpumask_empty(&others);
+ if (mm != current->active_mm || !local) {
+ cpumask_clear(&hmask);
+ riscv_cpuid_to_hartid_mask(&others, &hmask);
+ sbi_remote_fence_i(hmask.bits);
+ } else {
+ /*
+ * It's assumed that at least one strongly ordered operation is
+ * performed on this hart between setting a hart's cpumask bit
+ * and scheduling this MM context on that hart. Sending an SBI
+ * remote message will do this, but in the case where no
+ * messages are sent we still need to order this hart's writes
+ * with flush_icache_deferred().
+ */
+ smp_mb();
+ }
+
+ preempt_enable();
+}
+
+#endif /* CONFIG_SMP */
+
void flush_icache_pte(pte_t pte)
{
struct page *page = pte_page(pte);
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
new file mode 100644
index 000000000000..89ceb3cbe218
--- /dev/null
+++ b/arch/riscv/mm/context.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
+
+#include <linux/mm.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+/*
+ * When necessary, performs a deferred icache flush for the given MM context,
+ * on the local CPU. RISC-V has no direct mechanism for instruction cache
+ * shoot downs, so instead we send an IPI that informs the remote harts they
+ * need to flush their local instruction caches. To avoid pathologically slow
+ * behavior in a common case (a bunch of single-hart processes on a many-hart
+ * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
+ * executing a MM context and instead schedule a deferred local instruction
+ * cache flush to be performed before execution resumes on each hart. This
+ * actually performs that local instruction cache flush, which implicitly only
+ * refers to the current hart.
+ */
+static inline void flush_icache_deferred(struct mm_struct *mm)
+{
+#ifdef CONFIG_SMP
+ unsigned int cpu = smp_processor_id();
+ cpumask_t *mask = &mm->context.icache_stale_mask;
+
+ if (cpumask_test_cpu(cpu, mask)) {
+ cpumask_clear_cpu(cpu, mask);
+ /*
+ * Ensure the remote hart's writes are visible to this hart.
+ * This pairs with a barrier in flush_icache_mm.
+ */
+ smp_mb();
+ local_flush_icache_all();
+ }
+
+#endif
+}
+
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *task)
+{
+ unsigned int cpu;
+
+ if (unlikely(prev == next))
+ return;
+
+ /*
+ * Mark the current MM context as inactive, and the next as
+ * active. This is at least used by the icache flushing
+ * routines in order to determine who should be flushed.
+ */
+ cpu = smp_processor_id();
+
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * Use the old spbtr name instead of using the current satp
+ * name to support binutils 2.29 which doesn't know about the
+ * privileged ISA 1.10 yet.
+ */
+ csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
+ local_flush_tlb_all();
+
+ flush_icache_deferred(next);
+}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 88401d5125bc..cec8be9e2d6a 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -229,8 +229,9 @@ vmalloc_fault:
pte_t *pte_k;
int index;
+ /* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
- goto bad_area;
+ return do_trap(regs, SIGSEGV, code, addr, tsk);
/*
* Synchronize this task's top level page-table
@@ -239,13 +240,9 @@ vmalloc_fault:
* Do _not_ use "tsk->active_mm->pgd" here.
* We might be inside an interrupt in the middle
* of a task switch.
- *
- * Note: Use the old spbtr name instead of using the current
- * satp name to support binutils 2.29 which doesn't know about
- * the privileged ISA 1.10 yet.
*/
index = pgd_index(addr);
- pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
+ pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k))
diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c
new file mode 100644
index 000000000000..4eb64619b3f4
--- /dev/null
+++ b/arch/riscv/mm/sifive_l2_cache.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SiFive L2 cache controller Driver
+ *
+ * Copyright (C) 2018-2019 SiFive, Inc.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <asm/sifive_l2_cache.h>
+
+#define SIFIVE_L2_DIRECCFIX_LOW 0x100
+#define SIFIVE_L2_DIRECCFIX_HIGH 0x104
+#define SIFIVE_L2_DIRECCFIX_COUNT 0x108
+
+#define SIFIVE_L2_DATECCFIX_LOW 0x140
+#define SIFIVE_L2_DATECCFIX_HIGH 0x144
+#define SIFIVE_L2_DATECCFIX_COUNT 0x148
+
+#define SIFIVE_L2_DATECCFAIL_LOW 0x160
+#define SIFIVE_L2_DATECCFAIL_HIGH 0x164
+#define SIFIVE_L2_DATECCFAIL_COUNT 0x168
+
+#define SIFIVE_L2_CONFIG 0x00
+#define SIFIVE_L2_WAYENABLE 0x08
+#define SIFIVE_L2_ECCINJECTERR 0x40
+
+#define SIFIVE_L2_MAX_ECCINTR 3
+
+static void __iomem *l2_base;
+static int g_irq[SIFIVE_L2_MAX_ECCINTR];
+
+enum {
+ DIR_CORR = 0,
+ DATA_CORR,
+ DATA_UNCORR,
+};
+
+#ifdef CONFIG_DEBUG_FS
+static struct dentry *sifive_test;
+
+static ssize_t l2_write(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ unsigned int val;
+
+ if (kstrtouint_from_user(data, count, 0, &val))
+ return -EINVAL;
+ if ((val >= 0 && val < 0xFF) || (val >= 0x10000 && val < 0x100FF))
+ writel(val, l2_base + SIFIVE_L2_ECCINJECTERR);
+ else
+ return -EINVAL;
+ return count;
+}
+
+static const struct file_operations l2_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = l2_write
+};
+
+static void setup_sifive_debug(void)
+{
+ sifive_test = debugfs_create_dir("sifive_l2_cache", NULL);
+
+ debugfs_create_file("sifive_debug_inject_error", 0200,
+ sifive_test, NULL, &l2_fops);
+}
+#endif
+
+static void l2_config_read(void)
+{
+ u32 regval, val;
+
+ regval = readl(l2_base + SIFIVE_L2_CONFIG);
+ val = regval & 0xFF;
+ pr_info("L2CACHE: No. of Banks in the cache: %d\n", val);
+ val = (regval & 0xFF00) >> 8;
+ pr_info("L2CACHE: No. of ways per bank: %d\n", val);
+ val = (regval & 0xFF0000) >> 16;
+ pr_info("L2CACHE: Sets per bank: %llu\n", (uint64_t)1 << val);
+ val = (regval & 0xFF000000) >> 24;
+ pr_info("L2CACHE: Bytes per cache block: %llu\n", (uint64_t)1 << val);
+
+ regval = readl(l2_base + SIFIVE_L2_WAYENABLE);
+ pr_info("L2CACHE: Index of the largest way enabled: %d\n", regval);
+}
+
+static const struct of_device_id sifive_l2_ids[] = {
+ { .compatible = "sifive,fu540-c000-ccache" },
+ { /* end of table */ },
+};
+
+static ATOMIC_NOTIFIER_HEAD(l2_err_chain);
+
+int register_sifive_l2_error_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&l2_err_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_sifive_l2_error_notifier);
+
+int unregister_sifive_l2_error_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&l2_err_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_sifive_l2_error_notifier);
+
+static irqreturn_t l2_int_handler(int irq, void *device)
+{
+ unsigned int regval, add_h, add_l;
+
+ if (irq == g_irq[DIR_CORR]) {
+ add_h = readl(l2_base + SIFIVE_L2_DIRECCFIX_HIGH);
+ add_l = readl(l2_base + SIFIVE_L2_DIRECCFIX_LOW);
+ pr_err("L2CACHE: DirError @ 0x%08X.%08X\n", add_h, add_l);
+ regval = readl(l2_base + SIFIVE_L2_DIRECCFIX_COUNT);
+ atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE,
+ "DirECCFix");
+ }
+ if (irq == g_irq[DATA_CORR]) {
+ add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH);
+ add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW);
+ pr_err("L2CACHE: DataError @ 0x%08X.%08X\n", add_h, add_l);
+ regval = readl(l2_base + SIFIVE_L2_DATECCFIX_COUNT);
+ atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE,
+ "DatECCFix");
+ }
+ if (irq == g_irq[DATA_UNCORR]) {
+ add_h = readl(l2_base + SIFIVE_L2_DATECCFAIL_HIGH);
+ add_l = readl(l2_base + SIFIVE_L2_DATECCFAIL_LOW);
+ pr_err("L2CACHE: DataFail @ 0x%08X.%08X\n", add_h, add_l);
+ regval = readl(l2_base + SIFIVE_L2_DATECCFAIL_COUNT);
+ atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE,
+ "DatECCFail");
+ }
+
+ return IRQ_HANDLED;
+}
+
+int __init sifive_l2_init(void)
+{
+ struct device_node *np;
+ struct resource res;
+ int i, rc;
+
+ np = of_find_matching_node(NULL, sifive_l2_ids);
+ if (!np)
+ return -ENODEV;
+
+ if (of_address_to_resource(np, 0, &res))
+ return -ENODEV;
+
+ l2_base = ioremap(res.start, resource_size(&res));
+ if (!l2_base)
+ return -ENOMEM;
+
+ for (i = 0; i < SIFIVE_L2_MAX_ECCINTR; i++) {
+ g_irq[i] = irq_of_parse_and_map(np, i);
+ rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL);
+ if (rc) {
+ pr_err("L2CACHE: Could not request IRQ %d\n", g_irq[i]);
+ return rc;
+ }
+ }
+
+ l2_config_read();
+
+#ifdef CONFIG_DEBUG_FS
+ setup_sifive_debug();
+#endif
+ return 0;
+}
+device_initcall(sifive_l2_init);
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index df1d6a150f30..de8521fc9de5 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -10,6 +10,8 @@
# Copyright (C) 1994 by Linus Torvalds
#
+KBUILD_DEFCONFIG := defconfig
+
LD_BFD := elf64-s390
KBUILD_LDFLAGS := -m elf64_s390
KBUILD_AFLAGS_MODULE += -fPIC
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c51496bbac19..7cba96e7587b 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -58,7 +58,6 @@ define cmd_section_cmp
touch $@
endef
-OBJCOPYFLAGS_bzImage := --pad-to $$(readelf -s $(obj)/compressed/vmlinux | awk '/\<_end\>/ {print or(strtonum("0x"$$2),4095)+1}')
$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
$(call if_changed,objcopy)
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 112b8d9f1e4c..635217eb3d91 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -77,6 +77,8 @@ SECTIONS
_compressed_start = .;
*(.vmlinux.bin.compressed)
_compressed_end = .;
+ FILL(0xff);
+ . = ALIGN(4096);
}
. = ALIGN(256);
.bss : {
diff --git a/arch/s390/defconfig b/arch/s390/configs/defconfig
index c59b922cb6c5..c59b922cb6c5 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/configs/defconfig
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index f316de40e51b..27696755daa9 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -28,6 +28,7 @@
#define CPACF_KMCTR 0xb92d /* MSA4 */
#define CPACF_PRNO 0xb93c /* MSA5 */
#define CPACF_KMA 0xb929 /* MSA8 */
+#define CPACF_KDSA 0xb93a /* MSA9 */
/*
* En/decryption modifier bits
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c47e22bba87f..bdbc81b5bc91 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -278,6 +278,7 @@ struct kvm_s390_sie_block {
#define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000
#define ECD_ETOKENF 0x02000000
+#define ECD_ECC 0x00200000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -312,6 +313,7 @@ struct kvm_vcpu_stat {
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
+ u64 halt_no_poll_steal;
u64 halt_wakeup;
u64 instruction_lctl;
u64 instruction_lctlg;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 16511d97e8dc..47104e5b47fd 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -152,7 +152,10 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 pcc[16]; /* with MSA4 */
__u8 ppno[16]; /* with MSA5 */
__u8 kma[16]; /* with MSA8 */
- __u8 reserved[1808];
+ __u8 kdsa[16]; /* with MSA9 */
+ __u8 sortl[32]; /* with STFLE.150 */
+ __u8 dfltcc[32]; /* with STFLE.151 */
+ __u8 reserved[1728];
};
/* kvm attributes for crypto */
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 061418f787c3..e822b2964a83 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -430,3 +430,9 @@
425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree sys_open_tree
+429 common move_mount sys_move_mount sys_move_mount
+430 common fsopen sys_fsopen sys_fsopen
+431 common fsconfig sys_fsconfig sys_fsconfig
+432 common fsmount sys_fsmount sys_fsmount
+433 common fspick sys_fspick sys_fspick
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 1816ee48eadd..d3db3d7ed077 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_INVALID_WAKEUPS
+ select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
---help---
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 1fd706f6206c..9dde4d7d8704 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -14,6 +14,7 @@
#include <linux/kvm_host.h>
#include <linux/hrtimer.h>
#include <linux/mmu_context.h>
+#include <linux/nospec.h>
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
@@ -2307,6 +2308,7 @@ static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
{
if (id >= MAX_S390_IO_ADAPTERS)
return NULL;
+ id = array_index_nospec(id, MAX_S390_IO_ADAPTERS);
return kvm->arch.adapters[id];
}
@@ -2320,8 +2322,13 @@ static int register_io_adapter(struct kvm_device *dev,
(void __user *)attr->addr, sizeof(adapter_info)))
return -EFAULT;
- if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
- (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+ if (adapter_info.id >= MAX_S390_IO_ADAPTERS)
+ return -EINVAL;
+
+ adapter_info.id = array_index_nospec(adapter_info.id,
+ MAX_S390_IO_ADAPTERS);
+
+ if (dev->kvm->arch.adapters[adapter_info.id] != NULL)
return -EINVAL;
adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4638303ba6a8..8d6d75db8de6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -75,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
+ { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
@@ -177,6 +178,11 @@ static int hpage;
module_param(hpage, int, 0444);
MODULE_PARM_DESC(hpage, "1m huge page backing support");
+/* maximum percentage of steal time for polling. >100 is treated like 100 */
+static u8 halt_poll_max_steal = 10;
+module_param(halt_poll_max_steal, byte, 0644);
+MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -321,6 +327,22 @@ static inline int plo_test_bit(unsigned char nr)
return cc == 0;
}
+static inline void __insn32_query(unsigned int opcode, u8 query[32])
+{
+ register unsigned long r0 asm("0") = 0; /* query function */
+ register unsigned long r1 asm("1") = (unsigned long) query;
+
+ asm volatile(
+ /* Parameter regs are ignored */
+ " .insn rrf,%[opc] << 16,2,4,6,0\n"
+ : "=m" (*query)
+ : "d" (r0), "a" (r1), [opc] "i" (opcode)
+ : "cc");
+}
+
+#define INSN_SORTL 0xb938
+#define INSN_DFLTCC 0xb939
+
static void kvm_s390_cpu_feat_init(void)
{
int i;
@@ -368,6 +390,16 @@ static void kvm_s390_cpu_feat_init(void)
__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
kvm_s390_available_subfunc.kma);
+ if (test_facility(155)) /* MSA9 */
+ __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kdsa);
+
+ if (test_facility(150)) /* SORTL */
+ __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
+
+ if (test_facility(151)) /* DFLTCC */
+ __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
+
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
@@ -513,9 +545,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
else if (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS;
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
break;
@@ -657,6 +686,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
set_kvm_facility(kvm->arch.model.fac_mask, 135);
set_kvm_facility(kvm->arch.model.fac_list, 135);
}
+ if (test_facility(148)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 148);
+ set_kvm_facility(kvm->arch.model.fac_list, 148);
+ }
+ if (test_facility(152)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 152);
+ set_kvm_facility(kvm->arch.model.fac_list, 152);
+ }
r = 0;
} else
r = -EINVAL;
@@ -1323,6 +1360,19 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+ VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
+ VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
+ VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
return 0;
}
@@ -1491,6 +1541,19 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
+ VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
+ VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
+ VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
+ ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
return 0;
}
@@ -1546,6 +1609,19 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
+ VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
+ VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
+ ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
+ VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
+ ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
return 0;
}
@@ -2817,6 +2893,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
vcpu->arch.enabled_gmap = vcpu->arch.gmap;
}
+static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
+{
+ if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
+ test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
+ return true;
+ return false;
+}
+
+static bool kvm_has_pckmo_ecc(struct kvm *kvm)
+{
+ /* At least one ECC subfunction must be present */
+ return kvm_has_pckmo_subfunc(kvm, 32) ||
+ kvm_has_pckmo_subfunc(kvm, 33) ||
+ kvm_has_pckmo_subfunc(kvm, 34) ||
+ kvm_has_pckmo_subfunc(kvm, 40) ||
+ kvm_has_pckmo_subfunc(kvm, 41);
+
+}
+
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
/*
@@ -2829,13 +2924,19 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
vcpu->arch.sie_block->eca &= ~ECA_APIE;
+ vcpu->arch.sie_block->ecd &= ~ECD_ECC;
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
/* Set up protected key support */
- if (vcpu->kvm->arch.crypto.aes_kw)
+ if (vcpu->kvm->arch.crypto.aes_kw) {
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
+ /* ecc is also wrapped with AES key */
+ if (kvm_has_pckmo_ecc(vcpu->kvm))
+ vcpu->arch.sie_block->ecd |= ECD_ECC;
+ }
+
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
}
@@ -3068,6 +3169,17 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
}
}
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
+{
+ /* do not poll with more than halt_poll_max_steal percent of steal time */
+ if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+ halt_poll_max_steal) {
+ vcpu->stat.halt_no_poll_steal++;
+ return true;
+ }
+ return false;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index d62fa148558b..076090f9e666 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -288,7 +288,9 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
+ u32 ecd_flags;
int apie_h;
+ int apie_s;
int key_msk = test_kvm_facility(vcpu->kvm, 76);
int fmt_o = crycbd_o & CRYCB_FORMAT_MASK;
int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK;
@@ -297,7 +299,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->crycbd = 0;
apie_h = vcpu->arch.sie_block->eca & ECA_APIE;
- if (!apie_h && (!key_msk || fmt_o == CRYCB_FORMAT0))
+ apie_s = apie_h & scb_o->eca;
+ if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0)))
return 0;
if (!crycb_addr)
@@ -308,7 +311,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
((crycb_addr + 128) & PAGE_MASK))
return set_validity_icpt(scb_s, 0x003CU);
- if (apie_h && (scb_o->eca & ECA_APIE)) {
+ if (apie_s) {
ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr,
vcpu->kvm->arch.crypto.crycb,
fmt_o, fmt_h);
@@ -320,7 +323,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* we may only allow it if enabled for guest 2 */
ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 &
(ECB3_AES | ECB3_DEA);
- if (!ecb3_flags)
+ ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC;
+ if (!ecb3_flags && !ecd_flags)
goto end;
/* copy only the wrapping keys */
@@ -329,6 +333,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return set_validity_icpt(scb_s, 0x0035U);
scb_s->ecb3 |= ecb3_flags;
+ scb_s->ecd |= ecd_flags;
/* xor both blocks in one run */
b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask;
@@ -339,7 +344,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
end:
switch (ret) {
case -EINVAL:
- return set_validity_icpt(scb_s, 0x0020U);
+ return set_validity_icpt(scb_s, 0x0022U);
case -EFAULT:
return set_validity_icpt(scb_s, 0x0035U);
case -EACCES:
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 01892dcf4029..0c1f257be422 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -28,7 +28,7 @@ static void __init kasan_early_panic(const char *reason)
{
sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
sclp_early_printk(reason);
- disabled_wait(0);
+ disabled_wait();
}
static void * __init kasan_early_alloc_segment(void)
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index fd788e0f2e5b..cead9e0dcffb 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -93,6 +93,9 @@ static struct facility_def facility_defs[] = {
131, /* enhanced-SOP 2 and side-effect */
139, /* multiple epoch facility */
146, /* msa extension 8 */
+ 150, /* enhanced sort */
+ 151, /* deflate conversion */
+ 155, /* msa extension 9 */
-1 /* END */
}
},
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 480b057556ee..016a727d4357 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -430,3 +430,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index a1dd24307b00..e047480b1605 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -473,3 +473,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index fca34b2177e2..9f4b4bb78120 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -22,7 +22,6 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
}
extern void arch_exit_mmap(struct mm_struct *mm);
static inline void arch_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
diff --git a/arch/unicore32/configs/unicore32_defconfig b/arch/unicore32/configs/unicore32_defconfig
index aebd01fc28e5..360cc9abcdb0 100644
--- a/arch/unicore32/configs/unicore32_defconfig
+++ b/arch/unicore32/configs/unicore32_defconfig
@@ -119,7 +119,7 @@ CONFIG_I2C_PUV3=y
# Hardware Monitoring support
#CONFIG_SENSORS_LM75=m
# Generic Thermal sysfs driver
-#CONFIG_THERMAL=m
+#CONFIG_THERMAL=y
#CONFIG_THERMAL_HWMON=y
# Multimedia support
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 5c205a9cb5a6..9f06ea5466dd 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -88,7 +88,6 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
}
static inline void arch_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 4cd5f982b1e5..ad968b7bac72 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,12 +398,6 @@
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
386 i386 rseq sys_rseq __ia32_sys_rseq
-387 i386 open_tree sys_open_tree __ia32_sys_open_tree
-388 i386 move_mount sys_move_mount __ia32_sys_move_mount
-389 i386 fsopen sys_fsopen __ia32_sys_fsopen
-390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
-391 i386 fsmount sys_fsmount __ia32_sys_fsmount
-392 i386 fspick sys_fspick __ia32_sys_fspick
393 i386 semget sys_semget __ia32_sys_semget
394 i386 semctl sys_semctl __ia32_compat_sys_semctl
395 i386 shmget sys_shmget __ia32_sys_shmget
@@ -438,3 +432,9 @@
425 i386 io_uring_setup sys_io_uring_setup __ia32_sys_io_uring_setup
426 i386 io_uring_enter sys_io_uring_enter __ia32_sys_io_uring_enter
427 i386 io_uring_register sys_io_uring_register __ia32_sys_io_uring_register
+428 i386 open_tree sys_open_tree __ia32_sys_open_tree
+429 i386 move_mount sys_move_mount __ia32_sys_move_mount
+430 i386 fsopen sys_fsopen __ia32_sys_fsopen
+431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
+432 i386 fsmount sys_fsmount __ia32_sys_fsmount
+433 i386 fspick sys_fspick __ia32_sys_fspick
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 64ca0d06259a..b4e6f9e6204a 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,18 +343,18 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
-335 common open_tree __x64_sys_open_tree
-336 common move_mount __x64_sys_move_mount
-337 common fsopen __x64_sys_fsopen
-338 common fsconfig __x64_sys_fsconfig
-339 common fsmount __x64_sys_fsmount
-340 common fspick __x64_sys_fspick
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal __x64_sys_pidfd_send_signal
425 common io_uring_setup __x64_sys_io_uring_setup
426 common io_uring_enter __x64_sys_io_uring_enter
427 common io_uring_register __x64_sys_io_uring_register
+428 common open_tree __x64_sys_open_tree
+429 common move_mount __x64_sys_move_mount
+430 common fsopen __x64_sys_fsopen
+431 common fsconfig __x64_sys_fsconfig
+432 common fsmount __x64_sys_fsmount
+433 common fspick __x64_sys_fspick
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 12ec402f4114..546d13e436aa 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2384,7 +2384,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(55, (unsigned long *)&status)) {
handled++;
- intel_pt_interrupt();
+ if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
+ perf_guest_cbs->handle_intel_pt_intr))
+ perf_guest_cbs->handle_intel_pt_intr();
+ else
+ intel_pt_interrupt();
}
/*
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 62be73b23d5c..e8f58ddd06d9 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -10,6 +10,7 @@ extern struct e820_table *e820_table_firmware;
extern unsigned long pci_mem_start;
+extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c79abe7ca093..450d69a1e6fa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -470,6 +470,7 @@ struct kvm_pmu {
u64 global_ovf_ctrl;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
+ u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
@@ -781,6 +782,9 @@ struct kvm_vcpu_arch {
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
+
+ /* AMD MSRC001_0015 Hardware Configuration */
+ u64 msr_hwcr;
};
struct kvm_lpage_info {
@@ -1168,7 +1172,8 @@ struct kvm_x86_ops {
uint32_t guest_irq, bool set);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired);
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
void (*setup_mce)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 93dff1963337..9024236693d2 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -278,8 +278,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
mpx_mm_init(mm);
}
-static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
{
/*
* mpx_notify_unmap() goes and reads a rarely-hot
@@ -299,7 +299,7 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
* consistently wrong.
*/
if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
- mpx_notify_unmap(mm, vma, start, end);
+ mpx_notify_unmap(mm, start, end);
}
/*
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index d0b1434fb0b6..143a5c193ed3 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -64,12 +64,15 @@ struct mpx_fault_info {
};
#ifdef CONFIG_X86_INTEL_MPX
-int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
-int mpx_handle_bd_fault(void);
+
+extern int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs);
+extern int mpx_handle_bd_fault(void);
+
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
{
return (mm->context.bd_addr != MPX_INVALID_BOUNDS_DIR);
}
+
static inline void mpx_mm_init(struct mm_struct *mm)
{
/*
@@ -78,11 +81,10 @@ static inline void mpx_mm_init(struct mm_struct *mm)
*/
mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
}
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end);
-unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
- unsigned long flags);
+extern void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, unsigned long end);
+extern unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags);
+
#else
static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
{
@@ -100,7 +102,6 @@ static inline void mpx_mm_init(struct mm_struct *mm)
{
}
static inline void mpx_notify_unmap(struct mm_struct *mm,
- struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 88dd202c8b00..979ef971cc78 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -789,6 +789,14 @@
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+/* PERF_GLOBAL_OVF_CTL bits */
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
+
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2879e234e193..76dd605ee2a3 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -73,12 +73,13 @@ EXPORT_SYMBOL(pci_mem_start);
* This function checks if any part of the range <start,end> is mapped
* with type.
*/
-bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
+static bool _e820__mapped_any(struct e820_table *table,
+ u64 start, u64 end, enum e820_type type)
{
int i;
- for (i = 0; i < e820_table->nr_entries; i++) {
- struct e820_entry *entry = &e820_table->entries[i];
+ for (i = 0; i < table->nr_entries; i++) {
+ struct e820_entry *entry = &table->entries[i];
if (type && entry->type != type)
continue;
@@ -88,6 +89,17 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
}
return 0;
}
+
+bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
+{
+ return _e820__mapped_any(e820_table_firmware, start, end, type);
+}
+EXPORT_SYMBOL_GPL(e820__mapped_raw_any);
+
+bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
+{
+ return _e820__mapped_any(e820_table, start, end, type);
+}
EXPORT_SYMBOL_GPL(e820__mapped_any);
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbbe611f0c49..80a642a0143d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -963,13 +963,13 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
return 1;
- eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
- ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ eax = kvm_rax_read(vcpu);
+ ecx = kvm_rcx_read(vcpu);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
- kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
- kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
- kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
- kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+ kvm_rax_write(vcpu, eax);
+ kvm_rbx_write(vcpu, ebx);
+ kvm_rcx_write(vcpu, ecx);
+ kvm_rdx_write(vcpu, edx);
return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cc24b3a32c44..8ca4b39918e0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1535,10 +1535,10 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
longmode = is_64_bit_mode(vcpu);
if (longmode)
- kvm_register_write(vcpu, VCPU_REGS_RAX, result);
+ kvm_rax_write(vcpu, result);
else {
- kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
- kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
+ kvm_rdx_write(vcpu, result >> 32);
+ kvm_rax_write(vcpu, result & 0xffffffff);
}
}
@@ -1611,18 +1611,18 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
longmode = is_64_bit_mode(vcpu);
if (!longmode) {
- param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
- ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
- outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+ param = ((u64)kvm_rdx_read(vcpu) << 32) |
+ (kvm_rax_read(vcpu) & 0xffffffff);
+ ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
+ (kvm_rcx_read(vcpu) & 0xffffffff);
+ outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
+ (kvm_rsi_read(vcpu) & 0xffffffff);
}
#ifdef CONFIG_X86_64
else {
- param = kvm_register_read(vcpu, VCPU_REGS_RCX);
- ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
- outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+ param = kvm_rcx_read(vcpu);
+ ingpa = kvm_rdx_read(vcpu);
+ outgpa = kvm_r8_read(vcpu);
}
#endif
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f8f56a93358b..1cc6c47dc77e 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,6 +9,34 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
+#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
+static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
+{ \
+ return vcpu->arch.regs[VCPU_REGS_##uname]; \
+} \
+static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
+ unsigned long val) \
+{ \
+ vcpu->arch.regs[VCPU_REGS_##uname] = val; \
+}
+BUILD_KVM_GPR_ACCESSORS(rax, RAX)
+BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
+BUILD_KVM_GPR_ACCESSORS(rcx, RCX)
+BUILD_KVM_GPR_ACCESSORS(rdx, RDX)
+BUILD_KVM_GPR_ACCESSORS(rbp, RBP)
+BUILD_KVM_GPR_ACCESSORS(rsi, RSI)
+BUILD_KVM_GPR_ACCESSORS(rdi, RDI)
+#ifdef CONFIG_X86_64
+BUILD_KVM_GPR_ACCESSORS(r8, R8)
+BUILD_KVM_GPR_ACCESSORS(r9, R9)
+BUILD_KVM_GPR_ACCESSORS(r10, R10)
+BUILD_KVM_GPR_ACCESSORS(r11, R11)
+BUILD_KVM_GPR_ACCESSORS(r12, R12)
+BUILD_KVM_GPR_ACCESSORS(r13, R13)
+BUILD_KVM_GPR_ACCESSORS(r14, R14)
+BUILD_KVM_GPR_ACCESSORS(r15, R15)
+#endif
+
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
@@ -37,6 +65,16 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
+static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu)
+{
+ return kvm_register_read(vcpu, VCPU_REGS_RSP);
+}
+
+static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ kvm_register_write(vcpu, VCPU_REGS_RSP, val);
+}
+
static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
might_sleep(); /* on svm */
@@ -83,8 +121,8 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
{
- return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
- | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
+ return (kvm_rax_read(vcpu) & -1u)
+ | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32);
}
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bd13fdddbdc4..4924f83ed4f3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1454,7 +1454,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
if (swait_active(q))
swake_up_one(q);
- if (apic_lvtt_tscdeadline(apic))
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
ktimer->expired_tscdeadline = ktimer->tscdeadline;
}
@@ -1696,37 +1696,42 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
static bool start_hv_timer(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
- int r;
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ bool expired;
WARN_ON(preemptible());
if (!kvm_x86_ops->set_hv_timer)
return false;
- if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
- return false;
-
if (!ktimer->tscdeadline)
return false;
- r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
- if (r < 0)
+ if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
return false;
ktimer->hv_timer_in_use = true;
hrtimer_cancel(&ktimer->timer);
/*
- * Also recheck ktimer->pending, in case the sw timer triggered in
- * the window. For periodic timer, leave the hv timer running for
- * simplicity, and the deadline will be recomputed on the next vmexit.
+ * To simplify handling the periodic timer, leave the hv timer running
+ * even if the deadline timer has expired, i.e. rely on the resulting
+ * VM-Exit to recompute the periodic timer's target expiration.
*/
- if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
- if (r)
+ if (!apic_lvtt_period(apic)) {
+ /*
+ * Cancel the hv timer if the sw timer fired while the hv timer
+ * was being programmed, or if the hv timer itself expired.
+ */
+ if (atomic_read(&ktimer->pending)) {
+ cancel_hv_timer(apic);
+ } else if (expired) {
apic_timer_expired(apic);
- return false;
+ cancel_hv_timer(apic);
+ }
}
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
+ trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
+
return true;
}
@@ -1750,8 +1755,13 @@ static void start_sw_timer(struct kvm_lapic *apic)
static void restart_apic_timer(struct kvm_lapic *apic)
{
preempt_disable();
+
+ if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
+ goto out;
+
if (!start_hv_timer(apic))
start_sw_timer(apic);
+out:
preempt_enable();
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d9c7b45d231f..1e9ba81accba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pat.h>
#include <asm/cmpxchg.h>
+#include <asm/e820/api.h>
#include <asm/io.h>
#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
@@ -487,16 +488,24 @@ static void kvm_mmu_reset_all_pte_masks(void)
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
* assumed that the CPU is not vulnerable to L1TF.
+ *
+ * Some Intel CPUs address the L1 cache using more PA bits than are
+ * reported by CPUID. Use the PA width of the L1 cache when possible
+ * to achieve more effective mitigation, e.g. if system RAM overlaps
+ * the most significant bits of legal physical address space.
*/
- low_phys_bits = boot_cpu_data.x86_phys_bits;
- if (boot_cpu_data.x86_phys_bits <
+ shadow_nonpresent_or_rsvd_mask = 0;
+ low_phys_bits = boot_cpu_data.x86_cache_bits;
+ if (boot_cpu_data.x86_cache_bits <
52 - shadow_nonpresent_or_rsvd_mask_len) {
shadow_nonpresent_or_rsvd_mask =
- rsvd_bits(boot_cpu_data.x86_phys_bits -
+ rsvd_bits(boot_cpu_data.x86_cache_bits -
shadow_nonpresent_or_rsvd_mask_len,
- boot_cpu_data.x86_phys_bits - 1);
+ boot_cpu_data.x86_cache_bits - 1);
low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
- }
+ } else
+ WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
+
shadow_nonpresent_or_rsvd_lower_gfn_mask =
GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
}
@@ -2892,7 +2901,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
*/
(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
- return true;
+ return !e820__mapped_raw_any(pfn_to_hpa(pfn),
+ pfn_to_hpa(pfn + 1) - 1,
+ E820_TYPE_RAM);
}
/* Bits which may be returned by set_spte() */
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e9ea2d45ae66..9f72cc427158 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -48,11 +48,6 @@ static bool msr_mtrr_valid(unsigned msr)
return false;
}
-static bool valid_pat_type(unsigned t)
-{
- return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
-}
-
static bool valid_mtrr_type(unsigned t)
{
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
@@ -67,10 +62,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return false;
if (msr == MSR_IA32_CR_PAT) {
- for (i = 0; i < 8; i++)
- if (!valid_pat_type((data >> (i * 8)) & 0xff))
- return false;
- return true;
+ return kvm_pat_valid(data);
} else if (msr == MSR_MTRRdefType) {
if (data & ~0xcff)
return false;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 08715034e315..367a47df4ba0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -141,15 +141,35 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
struct page *page;
npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
- /* Check if the user is doing something meaningless. */
- if (unlikely(npages != 1))
- return -EFAULT;
-
- table = kmap_atomic(page);
- ret = CMPXCHG(&table[index], orig_pte, new_pte);
- kunmap_atomic(table);
-
- kvm_release_page_dirty(page);
+ if (likely(npages == 1)) {
+ table = kmap_atomic(page);
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+ kunmap_atomic(table);
+
+ kvm_release_page_dirty(page);
+ } else {
+ struct vm_area_struct *vma;
+ unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
+ unsigned long pfn;
+ unsigned long paddr;
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
+ if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+ pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ paddr = pfn << PAGE_SHIFT;
+ table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
+ if (!table) {
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+ memunmap(table);
+ up_read(&current->mm->mmap_sem);
+ }
return (ret != orig_pte);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b92eaf4a3b1..a849dcb7fbc5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2091,7 +2091,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
init_vmcb(svm);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
- kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+ kvm_rdx_write(vcpu, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
@@ -3071,32 +3071,6 @@ static inline bool nested_svm_nmi(struct vcpu_svm *svm)
return false;
}
-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
-{
- struct page *page;
-
- might_sleep();
-
- page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
- goto error;
-
- *_page = page;
-
- return kmap(page);
-
-error:
- kvm_inject_gp(&svm->vcpu, 0);
-
- return NULL;
-}
-
-static void nested_svm_unmap(struct page *page)
-{
- kunmap(page);
- kvm_release_page_dirty(page);
-}
-
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
{
unsigned port, size, iopm_len;
@@ -3299,10 +3273,11 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
static int nested_svm_vmexit(struct vcpu_svm *svm)
{
+ int rc;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ struct kvm_host_map map;
trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
vmcb->control.exit_info_1,
@@ -3311,9 +3286,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
- nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
- if (!nested_vmcb)
+ rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
@@ -3408,16 +3388,16 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
} else {
(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
}
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+ kvm_rax_write(&svm->vcpu, hsave->save.rax);
+ kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
+ kvm_rip_write(&svm->vcpu, hsave->save.rip);
svm->vmcb->save.dr7 = 0;
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
mark_all_dirty(svm->vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
@@ -3483,7 +3463,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
}
static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct page *page)
+ struct vmcb *nested_vmcb, struct kvm_host_map *map)
{
if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
@@ -3516,9 +3496,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
kvm_mmu_reset_context(&svm->vcpu);
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
+ kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
+ kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
svm->vmcb->save.rax = nested_vmcb->save.rax;
@@ -3567,7 +3547,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->control.pause_filter_thresh =
nested_vmcb->control.pause_filter_thresh;
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, map, true);
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -3587,17 +3567,23 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
static bool nested_svm_vmrun(struct vcpu_svm *svm)
{
+ int rc;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ struct kvm_host_map map;
u64 vmcb_gpa;
vmcb_gpa = svm->vmcb->save.rax;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return false;
+ }
+
+ nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
@@ -3605,7 +3591,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_1 = 0;
nested_vmcb->control.exit_info_2 = 0;
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return false;
}
@@ -3649,7 +3635,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
- enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
+ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
return true;
}
@@ -3673,21 +3659,26 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
int ret;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ if (ret) {
+ if (ret == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return ret;
}
@@ -3695,21 +3686,26 @@ static int vmload_interception(struct vcpu_svm *svm)
static int vmsave_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
int ret;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ if (ret) {
+ if (ret == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return ret;
}
@@ -3791,11 +3787,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
- kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
+ kvm_rax_read(&svm->vcpu));
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
- kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3803,7 +3799,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
static int skinit_interception(struct vcpu_svm *svm)
{
- trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@@ -3817,7 +3813,7 @@ static int wbinvd_interception(struct vcpu_svm *svm)
static int xsetbv_interception(struct vcpu_svm *svm)
{
u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
- u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 index = kvm_rcx_read(&svm->vcpu);
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
@@ -4213,7 +4209,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int rdmsr_interception(struct vcpu_svm *svm)
{
- u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(&svm->vcpu);
struct msr_data msr_info;
msr_info.index = ecx;
@@ -4225,10 +4221,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
} else {
trace_kvm_msr_read(ecx, msr_info.data);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
- msr_info.data & 0xffffffff);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
- msr_info.data >> 32);
+ kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
+ kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -4422,7 +4416,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
{
struct msr_data msr;
- u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(&svm->vcpu);
u64 data = kvm_read_edx_eax(&svm->vcpu);
msr.data = data;
@@ -6236,7 +6230,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
u64 guest;
u64 vmcb;
@@ -6244,10 +6238,10 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
if (guest) {
- nested_vmcb = nested_svm_map(svm, vmcb, &page);
- if (!nested_vmcb)
+ if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
return 1;
- enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
+ nested_vmcb = map.hva;
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
}
return 0;
}
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 854e144131c6..d6664ee3d127 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -2,6 +2,8 @@
#ifndef __KVM_X86_VMX_CAPS_H
#define __KVM_X86_VMX_CAPS_H
+#include <asm/vmx.h>
+
#include "lapic.h"
extern bool __read_mostly enable_vpid;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c601d079cd2..f1a69117ac0f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -193,10 +193,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
if (!vmx->nested.hv_evmcs)
return;
- kunmap(vmx->nested.hv_evmcs_page);
- kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
vmx->nested.hv_evmcs_vmptr = -1ull;
- vmx->nested.hv_evmcs_page = NULL;
vmx->nested.hv_evmcs = NULL;
}
@@ -229,16 +227,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
- if (vmx->nested.virtual_apic_page) {
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- }
+ kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+ vmx->nested.pi_desc = NULL;
kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
@@ -519,39 +510,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
int msr;
- struct page *page;
unsigned long *msr_bitmap_l1;
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
- /*
- * pred_cmd & spec_ctrl are trying to verify two things:
- *
- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
- * ensures that we do not accidentally generate an L02 MSR bitmap
- * from the L12 MSR bitmap that is too permissive.
- * 2. That L1 or L2s have actually used the MSR. This avoids
- * unnecessarily merging of the bitmap if the MSR is unused. This
- * works properly because we only update the L01 MSR bitmap lazily.
- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
- * updated to reflect this when L1 (or its L2s) actually write to
- * the MSR.
- */
- bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
- bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
if (!cpu_has_vmx_msr_bitmap() ||
!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return false;
- if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
- !pred_cmd && !spec_ctrl)
- return false;
-
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
- if (is_error_page(page))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
return false;
- msr_bitmap_l1 = (unsigned long *)kmap(page);
+ msr_bitmap_l1 = (unsigned long *)map->hva;
/*
* To keep the control flow simple, pay eight 8-byte writes (sixteen
@@ -592,20 +563,42 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
}
}
- if (spec_ctrl)
+ /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
+ nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+ MSR_FS_BASE, MSR_TYPE_RW);
+
+ nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+ MSR_GS_BASE, MSR_TYPE_RW);
+
+ nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
+ MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+
+ /*
+ * Checking the L0->L1 bitmap is trying to verify two things:
+ *
+ * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ * ensures that we do not accidentally generate an L02 MSR bitmap
+ * from the L12 MSR bitmap that is too permissive.
+ * 2. That L1 or L2s have actually used the MSR. This avoids
+ * unnecessarily merging of the bitmap if the MSR is unused. This
+ * works properly because we only update the L01 MSR bitmap lazily.
+ * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
+ * updated to reflect this when L1 (or its L2s) actually write to
+ * the MSR.
+ */
+ if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_SPEC_CTRL,
MSR_TYPE_R | MSR_TYPE_W);
- if (pred_cmd)
+ if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
nested_vmx_disable_intercept_for_msr(
msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_PRED_CMD,
MSR_TYPE_W);
- kunmap(page);
- kvm_release_page_clean(page);
+ kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
return true;
}
@@ -613,20 +606,20 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
+ struct kvm_host_map map;
struct vmcs12 *shadow;
- struct page *page;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == -1ull)
return;
shadow = get_shadow_vmcs12(vcpu);
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
- memcpy(shadow, kmap(page), VMCS12_SIZE);
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+ return;
- kunmap(page);
- kvm_release_page_clean(page);
+ memcpy(shadow, map.hva, VMCS12_SIZE);
+ kvm_vcpu_unmap(vcpu, &map, false);
}
static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
@@ -930,7 +923,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
if (!nested_cr3_valid(vcpu, cr3)) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
- return 1;
+ return -EINVAL;
}
/*
@@ -941,7 +934,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
!nested_ept) {
if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
*entry_failure_code = ENTRY_FAIL_PDPTE;
- return 1;
+ return -EINVAL;
}
}
}
@@ -1794,13 +1787,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
nested_release_evmcs(vcpu);
- vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
- vcpu, assist_page.current_nested_vmcs);
-
- if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs),
+ &vmx->nested.hv_evmcs_map))
return 0;
- vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
+ vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
/*
* Currently, KVM only supports eVMCS version 1
@@ -2373,19 +2364,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
*/
if (vmx->emulation_required) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
- return 1;
+ return -EINVAL;
}
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
- return 1;
+ return -EINVAL;
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
- kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
- kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
+ kvm_rsp_write(vcpu, vmcs12->guest_rsp);
+ kvm_rip_write(vcpu, vmcs12->guest_rip);
return 0;
}
@@ -2589,11 +2580,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * Checks related to Host Control Registers and MSRs
- */
-static int nested_check_host_control_regs(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
+ nested_check_vm_exit_controls(vcpu, vmcs12) ||
+ nested_check_vm_entry_controls(vcpu, vmcs12))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
{
bool ia32e;
@@ -2606,6 +2605,10 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu,
is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))
return -EINVAL;
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
+ !kvm_pat_valid(vmcs12->host_ia32_pat))
+ return -EINVAL;
+
/*
* If the load IA32_EFER VM-exit control is 1, bits reserved in the
* IA32_EFER MSR must be 0 in the field for that register. In addition,
@@ -2624,41 +2627,12 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu,
return 0;
}
-/*
- * Checks related to Guest Non-register State
- */
-static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
-{
- if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
- vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
- return -EINVAL;
-
- return 0;
-}
-
-static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
-{
- if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
- nested_check_vm_exit_controls(vcpu, vmcs12) ||
- nested_check_vm_entry_controls(vcpu, vmcs12))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
-
- if (nested_check_host_control_regs(vcpu, vmcs12))
- return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
-
- if (nested_check_guest_non_reg_state(vmcs12))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
-
- return 0;
-}
-
static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- int r;
- struct page *page;
+ int r = 0;
struct vmcs12 *shadow;
+ struct kvm_host_map map;
if (vmcs12->vmcs_link_pointer == -1ull)
return 0;
@@ -2666,23 +2640,34 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
return -EINVAL;
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
- if (is_error_page(page))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
return -EINVAL;
- r = 0;
- shadow = kmap(page);
+ shadow = map.hva;
+
if (shadow->hdr.revision_id != VMCS12_REVISION ||
shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
r = -EINVAL;
- kunmap(page);
- kvm_release_page_clean(page);
+
+ kvm_vcpu_unmap(vcpu, &map, false);
return r;
}
-static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12,
- u32 *exit_qual)
+/*
+ * Checks related to Guest Non-register State
+ */
+static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
+{
+ if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
+ vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12,
+ u32 *exit_qual)
{
bool ia32e;
@@ -2690,11 +2675,15 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
- return 1;
+ return -EINVAL;
+
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
+ !kvm_pat_valid(vmcs12->guest_ia32_pat))
+ return -EINVAL;
if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
*exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
- return 1;
+ return -EINVAL;
}
/*
@@ -2713,13 +2702,16 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu,
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
((vmcs12->guest_cr0 & X86_CR0_PG) &&
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
- return 1;
+ return -EINVAL;
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
- (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
- (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
- return 1;
+ (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
+ (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+ return -EINVAL;
+
+ if (nested_check_guest_non_reg_state(vmcs12))
+ return -EINVAL;
return 0;
}
@@ -2832,6 +2824,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_host_map *map;
struct page *page;
u64 hpa;
@@ -2864,20 +2857,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
- if (vmx->nested.virtual_apic_page) { /* shouldn't happen */
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr);
+ map = &vmx->nested.virtual_apic_map;
/*
* If translation failed, VM entry will fail because
* prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
*/
- if (!is_error_page(page)) {
- vmx->nested.virtual_apic_page = page;
- hpa = page_to_phys(vmx->nested.virtual_apic_page);
- vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+ if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@ -2898,26 +2885,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
if (nested_cpu_has_posted_intr(vmcs12)) {
- if (vmx->nested.pi_desc_page) { /* shouldn't happen */
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull);
+ map = &vmx->nested.pi_desc_map;
+
+ if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
+ vmx->nested.pi_desc =
+ (struct pi_desc *)(((void *)map->hva) +
+ offset_in_page(vmcs12->posted_intr_desc_addr));
+ vmcs_write64(POSTED_INTR_DESC_ADDR,
+ pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
}
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr);
- if (is_error_page(page))
- return;
- vmx->nested.pi_desc_page = page;
- vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc =
- (struct pi_desc *)((void *)vmx->nested.pi_desc +
- (unsigned long)(vmcs12->posted_intr_desc_addr &
- (PAGE_SIZE - 1)));
- vmcs_write64(POSTED_INTR_DESC_ADDR,
- page_to_phys(vmx->nested.pi_desc_page) +
- (unsigned long)(vmcs12->posted_intr_desc_addr &
- (PAGE_SIZE - 1)));
}
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
@@ -3000,7 +2976,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
return -1;
}
- if (nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
goto vmentry_fail_vmexit;
}
@@ -3145,9 +3121,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
- ret = nested_vmx_check_vmentry_prereqs(vcpu, vmcs12);
- if (ret)
- return nested_vmx_failValid(vcpu, ret);
+ if (nested_vmx_check_controls(vcpu, vmcs12))
+ return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+ if (nested_vmx_check_host_state(vcpu, vmcs12))
+ return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
/*
* We're finally done with prerequisite checking, and can start with
@@ -3310,11 +3288,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
if (max_irr != 256) {
- vapic_page = kmap(vmx->nested.virtual_apic_page);
+ vapic_page = vmx->nested.virtual_apic_map.hva;
+ if (!vapic_page)
+ return;
+
__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
vapic_page, &max_irr);
- kunmap(vmx->nested.virtual_apic_page);
-
status = vmcs_read16(GUEST_INTR_STATUS);
if ((u8)max_irr > ((u8)status & 0xff)) {
status &= ~0xff;
@@ -3425,8 +3404,8 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
- vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
- vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
+ vmcs12->guest_rsp = kvm_rsp_read(vcpu);
+ vmcs12->guest_rip = kvm_rip_read(vcpu);
vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
@@ -3609,8 +3588,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
vmx_set_efer(vcpu, vcpu->arch.efer);
- kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp);
- kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip);
+ kvm_rsp_write(vcpu, vmcs12->host_rsp);
+ kvm_rip_write(vcpu, vmcs12->host_rip);
vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
vmx_set_interrupt_shadow(vcpu, 0);
@@ -3955,16 +3934,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
- if (vmx->nested.virtual_apic_page) {
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- }
+ kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+ vmx->nested.pi_desc = NULL;
/*
* We are now running in L2, mmu_notifier will force to reload the
@@ -4260,7 +4232,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
gpa_t vmptr;
- struct page *page;
+ uint32_t revision;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -4306,20 +4278,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
* Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
* which replaces physical address width with 32
*/
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
- return nested_vmx_failInvalid(vcpu);
-
- page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
- if (is_error_page(page))
+ if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failInvalid(vcpu);
- if (*(u32 *)kmap(page) != VMCS12_REVISION) {
- kunmap(page);
- kvm_release_page_clean(page);
+ if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
+ revision != VMCS12_REVISION)
return nested_vmx_failInvalid(vcpu);
- }
- kunmap(page);
- kvm_release_page_clean(page);
vmx->nested.vmxon_ptr = vmptr;
ret = enter_vmx_operation(vcpu);
@@ -4377,7 +4341,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_INVALID_ADDRESS);
@@ -4385,7 +4349,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- if (vmx->nested.hv_evmcs_page) {
+ if (vmx->nested.hv_evmcs_map.hva) {
if (vmptr == vmx->nested.hv_evmcs_vmptr)
nested_release_evmcs(vcpu);
} else {
@@ -4584,7 +4548,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
if (nested_vmx_get_vmptr(vcpu, &vmptr))
return 1;
- if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu)))
+ if (!page_address_valid(vcpu, vmptr))
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INVALID_ADDRESS);
@@ -4597,11 +4561,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return 1;
if (vmx->nested.current_vmptr != vmptr) {
+ struct kvm_host_map map;
struct vmcs12 *new_vmcs12;
- struct page *page;
- page = kvm_vcpu_gpa_to_page(vcpu, vmptr);
- if (is_error_page(page)) {
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the
@@ -4611,12 +4574,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
- new_vmcs12 = kmap(page);
+
+ new_vmcs12 = map.hva;
+
if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
(new_vmcs12->hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
- kunmap(page);
- kvm_release_page_clean(page);
+ kvm_vcpu_unmap(vcpu, &map, false);
return nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@@ -4628,8 +4592,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
* cached.
*/
memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
- kunmap(page);
- kvm_release_page_clean(page);
+ kvm_vcpu_unmap(vcpu, &map, false);
set_current_vmptr(vmx, vmptr);
}
@@ -4804,7 +4767,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u32 index = kvm_rcx_read(vcpu);
u64 address;
bool accessed_dirty;
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
@@ -4850,7 +4813,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12;
- u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+ u32 function = kvm_rax_read(vcpu);
/*
* VMFUNC is only supported for nested guests, but we always enable the
@@ -4936,7 +4899,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12, u32 exit_reason)
{
- u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u32 msr_index = kvm_rcx_read(vcpu);
gpa_t bitmap;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
@@ -5373,9 +5336,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->format != 0)
return -EINVAL;
- if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
- nested_enable_evmcs(vcpu, NULL);
-
if (!nested_vmx_allowed(vcpu))
return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
@@ -5417,6 +5377,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->vmx.vmxon_pa == -1ull)
return 0;
+ if (kvm_state->flags & KVM_STATE_NESTED_EVMCS)
+ nested_enable_evmcs(vcpu, NULL);
+
vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
ret = enter_vmx_operation(vcpu);
if (ret)
@@ -5460,9 +5423,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
return 0;
- vmx->nested.nested_run_pending =
- !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
-
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
@@ -5480,14 +5440,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
}
- if (nested_vmx_check_vmentry_prereqs(vcpu, vmcs12) ||
- nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ if (nested_vmx_check_controls(vcpu, vmcs12) ||
+ nested_vmx_check_host_state(vcpu, vmcs12) ||
+ nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
return -EINVAL;
vmx->nested.dirty_vmcs12 = true;
+ vmx->nested.nested_run_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- if (ret)
+ if (ret) {
+ vmx->nested.nested_run_pending = 0;
return -EINVAL;
+ }
return 0;
}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 5ab4a364348e..f8502c376b37 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -227,7 +227,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+ if (!(data & pmu->global_ovf_ctrl_mask)) {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
pmu->global_ovf_ctrl = data;
@@ -297,6 +297,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
pmu->global_ctrl_mask = ~pmu->global_ctrl;
+ pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
+ & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
+ MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
+ if (kvm_x86_ops->pt_supported())
+ pmu->global_ovf_ctrl_mask &=
+ ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
entry = kvm_find_cpuid_entry(vcpu, 7, 0);
if (entry &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e1fa935a545f..1ac167614032 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1692,6 +1692,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
+ case MSR_IA32_POWER_CTL:
+ msr_info->data = vmx->msr_ia32_power_ctl;
+ break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1822,6 +1825,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
+ case MSR_IA32_POWER_CTL:
+ vmx->msr_ia32_power_ctl = data;
+ break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -1891,7 +1897,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ if (!kvm_pat_valid(data))
return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
@@ -2288,7 +2294,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
- VM_EXIT_SAVE_IA32_PAT |
VM_EXIT_LOAD_IA32_PAT |
VM_EXIT_LOAD_IA32_EFER |
VM_EXIT_CLEAR_BNDCFGS |
@@ -3619,14 +3624,13 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
!nested_cpu_has_vid(get_vmcs12(vcpu)) ||
- WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
+ WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
return false;
rvi = vmx_get_rvi();
- vapic_page = kmap(vmx->nested.virtual_apic_page);
+ vapic_page = vmx->nested.virtual_apic_map.hva;
vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
- kunmap(vmx->nested.virtual_apic_page);
return ((rvi & 0xf0) > (vppr & 0xf0));
}
@@ -4827,7 +4831,7 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
static int handle_rdmsr(struct kvm_vcpu *vcpu)
{
- u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
+ u32 ecx = kvm_rcx_read(vcpu);
struct msr_data msr_info;
msr_info.index = ecx;
@@ -4840,18 +4844,16 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
trace_kvm_msr_read(ecx, msr_info.data);
- /* FIXME: handling of bits 32:63 of rax, rdx */
- vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
- vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
+ kvm_rax_write(vcpu, msr_info.data & -1u);
+ kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
return kvm_skip_emulated_instruction(vcpu);
}
static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
struct msr_data msr;
- u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
- u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
- | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+ u32 ecx = kvm_rcx_read(vcpu);
+ u64 data = kvm_read_edx_eax(vcpu);
msr.data = data;
msr.index = ecx;
@@ -4922,7 +4924,7 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
static int handle_xsetbv(struct kvm_vcpu *vcpu)
{
u64 new_bv = kvm_read_edx_eax(vcpu);
- u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ u32 index = kvm_rcx_read(vcpu);
if (kvm_set_xcr(vcpu, index, new_bv) == 0)
return kvm_skip_emulated_instruction(vcpu);
@@ -5723,8 +5725,16 @@ void dump_vmcs(void)
if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
pr_err("TSC Multiplier = 0x%016llx\n",
vmcs_read64(TSC_MULTIPLIER));
- if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
- pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+ if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
+ if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
+ u16 status = vmcs_read16(GUEST_INTR_STATUS);
+ pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
+ }
+ pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
+ if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
+ pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
+ pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
+ }
if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
@@ -6856,30 +6866,6 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
}
}
-static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *entry;
- union cpuid10_eax eax;
-
- entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry)
- return false;
-
- eax.full = entry->eax;
- return (eax.split.version_id > 0);
-}
-
-static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- bool pmu_enabled = guest_cpuid_has_pmu(vcpu);
-
- if (pmu_enabled)
- vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING;
- else
- vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING;
-}
-
static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6968,7 +6954,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
nested_vmx_entry_exit_ctls_update(vcpu);
- nested_vmx_procbased_ctls_update(vcpu);
}
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@ -7028,7 +7013,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
return 0;
}
-static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired)
{
struct vcpu_vmx *vmx;
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
@@ -7051,10 +7037,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
/* Convert to host delta tsc if tsc scaling is enabled */
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
- u64_shl_div_u64(delta_tsc,
+ delta_tsc && u64_shl_div_u64(delta_tsc,
kvm_tsc_scaling_ratio_frac_bits,
- vcpu->arch.tsc_scaling_ratio,
- &delta_tsc))
+ vcpu->arch.tsc_scaling_ratio, &delta_tsc))
return -ERANGE;
/*
@@ -7067,7 +7052,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
return -ERANGE;
vmx->hv_deadline_tsc = tscl + delta_tsc;
- return delta_tsc == 0;
+ *expired = !delta_tsc;
+ return 0;
}
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
@@ -7104,9 +7090,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- gpa_t gpa;
- struct page *page = NULL;
- u64 *pml_address;
+ gpa_t gpa, dst;
if (is_guest_mode(vcpu)) {
WARN_ON_ONCE(vmx->nested.pml_full);
@@ -7126,15 +7110,13 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
}
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+ dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
- page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address);
- if (is_error_page(page))
+ if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
+ offset_in_page(dst), sizeof(gpa)))
return 0;
- pml_address = kmap(page);
- pml_address[vmcs12->guest_pml_index--] = gpa;
- kunmap(page);
- kvm_release_page_clean(page);
+ vmcs12->guest_pml_index--;
}
return 0;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index f879529906b4..63d37ccce3dc 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -142,8 +142,11 @@ struct nested_vmx {
* pointers, so we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
- struct page *virtual_apic_page;
- struct page *pi_desc_page;
+ struct kvm_host_map virtual_apic_map;
+ struct kvm_host_map pi_desc_map;
+
+ struct kvm_host_map msr_bitmap_map;
+
struct pi_desc *pi_desc;
bool pi_pending;
u16 posted_intr_nv;
@@ -169,7 +172,7 @@ struct nested_vmx {
} smm;
gpa_t hv_evmcs_vmptr;
- struct page *hv_evmcs_page;
+ struct kvm_host_map hv_evmcs_map;
struct hv_enlightened_vmcs *hv_evmcs;
};
@@ -257,6 +260,8 @@ struct vcpu_vmx {
unsigned long host_debugctlmsr;
+ u64 msr_ia32_power_ctl;
+
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b9591abde62a..536b78c4af6e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1100,15 +1100,15 @@ EXPORT_SYMBOL_GPL(kvm_get_dr);
bool kvm_rdpmc(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(vcpu);
u64 data;
int err;
err = kvm_pmu_rdpmc(vcpu, ecx, &data);
if (err)
return err;
- kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
- kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
+ kvm_rax_write(vcpu, (u32)data);
+ kvm_rdx_write(vcpu, data >> 32);
return err;
}
EXPORT_SYMBOL_GPL(kvm_rdpmc);
@@ -1174,6 +1174,9 @@ static u32 emulated_msrs[] = {
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
MSR_AMD64_VIRT_SPEC_CTRL,
+ MSR_IA32_POWER_CTL,
+
+ MSR_K7_HWCR,
};
static unsigned num_emulated_msrs;
@@ -1262,31 +1265,49 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return 0;
}
-bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
+static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
- if (efer & efer_reserved_bits)
- return false;
-
if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
- return false;
+ return false;
if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
- return false;
+ return false;
+
+ if (efer & (EFER_LME | EFER_LMA) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ return false;
+
+ if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
+ return false;
return true;
+
+}
+bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
+{
+ if (efer & efer_reserved_bits)
+ return false;
+
+ return __kvm_valid_efer(vcpu, efer);
}
EXPORT_SYMBOL_GPL(kvm_valid_efer);
-static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
+static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u64 old_efer = vcpu->arch.efer;
+ u64 efer = msr_info->data;
- if (!kvm_valid_efer(vcpu, efer))
- return 1;
+ if (efer & efer_reserved_bits)
+ return false;
- if (is_paging(vcpu)
- && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
- return 1;
+ if (!msr_info->host_initiated) {
+ if (!__kvm_valid_efer(vcpu, efer))
+ return 1;
+
+ if (is_paging(vcpu) &&
+ (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
+ return 1;
+ }
efer &= ~EFER_LMA;
efer |= vcpu->arch.efer & EFER_LMA;
@@ -2279,6 +2300,18 @@ static void kvmclock_sync_fn(struct work_struct *work)
KVMCLOCK_SYNC_PERIOD);
}
+/*
+ * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
+ */
+static bool can_set_mci_status(struct kvm_vcpu *vcpu)
+{
+ /* McStatusWrEn enabled? */
+ if (guest_cpuid_is_amd(vcpu))
+ return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
+
+ return false;
+}
+
static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2310,9 +2343,14 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10)) != ~(u64)0)
return -1;
+
+ /* MCi_STATUS */
if (!msr_info->host_initiated &&
- (offset & 0x3) == 1 && data != 0)
- return -1;
+ (offset & 0x3) == 1 && data != 0) {
+ if (!can_set_mci_status(vcpu))
+ return -1;
+ }
+
vcpu->arch.mce_banks[offset] = data;
break;
}
@@ -2456,13 +2494,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.arch_capabilities = data;
break;
case MSR_EFER:
- return set_efer(vcpu, data);
+ return set_efer(vcpu, msr_info);
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
data &= ~(u64)0x8; /* ignore TLB cache disable */
- data &= ~(u64)0x40000; /* ignore Mc status write enable */
- if (data != 0) {
+
+ /* Handle McStatusWrEn */
+ if (data == BIT_ULL(18)) {
+ vcpu->arch.msr_hwcr = data;
+ } else if (data != 0) {
vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
return 1;
@@ -2736,7 +2777,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K8_SYSCFG:
case MSR_K8_TSEG_ADDR:
case MSR_K8_TSEG_MASK:
- case MSR_K7_HWCR:
case MSR_VM_HSAVE_PA:
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
@@ -2900,6 +2940,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_MISC_FEATURES_ENABLES:
msr_info->data = vcpu->arch.msr_misc_features_enables;
break;
+ case MSR_K7_HWCR:
+ msr_info->data = vcpu->arch.msr_hwcr;
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -3079,9 +3122,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
- case KVM_CAP_NR_MEMSLOTS:
- r = KVM_USER_MEM_SLOTS;
- break;
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
@@ -5521,9 +5561,9 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int bytes,
struct x86_exception *exception)
{
+ struct kvm_host_map map;
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
- struct page *page;
char *kaddr;
bool exchanged;
@@ -5540,12 +5580,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
goto emul_write;
- page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
+ if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
goto emul_write;
- kaddr = kmap_atomic(page);
- kaddr += offset_in_page(gpa);
+ kaddr = map.hva + offset_in_page(gpa);
+
switch (bytes) {
case 1:
exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
@@ -5562,13 +5601,12 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
default:
BUG();
}
- kunmap_atomic(kaddr);
- kvm_release_page_dirty(page);
+
+ kvm_vcpu_unmap(vcpu, &map, true);
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
- kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
kvm_page_track_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
@@ -6558,7 +6596,7 @@ static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
{
- unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ unsigned long val = kvm_rax_read(vcpu);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
if (ret)
@@ -6593,8 +6631,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
}
/* For size less than 4 we merge, else we zero extend */
- val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
- : 0;
+ val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
/*
* Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
@@ -6602,7 +6639,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
*/
emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
vcpu->arch.pio.port, &val, 1);
- kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ kvm_rax_write(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -6614,12 +6651,12 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
int ret;
/* For size less than 4 we merge, else we zero extend */
- val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
+ val = (size < 4) ? kvm_rax_read(vcpu) : 0;
ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
&val, 1);
if (ret) {
- kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ kvm_rax_write(vcpu, val);
return ret;
}
@@ -6854,10 +6891,20 @@ static unsigned long kvm_get_guest_ip(void)
return ip;
}
+static void kvm_handle_intel_pt_intr(void)
+{
+ struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
+
+ kvm_make_request(KVM_REQ_PMI, vcpu);
+ __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
+ (unsigned long *)&vcpu->arch.pmu.global_status);
+}
+
static struct perf_guest_info_callbacks kvm_guest_cbs = {
.is_in_guest = kvm_is_in_guest,
.is_user_mode = kvm_is_user_mode,
.get_guest_ip = kvm_get_guest_ip,
+ .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
static void kvm_set_mmio_spte_mask(void)
@@ -7133,11 +7180,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
- nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
- a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
- a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
- a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
- a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
+ nr = kvm_rax_read(vcpu);
+ a0 = kvm_rbx_read(vcpu);
+ a1 = kvm_rcx_read(vcpu);
+ a2 = kvm_rdx_read(vcpu);
+ a3 = kvm_rsi_read(vcpu);
trace_kvm_hypercall(nr, a0, a1, a2, a3);
@@ -7178,7 +7225,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
out:
if (!op_64_bit)
ret = (u32)ret;
- kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+ kvm_rax_write(vcpu, ret);
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
@@ -8280,23 +8327,23 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
}
- regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
- regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
- regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
- regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
- regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
- regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
- regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
- regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
+ regs->rax = kvm_rax_read(vcpu);
+ regs->rbx = kvm_rbx_read(vcpu);
+ regs->rcx = kvm_rcx_read(vcpu);
+ regs->rdx = kvm_rdx_read(vcpu);
+ regs->rsi = kvm_rsi_read(vcpu);
+ regs->rdi = kvm_rdi_read(vcpu);
+ regs->rsp = kvm_rsp_read(vcpu);
+ regs->rbp = kvm_rbp_read(vcpu);
#ifdef CONFIG_X86_64
- regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
- regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
- regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
- regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
- regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
- regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
- regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
- regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
+ regs->r8 = kvm_r8_read(vcpu);
+ regs->r9 = kvm_r9_read(vcpu);
+ regs->r10 = kvm_r10_read(vcpu);
+ regs->r11 = kvm_r11_read(vcpu);
+ regs->r12 = kvm_r12_read(vcpu);
+ regs->r13 = kvm_r13_read(vcpu);
+ regs->r14 = kvm_r14_read(vcpu);
+ regs->r15 = kvm_r15_read(vcpu);
#endif
regs->rip = kvm_rip_read(vcpu);
@@ -8316,23 +8363,23 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
- kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
- kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
- kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
- kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
- kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
- kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
- kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
- kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
+ kvm_rax_write(vcpu, regs->rax);
+ kvm_rbx_write(vcpu, regs->rbx);
+ kvm_rcx_write(vcpu, regs->rcx);
+ kvm_rdx_write(vcpu, regs->rdx);
+ kvm_rsi_write(vcpu, regs->rsi);
+ kvm_rdi_write(vcpu, regs->rdi);
+ kvm_rsp_write(vcpu, regs->rsp);
+ kvm_rbp_write(vcpu, regs->rbp);
#ifdef CONFIG_X86_64
- kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
- kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
- kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
- kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
- kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
- kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
- kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
- kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
+ kvm_r8_write(vcpu, regs->r8);
+ kvm_r9_write(vcpu, regs->r9);
+ kvm_r10_write(vcpu, regs->r10);
+ kvm_r11_write(vcpu, regs->r11);
+ kvm_r12_write(vcpu, regs->r12);
+ kvm_r13_write(vcpu, regs->r13);
+ kvm_r14_write(vcpu, regs->r14);
+ kvm_r15_write(vcpu, regs->r15);
#endif
kvm_rip_write(vcpu, regs->rip);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 534d3f28bb01..a470ff0868c5 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -345,6 +345,16 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
__this_cpu_write(current_vcpu, NULL);
}
+
+static inline bool kvm_pat_valid(u64 data)
+{
+ if (data & 0xF8F8F8F8F8F8F8F8ull)
+ return false;
+ /* 0, 1, 4, 5, 6, 7 are valid values. */
+ return (data | ((data & 0x0202020202020202ull) << 1)) == data;
+}
+
void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+
#endif
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 59726aaf4671..0d1c47cbbdd6 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -881,9 +881,10 @@ static int mpx_unmap_tables(struct mm_struct *mm,
* the virtual address region start...end have already been split if
* necessary, and the 'vma' is the first vma in this range (start -> end).
*/
-void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
{
+ struct vm_area_struct *vma;
int ret;
/*
@@ -902,11 +903,12 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
* which should not occur normally. Being strict about it here
* helps ensure that we do not have an exploitable stack overflow.
*/
- do {
+ vma = find_vma(mm, start);
+ while (vma && vma->vm_start < end) {
if (vma->vm_flags & VM_MPX)
return;
vma = vma->vm_next;
- } while (vma && vma->vm_start < end);
+ }
ret = mpx_unmap_tables(mm, start, end);
if (ret)
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 30084eaf8422..5fa0ee1c8e00 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -398,3 +398,9 @@
425 common io_uring_setup sys_io_uring_setup
426 common io_uring_enter sys_io_uring_enter
427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index 820e8738af11..b1506376d502 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -18,6 +18,7 @@
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/errno.h>
#include <linux/reboot.h>
#include <linux/kdev_t.h>