summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig16
-rw-r--r--arch/alpha/kernel/console.c1
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts2
-rw-r--r--arch/arm/include/asm/bitops.h1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h2
-rw-r--r--arch/arm/include/asm/kvm_host.h9
-rw-r--r--arch/arm/include/asm/kvm_hyp.h3
-rw-r--r--arch/arm/include/asm/kvm_mmu.h99
-rw-r--r--arch/arm/include/asm/kvm_psci.h27
-rw-r--r--arch/arm/include/asm/pci.h5
-rw-r--r--arch/arm/include/asm/pgtable.h4
-rw-r--r--arch/arm/kernel/bios32.c2
-rw-r--r--arch/arm/kvm/handle_exit.c17
-rw-r--r--arch/arm/kvm/hyp/switch.c1
-rw-r--r--arch/arm/kvm/hyp/tlb.c1
-rw-r--r--arch/arm/mach-mvebu/Kconfig1
-rw-r--r--arch/arm/mach-vt8500/Kconfig1
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/assembler.h64
-rw-r--r--arch/arm64/include/asm/barrier.h22
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/futex.h9
-rw-r--r--arch/arm64/include/asm/kasan.h17
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h12
-rw-r--r--arch/arm64/include/asm/kvm_host.h8
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h36
-rw-r--r--arch/arm64/include/asm/kvm_psci.h27
-rw-r--r--arch/arm64/include/asm/memory.h3
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h30
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/spinlock.h4
-rw-r--r--arch/arm64/include/asm/uaccess.h155
-rw-r--r--arch/arm64/kernel/acpi.c4
-rw-r--r--arch/arm64/kernel/arm64ksyms.c4
-rw-r--r--arch/arm64/kernel/bpi.S44
-rw-r--r--arch/arm64/kernel/cpu-reset.S2
-rw-r--r--arch/arm64/kernel/cpu_errata.c77
-rw-r--r--arch/arm64/kernel/cpufeature.c42
-rw-r--r--arch/arm64/kernel/entry.S33
-rw-r--r--arch/arm64/kernel/head.S30
-rw-r--r--arch/arm64/kernel/hibernate-asm.S4
-rw-r--r--arch/arm64/kernel/perf_event.c5
-rw-r--r--arch/arm64/kernel/sleep.S2
-rw-r--r--arch/arm64/kvm/guest.c15
-rw-r--r--arch/arm64/kvm/handle_exit.c14
-rw-r--r--arch/arm64/kvm/hyp-init.S2
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S20
-rw-r--r--arch/arm64/kvm/hyp/switch.c15
-rw-r--r--arch/arm64/kvm/hyp/tlb.c1
-rw-r--r--arch/arm64/lib/clear_user.S6
-rw-r--r--arch/arm64/lib/copy_in_user.S5
-rw-r--r--arch/arm64/mm/cache.S32
-rw-r--r--arch/arm64/mm/fault.c19
-rw-r--r--arch/arm64/mm/kasan_init.c3
-rw-r--r--arch/arm64/mm/mmu.c13
-rw-r--r--arch/arm64/mm/proc.S212
-rw-r--r--arch/blackfin/include/uapi/asm/poll.h19
-rw-r--r--arch/cris/arch-v10/drivers/gpio.c2
-rw-r--r--arch/cris/arch-v10/drivers/sync_serial.c8
-rw-r--r--arch/cris/arch-v32/drivers/sync_serial.c10
-rw-r--r--arch/cris/kernel/Makefile17
-rw-r--r--arch/cris/kernel/setup.c2
-rw-r--r--arch/frv/include/uapi/asm/poll.h19
-rw-r--r--arch/ia64/kernel/perfmon.c15
-rw-r--r--arch/ia64/mm/discontig.c6
-rw-r--r--arch/ia64/mm/init.c18
-rw-r--r--arch/m68k/include/asm/bitops.h3
-rw-r--r--arch/m68k/include/uapi/asm/poll.h19
-rw-r--r--arch/mips/Kconfig24
-rw-r--r--arch/mips/Makefile8
-rw-r--r--arch/mips/bcm47xx/Platform1
-rw-r--r--arch/mips/bcm63xx/boards/Kconfig1
-rw-r--r--arch/mips/boot/compressed/Makefile6
-rw-r--r--arch/mips/boot/dts/ingenic/Makefile1
-rw-r--r--arch/mips/boot/dts/ingenic/gcw0.dts62
-rw-r--r--arch/mips/boot/dts/ingenic/jz4770.dtsi212
-rw-r--r--arch/mips/configs/bigsur_defconfig1
-rw-r--r--arch/mips/configs/gcw0_defconfig27
-rw-r--r--arch/mips/configs/generic/board-ranchu.config30
-rw-r--r--arch/mips/configs/ip27_defconfig1
-rw-r--r--arch/mips/configs/ip32_defconfig1
-rw-r--r--arch/mips/configs/malta_defconfig5
-rw-r--r--arch/mips/configs/malta_kvm_defconfig4
-rw-r--r--arch/mips/configs/malta_kvm_guest_defconfig4
-rw-r--r--arch/mips/configs/malta_qemu_32r6_defconfig1
-rw-r--r--arch/mips/configs/maltaaprp_defconfig1
-rw-r--r--arch/mips/configs/maltasmvp_defconfig1
-rw-r--r--arch/mips/configs/maltasmvp_eva_defconfig1
-rw-r--r--arch/mips/configs/maltaup_defconfig1
-rw-r--r--arch/mips/configs/maltaup_xpa_defconfig5
-rw-r--r--arch/mips/configs/nlm_xlp_defconfig1
-rw-r--r--arch/mips/configs/nlm_xlr_defconfig1
-rw-r--r--arch/mips/configs/pnx8335_stb225_defconfig1
-rw-r--r--arch/mips/configs/sb1250_swarm_defconfig1
-rw-r--r--arch/mips/generic/Kconfig10
-rw-r--r--arch/mips/generic/Makefile1
-rw-r--r--arch/mips/generic/board-ranchu.c93
-rw-r--r--arch/mips/generic/irq.c18
-rw-r--r--arch/mips/include/asm/bootinfo.h2
-rw-r--r--arch/mips/include/asm/checksum.h2
-rw-r--r--arch/mips/include/asm/mach-loongson64/boot_param.h2
-rw-r--r--arch/mips/include/asm/machine.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h702
-rw-r--r--arch/mips/include/asm/msa.h63
-rw-r--r--arch/mips/include/uapi/asm/poll.h19
-rw-r--r--arch/mips/jazz/setup.c8
-rw-r--r--arch/mips/jz4740/Kconfig10
-rw-r--r--arch/mips/jz4740/prom.c25
-rw-r--r--arch/mips/jz4740/setup.c22
-rw-r--r--arch/mips/jz4740/time.c2
-rw-r--r--arch/mips/kernel/branch.c2
-rw-r--r--arch/mips/kernel/cps-vec.S17
-rw-r--r--arch/mips/kernel/ftrace.c2
-rw-r--r--arch/mips/kernel/mips-cpc.c13
-rw-r--r--arch/mips/kernel/rtlx.c4
-rw-r--r--arch/mips/kernel/setup.c39
-rw-r--r--arch/mips/kernel/smp-cps.c2
-rw-r--r--arch/mips/kernel/watch.c31
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/mips.c67
-rw-r--r--arch/mips/loongson64/Kconfig2
-rw-r--r--arch/mips/loongson64/common/mem.c2
-rw-r--r--arch/mips/loongson64/loongson-3/numa.c2
-rw-r--r--arch/mips/math-emu/cp1emu.c28
-rw-r--r--arch/mips/math-emu/dp_add.c3
-rw-r--r--arch/mips/math-emu/dp_div.c1
-rw-r--r--arch/mips/math-emu/dp_fmax.c2
-rw-r--r--arch/mips/math-emu/dp_fmin.c2
-rw-r--r--arch/mips/math-emu/dp_maddf.c8
-rw-r--r--arch/mips/math-emu/dp_mul.c4
-rw-r--r--arch/mips/math-emu/dp_sqrt.c8
-rw-r--r--arch/mips/math-emu/dp_sub.c2
-rw-r--r--arch/mips/math-emu/ieee754dp.h3
-rw-r--r--arch/mips/math-emu/sp_add.c3
-rw-r--r--arch/mips/math-emu/sp_div.c1
-rw-r--r--arch/mips/math-emu/sp_fdp.c3
-rw-r--r--arch/mips/math-emu/sp_fmax.c2
-rw-r--r--arch/mips/math-emu/sp_fmin.c2
-rw-r--r--arch/mips/math-emu/sp_maddf.c3
-rw-r--r--arch/mips/math-emu/sp_mul.c1
-rw-r--r--arch/mips/math-emu/sp_sqrt.c3
-rw-r--r--arch/mips/math-emu/sp_sub.c1
-rw-r--r--arch/mips/math-emu/sp_tlong.c1
-rw-r--r--arch/mips/mm/dma-default.c6
-rw-r--r--arch/mips/mm/sc-mips.c9
-rw-r--r--arch/mips/mti-malta/malta-setup.c10
-rw-r--r--arch/mips/txx9/rbtx4939/setup.c4
-rw-r--r--arch/nios2/Kconfig1
-rw-r--r--arch/nios2/boot/dts/3c120_devboard.dts16
-rw-r--r--arch/nios2/configs/10m50_defconfig1
-rw-r--r--arch/nios2/configs/3c120_defconfig1
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h4
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h12
-rw-r--r--arch/powerpc/include/asm/kexec.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h14
-rw-r--r--arch/powerpc/include/asm/kvm_host.h8
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h4
-rw-r--r--arch/powerpc/include/asm/membarrier.h27
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h6
-rw-r--r--arch/powerpc/include/asm/topology.h10
-rw-r--r--arch/powerpc/include/asm/xive.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/eeh.c6
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S6
-rw-r--r--arch/powerpc/kernel/pci-common.c11
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c20
-rw-r--r--arch/powerpc/kernel/pci_32.c3
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c7
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kvm/Kconfig3
-rw-r--r--arch/powerpc/kvm/book3s.c24
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c38
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c70
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S231
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S4
-rw-r--r--arch/powerpc/kvm/book3s_pr.c20
-rw-r--r--arch/powerpc/kvm/book3s_xive.c109
-rw-r--r--arch/powerpc/kvm/book3s_xive.h15
-rw-r--r--arch/powerpc/kvm/booke.c51
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c36
-rw-r--r--arch/powerpc/kvm/powerpc.c200
-rw-r--r--arch/powerpc/kvm/timing.c3
-rw-r--r--arch/powerpc/mm/hash64_4k.c4
-rw-r--r--arch/powerpc/mm/hash64_64k.c8
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c10
-rw-r--r--arch/powerpc/mm/init-common.c4
-rw-r--r--arch/powerpc/mm/init_64.c17
-rw-r--r--arch/powerpc/mm/mem.c11
-rw-r--r--arch/powerpc/mm/mmu_context.c7
-rw-r--r--arch/powerpc/mm/numa.c5
-rw-r--r--arch/powerpc/mm/pgtable-radix.c117
-rw-r--r--arch/powerpc/mm/pgtable_64.c4
-rw-r--r--arch/powerpc/mm/tlb_hash64.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/backing_ops.c8
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c10
-rw-r--r--arch/powerpc/platforms/cell/spufs/hw_ops.c8
-rw-r--r--arch/powerpc/platforms/maple/time.c2
-rw-r--r--arch/powerpc/platforms/powermac/feature.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c2
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c16
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c4
-rw-r--r--arch/powerpc/platforms/pseries/ras.c31
-rw-r--r--arch/powerpc/sysdev/i8259.c6
-rw-r--r--arch/powerpc/sysdev/mv64x60_pci.c4
-rw-r--r--arch/powerpc/sysdev/xive/native.c18
-rw-r--r--arch/riscv/Kconfig10
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/csr.h14
-rw-r--r--arch/riscv/include/asm/ftrace.h10
-rw-r--r--arch/riscv/include/asm/mmu_context.h17
-rw-r--r--arch/riscv/include/asm/tlbflush.h20
-rw-r--r--arch/riscv/include/asm/unistd.h1
-rw-r--r--arch/riscv/kernel/Makefile7
-rw-r--r--arch/riscv/kernel/entry.S9
-rw-r--r--arch/riscv/kernel/ftrace.c41
-rw-r--r--arch/riscv/kernel/head.S6
-rw-r--r--arch/riscv/kernel/mcount.S126
-rw-r--r--arch/riscv/kernel/setup.c44
-rw-r--r--arch/riscv/kernel/vdso.c2
-rw-r--r--arch/riscv/mm/fault.c4
-rw-r--r--arch/riscv/mm/init.c12
-rw-r--r--arch/s390/Kconfig46
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/include/asm/barrier.h24
-rw-r--r--arch/s390/include/asm/bitops.h5
-rw-r--r--arch/s390/include/asm/css_chars.h4
-rw-r--r--arch/s390/include/asm/eadm.h2
-rw-r--r--arch/s390/include/asm/facility.h18
-rw-r--r--arch/s390/include/asm/kvm_host.h126
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/nospec-branch.h18
-rw-r--r--arch/s390/include/asm/processor.h4
-rw-r--r--arch/s390/include/asm/runtime_instr.h67
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/sysinfo.h3
-rw-r--r--arch/s390/include/asm/thread_info.h4
-rw-r--r--arch/s390/include/uapi/asm/runtime_instr.h74
-rw-r--r--arch/s390/kernel/Makefile4
-rw-r--r--arch/s390/kernel/alternative.c26
-rw-r--r--arch/s390/kernel/early.c5
-rw-r--r--arch/s390/kernel/entry.S249
-rw-r--r--arch/s390/kernel/ipl.c1
-rw-r--r--arch/s390/kernel/kprobes.c2
-rw-r--r--arch/s390/kernel/module.c62
-rw-r--r--arch/s390/kernel/nospec-branch.c100
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c2
-rw-r--r--arch/s390/kernel/processor.c18
-rw-r--r--arch/s390/kernel/runtime_instr.c10
-rw-r--r--arch/s390/kernel/setup.c8
-rw-r--r--arch/s390/kernel/smp.c7
-rw-r--r--arch/s390/kernel/sysinfo.c2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S14
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/diag.c1
-rw-r--r--arch/s390/kvm/interrupt.c288
-rw-r--r--arch/s390/kvm/kvm-s390.c209
-rw-r--r--arch/s390/kvm/kvm-s390.h22
-rw-r--r--arch/s390/kvm/priv.c38
-rw-r--r--arch/s390/kvm/sigp.c18
-rw-r--r--arch/s390/kvm/vsie.c91
-rw-r--r--arch/s390/mm/gmap.c44
-rw-r--r--arch/s390/mm/init.c7
-rw-r--r--arch/s390/mm/vmem.c6
-rw-r--r--arch/score/kernel/setup.c4
-rw-r--r--arch/sh/mm/init.c10
-rw-r--r--arch/sparc/include/uapi/asm/poll.h28
-rw-r--r--arch/sparc/mm/init_64.c5
-rw-r--r--arch/um/drivers/mconsole_kern.c3
-rw-r--r--arch/unicore32/include/asm/bitops.h2
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c10
-rw-r--r--arch/x86/entry/entry_32.S8
-rw-r--r--arch/x86/entry/entry_64.S7
-rw-r--r--arch/x86/events/perf_event.h3
-rw-r--r--arch/x86/hyperv/hv_init.c123
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/intel-family.h6
-rw-r--r--arch/x86/include/asm/intel_pmc_ipc.h6
-rw-r--r--arch/x86/include/asm/irq_vectors.h7
-rw-r--r--arch/x86/include/asm/kasan.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h22
-rw-r--r--arch/x86/include/asm/mshyperv.h32
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/pat.h2
-rw-r--r--arch/x86/include/asm/pmc_core.h27
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/asm/sync_core.h28
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h27
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/cpu/amd.c66
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c4
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/irq.c9
-rw-r--r--arch/x86/kernel/kvm.c49
-rw-r--r--arch/x86/kvm/Kconfig8
-rw-r--r--arch/x86/kvm/cpuid.c22
-rw-r--r--arch/x86/kvm/emulate.c62
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/lapic.c25
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c26
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/kvm/svm.c1199
-rw-r--r--arch/x86/kvm/vmx.c756
-rw-r--r--arch/x86/kvm/vmx_shadow_fields.h77
-rw-r--r--arch/x86/kvm/x86.c338
-rw-r--r--arch/x86/kvm/x86.h33
-rw-r--r--arch/x86/mm/init_32.c9
-rw-r--r--arch/x86/mm/init_64.c94
-rw-r--r--arch/x86/mm/pat.c19
-rw-r--r--arch/x86/mm/tlb.c6
-rw-r--r--arch/x86/pci/irq.c3
-rw-r--r--arch/x86/pci/xen.c4
-rw-r--r--arch/x86/xen/p2m.c6
-rw-r--r--arch/x86/xen/xen-head.S16
-rw-r--r--arch/xtensa/include/asm/kasan.h2
-rw-r--r--arch/xtensa/include/uapi/asm/poll.h21
339 files changed, 7125 insertions, 2477 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 467dfa35bf96..76c0b54443b1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -538,16 +538,10 @@ config HAVE_CC_STACKPROTECTOR
- its compiler supports the -fstack-protector option
- it has implemented a stack canary (e.g. __stack_chk_guard)
-config CC_STACKPROTECTOR
- def_bool n
- help
- Set when a stack-protector mode is enabled, so that the build
- can enable kernel-side support for the GCC feature.
-
choice
prompt "Stack Protector buffer overflow detection"
depends on HAVE_CC_STACKPROTECTOR
- default CC_STACKPROTECTOR_NONE
+ default CC_STACKPROTECTOR_AUTO
help
This option turns on the "stack-protector" GCC feature. This
feature puts, at the beginning of functions, a canary value on
@@ -564,7 +558,6 @@ config CC_STACKPROTECTOR_NONE
config CC_STACKPROTECTOR_REGULAR
bool "Regular"
- select CC_STACKPROTECTOR
help
Functions will have the stack-protector canary logic added if they
have an 8-byte or larger character array on the stack.
@@ -578,7 +571,6 @@ config CC_STACKPROTECTOR_REGULAR
config CC_STACKPROTECTOR_STRONG
bool "Strong"
- select CC_STACKPROTECTOR
help
Functions will have the stack-protector canary logic added in any
of the following conditions:
@@ -596,6 +588,12 @@ config CC_STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.
+config CC_STACKPROTECTOR_AUTO
+ bool "Automatic"
+ help
+ If the compiler supports it, the best available stack-protector
+ option will be chosen.
+
endchoice
config THIN_ARCHIVES
diff --git a/arch/alpha/kernel/console.c b/arch/alpha/kernel/console.c
index 8e9a41966881..5476279329a6 100644
--- a/arch/alpha/kernel/console.c
+++ b/arch/alpha/kernel/console.c
@@ -21,6 +21,7 @@
struct pci_controller *pci_vga_hose;
static struct resource alpha_vga = {
.name = "alpha-vga+",
+ .flags = IORESOURCE_IO,
.start = 0x3C0,
.end = 0x3DF
};
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 72c765a3b354..ab930581fc7a 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -780,6 +780,8 @@
reset-gpio = <&gpio4 6 GPIO_ACTIVE_HIGH>; /* 102 */
+ lens-focus = <&ad5820>;
+
port {
csi_cam1: endpoint {
bus-type = <3>; /* CCP2 */
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index ce5ee762ed66..4cab9bb823fb 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -338,6 +338,7 @@ static inline int find_next_bit_le(const void *p, int size, int offset)
#endif
+#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>
/*
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 3d22eb87f919..9003bd19cb70 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -131,7 +131,7 @@ static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
{
unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
- return cpsr_mode > USR_MODE;;
+ return cpsr_mode > USR_MODE;
}
static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index acbf9ec7b396..248b930563e5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -306,4 +308,11 @@ static inline void kvm_fpsimd_flush_cpu_state(void) {}
static inline void kvm_arm_vhe_guest_enter(void) {}
static inline void kvm_arm_vhe_guest_exit(void) {}
+
+static inline bool kvm_arm_harden_branch_predictor(void)
+{
+ /* No way to detect it yet, pretend it is not there. */
+ return false;
+}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index ab20ffa8b9e7..1ab8329e9ff7 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -21,7 +21,6 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/cp15.h>
-#include <asm/kvm_mmu.h>
#include <asm/vfp.h>
#define __hyp_text __section(.hyp.text) notrace
@@ -69,6 +68,8 @@
#define HIFAR __ACCESS_CP15(c6, 4, c0, 2)
#define HPFAR __ACCESS_CP15(c6, 4, c0, 4)
#define ICIALLUIS __ACCESS_CP15(c7, 0, c1, 0)
+#define BPIALLIS __ACCESS_CP15(c7, 0, c1, 6)
+#define ICIMVAU __ACCESS_CP15(c7, 0, c5, 1)
#define ATS1CPR __ACCESS_CP15(c7, 0, c8, 0)
#define TLBIALLIS __ACCESS_CP15(c8, 0, c3, 0)
#define TLBIALL __ACCESS_CP15(c8, 0, c7, 0)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a2d176a308bd..de1b919404e4 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -37,6 +37,8 @@
#include <linux/highmem.h>
#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/kvm_hyp.h>
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
@@ -83,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
return pmd;
}
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+ pte_val(pte) &= ~L_PTE_XN;
+ return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_SECT_XN;
+ return pmd;
+}
+
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
@@ -93,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
}
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+ return !(pte_val(*pte) & L_PTE_XN);
+}
+
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
{
pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
@@ -103,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
}
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+ return !(pmd_val(*pmd) & PMD_SECT_XN);
+}
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
@@ -126,10 +150,36 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
}
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
- kvm_pfn_t pfn,
- unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+ /*
+ * Clean the dcache to the Point of Coherency.
+ *
+ * We need to do this through a kernel mapping (using the
+ * user-space mapping has proved to be the wrong
+ * solution). For that, we need to kmap one page at a time,
+ * and iterate over the range.
+ */
+
+ VM_BUG_ON(size & ~PAGE_MASK);
+
+ while (size) {
+ void *va = kmap_atomic_pfn(pfn);
+
+ kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+ size -= PAGE_SIZE;
+ pfn++;
+
+ kunmap_atomic(va);
+ }
+}
+
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+ unsigned long size)
{
+ u32 iclsz;
+
/*
* If we are going to insert an instruction page and the icache is
* either VIPT or PIPT, there is a potential problem where the host
@@ -141,23 +191,40 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
*
* VIVT caches are tagged using both the ASID and the VMID and doesn't
* need any kind of flushing (DDI 0406C.b - Page B3-1392).
- *
- * We need to do this through a kernel mapping (using the
- * user-space mapping has proved to be the wrong
- * solution). For that, we need to kmap one page at a time,
- * and iterate over the range.
*/
VM_BUG_ON(size & ~PAGE_MASK);
+ if (icache_is_vivt_asid_tagged())
+ return;
+
+ if (!icache_is_pipt()) {
+ /* any kind of VIPT cache */
+ __flush_icache_all();
+ return;
+ }
+
+ /*
+ * CTR IminLine contains Log2 of the number of words in the
+ * cache line, so we can get the number of words as
+ * 2 << (IminLine - 1). To get the number of bytes, we
+ * multiply by 4 (the number of bytes in a 32-bit word), and
+ * get 4 << (IminLine).
+ */
+ iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
+
while (size) {
void *va = kmap_atomic_pfn(pfn);
+ void *end = va + PAGE_SIZE;
+ void *addr = va;
- kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+ do {
+ write_sysreg(addr, ICIMVAU);
+ addr += iclsz;
+ } while (addr < end);
- if (icache_is_pipt())
- __cpuc_coherent_user_range((unsigned long)va,
- (unsigned long)va + PAGE_SIZE);
+ dsb(ishst);
+ isb();
size -= PAGE_SIZE;
pfn++;
@@ -165,9 +232,11 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
kunmap_atomic(va);
}
- if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
- /* any kind of VIPT cache */
- __flush_icache_all();
+ /* Check if we need to invalidate the BTB */
+ if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
+ write_sysreg(0, BPIALLIS);
+ dsb(ishst);
+ isb();
}
}
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
deleted file mode 100644
index 6bda945d31fa..000000000000
--- a/arch/arm/include/asm/kvm_psci.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2012 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM_KVM_PSCI_H__
-#define __ARM_KVM_PSCI_H__
-
-#define KVM_ARM_PSCI_0_1 1
-#define KVM_ARM_PSCI_0_2 2
-
-int kvm_psci_version(struct kvm_vcpu *vcpu);
-int kvm_psci_call(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 960d9dc4f380..1f0de808d111 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -10,10 +10,7 @@ extern unsigned long pcibios_min_io;
extern unsigned long pcibios_min_mem;
#define PCIBIOS_MIN_MEM pcibios_min_mem
-static inline int pcibios_assign_all_busses(void)
-{
- return pci_has_flag(PCI_REASSIGN_ALL_RSRC);
-}
+#define pcibios_assign_all_busses() pci_has_flag(PCI_REASSIGN_ALL_BUS)
#ifdef CONFIG_PCI_DOMAINS
static inline int pci_proc_domain(struct pci_bus *bus)
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 150ece66ddf3..a757401129f9 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -102,8 +102,8 @@ extern pgprot_t pgprot_s2_device;
#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
-#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
+#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
+#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 0cd0aefb3a8f..ed46ca69813d 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -527,7 +527,7 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
struct pci_sys_data *sys;
LIST_HEAD(head);
- pci_add_flags(PCI_REASSIGN_ALL_RSRC);
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
if (hw->preinit)
hw->preinit();
pcibios_init_hw(parent, hw, &head);
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index cf8bf6bf87c4..910bd8dabb3c 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -21,7 +21,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_mmu.h>
-#include <asm/kvm_psci.h>
+#include <kvm/arm_psci.h>
#include <trace/events/kvm.h>
#include "trace.h"
@@ -36,9 +36,9 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
- ret = kvm_psci_call(vcpu);
+ ret = kvm_hvc_call_handler(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -47,7 +47,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ /*
+ * "If an SMC instruction executed at Non-secure EL1 is
+ * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
+ * Trap exception, not a Secure Monitor Call exception [...]"
+ *
+ * We need to advance the PC after the trap, as it would
+ * otherwise return to the same address...
+ */
+ vcpu_set_reg(vcpu, 0, ~0UL);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index 330c9ce34ba5..ae45ae96aac2 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -18,6 +18,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
__asm__(".arch_extension virt");
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 6d810af2d9fd..c0edd450e104 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -19,6 +19,7 @@
*/
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
/**
* Flush per-VMID TLBs
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 9b49867154bf..6b32dc527edc 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -10,7 +10,6 @@ menuconfig ARCH_MVEBU
select ZONE_DMA if ARM_LPAE
select GPIOLIB
select PCI_QUIRKS if PCI
- select OF_ADDRESS_PCI
if ARCH_MVEBU
diff --git a/arch/arm/mach-vt8500/Kconfig b/arch/arm/mach-vt8500/Kconfig
index 1156a585dafc..8841199058ea 100644
--- a/arch/arm/mach-vt8500/Kconfig
+++ b/arch/arm/mach-vt8500/Kconfig
@@ -13,7 +13,6 @@ config ARCH_WM8505
depends on ARCH_MULTI_V5
select ARCH_VT8500
select CPU_ARM926T
- help
config ARCH_WM8750
bool "WonderMedia WM8750"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 53612879fe56..7381eeb7ef8e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -16,6 +16,7 @@ config ARM64
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
+ select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3873dd7b5a32..3c78835bba94 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -116,6 +116,24 @@
.endm
/*
+ * Value prediction barrier
+ */
+ .macro csdb
+ hint #20
+ .endm
+
+/*
+ * Sanitise a 64-bit bounded index wrt speculation, returning zero if out
+ * of bounds.
+ */
+ .macro mask_nospec64, idx, limit, tmp
+ sub \tmp, \idx, \limit
+ bic \tmp, \tmp, \idx
+ and \idx, \idx, \tmp, asr #63
+ csdb
+ .endm
+
+/*
* NOP sequence
*/
.macro nops, num
@@ -418,6 +436,27 @@ alternative_endif
.endm
/*
+ * Macro to perform an instruction cache maintenance for the interval
+ * [start, end)
+ *
+ * start, end: virtual addresses describing the region
+ * label: A label to branch to on user fault.
+ * Corrupts: tmp1, tmp2
+ */
+ .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+ icache_line_size \tmp1, \tmp2
+ sub \tmp2, \tmp1, #1
+ bic \tmp2, \start, \tmp2
+9997:
+USER(\label, ic ivau, \tmp2) // invalidate I line PoU
+ add \tmp2, \tmp2, \tmp1
+ cmp \tmp2, \end
+ b.lo 9997b
+ dsb ish
+ isb
+ .endm
+
+/*
* reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
*/
.macro reset_pmuserenr_el0, tmpreg
@@ -514,7 +553,7 @@ alternative_endif
* phys: physical address, preserved
* ttbr: returns the TTBR value
*/
- .macro phys_to_ttbr, phys, ttbr
+ .macro phys_to_ttbr, ttbr, phys
#ifdef CONFIG_ARM64_PA_BITS_52
orr \ttbr, \phys, \phys, lsr #46
and \ttbr, \ttbr, #TTBR_BADDR_MASK_52
@@ -523,6 +562,29 @@ alternative_endif
#endif
.endm
+ .macro phys_to_pte, pte, phys
+#ifdef CONFIG_ARM64_PA_BITS_52
+ /*
+ * We assume \phys is 64K aligned and this is guaranteed by only
+ * supporting this configuration with 64K pages.
+ */
+ orr \pte, \phys, \phys, lsr #36
+ and \pte, \pte, #PTE_ADDR_MASK
+#else
+ mov \pte, \phys
+#endif
+ .endm
+
+ .macro pte_to_phys, phys, pte
+#ifdef CONFIG_ARM64_PA_BITS_52
+ ubfiz \phys, \pte, #(48 - 16 - 12), #16
+ bfxil \phys, \pte, #16, #32
+ lsl \phys, \phys, #16
+#else
+ and \phys, \pte, #PTE_ADDR_MASK
+#endif
+ .endm
+
/**
* Errata workaround prior to disable MMU. Insert an ISB immediately prior
* to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 77651c49ef44..f11518af96a9 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -32,6 +32,7 @@
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
#define psb_csync() asm volatile("hint #17" : : : "memory")
+#define csdb() asm volatile("hint #20" : : : "memory")
#define mb() dsb(sy)
#define rmb() dsb(ld)
@@ -40,6 +41,27 @@
#define dma_rmb() dmb(oshld)
#define dma_wmb() dmb(oshst)
+/*
+ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
+ * and 0 otherwise.
+ */
+#define array_index_mask_nospec array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long idx,
+ unsigned long sz)
+{
+ unsigned long mask;
+
+ asm volatile(
+ " cmp %1, %2\n"
+ " sbc %0, xzr, xzr\n"
+ : "=r" (mask)
+ : "r" (idx), "Ir" (sz)
+ : "cc");
+
+ csdb();
+ return mask;
+}
+
#define __smp_mb() dmb(ish)
#define __smp_rmb() dmb(ishld)
#define __smp_wmb() dmb(ishst)
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 955130762a3c..bef9f418f089 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -52,6 +52,12 @@
* - start - virtual start address
* - end - virtual end address
*
+ * invalidate_icache_range(start, end)
+ *
+ * Invalidate the I-cache in the region described by start, end.
+ * - start - virtual start address
+ * - end - virtual end address
+ *
* __flush_cache_user_range(start, end)
*
* Ensure coherency between the I-cache and the D-cache in the
@@ -66,6 +72,7 @@
* - size - region size
*/
extern void flush_icache_range(unsigned long start, unsigned long end);
+extern int invalidate_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
extern void __inval_dcache_area(void *addr, size_t len);
extern void __clean_dcache_area_poc(void *addr, size_t len);
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5bb2fd4674e7..07fe2479d310 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,9 +48,10 @@ do { \
} while (0)
static inline int
-arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
{
int oldval = 0, ret, tmp;
+ u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
pagefault_disable();
@@ -88,15 +89,17 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
u32 oldval, u32 newval)
{
int ret = 0;
u32 val, tmp;
+ u32 __user *uaddr;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!access_ok(VERIFY_WRITE, _uaddr, sizeof(u32)))
return -EFAULT;
+ uaddr = __uaccess_mask_ptr(_uaddr);
uaccess_enable();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
" prfm pstl1strm, %2\n"
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index e266f80e45b7..8758bb008436 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -12,7 +12,8 @@
/*
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
- * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
+ * where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
*/
#define KASAN_SHADOW_START (VA_START)
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
@@ -20,14 +21,16 @@
/*
* This value is used to map an address to the corresponding shadow
* address by the following formula:
- * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
*
- * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
- * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
- * should satisfy the following equation:
- * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
+ * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
+ * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
+ * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
+ * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
+ * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
*/
-#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
+#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \
+ (64 - KASAN_SHADOW_SCALE_SHIFT)))
void kasan_init(void);
void kasan_copy_shadow(pgd_t *pgdir);
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 82386e860dd2..a780f6714b44 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -123,16 +123,8 @@
/*
* Initial memory map attributes.
*/
-#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG)
-#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
-#else
-#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS
-#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS
-#endif
+#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4485ae8e98de..596f8e414a4c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -48,6 +48,8 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
@@ -415,4 +417,10 @@ static inline void kvm_arm_vhe_guest_exit(void)
{
local_daif_restore(DAIF_PROCCTX_NOIRQ);
}
+
+static inline bool kvm_arm_harden_branch_predictor(void)
+{
+ return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
+}
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 08d3bb66c8b7..f26f9cd70c72 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -20,7 +20,6 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
#define __hyp_text __section(.hyp.text) notrace
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 72e279dbae5f..9679067a1574 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -173,6 +173,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
return pmd;
}
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+ pte_val(pte) &= ~PTE_S2_XN;
+ return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+ pmd_val(pmd) &= ~PMD_S2_XN;
+ return pmd;
+}
+
static inline void kvm_set_s2pte_readonly(pte_t *pte)
{
pteval_t old_pteval, pteval;
@@ -191,6 +203,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
}
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+ return !(pte_val(*pte) & PTE_S2_XN);
+}
+
static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
{
kvm_set_s2pte_readonly((pte_t *)pmd);
@@ -201,6 +218,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
return kvm_s2pte_readonly((pte_t *)pmd);
}
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+ return !(pmd_val(*pmd) & PMD_S2_XN);
+}
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
@@ -230,21 +252,25 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
- kvm_pfn_t pfn,
- unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
void *va = page_address(pfn_to_page(pfn));
kvm_flush_dcache_to_poc(va, size);
+}
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+ unsigned long size)
+{
if (icache_is_aliasing()) {
/* any kind of VIPT cache */
__flush_icache_all();
} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
- flush_icache_range((unsigned long)va,
- (unsigned long)va + size);
+ void *va = page_address(pfn_to_page(pfn));
+
+ invalidate_icache_range((unsigned long)va,
+ (unsigned long)va + size);
}
}
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
deleted file mode 100644
index bc39e557c56c..000000000000
--- a/arch/arm64/include/asm/kvm_psci.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __ARM64_KVM_PSCI_H__
-#define __ARM64_KVM_PSCI_H__
-
-#define KVM_ARM_PSCI_0_1 1
-#define KVM_ARM_PSCI_0_2 2
-
-int kvm_psci_version(struct kvm_vcpu *vcpu);
-int kvm_psci_call(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index d4bae7d6e0d8..50fa96a49792 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -85,7 +85,8 @@
* stack size when KASAN is in use.
*/
#ifdef CONFIG_KASAN
-#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3))
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_THREAD_SHIFT 1
#else
#define KASAN_SHADOW_SIZE (0)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index f42836da8723..cdfe3e657a9e 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -187,9 +187,11 @@
*/
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
+#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */
#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
+#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 22a926825e3f..108ecad7acc5 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -37,13 +37,11 @@
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG)
-#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG)
-#else
-#define PROT_DEFAULT _PROT_DEFAULT
-#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0)
+#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0)
+
+#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
+#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -55,22 +53,22 @@
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG)
+#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT
-#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
+#define PAGE_KERNEL __pgprot(PROT_NORMAL)
+#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
+#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
+#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
+#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6db43ebd648d..fce604e3e599 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -21,6 +21,9 @@
#define TASK_SIZE_64 (UL(1) << VA_BITS)
+#define KERNEL_DS UL(-1)
+#define USER_DS (TASK_SIZE_64 - 1)
+
#ifndef __ASSEMBLY__
/*
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index fdb827c7832f..ebdae15d665d 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -87,8 +87,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
" cbnz %w1, 1f\n"
" add %w1, %w0, %3\n"
" casa %w0, %w1, %2\n"
- " and %w1, %w1, #0xffff\n"
- " eor %w1, %w1, %w0, lsr #16\n"
+ " sub %w1, %w1, %3\n"
+ " eor %w1, %w1, %w0\n"
"1:")
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
: "I" (1 << TICKET_SHIFT)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 59fda5292936..543e11f0f657 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -35,16 +35,20 @@
#include <asm/compiler.h>
#include <asm/extable.h>
-#define KERNEL_DS (-1UL)
#define get_ds() (KERNEL_DS)
-
-#define USER_DS TASK_SIZE_64
#define get_fs() (current_thread_info()->addr_limit)
static inline void set_fs(mm_segment_t fs)
{
current_thread_info()->addr_limit = fs;
+ /*
+ * Prevent a mispredicted conditional call to set_fs from forwarding
+ * the wrong address limit to access_ok under speculation.
+ */
+ dsb(nsh);
+ isb();
+
/* On user-mode return, check fs is correct */
set_thread_flag(TIF_FSCHECK);
@@ -66,22 +70,32 @@ static inline void set_fs(mm_segment_t fs)
* Returns 1 if the range is valid, 0 otherwise.
*
* This is equivalent to the following test:
- * (u65)addr + (u65)size <= current->addr_limit
- *
- * This needs 65-bit arithmetic.
+ * (u65)addr + (u65)size <= (u65)current->addr_limit + 1
*/
-#define __range_ok(addr, size) \
-({ \
- unsigned long __addr = (unsigned long)(addr); \
- unsigned long flag, roksum; \
- __chk_user_ptr(addr); \
- asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \
- : "=&r" (flag), "=&r" (roksum) \
- : "1" (__addr), "Ir" (size), \
- "r" (current_thread_info()->addr_limit) \
- : "cc"); \
- flag; \
-})
+static inline unsigned long __range_ok(unsigned long addr, unsigned long size)
+{
+ unsigned long limit = current_thread_info()->addr_limit;
+
+ __chk_user_ptr(addr);
+ asm volatile(
+ // A + B <= C + 1 for all A,B,C, in four easy steps:
+ // 1: X = A + B; X' = X % 2^64
+ " adds %0, %0, %2\n"
+ // 2: Set C = 0 if X > 2^64, to guarantee X' > C in step 4
+ " csel %1, xzr, %1, hi\n"
+ // 3: Set X' = ~0 if X >= 2^64. For X == 2^64, this decrements X'
+ // to compensate for the carry flag being set in step 4. For
+ // X > 2^64, X' merely has to remain nonzero, which it does.
+ " csinv %0, %0, xzr, cc\n"
+ // 4: For X < 2^64, this gives us X' - C - 1 <= 0, where the -1
+ // comes from the carry in being clear. Otherwise, we are
+ // testing X' - C == 0, subject to the previous adjustments.
+ " sbcs xzr, %0, %1\n"
+ " cset %0, ls\n"
+ : "+r" (addr), "+r" (limit) : "Ir" (size) : "cc");
+
+ return addr;
+}
/*
* When dealing with data aborts, watchpoints, or instruction traps we may end
@@ -90,7 +104,7 @@ static inline void set_fs(mm_segment_t fs)
*/
#define untagged_addr(addr) sign_extend64(addr, 55)
-#define access_ok(type, addr, size) __range_ok(addr, size)
+#define access_ok(type, addr, size) __range_ok((unsigned long)(addr), size)
#define user_addr_max get_fs
#define _ASM_EXTABLE(from, to) \
@@ -221,6 +235,26 @@ static inline void uaccess_enable_not_uao(void)
}
/*
+ * Sanitise a uaccess pointer such that it becomes NULL if above the
+ * current addr_limit.
+ */
+#define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
+static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
+{
+ void __user *safe_ptr;
+
+ asm volatile(
+ " bics xzr, %1, %2\n"
+ " csel %0, %1, xzr, eq\n"
+ : "=&r" (safe_ptr)
+ : "r" (ptr), "r" (current_thread_info()->addr_limit)
+ : "cc");
+
+ csdb();
+ return safe_ptr;
+}
+
+/*
* The "__xxx" versions of the user access functions do not verify the address
* space - it must have been done previously with a separate "access_ok()"
* call.
@@ -272,28 +306,33 @@ do { \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
} while (0)
-#define __get_user(x, ptr) \
+#define __get_user_check(x, ptr, err) \
({ \
- int __gu_err = 0; \
- __get_user_err((x), (ptr), __gu_err); \
- __gu_err; \
+ __typeof__(*(ptr)) __user *__p = (ptr); \
+ might_fault(); \
+ if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \
+ __p = uaccess_mask_ptr(__p); \
+ __get_user_err((x), __p, (err)); \
+ } else { \
+ (x) = 0; (err) = -EFAULT; \
+ } \
})
#define __get_user_error(x, ptr, err) \
({ \
- __get_user_err((x), (ptr), (err)); \
+ __get_user_check((x), (ptr), (err)); \
(void)0; \
})
-#define get_user(x, ptr) \
+#define __get_user(x, ptr) \
({ \
- __typeof__(*(ptr)) __user *__p = (ptr); \
- might_fault(); \
- access_ok(VERIFY_READ, __p, sizeof(*__p)) ? \
- __get_user((x), __p) : \
- ((x) = 0, -EFAULT); \
+ int __gu_err = 0; \
+ __get_user_check((x), (ptr), __gu_err); \
+ __gu_err; \
})
+#define get_user __get_user
+
#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \
asm volatile( \
"1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \
@@ -336,43 +375,63 @@ do { \
uaccess_disable_not_uao(); \
} while (0)
-#define __put_user(x, ptr) \
+#define __put_user_check(x, ptr, err) \
({ \
- int __pu_err = 0; \
- __put_user_err((x), (ptr), __pu_err); \
- __pu_err; \
+ __typeof__(*(ptr)) __user *__p = (ptr); \
+ might_fault(); \
+ if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \
+ __p = uaccess_mask_ptr(__p); \
+ __put_user_err((x), __p, (err)); \
+ } else { \
+ (err) = -EFAULT; \
+ } \
})
#define __put_user_error(x, ptr, err) \
({ \
- __put_user_err((x), (ptr), (err)); \
+ __put_user_check((x), (ptr), (err)); \
(void)0; \
})
-#define put_user(x, ptr) \
+#define __put_user(x, ptr) \
({ \
- __typeof__(*(ptr)) __user *__p = (ptr); \
- might_fault(); \
- access_ok(VERIFY_WRITE, __p, sizeof(*__p)) ? \
- __put_user((x), __p) : \
- -EFAULT; \
+ int __pu_err = 0; \
+ __put_user_check((x), (ptr), __pu_err); \
+ __pu_err; \
})
+#define put_user __put_user
+
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
-#define raw_copy_from_user __arch_copy_from_user
+#define raw_copy_from_user(to, from, n) \
+({ \
+ __arch_copy_from_user((to), __uaccess_mask_ptr(from), (n)); \
+})
+
extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
-#define raw_copy_to_user __arch_copy_to_user
-extern unsigned long __must_check raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
-extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
+#define raw_copy_to_user(to, from, n) \
+({ \
+ __arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n)); \
+})
+
+extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
+#define raw_copy_in_user(to, from, n) \
+({ \
+ __arch_copy_in_user(__uaccess_mask_ptr(to), \
+ __uaccess_mask_ptr(from), (n)); \
+})
+
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
-static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
+static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
- n = __clear_user(to, n);
+ n = __arch_clear_user(__uaccess_mask_ptr(to), n);
return n;
}
+#define clear_user __clear_user
extern long strncpy_from_user(char *dest, const char __user *src, long count);
@@ -386,7 +445,7 @@ extern unsigned long __must_check __copy_user_flushcache(void *to, const void __
static inline int __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
{
kasan_check_write(dst, size);
- return __copy_user_flushcache(dst, src, size);
+ return __copy_user_flushcache(dst, __uaccess_mask_ptr(src), size);
}
#endif
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 252396a96c78..7b09487ff8fb 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -230,10 +230,10 @@ void __init acpi_boot_table_init(void)
done:
if (acpi_disabled) {
- if (earlycon_init_is_deferred)
+ if (earlycon_acpi_spcr_enable)
early_init_dt_scan_chosen_stdout();
} else {
- parse_spcr(earlycon_init_is_deferred);
+ acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
}
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 67368c7329c0..66be504edb6c 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -37,8 +37,8 @@ EXPORT_SYMBOL(clear_page);
/* user mem (segment) */
EXPORT_SYMBOL(__arch_copy_from_user);
EXPORT_SYMBOL(__arch_copy_to_user);
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(raw_copy_in_user);
+EXPORT_SYMBOL(__arch_clear_user);
+EXPORT_SYMBOL(__arch_copy_in_user);
/* physical memory */
EXPORT_SYMBOL(memstart_addr);
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
index 76225c2611ea..e5de33513b5d 100644
--- a/arch/arm64/kernel/bpi.S
+++ b/arch/arm64/kernel/bpi.S
@@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
+#include <linux/arm-smccc.h>
.macro ventry target
.rept 31
@@ -53,30 +54,6 @@ ENTRY(__bp_harden_hyp_vecs_start)
vectors __kvm_hyp_vector
.endr
ENTRY(__bp_harden_hyp_vecs_end)
-ENTRY(__psci_hyp_bp_inval_start)
- sub sp, sp, #(8 * 18)
- stp x16, x17, [sp, #(16 * 0)]
- stp x14, x15, [sp, #(16 * 1)]
- stp x12, x13, [sp, #(16 * 2)]
- stp x10, x11, [sp, #(16 * 3)]
- stp x8, x9, [sp, #(16 * 4)]
- stp x6, x7, [sp, #(16 * 5)]
- stp x4, x5, [sp, #(16 * 6)]
- stp x2, x3, [sp, #(16 * 7)]
- stp x0, x1, [sp, #(16 * 8)]
- mov x0, #0x84000000
- smc #0
- ldp x16, x17, [sp, #(16 * 0)]
- ldp x14, x15, [sp, #(16 * 1)]
- ldp x12, x13, [sp, #(16 * 2)]
- ldp x10, x11, [sp, #(16 * 3)]
- ldp x8, x9, [sp, #(16 * 4)]
- ldp x6, x7, [sp, #(16 * 5)]
- ldp x4, x5, [sp, #(16 * 6)]
- ldp x2, x3, [sp, #(16 * 7)]
- ldp x0, x1, [sp, #(16 * 8)]
- add sp, sp, #(8 * 18)
-ENTRY(__psci_hyp_bp_inval_end)
ENTRY(__qcom_hyp_sanitize_link_stack_start)
stp x29, x30, [sp, #-16]!
@@ -85,3 +62,22 @@ ENTRY(__qcom_hyp_sanitize_link_stack_start)
.endr
ldp x29, x30, [sp], #16
ENTRY(__qcom_hyp_sanitize_link_stack_end)
+
+.macro smccc_workaround_1 inst
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ \inst #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+.endm
+
+ENTRY(__smccc_workaround_1_smc_start)
+ smccc_workaround_1 smc
+ENTRY(__smccc_workaround_1_smc_end)
+
+ENTRY(__smccc_workaround_1_hvc_start)
+ smccc_workaround_1 hvc
+ENTRY(__smccc_workaround_1_hvc_end)
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 2a752cb2a0f3..8021b46c9743 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -16,7 +16,7 @@
#include <asm/virt.h>
.text
-.pushsection .idmap.text, "ax"
+.pushsection .idmap.text, "awx"
/*
* __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index ed6881882231..07823595b7f0 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -67,9 +67,12 @@ static int cpu_enable_trap_ctr_access(void *__unused)
DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
#ifdef CONFIG_KVM
-extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
extern char __qcom_hyp_sanitize_link_stack_start[];
extern char __qcom_hyp_sanitize_link_stack_end[];
+extern char __smccc_workaround_1_smc_start[];
+extern char __smccc_workaround_1_smc_end[];
+extern char __smccc_workaround_1_hvc_start[];
+extern char __smccc_workaround_1_hvc_end[];
static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
const char *hyp_vecs_end)
@@ -112,10 +115,12 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
spin_unlock(&bp_lock);
}
#else
-#define __psci_hyp_bp_inval_start NULL
-#define __psci_hyp_bp_inval_end NULL
#define __qcom_hyp_sanitize_link_stack_start NULL
#define __qcom_hyp_sanitize_link_stack_end NULL
+#define __smccc_workaround_1_smc_start NULL
+#define __smccc_workaround_1_smc_end NULL
+#define __smccc_workaround_1_hvc_start NULL
+#define __smccc_workaround_1_hvc_end NULL
static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
@@ -142,17 +147,59 @@ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
__install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
}
+#include <uapi/linux/psci.h>
+#include <linux/arm-smccc.h>
#include <linux/psci.h>
-static int enable_psci_bp_hardening(void *data)
+static void call_smc_arch_workaround_1(void)
+{
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void call_hvc_arch_workaround_1(void)
+{
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static int enable_smccc_arch_workaround_1(void *data)
{
const struct arm64_cpu_capabilities *entry = data;
+ bp_hardening_cb_t cb;
+ void *smccc_start, *smccc_end;
+ struct arm_smccc_res res;
+
+ if (!entry->matches(entry, SCOPE_LOCAL_CPU))
+ return 0;
+
+ if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+ return 0;
+
+ switch (psci_ops.conduit) {
+ case PSCI_CONDUIT_HVC:
+ arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+ if (res.a0)
+ return 0;
+ cb = call_hvc_arch_workaround_1;
+ smccc_start = __smccc_workaround_1_hvc_start;
+ smccc_end = __smccc_workaround_1_hvc_end;
+ break;
+
+ case PSCI_CONDUIT_SMC:
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+ if (res.a0)
+ return 0;
+ cb = call_smc_arch_workaround_1;
+ smccc_start = __smccc_workaround_1_smc_start;
+ smccc_end = __smccc_workaround_1_smc_end;
+ break;
+
+ default:
+ return 0;
+ }
- if (psci_ops.get_version)
- install_bp_hardening_cb(entry,
- (bp_hardening_cb_t)psci_ops.get_version,
- __psci_hyp_bp_inval_start,
- __psci_hyp_bp_inval_end);
+ install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
return 0;
}
@@ -333,22 +380,22 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- .enable = enable_psci_bp_hardening,
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- .enable = enable_psci_bp_hardening,
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- .enable = enable_psci_bp_hardening,
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- .enable = enable_psci_bp_hardening,
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
@@ -362,12 +409,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
- .enable = enable_psci_bp_hardening,
+ .enable = enable_smccc_arch_workaround_1,
},
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
- .enable = enable_psci_bp_hardening,
+ .enable = enable_smccc_arch_workaround_1,
},
#endif
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0fb6a3151443..29b1f873e337 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -856,12 +856,23 @@ static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
int __unused)
{
+ char const *str = "command line option";
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
- /* Forced on command line? */
+ /*
+ * For reasons that aren't entirely clear, enabling KPTI on Cavium
+ * ThunderX leads to apparent I-cache corruption of kernel text, which
+ * ends as well as you might imagine. Don't even try.
+ */
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
+ str = "ARM64_WORKAROUND_CAVIUM_27456";
+ __kpti_forced = -1;
+ }
+
+ /* Forced? */
if (__kpti_forced) {
- pr_info_once("kernel page table isolation forced %s by command line option\n",
- __kpti_forced > 0 ? "ON" : "OFF");
+ pr_info_once("kernel page table isolation forced %s by %s\n",
+ __kpti_forced > 0 ? "ON" : "OFF", str);
return __kpti_forced > 0;
}
@@ -881,6 +892,30 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
ID_AA64PFR0_CSV3_SHIFT);
}
+static int kpti_install_ng_mappings(void *__unused)
+{
+ typedef void (kpti_remap_fn)(int, int, phys_addr_t);
+ extern kpti_remap_fn idmap_kpti_install_ng_mappings;
+ kpti_remap_fn *remap_fn;
+
+ static bool kpti_applied = false;
+ int cpu = smp_processor_id();
+
+ if (kpti_applied)
+ return 0;
+
+ remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
+
+ cpu_install_idmap();
+ remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));
+ cpu_uninstall_idmap();
+
+ if (!cpu)
+ kpti_applied = true;
+
+ return 0;
+}
+
static int __init parse_kpti(char *str)
{
bool enabled;
@@ -1004,6 +1039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
.def_scope = SCOPE_SYSTEM,
.matches = unmap_kernel_at_el0,
+ .enable = kpti_install_ng_mappings,
},
#endif
{
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b34e717d7597..ec2ee720e33e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -167,10 +167,10 @@ alternative_else_nop_endif
.else
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
- /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+ /* Save the task's original addr_limit and set USER_DS */
ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT]
- mov x20, #TASK_SIZE_64
+ mov x20, #USER_DS
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
@@ -324,6 +324,10 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
+ /*
+ * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization
+ * when returning from IPI handler, and when returning to user-space.
+ */
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
@@ -378,6 +382,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
* x7 is reserved for the system call number in 32-bit mode.
*/
wsc_nr .req w25 // number of system calls
+xsc_nr .req x25 // number of system calls (zero-extended)
wscno .req w26 // syscall number
xscno .req x26 // syscall number (zero-extended)
stbl .req x27 // syscall table pointer
@@ -766,7 +771,10 @@ el0_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x26, far_el1
- enable_daif
+ enable_da_f
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_off
+#endif
ct_user_exit
mov x0, x26
mov x1, x25
@@ -824,6 +832,11 @@ el0_irq_naked:
#endif
ct_user_exit
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+ tbz x22, #55, 1f
+ bl do_el0_irq_bp_hardening
+1:
+#endif
irq_handler
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -935,6 +948,7 @@ el0_svc_naked: // compat entry point
b.ne __sys_trace
cmp wscno, wsc_nr // check upper syscall limit
b.hs ni_sys
+ mask_nospec64 xscno, xsc_nr, x19 // enforce bounds for syscall number
ldr x16, [stbl, xscno, lsl #3] // address in the syscall table
blr x16 // call sys_* routine
b ret_fast_syscall
@@ -1013,16 +1027,9 @@ alternative_else_nop_endif
orr \tmp, \tmp, #USER_ASID_FLAG
msr ttbr1_el1, \tmp
/*
- * We avoid running the post_ttbr_update_workaround here because the
- * user and kernel ASIDs don't have conflicting mappings, so any
- * "blessing" as described in:
- *
- * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com
- *
- * will not hurt correctness. Whilst this may partially defeat the
- * point of using split ASIDs in the first place, it avoids
- * the hit of invalidating the entire I-cache on every return to
- * userspace.
+ * We avoid running the post_ttbr_update_workaround here because
+ * it's only needed by Cavium ThunderX, which requires KPTI to be
+ * disabled.
*/
.endm
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ba3ab04788dc..2b6b8b24e5ab 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -148,26 +148,6 @@ preserve_boot_args:
ENDPROC(preserve_boot_args)
/*
- * Macro to arrange a physical address in a page table entry, taking care of
- * 52-bit addresses.
- *
- * Preserves: phys
- * Returns: pte
- */
- .macro phys_to_pte, phys, pte
-#ifdef CONFIG_ARM64_PA_BITS_52
- /*
- * We assume \phys is 64K aligned and this is guaranteed by only
- * supporting this configuration with 64K pages.
- */
- orr \pte, \phys, \phys, lsr #36
- and \pte, \pte, #PTE_ADDR_MASK
-#else
- mov \pte, \phys
-#endif
- .endm
-
-/*
* Macro to create a table entry to the next page.
*
* tbl: page table address
@@ -181,7 +161,7 @@ ENDPROC(preserve_boot_args)
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
add \tmp1, \tbl, #PAGE_SIZE
- phys_to_pte \tmp1, \tmp2
+ phys_to_pte \tmp2, \tmp1
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
lsr \tmp1, \virt, #\shift
sub \ptrs, \ptrs, #1
@@ -207,7 +187,7 @@ ENDPROC(preserve_boot_args)
* Returns: rtbl
*/
.macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
-.Lpe\@: phys_to_pte \rtbl, \tmp1
+.Lpe\@: phys_to_pte \tmp1, \rtbl
orr \tmp1, \tmp1, \flags // tmp1 = table entry
str \tmp1, [\tbl, \index, lsl #3]
add \rtbl, \rtbl, \inc // rtbl = pa next level
@@ -475,7 +455,7 @@ ENDPROC(__primary_switched)
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
- .section ".idmap.text","ax"
+ .section ".idmap.text","awx"
ENTRY(kimage_vaddr)
.quad _text - TEXT_OFFSET
@@ -776,8 +756,8 @@ ENTRY(__enable_mmu)
update_early_cpu_boot_status 0, x1, x2
adrp x1, idmap_pg_dir
adrp x2, swapper_pg_dir
- phys_to_ttbr x1, x3
- phys_to_ttbr x2, x4
+ phys_to_ttbr x3, x1
+ phys_to_ttbr x4, x2
msr ttbr0_el1, x3 // load TTBR0
msr ttbr1_el1, x4 // load TTBR1
isb
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 84f5d52fddda..dd14ab8c9f72 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -34,12 +34,12 @@
* each stage of the walk.
*/
.macro break_before_make_ttbr_switch zero_page, page_table, tmp
- phys_to_ttbr \zero_page, \tmp
+ phys_to_ttbr \tmp, \zero_page
msr ttbr1_el1, \tmp
isb
tlbi vmalle1
dsb nsh
- phys_to_ttbr \page_table, \tmp
+ phys_to_ttbr \tmp, \page_table
msr ttbr1_el1, \tmp
isb
.endm
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 3affca3dd96a..75b220ba73a3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -925,9 +925,8 @@ static void __armv8pmu_probe_pmu(void *info)
pmceid[0] = read_sysreg(pmceid0_el0);
pmceid[1] = read_sysreg(pmceid1_el0);
- bitmap_from_u32array(cpu_pmu->pmceid_bitmap,
- ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid,
- ARRAY_SIZE(pmceid));
+ bitmap_from_arr32(cpu_pmu->pmceid_bitmap,
+ pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
}
static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 10dd16d7902d..bebec8ef9372 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -96,7 +96,7 @@ ENTRY(__cpu_suspend_enter)
ret
ENDPROC(__cpu_suspend_enter)
- .pushsection ".idmap.text", "ax"
+ .pushsection ".idmap.text", "awx"
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
bl __cpu_setup
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5c7f657dd207..d7e3299a7734 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -361,10 +361,16 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
trace_kvm_set_guest_debug(vcpu, dbg->control);
- if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
- return -EINVAL;
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
+ ret = -EINVAL;
+ goto out;
+ }
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
@@ -378,7 +384,10 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
/* If not enabled clear all flags */
vcpu->guest_debug = 0;
}
- return 0;
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 520b0dad3c62..e5e741bfffe1 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -22,13 +22,14 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <kvm/arm_psci.h>
+
#include <asm/esr.h>
#include <asm/exception.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
-#include <asm/kvm_psci.h>
#include <asm/debug-monitors.h>
#include <asm/traps.h>
@@ -51,7 +52,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_hvc_get_imm(vcpu));
vcpu->stat.hvc_exit_stat++;
- ret = kvm_psci_call(vcpu);
+ ret = kvm_hvc_call_handler(vcpu);
if (ret < 0) {
vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
@@ -62,7 +63,16 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
+ /*
+ * "If an SMC instruction executed at Non-secure EL1 is
+ * trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
+ * Trap exception, not a Secure Monitor Call exception [...]"
+ *
+ * We need to advance the PC after the trap, as it would
+ * otherwise return to the same address...
+ */
vcpu_set_reg(vcpu, 0, ~0UL);
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
return 1;
}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index e086c6eff8c6..5aa9ccf6db99 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -63,7 +63,7 @@ __do_hyp_init:
cmp x0, #HVC_STUB_HCALL_NR
b.lo __kvm_handle_stub_hvc
- phys_to_ttbr x0, x4
+ phys_to_ttbr x4, x0
msr ttbr0_el2, x4
mrs x4, tcr_el1
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index f4363d40e2cd..dabb5cc7b087 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -21,6 +21,7 @@
#include <asm/debug-monitors.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#define read_debug(r,n) read_sysreg(r##n##_el1)
#define write_debug(v,r,n) write_sysreg(v, r##n##_el1)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index e4f37b9dd47c..f36464bd57c5 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -15,6 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm-smccc.h>
#include <linux/linkage.h>
#include <asm/alternative.h>
@@ -64,10 +65,11 @@ alternative_endif
lsr x0, x1, #ESR_ELx_EC_SHIFT
cmp x0, #ESR_ELx_EC_HVC64
+ ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
- mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest
- cbnz x1, el1_trap // called HVC
+ mrs x1, vttbr_el2 // If vttbr is valid, the guest
+ cbnz x1, el1_hvc_guest // called HVC
/* Here, we're pretty sure the host called HVC. */
ldp x0, x1, [sp], #16
@@ -100,6 +102,20 @@ alternative_endif
eret
+el1_hvc_guest:
+ /*
+ * Fastest possible path for ARM_SMCCC_ARCH_WORKAROUND_1.
+ * The workaround has already been applied on the host,
+ * so let's quickly get back to the guest. We don't bother
+ * restoring x1, as it can be clobbered anyway.
+ */
+ ldr x1, [sp] // Guest's x0
+ eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+ cbnz w1, el1_trap
+ mov x0, x1
+ add sp, sp, #16
+ eret
+
el1_trap:
/*
* x0: ESR_EC
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 036e1f3d77a6..116252a8d3a5 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -19,9 +19,12 @@
#include <linux/jump_label.h>
#include <uapi/linux/psci.h>
+#include <kvm/arm_psci.h>
+
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
@@ -348,18 +351,6 @@ again:
if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
goto again;
- if (exit_code == ARM_EXCEPTION_TRAP &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) &&
- vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) {
- u64 val = PSCI_RET_NOT_SUPPORTED;
- if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
- val = 2;
-
- vcpu_set_reg(vcpu, 0, val);
- goto again;
- }
-
if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
exit_code == ARM_EXCEPTION_TRAP) {
bool valid;
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 73464a96c365..131c7772703c 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -16,6 +16,7 @@
*/
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#include <asm/tlbflush.h>
static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 3d69a8d41fa5..21ba0b29621b 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -21,7 +21,7 @@
.text
-/* Prototype: int __clear_user(void *addr, size_t sz)
+/* Prototype: int __arch_clear_user(void *addr, size_t sz)
* Purpose : clear some user memory
* Params : addr - user memory address to clear
* : sz - number of bytes to clear
@@ -29,7 +29,7 @@
*
* Alignment fixed up by hardware.
*/
-ENTRY(__clear_user)
+ENTRY(__arch_clear_user)
uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
@@ -52,7 +52,7 @@ uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
uaccess_disable_not_uao x2, x3
ret
-ENDPROC(__clear_user)
+ENDPROC(__arch_clear_user)
.section .fixup,"ax"
.align 2
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index fbb090f431a5..54b75deb1d16 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -64,14 +64,15 @@
.endm
end .req x5
-ENTRY(raw_copy_in_user)
+
+ENTRY(__arch_copy_in_user)
uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
uaccess_disable_not_uao x3, x4
mov x0, #0
ret
-ENDPROC(raw_copy_in_user)
+ENDPROC(__arch_copy_in_user)
.section .fixup,"ax"
.align 2
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 91464e7f77cc..758bde7e2fa6 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -60,16 +60,7 @@ user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE
b.lo 1b
dsb ish
- icache_line_size x2, x3
- sub x3, x2, #1
- bic x4, x0, x3
-1:
-USER(9f, ic ivau, x4 ) // invalidate I line PoU
- add x4, x4, x2
- cmp x4, x1
- b.lo 1b
- dsb ish
- isb
+ invalidate_icache_by_line x0, x1, x2, x3, 9f
mov x0, #0
1:
uaccess_ttbr0_disable x1, x2
@@ -81,6 +72,27 @@ ENDPROC(flush_icache_range)
ENDPROC(__flush_cache_user_range)
/*
+ * invalidate_icache_range(start,end)
+ *
+ * Ensure that the I cache is invalid within specified region.
+ *
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(invalidate_icache_range)
+ uaccess_ttbr0_enable x2, x3, x4
+
+ invalidate_icache_by_line x0, x1, x2, x3, 2f
+ mov x0, xzr
+1:
+ uaccess_ttbr0_disable x1, x2
+ ret
+2:
+ mov x0, #-EFAULT
+ b 1b
+ENDPROC(invalidate_icache_range)
+
+/*
* __flush_dcache_area(kaddr, size)
*
* Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index ce441d29e7f6..f76bb2c3c943 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -240,7 +240,7 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
if (fsc_type == ESR_ELx_FSC_PERM)
return true;
- if (addr < USER_DS && system_uses_ttbr0_pan())
+ if (addr < TASK_SIZE && system_uses_ttbr0_pan())
return fsc_type == ESR_ELx_FSC_FAULT &&
(regs->pstate & PSR_PAN_BIT);
@@ -414,7 +414,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- if (addr < USER_DS && is_permission_fault(esr, regs, addr)) {
+ if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
@@ -707,6 +707,12 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
arm64_notify_die("", regs, &info, esr);
}
+asmlinkage void __exception do_el0_irq_bp_hardening(void)
+{
+ /* PC has already been checked in entry.S */
+ arm64_apply_bp_hardening();
+}
+
asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
@@ -731,6 +737,12 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
struct siginfo info;
struct task_struct *tsk = current;
+ if (user_mode(regs)) {
+ if (instruction_pointer(regs) > TASK_SIZE)
+ arm64_apply_bp_hardening();
+ local_irq_enable();
+ }
+
if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS))
pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n",
tsk->comm, task_pid_nr(tsk),
@@ -790,6 +802,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
if (interrupts_enabled(regs))
trace_hardirqs_off();
+ if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
+ arm64_apply_bp_hardening();
+
if (!inf->fn(addr, esr, regs)) {
rv = 1;
} else {
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index acba49fb5aac..6e02e6fb4c7b 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -135,7 +135,8 @@ static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
/* The early shadow maps everything to a single page of zeroes */
asmlinkage void __init kasan_early_init(void)
{
- BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b44992ec9643..4694cda823c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -118,6 +118,10 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
if ((old | new) & PTE_CONT)
return false;
+ /* Transitioning from Global to Non-Global is safe */
+ if (((old ^ new) == PTE_NG) && (new & PTE_NG))
+ return true;
+
return ((old ^ new) & ~mask) == 0;
}
@@ -685,12 +689,14 @@ int kern_addr_valid(unsigned long addr)
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
return vmemmap_populate_basepages(start, end, node);
}
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
unsigned long addr = start;
unsigned long next;
@@ -725,7 +731,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
return 0;
}
#endif /* CONFIG_ARM64_64K_PAGES */
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 9f177aac6390..71baed7e592a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -90,7 +90,7 @@ ENDPROC(cpu_do_suspend)
*
* x0: Address of context pointer
*/
- .pushsection ".idmap.text", "ax"
+ .pushsection ".idmap.text", "awx"
ENTRY(cpu_do_resume)
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
@@ -153,7 +153,7 @@ ENDPROC(cpu_do_resume)
ENTRY(cpu_do_switch_mm)
mrs x2, ttbr1_el1
mmid x1, x1 // get mm->context.id
- phys_to_ttbr x0, x3
+ phys_to_ttbr x3, x0
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
bfi x3, x1, #48, #16 // set the ASID field in TTBR0
#endif
@@ -165,7 +165,18 @@ ENTRY(cpu_do_switch_mm)
b post_ttbr_update_workaround // Back to C code...
ENDPROC(cpu_do_switch_mm)
- .pushsection ".idmap.text", "ax"
+ .pushsection ".idmap.text", "awx"
+
+.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
+ adrp \tmp1, empty_zero_page
+ phys_to_ttbr \tmp2, \tmp1
+ msr ttbr1_el1, \tmp2
+ isb
+ tlbi vmalle1
+ dsb nsh
+ isb
+.endm
+
/*
* void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
*
@@ -175,24 +186,201 @@ ENDPROC(cpu_do_switch_mm)
ENTRY(idmap_cpu_replace_ttbr1)
save_and_disable_daif flags=x2
- adrp x1, empty_zero_page
- phys_to_ttbr x1, x3
+ __idmap_cpu_set_reserved_ttbr1 x1, x3
+
+ phys_to_ttbr x3, x0
msr ttbr1_el1, x3
isb
- tlbi vmalle1
- dsb nsh
+ restore_daif x2
+
+ ret
+ENDPROC(idmap_cpu_replace_ttbr1)
+ .popsection
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ .pushsection ".idmap.text", "awx"
+
+ .macro __idmap_kpti_get_pgtable_ent, type
+ dc cvac, cur_\()\type\()p // Ensure any existing dirty
+ dmb sy // lines are written back before
+ ldr \type, [cur_\()\type\()p] // loading the entry
+ tbz \type, #0, next_\()\type // Skip invalid entries
+ .endm
+
+ .macro __idmap_kpti_put_pgtable_ent_ng, type
+ orr \type, \type, #PTE_NG // Same bit for blocks and pages
+ str \type, [cur_\()\type\()p] // Update the entry and ensure it
+ dc civac, cur_\()\type\()p // is visible to all CPUs.
+ .endm
+
+/*
+ * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
+ *
+ * Called exactly once from stop_machine context by each CPU found during boot.
+ */
+__idmap_kpti_flag:
+ .long 1
+ENTRY(idmap_kpti_install_ng_mappings)
+ cpu .req w0
+ num_cpus .req w1
+ swapper_pa .req x2
+ swapper_ttb .req x3
+ flag_ptr .req x4
+ cur_pgdp .req x5
+ end_pgdp .req x6
+ pgd .req x7
+ cur_pudp .req x8
+ end_pudp .req x9
+ pud .req x10
+ cur_pmdp .req x11
+ end_pmdp .req x12
+ pmd .req x13
+ cur_ptep .req x14
+ end_ptep .req x15
+ pte .req x16
+
+ mrs swapper_ttb, ttbr1_el1
+ adr flag_ptr, __idmap_kpti_flag
+
+ cbnz cpu, __idmap_kpti_secondary
+
+ /* We're the boot CPU. Wait for the others to catch up */
+ sevl
+1: wfe
+ ldaxr w18, [flag_ptr]
+ eor w18, w18, num_cpus
+ cbnz w18, 1b
+
+ /* We need to walk swapper, so turn off the MMU. */
+ pre_disable_mmu_workaround
+ mrs x18, sctlr_el1
+ bic x18, x18, #SCTLR_ELx_M
+ msr sctlr_el1, x18
isb
- phys_to_ttbr x0, x3
- msr ttbr1_el1, x3
+ /* Everybody is enjoying the idmap, so we can rewrite swapper. */
+ /* PGD */
+ mov cur_pgdp, swapper_pa
+ add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
+do_pgd: __idmap_kpti_get_pgtable_ent pgd
+ tbnz pgd, #1, walk_puds
+ __idmap_kpti_put_pgtable_ent_ng pgd
+next_pgd:
+ add cur_pgdp, cur_pgdp, #8
+ cmp cur_pgdp, end_pgdp
+ b.ne do_pgd
+
+ /* Publish the updated tables and nuke all the TLBs */
+ dsb sy
+ tlbi vmalle1is
+ dsb ish
isb
- restore_daif x2
+ /* We're done: fire up the MMU again */
+ mrs x18, sctlr_el1
+ orr x18, x18, #SCTLR_ELx_M
+ msr sctlr_el1, x18
+ isb
+ /* Set the flag to zero to indicate that we're all done */
+ str wzr, [flag_ptr]
ret
-ENDPROC(idmap_cpu_replace_ttbr1)
+
+ /* PUD */
+walk_puds:
+ .if CONFIG_PGTABLE_LEVELS > 3
+ pte_to_phys cur_pudp, pgd
+ add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
+do_pud: __idmap_kpti_get_pgtable_ent pud
+ tbnz pud, #1, walk_pmds
+ __idmap_kpti_put_pgtable_ent_ng pud
+next_pud:
+ add cur_pudp, cur_pudp, 8
+ cmp cur_pudp, end_pudp
+ b.ne do_pud
+ b next_pgd
+ .else /* CONFIG_PGTABLE_LEVELS <= 3 */
+ mov pud, pgd
+ b walk_pmds
+next_pud:
+ b next_pgd
+ .endif
+
+ /* PMD */
+walk_pmds:
+ .if CONFIG_PGTABLE_LEVELS > 2
+ pte_to_phys cur_pmdp, pud
+ add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
+do_pmd: __idmap_kpti_get_pgtable_ent pmd
+ tbnz pmd, #1, walk_ptes
+ __idmap_kpti_put_pgtable_ent_ng pmd
+next_pmd:
+ add cur_pmdp, cur_pmdp, #8
+ cmp cur_pmdp, end_pmdp
+ b.ne do_pmd
+ b next_pud
+ .else /* CONFIG_PGTABLE_LEVELS <= 2 */
+ mov pmd, pud
+ b walk_ptes
+next_pmd:
+ b next_pud
+ .endif
+
+ /* PTE */
+walk_ptes:
+ pte_to_phys cur_ptep, pmd
+ add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
+do_pte: __idmap_kpti_get_pgtable_ent pte
+ __idmap_kpti_put_pgtable_ent_ng pte
+next_pte:
+ add cur_ptep, cur_ptep, #8
+ cmp cur_ptep, end_ptep
+ b.ne do_pte
+ b next_pmd
+
+ /* Secondary CPUs end up here */
+__idmap_kpti_secondary:
+ /* Uninstall swapper before surgery begins */
+ __idmap_cpu_set_reserved_ttbr1 x18, x17
+
+ /* Increment the flag to let the boot CPU we're ready */
+1: ldxr w18, [flag_ptr]
+ add w18, w18, #1
+ stxr w17, w18, [flag_ptr]
+ cbnz w17, 1b
+
+ /* Wait for the boot CPU to finish messing around with swapper */
+ sevl
+1: wfe
+ ldxr w18, [flag_ptr]
+ cbnz w18, 1b
+
+ /* All done, act like nothing happened */
+ msr ttbr1_el1, swapper_ttb
+ isb
+ ret
+
+ .unreq cpu
+ .unreq num_cpus
+ .unreq swapper_pa
+ .unreq swapper_ttb
+ .unreq flag_ptr
+ .unreq cur_pgdp
+ .unreq end_pgdp
+ .unreq pgd
+ .unreq cur_pudp
+ .unreq end_pudp
+ .unreq pud
+ .unreq cur_pmdp
+ .unreq end_pmdp
+ .unreq pmd
+ .unreq cur_ptep
+ .unreq end_ptep
+ .unreq pte
+ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
+#endif
/*
* __cpu_setup
@@ -200,7 +388,7 @@ ENDPROC(idmap_cpu_replace_ttbr1)
* Initialise the processor for turning the MMU on. Return in x0 the
* value of the SCTLR_EL1 register.
*/
- .pushsection ".idmap.text", "ax"
+ .pushsection ".idmap.text", "awx"
ENTRY(__cpu_setup)
tlbi vmalle1 // Invalidate local TLB
dsb nsh
diff --git a/arch/blackfin/include/uapi/asm/poll.h b/arch/blackfin/include/uapi/asm/poll.h
index 3b162f2d2970..cd2f1a78aba5 100644
--- a/arch/blackfin/include/uapi/asm/poll.h
+++ b/arch/blackfin/include/uapi/asm/poll.h
@@ -9,25 +9,8 @@
#ifndef _UAPI__BFIN_POLL_H
#define _UAPI__BFIN_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 256
#include <asm-generic/poll.h>
diff --git a/arch/cris/arch-v10/drivers/gpio.c b/arch/cris/arch-v10/drivers/gpio.c
index a2986c60aaac..cd0e05d89d42 100644
--- a/arch/cris/arch-v10/drivers/gpio.c
+++ b/arch/cris/arch-v10/drivers/gpio.c
@@ -173,7 +173,7 @@ static __poll_t gpio_poll(struct file *file, poll_table *wait)
if ((data & priv->highalarm) ||
(~data & priv->lowalarm)) {
- mask = POLLIN|POLLRDNORM;
+ mask = EPOLLIN|EPOLLRDNORM;
}
out:
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 177843c64071..ed1a568a7217 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -666,16 +666,16 @@ static __poll_t sync_serial_poll(struct file *file, poll_table *wait)
poll_wait(file, &port->in_wait_q, wait);
/* Some room to write */
if (port->out_count < OUT_BUFFER_SIZE)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
/* At least an inbufchunk of data */
if (sync_data_avail(port) >= port->inbufchunk)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
DEBUGPOLL(if (mask != prev_mask)
printk(KERN_DEBUG "sync_serial_poll: mask 0x%08X %s %s\n",
mask,
- mask & POLLOUT ? "POLLOUT" : "",
- mask & POLLIN ? "POLLIN" : "");
+ mask & EPOLLOUT ? "POLLOUT" : "",
+ mask & EPOLLIN ? "POLLIN" : "");
prev_mask = mask;
);
return mask;
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index e20e0b9a3a5c..1b0ce8a8af16 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -574,24 +574,24 @@ static __poll_t sync_serial_poll(struct file *file, poll_table *wait)
/* No active transfer, descriptors are available */
if (port->output && !port->tr_running)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
/* Descriptor and buffer space available. */
if (port->output &&
port->active_tr_descr != port->catch_tr_descr &&
port->out_buf_count < OUT_BUFFER_SIZE)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
/* At least an inbufchunk of data */
if (port->input && sync_data_avail(port) >= port->inbufchunk)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
DEBUGPOLL(
if (mask != prev_mask)
pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
mask,
- mask & POLLOUT ? "POLLOUT" : "",
- mask & POLLIN ? "POLLIN" : "");
+ mask & EPOLLOUT ? "POLLOUT" : "",
+ mask & EPOLLIN ? "POLLIN" : "");
prev_mask = mask;
);
return mask;
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile
index e69de29bb2d1..f6bfee6c8c1b 100644
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the CRIS port.
+#
+
+CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
+extra-y := vmlinux.lds
+
+obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
+obj-y += stacktrace.o
+
+obj-$(CONFIG_MODULES) += crisksyms.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_SYSTEM_PROFILER) += profile.o
+
+clean:
+
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 524d47501a23..1b61a7207afb 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -24,6 +24,7 @@
#include <linux/of_fdt.h>
#include <asm/setup.h>
#include <arch/system.h>
+#include <asm/sections.h>
/*
* Setup options
@@ -31,7 +32,6 @@
struct screen_info screen_info;
extern int root_mountflags;
-extern char _etext, _edata, _end;
char __initdata cris_command_line[COMMAND_LINE_SIZE] = { 0, };
diff --git a/arch/frv/include/uapi/asm/poll.h b/arch/frv/include/uapi/asm/poll.h
index a44c8f0ebee7..f55b45f475ec 100644
--- a/arch/frv/include/uapi/asm/poll.h
+++ b/arch/frv/include/uapi/asm/poll.h
@@ -2,25 +2,8 @@
#ifndef _ASM_POLL_H
#define _ASM_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 256
#include <asm-generic/poll.h>
#undef POLLREMOVE
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index c44f002e8f6b..8fb280e33114 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1670,7 +1670,7 @@ pfm_poll(struct file *filp, poll_table * wait)
PROTECT_CTX(ctx, flags);
if (PFM_CTXQ_EMPTY(ctx) == 0)
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
UNPROTECT_CTX(ctx, flags);
@@ -2610,17 +2610,10 @@ pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
if (pid < 2) return -EPERM;
if (pid != task_pid_vnr(current)) {
-
- read_lock(&tasklist_lock);
-
- p = find_task_by_vpid(pid);
-
/* make sure task cannot go away while we operate on it */
- if (p) get_task_struct(p);
-
- read_unlock(&tasklist_lock);
-
- if (p == NULL) return -ESRCH;
+ p = find_get_task_by_vpid(pid);
+ if (!p)
+ return -ESRCH;
}
ret = pfm_task_incompatible(ctx, p);
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index ac46f0d60b66..7d9bd20319ff 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
return vmemmap_populate_basepages(start, end, node);
}
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
#endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 7af4e05bb61e..18278b448530 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start),
- MEMMAP_EARLY);
+ MEMMAP_EARLY, NULL);
return 0;
}
@@ -509,9 +509,10 @@ void __meminit
memmap_init (unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn)
{
- if (!vmem_map)
- memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY);
- else {
+ if (!vmem_map) {
+ memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
+ NULL);
+ } else {
struct page *start;
struct memmap_init_callback_data args;
@@ -647,13 +648,14 @@ mem_init (void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
if (ret)
printk("%s: Problem encountered in __add_pages() as ret=%d\n",
__func__, ret);
@@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size)
int ret;
zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
if (ret)
pr_warn("%s: Problem encountered in __remove_pages() as"
" ret=%d\n", __func__, ret);
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index dda58cfe8c22..93b47b1f6fb4 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -311,7 +311,6 @@ static inline int bfchg_mem_test_and_change_bit(int nr,
* functions.
*/
#if defined(CONFIG_CPU_HAS_NO_BITFIELDS)
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/ffz.h>
#else
@@ -441,6 +440,8 @@ static inline unsigned long ffz(unsigned long word)
#endif
+#include <asm-generic/bitops/find.h>
+
#ifdef __KERNEL__
#if defined(CONFIG_CPU_HAS_NO_BITFIELDS)
diff --git a/arch/m68k/include/uapi/asm/poll.h b/arch/m68k/include/uapi/asm/poll.h
index d8be239e8141..c3e3fcc15e1d 100644
--- a/arch/m68k/include/uapi/asm/poll.h
+++ b/arch/m68k/include/uapi/asm/poll.h
@@ -2,25 +2,8 @@
#ifndef __m68k_POLL_H
#define __m68k_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 256
#include <asm-generic/poll.h>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ab98569994f0..8128c3b68d6b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -7,8 +7,6 @@ config MIPS
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select ARCH_MIGHT_HAVE_PC_PARPORT
- select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_SUPPORTS_UPROBES
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
@@ -119,12 +117,12 @@ config MIPS_GENERIC
select SYS_SUPPORTS_MULTITHREADING
select SYS_SUPPORTS_RELOCATABLE
select SYS_SUPPORTS_SMARTMIPS
- select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
- select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
- select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
- select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
- select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
- select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+ select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+ select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+ select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+ select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+ select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+ select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
help
Select this to build a kernel which aims to support multiple boards,
@@ -253,6 +251,7 @@ config BCM47XX
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_MIPS16
+ select SYS_SUPPORTS_ZBOOT
select SYS_HAS_EARLY_PRINTK
select USE_GENERIC_EARLY_PRINTK_8250
select GPIOLIB
@@ -341,6 +340,8 @@ config MACH_DECSTATION
config MACH_JAZZ
bool "Jazz family of machines"
+ select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_MIGHT_HAVE_PC_SERIO
select FW_ARC
select FW_ARC32
select ARCH_MAY_HAVE_PC_FDC
@@ -476,6 +477,8 @@ config MACH_PISTACHIO
config MIPS_MALTA
bool "MIPS Malta board"
select ARCH_MAY_HAVE_PC_FDC
+ select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_MIGHT_HAVE_PC_SERIO
select BOOT_ELF32
select BOOT_RAW
select BUILTIN_DTB
@@ -613,6 +616,7 @@ config SGI_IP22
bool "SGI IP22 (Indy/Indigo2)"
select FW_ARC
select FW_ARC32
+ select ARCH_MIGHT_HAVE_PC_SERIO
select BOOT_ELF32
select CEVT_R4K
select CSRC_R4K
@@ -675,6 +679,7 @@ config SGI_IP28
bool "SGI IP28 (Indigo2 R10k)"
select FW_ARC
select FW_ARC64
+ select ARCH_MIGHT_HAVE_PC_SERIO
select BOOT_ELF64
select CEVT_R4K
select CSRC_R4K
@@ -824,6 +829,8 @@ config SNI_RM
select FW_ARC32 if CPU_LITTLE_ENDIAN
select FW_SNIPROM if CPU_BIG_ENDIAN
select ARCH_MAY_HAVE_PC_FDC
+ select ARCH_MIGHT_HAVE_PC_PARPORT
+ select ARCH_MIGHT_HAVE_PC_SERIO
select BOOT_ELF32
select CEVT_R4K
select CSRC_R4K
@@ -2326,7 +2333,6 @@ config MIPS_VPE_LOADER_TOM
config MIPS_VPE_APSP_API
bool "Enable support for AP/SP API (RTLX)"
depends on MIPS_VPE_LOADER
- help
config MIPS_VPE_APSP_API_CMP
bool
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 9f6a26d72f9f..d1ca839c3981 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -216,6 +216,12 @@ cflags-$(toolchain-msa) += -DTOOLCHAIN_SUPPORTS_MSA
endif
toolchain-virt := $(call cc-option-yn,$(mips-cflags) -mvirt)
cflags-$(toolchain-virt) += -DTOOLCHAIN_SUPPORTS_VIRT
+# For -mmicromips, use -Wa,-fatal-warnings to catch unsupported -mxpa which
+# only warns
+xpa-cflags-y := $(mips-cflags)
+xpa-cflags-$(micromips-ase) += -mmicromips -Wa$(comma)-fatal-warnings
+toolchain-xpa := $(call cc-option-yn,$(xpa-cflags-y) -mxpa)
+cflags-$(toolchain-xpa) += -DTOOLCHAIN_SUPPORTS_XPA
#
# Firmware support
@@ -228,7 +234,7 @@ libs-y += arch/mips/fw/lib/
#
# Kernel compression
#
-ifdef SYS_SUPPORTS_ZBOOT
+ifdef CONFIG_SYS_SUPPORTS_ZBOOT
COMPRESSION_FNAME = vmlinuz
else
COMPRESSION_FNAME = vmlinux
diff --git a/arch/mips/bcm47xx/Platform b/arch/mips/bcm47xx/Platform
index 874b7ca4cd11..70783b75fd9d 100644
--- a/arch/mips/bcm47xx/Platform
+++ b/arch/mips/bcm47xx/Platform
@@ -5,3 +5,4 @@ platform-$(CONFIG_BCM47XX) += bcm47xx/
cflags-$(CONFIG_BCM47XX) += \
-I$(srctree)/arch/mips/include/asm/mach-bcm47xx
load-$(CONFIG_BCM47XX) := 0xffffffff80001000
+zload-$(CONFIG_BCM47XX) += 0xffffffff80400000
diff --git a/arch/mips/bcm63xx/boards/Kconfig b/arch/mips/bcm63xx/boards/Kconfig
index 6ff0a7481081..f60d96610ace 100644
--- a/arch/mips/bcm63xx/boards/Kconfig
+++ b/arch/mips/bcm63xx/boards/Kconfig
@@ -7,6 +7,5 @@ choice
config BOARD_BCM963XX
bool "Generic Broadcom 963xx boards"
select SSB
- help
endchoice
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index c675eece389a..adce180f3ee4 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -133,4 +133,8 @@ vmlinuz.srec: vmlinuz
uzImage.bin: vmlinuz.bin FORCE
$(call if_changed,uimage,none)
-clean-files := $(objtree)/vmlinuz $(objtree)/vmlinuz.{32,ecoff,bin,srec}
+clean-files += $(objtree)/vmlinuz
+clean-files += $(objtree)/vmlinuz.32
+clean-files += $(objtree)/vmlinuz.ecoff
+clean-files += $(objtree)/vmlinuz.bin
+clean-files += $(objtree)/vmlinuz.srec
diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile
index 6a31759839b4..5b1361a89e02 100644
--- a/arch/mips/boot/dts/ingenic/Makefile
+++ b/arch/mips/boot/dts/ingenic/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb
+dtb-$(CONFIG_JZ4770_GCW0) += gcw0.dtb
dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb
obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts
new file mode 100644
index 000000000000..35f0291e8d38
--- /dev/null
+++ b/arch/mips/boot/dts/ingenic/gcw0.dts
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/dts-v1/;
+
+#include "jz4770.dtsi"
+
+/ {
+ compatible = "gcw,zero", "ingenic,jz4770";
+ model = "GCW Zero";
+
+ aliases {
+ serial0 = &uart0;
+ serial1 = &uart1;
+ serial2 = &uart2;
+ serial3 = &uart3;
+ };
+
+ chosen {
+ stdout-path = "serial2:57600n8";
+ };
+
+ board {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ otg_phy: otg-phy {
+ compatible = "usb-nop-xceiv";
+ clocks = <&cgu JZ4770_CLK_OTG_PHY>;
+ clock-names = "main_clk";
+ };
+ };
+};
+
+&ext {
+ clock-frequency = <12000000>;
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&cgu {
+ /* Put high-speed peripherals under PLL1, such that we can change the
+ * PLL0 frequency on demand without having to suspend peripherals.
+ * We use a rate of 432 MHz, which is the least common multiple of
+ * 27 MHz (required by TV encoder) and 48 MHz (required by USB host).
+ */
+ assigned-clocks =
+ <&cgu JZ4770_CLK_PLL1>,
+ <&cgu JZ4770_CLK_UHC>;
+ assigned-clock-parents =
+ <0>,
+ <&cgu JZ4770_CLK_PLL1>;
+ assigned-clock-rates =
+ <432000000>;
+};
+
+&uhc {
+ /* The WiFi module is connected to the UHC. */
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi
new file mode 100644
index 000000000000..7c2804f3f5f1
--- /dev/null
+++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <dt-bindings/clock/jz4770-cgu.h>
+
+/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "ingenic,jz4770";
+
+ cpuintc: interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "mti,cpu-interrupt-controller";
+ };
+
+ intc: interrupt-controller@10001000 {
+ compatible = "ingenic,jz4770-intc";
+ reg = <0x10001000 0x40>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <2>;
+ };
+
+ ext: ext {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ };
+
+ osc32k: osc32k {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+
+ cgu: jz4770-cgu@10000000 {
+ compatible = "ingenic,jz4770-cgu";
+ reg = <0x10000000 0x100>;
+
+ clocks = <&ext>, <&osc32k>;
+ clock-names = "ext", "osc32k";
+
+ #clock-cells = <1>;
+ };
+
+ pinctrl: pin-controller@10010000 {
+ compatible = "ingenic,jz4770-pinctrl";
+ reg = <0x10010000 0x600>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpa: gpio@0 {
+ compatible = "ingenic,jz4770-gpio";
+ reg = <0>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 0 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <17>;
+ };
+
+ gpb: gpio@1 {
+ compatible = "ingenic,jz4770-gpio";
+ reg = <1>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 32 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <16>;
+ };
+
+ gpc: gpio@2 {
+ compatible = "ingenic,jz4770-gpio";
+ reg = <2>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 64 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <15>;
+ };
+
+ gpd: gpio@3 {
+ compatible = "ingenic,jz4770-gpio";
+ reg = <3>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 96 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <14>;
+ };
+
+ gpe: gpio@4 {
+ compatible = "ingenic,jz4770-gpio";
+ reg = <4>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 128 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <13>;
+ };
+
+ gpf: gpio@5 {
+ compatible = "ingenic,jz4770-gpio";
+ reg = <5>;
+
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 160 32>;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <12>;
+ };
+ };
+
+ uart0: serial@10030000 {
+ compatible = "ingenic,jz4770-uart";
+ reg = <0x10030000 0x100>;
+
+ clocks = <&ext>, <&cgu JZ4770_CLK_UART0>;
+ clock-names = "baud", "module";
+
+ interrupt-parent = <&intc>;
+ interrupts = <5>;
+
+ status = "disabled";
+ };
+
+ uart1: serial@10031000 {
+ compatible = "ingenic,jz4770-uart";
+ reg = <0x10031000 0x100>;
+
+ clocks = <&ext>, <&cgu JZ4770_CLK_UART1>;
+ clock-names = "baud", "module";
+
+ interrupt-parent = <&intc>;
+ interrupts = <4>;
+
+ status = "disabled";
+ };
+
+ uart2: serial@10032000 {
+ compatible = "ingenic,jz4770-uart";
+ reg = <0x10032000 0x100>;
+
+ clocks = <&ext>, <&cgu JZ4770_CLK_UART2>;
+ clock-names = "baud", "module";
+
+ interrupt-parent = <&intc>;
+ interrupts = <3>;
+
+ status = "disabled";
+ };
+
+ uart3: serial@10033000 {
+ compatible = "ingenic,jz4770-uart";
+ reg = <0x10033000 0x100>;
+
+ clocks = <&ext>, <&cgu JZ4770_CLK_UART3>;
+ clock-names = "baud", "module";
+
+ interrupt-parent = <&intc>;
+ interrupts = <2>;
+
+ status = "disabled";
+ };
+
+ uhc: uhc@13430000 {
+ compatible = "generic-ohci";
+ reg = <0x13430000 0x1000>;
+
+ clocks = <&cgu JZ4770_CLK_UHC>, <&cgu JZ4770_CLK_UHC_PHY>;
+ assigned-clocks = <&cgu JZ4770_CLK_UHC>;
+ assigned-clock-rates = <48000000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <20>;
+
+ status = "disabled";
+ };
+};
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index a55009edbb29..5e73fe755be6 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -153,7 +153,6 @@ CONFIG_SLIP_COMPRESSED=y
CONFIG_SLIP_SMART=y
CONFIG_SLIP_MODE_SLIP6=y
# CONFIG_INPUT is not set
-# CONFIG_SERIO_I8042 is not set
CONFIG_SERIO_RAW=m
# CONFIG_VT is not set
CONFIG_SERIAL_NONSTANDARD=y
diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig
new file mode 100644
index 000000000000..99ac1fa3b35f
--- /dev/null
+++ b/arch/mips/configs/gcw0_defconfig
@@ -0,0 +1,27 @@
+CONFIG_MACH_INGENIC=y
+CONFIG_JZ4770_GCW0=y
+CONFIG_HIGHMEM=y
+# CONFIG_BOUNCE is not set
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_SECCOMP is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_EMBEDDED=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_NETDEVICES=y
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_INGENIC=y
+CONFIG_USB=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_NOP_USB_XCEIV=y
+CONFIG_TMPFS=y
diff --git a/arch/mips/configs/generic/board-ranchu.config b/arch/mips/configs/generic/board-ranchu.config
new file mode 100644
index 000000000000..fee9ad4c5598
--- /dev/null
+++ b/arch/mips/configs/generic/board-ranchu.config
@@ -0,0 +1,30 @@
+CONFIG_VIRT_BOARD_RANCHU=y
+
+CONFIG_BATTERY_GOLDFISH=y
+CONFIG_FB=y
+CONFIG_FB_GOLDFISH=y
+CONFIG_GOLDFISH=y
+CONFIG_STAGING=y
+CONFIG_GOLDFISH_AUDIO=y
+CONFIG_GOLDFISH_PIC=y
+CONFIG_GOLDFISH_PIPE=y
+CONFIG_GOLDFISH_TTY=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GOLDFISH=y
+
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_GOLDFISH_EVENTS=y
+
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_POWER_RESET_SYSCON_POWEROFF=y
+
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index a0d593248668..91a9c13e2c82 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -252,7 +252,6 @@ CONFIG_RT2800PCI=m
CONFIG_WL12XX=m
CONFIG_WL1251=m
# CONFIG_INPUT is not set
-# CONFIG_SERIO_I8042 is not set
CONFIG_SERIO_LIBPS2=m
CONFIG_SERIO_RAW=m
CONFIG_SERIO_ALTERA_PS2=m
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 1e26e58b9dc3..ebff297328ae 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -75,7 +75,6 @@ CONFIG_DE2104X=m
CONFIG_TULIP=m
CONFIG_TULIP_MMIO=y
CONFIG_INPUT_EVDEV=m
-# CONFIG_SERIO_I8042 is not set
CONFIG_SERIO_MACEPS2=y
CONFIG_SERIO_RAW=y
# CONFIG_CONSOLE_TRANSLATIONS is not set
diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 396408404487..df8a9a15ca83 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -312,9 +312,8 @@ CONFIG_HOSTAP_PCI=m
CONFIG_IPW2100=m
CONFIG_IPW2100_MONITOR=y
CONFIG_LIBERTAS=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_MOUSE_PS2_ELANTECH=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_POWER_RESET=y
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index 5691673a3327..14df9ef15d40 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -324,9 +324,7 @@ CONFIG_HOSTAP_PCI=m
CONFIG_IPW2100=m
CONFIG_IPW2100_MONITOR=y
CONFIG_LIBERTAS=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
+CONFIG_INPUT_MOUSEDEV=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_POWER_RESET=y
diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig
index e9cadb37d684..25092e344574 100644
--- a/arch/mips/configs/malta_kvm_guest_defconfig
+++ b/arch/mips/configs/malta_kvm_guest_defconfig
@@ -326,9 +326,7 @@ CONFIG_HOSTAP_PCI=m
CONFIG_IPW2100=m
CONFIG_IPW2100_MONITOR=y
CONFIG_LIBERTAS=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
+CONFIG_INPUT_MOUSEDEV=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_POWER_RESET=y
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
index 77145ecaa23b..210bf609f785 100644
--- a/arch/mips/configs/malta_qemu_32r6_defconfig
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -126,6 +126,7 @@ CONFIG_PCNET32=y
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_WLAN is not set
+CONFIG_INPUT_MOUSEDEV=y
# CONFIG_VT is not set
CONFIG_LEGACY_PTY_COUNT=4
CONFIG_SERIAL_8250=y
diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
index cc2687cfdc13..e5934aa98397 100644
--- a/arch/mips/configs/maltaaprp_defconfig
+++ b/arch/mips/configs/maltaaprp_defconfig
@@ -126,6 +126,7 @@ CONFIG_PCNET32=y
# CONFIG_NET_VENDOR_TOSHIBA is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_WLAN is not set
+CONFIG_INPUT_MOUSEDEV=y
# CONFIG_VT is not set
CONFIG_LEGACY_PTY_COUNT=16
CONFIG_SERIAL_8250=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index d8c8f5fb8918..cb2ca11c1789 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -127,6 +127,7 @@ CONFIG_PCNET32=y
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_WLAN is not set
+CONFIG_INPUT_MOUSEDEV=y
# CONFIG_VT is not set
CONFIG_LEGACY_PTY_COUNT=4
CONFIG_SERIAL_8250=y
diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index 04827bc9f87f..be29fcec69fc 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig
@@ -130,6 +130,7 @@ CONFIG_PCNET32=y
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_WLAN is not set
+CONFIG_INPUT_MOUSEDEV=y
# CONFIG_VT is not set
CONFIG_LEGACY_PTY_COUNT=4
CONFIG_SERIAL_8250=y
diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
index 7ea7c0ba2666..40462d4c90a0 100644
--- a/arch/mips/configs/maltaup_defconfig
+++ b/arch/mips/configs/maltaup_defconfig
@@ -125,6 +125,7 @@ CONFIG_PCNET32=y
# CONFIG_NET_VENDOR_TOSHIBA is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_WLAN is not set
+CONFIG_INPUT_MOUSEDEV=y
# CONFIG_VT is not set
CONFIG_LEGACY_PTY_COUNT=16
CONFIG_SERIAL_8250=y
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index 2942610e4082..4e50176cb3df 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -321,9 +321,8 @@ CONFIG_HOSTAP_PCI=m
CONFIG_IPW2100=m
CONFIG_IPW2100_MONITOR=y
CONFIG_LIBERTAS=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_MOUSE_PS2_ELANTECH=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_POWER_RESET=y
diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index 7357248b3d7a..e8e1dd8e0e99 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig
@@ -399,7 +399,6 @@ CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_EVBUG=m
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
CONFIG_SERIO_SERPORT=m
CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_RAW=m
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index 1e18fd7de209..c4477a4d40c1 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -332,7 +332,6 @@ CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_EVBUG=m
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
CONFIG_SERIO_SERPORT=m
CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_RAW=m
diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
index 81b5eb89446c..e73cdb08fc6e 100644
--- a/arch/mips/configs/pnx8335_stb225_defconfig
+++ b/arch/mips/configs/pnx8335_stb225_defconfig
@@ -49,7 +49,6 @@ CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_EVBUG=m
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
# CONFIG_VT_CONSOLE is not set
CONFIG_SERIAL_PNX8XXX=y
CONFIG_SERIAL_PNX8XXX_CONSOLE=y
diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
index c724bdd6a7e6..1edd8430ad61 100644
--- a/arch/mips/configs/sb1250_swarm_defconfig
+++ b/arch/mips/configs/sb1250_swarm_defconfig
@@ -65,7 +65,6 @@ CONFIG_NET_ETHERNET=y
CONFIG_MII=y
CONFIG_SB1250_MAC=y
# CONFIG_INPUT is not set
-# CONFIG_SERIO_I8042 is not set
CONFIG_SERIO_RAW=m
# CONFIG_VT is not set
# CONFIG_HW_RANDOM is not set
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 52e0286a1612..2ff3b17bfab1 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -49,4 +49,14 @@ config FIT_IMAGE_FDT_XILFPGA
Enable this to include the FDT for the MIPSfpga platform
from Imagination Technologies in the FIT kernel image.
+config VIRT_BOARD_RANCHU
+ bool "Support Ranchu platform for Android emulator"
+ help
+ This enables support for the platform used by Android emulator.
+
+ Ranchu platform consists of a set of virtual devices. This platform
+ enables emulation of variety of virtual configurations while using
+ Android emulator. Android emulator is based on Qemu, and contains
+ the support for the same set of virtual devices.
+
endif
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
index 874967363dbb..5c31e0c4697d 100644
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -15,3 +15,4 @@ obj-y += proc.o
obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o
obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o
obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o
diff --git a/arch/mips/generic/board-ranchu.c b/arch/mips/generic/board-ranchu.c
new file mode 100644
index 000000000000..59a8c18fa2cc
--- /dev/null
+++ b/arch/mips/generic/board-ranchu.c
@@ -0,0 +1,93 @@
+/*
+ * Support code for virtual Ranchu board for MIPS.
+ *
+ * Author: Miodrag Dinic <miodrag.dinic@mips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/of_address.h>
+#include <linux/types.h>
+
+#include <asm/machine.h>
+#include <asm/mipsregs.h>
+#include <asm/time.h>
+
+#define GOLDFISH_TIMER_LOW 0x00
+#define GOLDFISH_TIMER_HIGH 0x04
+
+static __init u64 read_rtc_time(void __iomem *base)
+{
+ u32 time_low;
+ u32 time_high;
+
+ /*
+ * Reading the low address latches the high value
+ * as well so there is no fear that we may read
+ * inaccurate high value.
+ */
+ time_low = readl(base + GOLDFISH_TIMER_LOW);
+ time_high = readl(base + GOLDFISH_TIMER_HIGH);
+
+ return ((u64)time_high << 32) | time_low;
+}
+
+static __init unsigned int ranchu_measure_hpt_freq(void)
+{
+ u64 rtc_start, rtc_current, rtc_delta;
+ unsigned int start, count;
+ struct device_node *np;
+ void __iomem *rtc_base;
+
+ np = of_find_compatible_node(NULL, NULL, "google,goldfish-rtc");
+ if (!np)
+ panic("%s(): Failed to find 'google,goldfish-rtc' dt node!",
+ __func__);
+
+ rtc_base = of_iomap(np, 0);
+ if (!rtc_base)
+ panic("%s(): Failed to ioremap Goldfish RTC base!", __func__);
+
+ /*
+ * Poll the nanosecond resolution RTC for one
+ * second to calibrate the CPU frequency.
+ */
+ rtc_start = read_rtc_time(rtc_base);
+ start = read_c0_count();
+
+ do {
+ rtc_current = read_rtc_time(rtc_base);
+ rtc_delta = rtc_current - rtc_start;
+ } while (rtc_delta < NSEC_PER_SEC);
+
+ count = read_c0_count() - start;
+
+ /*
+ * Make sure the frequency will be a round number.
+ * Without this correction, the returned value may vary
+ * between subsequent emulation executions.
+ *
+ * TODO: Set this value using device tree.
+ */
+ count += 5000;
+ count -= count % 10000;
+
+ iounmap(rtc_base);
+
+ return count;
+}
+
+static const struct of_device_id ranchu_of_match[] __initconst = {
+ {
+ .compatible = "mti,ranchu",
+ },
+ {}
+};
+
+MIPS_MACHINE(ranchu) = {
+ .matches = ranchu_of_match,
+ .measure_hpt_freq = ranchu_measure_hpt_freq,
+};
diff --git a/arch/mips/generic/irq.c b/arch/mips/generic/irq.c
index 394f8161e462..cb7fdaeef426 100644
--- a/arch/mips/generic/irq.c
+++ b/arch/mips/generic/irq.c
@@ -22,10 +22,10 @@ int get_c0_fdc_int(void)
{
int mips_cpu_fdc_irq;
- if (cpu_has_veic)
- panic("Unimplemented!");
- else if (mips_gic_present())
+ if (mips_gic_present())
mips_cpu_fdc_irq = gic_get_c0_fdc_int();
+ else if (cpu_has_veic)
+ panic("Unimplemented!");
else if (cp0_fdc_irq >= 0)
mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
else
@@ -38,10 +38,10 @@ int get_c0_perfcount_int(void)
{
int mips_cpu_perf_irq;
- if (cpu_has_veic)
- panic("Unimplemented!");
- else if (mips_gic_present())
+ if (mips_gic_present())
mips_cpu_perf_irq = gic_get_c0_perfcount_int();
+ else if (cpu_has_veic)
+ panic("Unimplemented!");
else if (cp0_perfcount_irq >= 0)
mips_cpu_perf_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
else
@@ -54,10 +54,10 @@ unsigned int get_c0_compare_int(void)
{
int mips_cpu_timer_irq;
- if (cpu_has_veic)
- panic("Unimplemented!");
- else if (mips_gic_present())
+ if (mips_gic_present())
mips_cpu_timer_irq = gic_get_c0_compare_int();
+ else if (cpu_has_veic)
+ panic("Unimplemented!");
else
mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index e26a093bb17a..a301a8f4bc66 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -79,6 +79,8 @@ enum loongson_machine_type {
*/
#define MACH_INGENIC_JZ4730 0 /* JZ4730 SOC */
#define MACH_INGENIC_JZ4740 1 /* JZ4740 SOC */
+#define MACH_INGENIC_JZ4770 2 /* JZ4770 SOC */
+#define MACH_INGENIC_JZ4780 3 /* JZ4780 SOC */
extern char *system_type;
const char *get_system_type(void);
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 77cad232a1c6..e8161e4dfde7 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -110,7 +110,7 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst,
*/
static inline __sum16 csum_fold(__wsum csum)
{
- u32 sum = (__force u32)csum;;
+ u32 sum = (__force u32)csum;
sum += (sum << 16);
csum = (sum < csum);
diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h
index 4f69f08717f6..8c286bedff3e 100644
--- a/arch/mips/include/asm/mach-loongson64/boot_param.h
+++ b/arch/mips/include/asm/mach-loongson64/boot_param.h
@@ -4,7 +4,7 @@
#define SYSTEM_RAM_LOW 1
#define SYSTEM_RAM_HIGH 2
-#define MEM_RESERVED 3
+#define SYSTEM_RAM_RESERVED 3
#define PCI_IO 4
#define PCI_MEM 5
#define LOONGSON_CFG_REG 6
diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h
index e0d9b373d415..f83879dadd1e 100644
--- a/arch/mips/include/asm/machine.h
+++ b/arch/mips/include/asm/machine.h
@@ -52,7 +52,7 @@ mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt)
if (!mach->matches)
return NULL;
- for (match = mach->matches; match->compatible; match++) {
+ for (match = mach->matches; match->compatible[0]; match++) {
if (fdt_node_check_compatible(fdt, 0, match->compatible) == 0)
return match;
}
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 6b1f1ad0542c..858752dac337 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1181,6 +1181,89 @@ static inline int mm_insn_16bit(u16 insn)
#endif
/*
+ * parse_r var, r - Helper assembler macro for parsing register names.
+ *
+ * This converts the register name in $n form provided in \r to the
+ * corresponding register number, which is assigned to the variable \var. It is
+ * needed to allow explicit encoding of instructions in inline assembly where
+ * registers are chosen by the compiler in $n form, allowing us to avoid using
+ * fixed register numbers.
+ *
+ * It also allows newer instructions (not implemented by the assembler) to be
+ * transparently implemented using assembler macros, instead of needing separate
+ * cases depending on toolchain support.
+ *
+ * Simple usage example:
+ * __asm__ __volatile__("parse_r __rt, %0\n\t"
+ * ".insn\n\t"
+ * "# di %0\n\t"
+ * ".word (0x41606000 | (__rt << 16))"
+ * : "=r" (status);
+ */
+
+/* Match an individual register number and assign to \var */
+#define _IFC_REG(n) \
+ ".ifc \\r, $" #n "\n\t" \
+ "\\var = " #n "\n\t" \
+ ".endif\n\t"
+
+__asm__(".macro parse_r var r\n\t"
+ "\\var = -1\n\t"
+ _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3)
+ _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7)
+ _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11)
+ _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
+ _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
+ _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
+ _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
+ _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
+ ".iflt \\var\n\t"
+ ".error \"Unable to parse register name \\r\"\n\t"
+ ".endif\n\t"
+ ".endm");
+
+#undef _IFC_REG
+
+/*
+ * C macros for generating assembler macros for common instruction formats.
+ *
+ * The names of the operands can be chosen by the caller, and the encoding of
+ * register operand \<Rn> is assigned to __<Rn> where it can be accessed from
+ * the ENC encodings.
+ */
+
+/* Instructions with no operands */
+#define _ASM_MACRO_0(OP, ENC) \
+ __asm__(".macro " #OP "\n\t" \
+ ENC \
+ ".endm")
+
+/* Instructions with 2 register operands */
+#define _ASM_MACRO_2R(OP, R1, R2, ENC) \
+ __asm__(".macro " #OP " " #R1 ", " #R2 "\n\t" \
+ "parse_r __" #R1 ", \\" #R1 "\n\t" \
+ "parse_r __" #R2 ", \\" #R2 "\n\t" \
+ ENC \
+ ".endm")
+
+/* Instructions with 3 register operands */
+#define _ASM_MACRO_3R(OP, R1, R2, R3, ENC) \
+ __asm__(".macro " #OP " " #R1 ", " #R2 ", " #R3 "\n\t" \
+ "parse_r __" #R1 ", \\" #R1 "\n\t" \
+ "parse_r __" #R2 ", \\" #R2 "\n\t" \
+ "parse_r __" #R3 ", \\" #R3 "\n\t" \
+ ENC \
+ ".endm")
+
+/* Instructions with 2 register operands and 1 optional select operand */
+#define _ASM_MACRO_2R_1S(OP, R1, R2, SEL3, ENC) \
+ __asm__(".macro " #OP " " #R1 ", " #R2 ", " #SEL3 " = 0\n\t" \
+ "parse_r __" #R1 ", \\" #R1 "\n\t" \
+ "parse_r __" #R2 ", \\" #R2 "\n\t" \
+ ENC \
+ ".endm")
+
+/*
* TLB Invalidate Flush
*/
static inline void tlbinvf(void)
@@ -1245,14 +1328,14 @@ do { \
* Macros to access the system control coprocessor
*/
-#define __read_32bit_c0_register(source, sel) \
+#define ___read_32bit_c0_register(source, sel, vol) \
({ unsigned int __res; \
if (sel == 0) \
- __asm__ __volatile__( \
+ __asm__ vol( \
"mfc0\t%0, " #source "\n\t" \
: "=r" (__res)); \
else \
- __asm__ __volatile__( \
+ __asm__ vol( \
".set\tmips32\n\t" \
"mfc0\t%0, " #source ", " #sel "\n\t" \
".set\tmips0\n\t" \
@@ -1260,18 +1343,18 @@ do { \
__res; \
})
-#define __read_64bit_c0_register(source, sel) \
+#define ___read_64bit_c0_register(source, sel, vol) \
({ unsigned long long __res; \
if (sizeof(unsigned long) == 4) \
- __res = __read_64bit_c0_split(source, sel); \
+ __res = __read_64bit_c0_split(source, sel, vol); \
else if (sel == 0) \
- __asm__ __volatile__( \
+ __asm__ vol( \
".set\tmips3\n\t" \
"dmfc0\t%0, " #source "\n\t" \
".set\tmips0" \
: "=r" (__res)); \
else \
- __asm__ __volatile__( \
+ __asm__ vol( \
".set\tmips64\n\t" \
"dmfc0\t%0, " #source ", " #sel "\n\t" \
".set\tmips0" \
@@ -1279,6 +1362,18 @@ do { \
__res; \
})
+#define __read_32bit_c0_register(source, sel) \
+ ___read_32bit_c0_register(source, sel, __volatile__)
+
+#define __read_const_32bit_c0_register(source, sel) \
+ ___read_32bit_c0_register(source, sel,)
+
+#define __read_64bit_c0_register(source, sel) \
+ ___read_64bit_c0_register(source, sel, __volatile__)
+
+#define __read_const_64bit_c0_register(source, sel) \
+ ___read_64bit_c0_register(source, sel,)
+
#define __write_32bit_c0_register(register, sel, value) \
do { \
if (sel == 0) \
@@ -1316,6 +1411,11 @@ do { \
(unsigned long) __read_32bit_c0_register(reg, sel) : \
(unsigned long) __read_64bit_c0_register(reg, sel))
+#define __read_const_ulong_c0_register(reg, sel) \
+ ((sizeof(unsigned long) == 4) ? \
+ (unsigned long) __read_const_32bit_c0_register(reg, sel) : \
+ (unsigned long) __read_const_64bit_c0_register(reg, sel))
+
#define __write_ulong_c0_register(reg, sel, val) \
do { \
if (sizeof(unsigned long) == 4) \
@@ -1346,14 +1446,14 @@ do { \
* These versions are only needed for systems with more than 38 bits of
* physical address space running the 32-bit kernel. That's none atm :-)
*/
-#define __read_64bit_c0_split(source, sel) \
+#define __read_64bit_c0_split(source, sel, vol) \
({ \
unsigned long long __val; \
unsigned long __flags; \
\
local_irq_save(__flags); \
if (sel == 0) \
- __asm__ __volatile__( \
+ __asm__ vol( \
".set\tmips64\n\t" \
"dmfc0\t%L0, " #source "\n\t" \
"dsra\t%M0, %L0, 32\n\t" \
@@ -1361,7 +1461,7 @@ do { \
".set\tmips0" \
: "=r" (__val)); \
else \
- __asm__ __volatile__( \
+ __asm__ vol( \
".set\tmips64\n\t" \
"dmfc0\t%L0, " #source ", " #sel "\n\t" \
"dsra\t%M0, %L0, 32\n\t" \
@@ -1404,37 +1504,43 @@ do { \
local_irq_restore(__flags); \
} while (0)
-#define __readx_32bit_c0_register(source) \
+#ifndef TOOLCHAIN_SUPPORTS_XPA
+_ASM_MACRO_2R_1S(mfhc0, rt, rs, sel,
+ _ASM_INSN_IF_MIPS(0x40400000 | __rt << 16 | __rs << 11 | \\sel)
+ _ASM_INSN32_IF_MM(0x000000f4 | __rt << 21 | __rs << 16 | \\sel << 11));
+_ASM_MACRO_2R_1S(mthc0, rt, rd, sel,
+ _ASM_INSN_IF_MIPS(0x40c00000 | __rt << 16 | __rd << 11 | \\sel)
+ _ASM_INSN32_IF_MM(0x000002f4 | __rt << 21 | __rd << 16 | \\sel << 11));
+#define _ASM_SET_XPA ""
+#else /* !TOOLCHAIN_SUPPORTS_XPA */
+#define _ASM_SET_XPA ".set\txpa\n\t"
+#endif
+
+#define __readx_32bit_c0_register(source, sel) \
({ \
unsigned int __res; \
\
__asm__ __volatile__( \
" .set push \n" \
- " .set noat \n" \
" .set mips32r2 \n" \
- " # mfhc0 $1, %1 \n" \
- _ASM_INSN_IF_MIPS(0x40410000 | ((%1 & 0x1f) << 11)) \
- _ASM_INSN32_IF_MM(0x002000f4 | ((%1 & 0x1f) << 16)) \
- " move %0, $1 \n" \
+ _ASM_SET_XPA \
+ " mfhc0 %0, " #source ", %1 \n" \
" .set pop \n" \
: "=r" (__res) \
- : "i" (source)); \
+ : "i" (sel)); \
__res; \
})
-#define __writex_32bit_c0_register(register, value) \
+#define __writex_32bit_c0_register(register, sel, value) \
do { \
__asm__ __volatile__( \
" .set push \n" \
- " .set noat \n" \
" .set mips32r2 \n" \
- " move $1, %0 \n" \
- " # mthc0 $1, %1 \n" \
- _ASM_INSN_IF_MIPS(0x40c10000 | ((%1 & 0x1f) << 11)) \
- _ASM_INSN32_IF_MM(0x002002f4 | ((%1 & 0x1f) << 16)) \
+ _ASM_SET_XPA \
+ " mthc0 %z0, " #register ", %1 \n" \
" .set pop \n" \
: \
- : "r" (value), "i" (register)); \
+ : "Jr" (value), "i" (sel)); \
} while (0)
#define read_c0_index() __read_32bit_c0_register($0, 0)
@@ -1446,14 +1552,14 @@ do { \
#define read_c0_entrylo0() __read_ulong_c0_register($2, 0)
#define write_c0_entrylo0(val) __write_ulong_c0_register($2, 0, val)
-#define readx_c0_entrylo0() __readx_32bit_c0_register(2)
-#define writex_c0_entrylo0(val) __writex_32bit_c0_register(2, val)
+#define readx_c0_entrylo0() __readx_32bit_c0_register($2, 0)
+#define writex_c0_entrylo0(val) __writex_32bit_c0_register($2, 0, val)
#define read_c0_entrylo1() __read_ulong_c0_register($3, 0)
#define write_c0_entrylo1(val) __write_ulong_c0_register($3, 0, val)
-#define readx_c0_entrylo1() __readx_32bit_c0_register(3)
-#define writex_c0_entrylo1(val) __writex_32bit_c0_register(3, val)
+#define readx_c0_entrylo1() __readx_32bit_c0_register($3, 0)
+#define writex_c0_entrylo1(val) __writex_32bit_c0_register($3, 0, val)
#define read_c0_conf() __read_32bit_c0_register($3, 0)
#define write_c0_conf(val) __write_32bit_c0_register($3, 0, val)
@@ -1541,7 +1647,7 @@ do { \
#define read_c0_epc() __read_ulong_c0_register($14, 0)
#define write_c0_epc(val) __write_ulong_c0_register($14, 0, val)
-#define read_c0_prid() __read_32bit_c0_register($15, 0)
+#define read_c0_prid() __read_const_32bit_c0_register($15, 0)
#define read_c0_cmgcrbase() __read_ulong_c0_register($15, 3)
@@ -1830,18 +1936,44 @@ do { \
* Macros to access the guest system control coprocessor
*/
-#ifdef TOOLCHAIN_SUPPORTS_VIRT
+#ifndef TOOLCHAIN_SUPPORTS_VIRT
+_ASM_MACRO_2R_1S(mfgc0, rt, rs, sel,
+ _ASM_INSN_IF_MIPS(0x40600000 | __rt << 16 | __rs << 11 | \\sel)
+ _ASM_INSN32_IF_MM(0x000004fc | __rt << 21 | __rs << 16 | \\sel << 11));
+_ASM_MACRO_2R_1S(dmfgc0, rt, rs, sel,
+ _ASM_INSN_IF_MIPS(0x40600100 | __rt << 16 | __rs << 11 | \\sel)
+ _ASM_INSN32_IF_MM(0x580004fc | __rt << 21 | __rs << 16 | \\sel << 11));
+_ASM_MACRO_2R_1S(mtgc0, rt, rd, sel,
+ _ASM_INSN_IF_MIPS(0x40600200 | __rt << 16 | __rd << 11 | \\sel)
+ _ASM_INSN32_IF_MM(0x000006fc | __rt << 21 | __rd << 16 | \\sel << 11));
+_ASM_MACRO_2R_1S(dmtgc0, rt, rd, sel,
+ _ASM_INSN_IF_MIPS(0x40600300 | __rt << 16 | __rd << 11 | \\sel)
+ _ASM_INSN32_IF_MM(0x580006fc | __rt << 21 | __rd << 16 | \\sel << 11));
+_ASM_MACRO_0(tlbgp, _ASM_INSN_IF_MIPS(0x42000010)
+ _ASM_INSN32_IF_MM(0x0000017c));
+_ASM_MACRO_0(tlbgr, _ASM_INSN_IF_MIPS(0x42000009)
+ _ASM_INSN32_IF_MM(0x0000117c));
+_ASM_MACRO_0(tlbgwi, _ASM_INSN_IF_MIPS(0x4200000a)
+ _ASM_INSN32_IF_MM(0x0000217c));
+_ASM_MACRO_0(tlbgwr, _ASM_INSN_IF_MIPS(0x4200000e)
+ _ASM_INSN32_IF_MM(0x0000317c));
+_ASM_MACRO_0(tlbginvf, _ASM_INSN_IF_MIPS(0x4200000c)
+ _ASM_INSN32_IF_MM(0x0000517c));
+#define _ASM_SET_VIRT ""
+#else /* !TOOLCHAIN_SUPPORTS_VIRT */
+#define _ASM_SET_VIRT ".set\tvirt\n\t"
+#endif
#define __read_32bit_gc0_register(source, sel) \
({ int __res; \
__asm__ __volatile__( \
".set\tpush\n\t" \
".set\tmips32r2\n\t" \
- ".set\tvirt\n\t" \
- "mfgc0\t%0, $%1, %2\n\t" \
+ _ASM_SET_VIRT \
+ "mfgc0\t%0, " #source ", %1\n\t" \
".set\tpop" \
: "=r" (__res) \
- : "i" (source), "i" (sel)); \
+ : "i" (sel)); \
__res; \
})
@@ -1850,11 +1982,11 @@ do { \
__asm__ __volatile__( \
".set\tpush\n\t" \
".set\tmips64r2\n\t" \
- ".set\tvirt\n\t" \
- "dmfgc0\t%0, $%1, %2\n\t" \
+ _ASM_SET_VIRT \
+ "dmfgc0\t%0, " #source ", %1\n\t" \
".set\tpop" \
: "=r" (__res) \
- : "i" (source), "i" (sel)); \
+ : "i" (sel)); \
__res; \
})
@@ -1863,11 +1995,11 @@ do { \
__asm__ __volatile__( \
".set\tpush\n\t" \
".set\tmips32r2\n\t" \
- ".set\tvirt\n\t" \
- "mtgc0\t%z0, $%1, %2\n\t" \
+ _ASM_SET_VIRT \
+ "mtgc0\t%z0, " #register ", %1\n\t" \
".set\tpop" \
: : "Jr" ((unsigned int)(value)), \
- "i" (register), "i" (sel)); \
+ "i" (sel)); \
} while (0)
#define __write_64bit_gc0_register(register, sel, value) \
@@ -1875,75 +2007,13 @@ do { \
__asm__ __volatile__( \
".set\tpush\n\t" \
".set\tmips64r2\n\t" \
- ".set\tvirt\n\t" \
- "dmtgc0\t%z0, $%1, %2\n\t" \
+ _ASM_SET_VIRT \
+ "dmtgc0\t%z0, " #register ", %1\n\t" \
".set\tpop" \
: : "Jr" (value), \
- "i" (register), "i" (sel)); \
+ "i" (sel)); \
} while (0)
-#else /* TOOLCHAIN_SUPPORTS_VIRT */
-
-#define __read_32bit_gc0_register(source, sel) \
-({ int __res; \
- __asm__ __volatile__( \
- ".set\tpush\n\t" \
- ".set\tnoat\n\t" \
- "# mfgc0\t$1, $%1, %2\n\t" \
- _ASM_INSN_IF_MIPS(0x40610000 | %1 << 11 | %2) \
- _ASM_INSN32_IF_MM(0x002004fc | %1 << 16 | %2 << 11) \
- "move\t%0, $1\n\t" \
- ".set\tpop" \
- : "=r" (__res) \
- : "i" (source), "i" (sel)); \
- __res; \
-})
-
-#define __read_64bit_gc0_register(source, sel) \
-({ unsigned long long __res; \
- __asm__ __volatile__( \
- ".set\tpush\n\t" \
- ".set\tnoat\n\t" \
- "# dmfgc0\t$1, $%1, %2\n\t" \
- _ASM_INSN_IF_MIPS(0x40610100 | %1 << 11 | %2) \
- _ASM_INSN32_IF_MM(0x582004fc | %1 << 16 | %2 << 11) \
- "move\t%0, $1\n\t" \
- ".set\tpop" \
- : "=r" (__res) \
- : "i" (source), "i" (sel)); \
- __res; \
-})
-
-#define __write_32bit_gc0_register(register, sel, value) \
-do { \
- __asm__ __volatile__( \
- ".set\tpush\n\t" \
- ".set\tnoat\n\t" \
- "move\t$1, %z0\n\t" \
- "# mtgc0\t$1, $%1, %2\n\t" \
- _ASM_INSN_IF_MIPS(0x40610200 | %1 << 11 | %2) \
- _ASM_INSN32_IF_MM(0x002006fc | %1 << 16 | %2 << 11) \
- ".set\tpop" \
- : : "Jr" ((unsigned int)(value)), \
- "i" (register), "i" (sel)); \
-} while (0)
-
-#define __write_64bit_gc0_register(register, sel, value) \
-do { \
- __asm__ __volatile__( \
- ".set\tpush\n\t" \
- ".set\tnoat\n\t" \
- "move\t$1, %z0\n\t" \
- "# dmtgc0\t$1, $%1, %2\n\t" \
- _ASM_INSN_IF_MIPS(0x40610300 | %1 << 11 | %2) \
- _ASM_INSN32_IF_MM(0x582006fc | %1 << 16 | %2 << 11) \
- ".set\tpop" \
- : : "Jr" (value), \
- "i" (register), "i" (sel)); \
-} while (0)
-
-#endif /* !TOOLCHAIN_SUPPORTS_VIRT */
-
#define __read_ulong_gc0_register(reg, sel) \
((sizeof(unsigned long) == 4) ? \
(unsigned long) __read_32bit_gc0_register(reg, sel) : \
@@ -1957,207 +2027,207 @@ do { \
__write_64bit_gc0_register(reg, sel, val); \
} while (0)
-#define read_gc0_index() __read_32bit_gc0_register(0, 0)
-#define write_gc0_index(val) __write_32bit_gc0_register(0, 0, val)
+#define read_gc0_index() __read_32bit_gc0_register($0, 0)
+#define write_gc0_index(val) __write_32bit_gc0_register($0, 0, val)
-#define read_gc0_entrylo0() __read_ulong_gc0_register(2, 0)
-#define write_gc0_entrylo0(val) __write_ulong_gc0_register(2, 0, val)
+#define read_gc0_entrylo0() __read_ulong_gc0_register($2, 0)
+#define write_gc0_entrylo0(val) __write_ulong_gc0_register($2, 0, val)
-#define read_gc0_entrylo1() __read_ulong_gc0_register(3, 0)
-#define write_gc0_entrylo1(val) __write_ulong_gc0_register(3, 0, val)
+#define read_gc0_entrylo1() __read_ulong_gc0_register($3, 0)
+#define write_gc0_entrylo1(val) __write_ulong_gc0_register($3, 0, val)
-#define read_gc0_context() __read_ulong_gc0_register(4, 0)
-#define write_gc0_context(val) __write_ulong_gc0_register(4, 0, val)
+#define read_gc0_context() __read_ulong_gc0_register($4, 0)
+#define write_gc0_context(val) __write_ulong_gc0_register($4, 0, val)
-#define read_gc0_contextconfig() __read_32bit_gc0_register(4, 1)
-#define write_gc0_contextconfig(val) __write_32bit_gc0_register(4, 1, val)
+#define read_gc0_contextconfig() __read_32bit_gc0_register($4, 1)
+#define write_gc0_contextconfig(val) __write_32bit_gc0_register($4, 1, val)
-#define read_gc0_userlocal() __read_ulong_gc0_register(4, 2)
-#define write_gc0_userlocal(val) __write_ulong_gc0_register(4, 2, val)
+#define read_gc0_userlocal() __read_ulong_gc0_register($4, 2)
+#define write_gc0_userlocal(val) __write_ulong_gc0_register($4, 2, val)
-#define read_gc0_xcontextconfig() __read_ulong_gc0_register(4, 3)
-#define write_gc0_xcontextconfig(val) __write_ulong_gc0_register(4, 3, val)
+#define read_gc0_xcontextconfig() __read_ulong_gc0_register($4, 3)
+#define write_gc0_xcontextconfig(val) __write_ulong_gc0_register($4, 3, val)
-#define read_gc0_pagemask() __read_32bit_gc0_register(5, 0)
-#define write_gc0_pagemask(val) __write_32bit_gc0_register(5, 0, val)
+#define read_gc0_pagemask() __read_32bit_gc0_register($5, 0)
+#define write_gc0_pagemask(val) __write_32bit_gc0_register($5, 0, val)
-#define read_gc0_pagegrain() __read_32bit_gc0_register(5, 1)
-#define write_gc0_pagegrain(val) __write_32bit_gc0_register(5, 1, val)
+#define read_gc0_pagegrain() __read_32bit_gc0_register($5, 1)
+#define write_gc0_pagegrain(val) __write_32bit_gc0_register($5, 1, val)
-#define read_gc0_segctl0() __read_ulong_gc0_register(5, 2)
-#define write_gc0_segctl0(val) __write_ulong_gc0_register(5, 2, val)
+#define read_gc0_segctl0() __read_ulong_gc0_register($5, 2)
+#define write_gc0_segctl0(val) __write_ulong_gc0_register($5, 2, val)
-#define read_gc0_segctl1() __read_ulong_gc0_register(5, 3)
-#define write_gc0_segctl1(val) __write_ulong_gc0_register(5, 3, val)
+#define read_gc0_segctl1() __read_ulong_gc0_register($5, 3)
+#define write_gc0_segctl1(val) __write_ulong_gc0_register($5, 3, val)
-#define read_gc0_segctl2() __read_ulong_gc0_register(5, 4)
-#define write_gc0_segctl2(val) __write_ulong_gc0_register(5, 4, val)
+#define read_gc0_segctl2() __read_ulong_gc0_register($5, 4)
+#define write_gc0_segctl2(val) __write_ulong_gc0_register($5, 4, val)
-#define read_gc0_pwbase() __read_ulong_gc0_register(5, 5)
-#define write_gc0_pwbase(val) __write_ulong_gc0_register(5, 5, val)
+#define read_gc0_pwbase() __read_ulong_gc0_register($5, 5)
+#define write_gc0_pwbase(val) __write_ulong_gc0_register($5, 5, val)
-#define read_gc0_pwfield() __read_ulong_gc0_register(5, 6)
-#define write_gc0_pwfield(val) __write_ulong_gc0_register(5, 6, val)
+#define read_gc0_pwfield() __read_ulong_gc0_register($5, 6)
+#define write_gc0_pwfield(val) __write_ulong_gc0_register($5, 6, val)
-#define read_gc0_pwsize() __read_ulong_gc0_register(5, 7)
-#define write_gc0_pwsize(val) __write_ulong_gc0_register(5, 7, val)
+#define read_gc0_pwsize() __read_ulong_gc0_register($5, 7)
+#define write_gc0_pwsize(val) __write_ulong_gc0_register($5, 7, val)
-#define read_gc0_wired() __read_32bit_gc0_register(6, 0)
-#define write_gc0_wired(val) __write_32bit_gc0_register(6, 0, val)
+#define read_gc0_wired() __read_32bit_gc0_register($6, 0)
+#define write_gc0_wired(val) __write_32bit_gc0_register($6, 0, val)
-#define read_gc0_pwctl() __read_32bit_gc0_register(6, 6)
-#define write_gc0_pwctl(val) __write_32bit_gc0_register(6, 6, val)
-
-#define read_gc0_hwrena() __read_32bit_gc0_register(7, 0)
-#define write_gc0_hwrena(val) __write_32bit_gc0_register(7, 0, val)
-
-#define read_gc0_badvaddr() __read_ulong_gc0_register(8, 0)
-#define write_gc0_badvaddr(val) __write_ulong_gc0_register(8, 0, val)
-
-#define read_gc0_badinstr() __read_32bit_gc0_register(8, 1)
-#define write_gc0_badinstr(val) __write_32bit_gc0_register(8, 1, val)
-
-#define read_gc0_badinstrp() __read_32bit_gc0_register(8, 2)
-#define write_gc0_badinstrp(val) __write_32bit_gc0_register(8, 2, val)
-
-#define read_gc0_count() __read_32bit_gc0_register(9, 0)
-
-#define read_gc0_entryhi() __read_ulong_gc0_register(10, 0)
-#define write_gc0_entryhi(val) __write_ulong_gc0_register(10, 0, val)
-
-#define read_gc0_compare() __read_32bit_gc0_register(11, 0)
-#define write_gc0_compare(val) __write_32bit_gc0_register(11, 0, val)
-
-#define read_gc0_status() __read_32bit_gc0_register(12, 0)
-#define write_gc0_status(val) __write_32bit_gc0_register(12, 0, val)
-
-#define read_gc0_intctl() __read_32bit_gc0_register(12, 1)
-#define write_gc0_intctl(val) __write_32bit_gc0_register(12, 1, val)
-
-#define read_gc0_cause() __read_32bit_gc0_register(13, 0)
-#define write_gc0_cause(val) __write_32bit_gc0_register(13, 0, val)
-
-#define read_gc0_epc() __read_ulong_gc0_register(14, 0)
-#define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val)
-
-#define read_gc0_prid() __read_32bit_gc0_register(15, 0)
-
-#define read_gc0_ebase() __read_32bit_gc0_register(15, 1)
-#define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val)
-
-#define read_gc0_ebase_64() __read_64bit_gc0_register(15, 1)
-#define write_gc0_ebase_64(val) __write_64bit_gc0_register(15, 1, val)
-
-#define read_gc0_config() __read_32bit_gc0_register(16, 0)
-#define read_gc0_config1() __read_32bit_gc0_register(16, 1)
-#define read_gc0_config2() __read_32bit_gc0_register(16, 2)
-#define read_gc0_config3() __read_32bit_gc0_register(16, 3)
-#define read_gc0_config4() __read_32bit_gc0_register(16, 4)
-#define read_gc0_config5() __read_32bit_gc0_register(16, 5)
-#define read_gc0_config6() __read_32bit_gc0_register(16, 6)
-#define read_gc0_config7() __read_32bit_gc0_register(16, 7)
-#define write_gc0_config(val) __write_32bit_gc0_register(16, 0, val)
-#define write_gc0_config1(val) __write_32bit_gc0_register(16, 1, val)
-#define write_gc0_config2(val) __write_32bit_gc0_register(16, 2, val)
-#define write_gc0_config3(val) __write_32bit_gc0_register(16, 3, val)
-#define write_gc0_config4(val) __write_32bit_gc0_register(16, 4, val)
-#define write_gc0_config5(val) __write_32bit_gc0_register(16, 5, val)
-#define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val)
-#define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val)
-
-#define read_gc0_lladdr() __read_ulong_gc0_register(17, 0)
-#define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val)
-
-#define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0)
-#define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1)
-#define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2)
-#define read_gc0_watchlo3() __read_ulong_gc0_register(18, 3)
-#define read_gc0_watchlo4() __read_ulong_gc0_register(18, 4)
-#define read_gc0_watchlo5() __read_ulong_gc0_register(18, 5)
-#define read_gc0_watchlo6() __read_ulong_gc0_register(18, 6)
-#define read_gc0_watchlo7() __read_ulong_gc0_register(18, 7)
-#define write_gc0_watchlo0(val) __write_ulong_gc0_register(18, 0, val)
-#define write_gc0_watchlo1(val) __write_ulong_gc0_register(18, 1, val)
-#define write_gc0_watchlo2(val) __write_ulong_gc0_register(18, 2, val)
-#define write_gc0_watchlo3(val) __write_ulong_gc0_register(18, 3, val)
-#define write_gc0_watchlo4(val) __write_ulong_gc0_register(18, 4, val)
-#define write_gc0_watchlo5(val) __write_ulong_gc0_register(18, 5, val)
-#define write_gc0_watchlo6(val) __write_ulong_gc0_register(18, 6, val)
-#define write_gc0_watchlo7(val) __write_ulong_gc0_register(18, 7, val)
-
-#define read_gc0_watchhi0() __read_32bit_gc0_register(19, 0)
-#define read_gc0_watchhi1() __read_32bit_gc0_register(19, 1)
-#define read_gc0_watchhi2() __read_32bit_gc0_register(19, 2)
-#define read_gc0_watchhi3() __read_32bit_gc0_register(19, 3)
-#define read_gc0_watchhi4() __read_32bit_gc0_register(19, 4)
-#define read_gc0_watchhi5() __read_32bit_gc0_register(19, 5)
-#define read_gc0_watchhi6() __read_32bit_gc0_register(19, 6)
-#define read_gc0_watchhi7() __read_32bit_gc0_register(19, 7)
-#define write_gc0_watchhi0(val) __write_32bit_gc0_register(19, 0, val)
-#define write_gc0_watchhi1(val) __write_32bit_gc0_register(19, 1, val)
-#define write_gc0_watchhi2(val) __write_32bit_gc0_register(19, 2, val)
-#define write_gc0_watchhi3(val) __write_32bit_gc0_register(19, 3, val)
-#define write_gc0_watchhi4(val) __write_32bit_gc0_register(19, 4, val)
-#define write_gc0_watchhi5(val) __write_32bit_gc0_register(19, 5, val)
-#define write_gc0_watchhi6(val) __write_32bit_gc0_register(19, 6, val)
-#define write_gc0_watchhi7(val) __write_32bit_gc0_register(19, 7, val)
-
-#define read_gc0_xcontext() __read_ulong_gc0_register(20, 0)
-#define write_gc0_xcontext(val) __write_ulong_gc0_register(20, 0, val)
-
-#define read_gc0_perfctrl0() __read_32bit_gc0_register(25, 0)
-#define write_gc0_perfctrl0(val) __write_32bit_gc0_register(25, 0, val)
-#define read_gc0_perfcntr0() __read_32bit_gc0_register(25, 1)
-#define write_gc0_perfcntr0(val) __write_32bit_gc0_register(25, 1, val)
-#define read_gc0_perfcntr0_64() __read_64bit_gc0_register(25, 1)
-#define write_gc0_perfcntr0_64(val) __write_64bit_gc0_register(25, 1, val)
-#define read_gc0_perfctrl1() __read_32bit_gc0_register(25, 2)
-#define write_gc0_perfctrl1(val) __write_32bit_gc0_register(25, 2, val)
-#define read_gc0_perfcntr1() __read_32bit_gc0_register(25, 3)
-#define write_gc0_perfcntr1(val) __write_32bit_gc0_register(25, 3, val)
-#define read_gc0_perfcntr1_64() __read_64bit_gc0_register(25, 3)
-#define write_gc0_perfcntr1_64(val) __write_64bit_gc0_register(25, 3, val)
-#define read_gc0_perfctrl2() __read_32bit_gc0_register(25, 4)
-#define write_gc0_perfctrl2(val) __write_32bit_gc0_register(25, 4, val)
-#define read_gc0_perfcntr2() __read_32bit_gc0_register(25, 5)
-#define write_gc0_perfcntr2(val) __write_32bit_gc0_register(25, 5, val)
-#define read_gc0_perfcntr2_64() __read_64bit_gc0_register(25, 5)
-#define write_gc0_perfcntr2_64(val) __write_64bit_gc0_register(25, 5, val)
-#define read_gc0_perfctrl3() __read_32bit_gc0_register(25, 6)
-#define write_gc0_perfctrl3(val) __write_32bit_gc0_register(25, 6, val)
-#define read_gc0_perfcntr3() __read_32bit_gc0_register(25, 7)
-#define write_gc0_perfcntr3(val) __write_32bit_gc0_register(25, 7, val)
-#define read_gc0_perfcntr3_64() __read_64bit_gc0_register(25, 7)
-#define write_gc0_perfcntr3_64(val) __write_64bit_gc0_register(25, 7, val)
-
-#define read_gc0_errorepc() __read_ulong_gc0_register(30, 0)
-#define write_gc0_errorepc(val) __write_ulong_gc0_register(30, 0, val)
-
-#define read_gc0_kscratch1() __read_ulong_gc0_register(31, 2)
-#define read_gc0_kscratch2() __read_ulong_gc0_register(31, 3)
-#define read_gc0_kscratch3() __read_ulong_gc0_register(31, 4)
-#define read_gc0_kscratch4() __read_ulong_gc0_register(31, 5)
-#define read_gc0_kscratch5() __read_ulong_gc0_register(31, 6)
-#define read_gc0_kscratch6() __read_ulong_gc0_register(31, 7)
-#define write_gc0_kscratch1(val) __write_ulong_gc0_register(31, 2, val)
-#define write_gc0_kscratch2(val) __write_ulong_gc0_register(31, 3, val)
-#define write_gc0_kscratch3(val) __write_ulong_gc0_register(31, 4, val)
-#define write_gc0_kscratch4(val) __write_ulong_gc0_register(31, 5, val)
-#define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val)
-#define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val)
+#define read_gc0_pwctl() __read_32bit_gc0_register($6, 6)
+#define write_gc0_pwctl(val) __write_32bit_gc0_register($6, 6, val)
+
+#define read_gc0_hwrena() __read_32bit_gc0_register($7, 0)
+#define write_gc0_hwrena(val) __write_32bit_gc0_register($7, 0, val)
+
+#define read_gc0_badvaddr() __read_ulong_gc0_register($8, 0)
+#define write_gc0_badvaddr(val) __write_ulong_gc0_register($8, 0, val)
+
+#define read_gc0_badinstr() __read_32bit_gc0_register($8, 1)
+#define write_gc0_badinstr(val) __write_32bit_gc0_register($8, 1, val)
+
+#define read_gc0_badinstrp() __read_32bit_gc0_register($8, 2)
+#define write_gc0_badinstrp(val) __write_32bit_gc0_register($8, 2, val)
+
+#define read_gc0_count() __read_32bit_gc0_register($9, 0)
+
+#define read_gc0_entryhi() __read_ulong_gc0_register($10, 0)
+#define write_gc0_entryhi(val) __write_ulong_gc0_register($10, 0, val)
+
+#define read_gc0_compare() __read_32bit_gc0_register($11, 0)
+#define write_gc0_compare(val) __write_32bit_gc0_register($11, 0, val)
+
+#define read_gc0_status() __read_32bit_gc0_register($12, 0)
+#define write_gc0_status(val) __write_32bit_gc0_register($12, 0, val)
+
+#define read_gc0_intctl() __read_32bit_gc0_register($12, 1)
+#define write_gc0_intctl(val) __write_32bit_gc0_register($12, 1, val)
+
+#define read_gc0_cause() __read_32bit_gc0_register($13, 0)
+#define write_gc0_cause(val) __write_32bit_gc0_register($13, 0, val)
+
+#define read_gc0_epc() __read_ulong_gc0_register($14, 0)
+#define write_gc0_epc(val) __write_ulong_gc0_register($14, 0, val)
+
+#define read_gc0_prid() __read_32bit_gc0_register($15, 0)
+
+#define read_gc0_ebase() __read_32bit_gc0_register($15, 1)
+#define write_gc0_ebase(val) __write_32bit_gc0_register($15, 1, val)
+
+#define read_gc0_ebase_64() __read_64bit_gc0_register($15, 1)
+#define write_gc0_ebase_64(val) __write_64bit_gc0_register($15, 1, val)
+
+#define read_gc0_config() __read_32bit_gc0_register($16, 0)
+#define read_gc0_config1() __read_32bit_gc0_register($16, 1)
+#define read_gc0_config2() __read_32bit_gc0_register($16, 2)
+#define read_gc0_config3() __read_32bit_gc0_register($16, 3)
+#define read_gc0_config4() __read_32bit_gc0_register($16, 4)
+#define read_gc0_config5() __read_32bit_gc0_register($16, 5)
+#define read_gc0_config6() __read_32bit_gc0_register($16, 6)
+#define read_gc0_config7() __read_32bit_gc0_register($16, 7)
+#define write_gc0_config(val) __write_32bit_gc0_register($16, 0, val)
+#define write_gc0_config1(val) __write_32bit_gc0_register($16, 1, val)
+#define write_gc0_config2(val) __write_32bit_gc0_register($16, 2, val)
+#define write_gc0_config3(val) __write_32bit_gc0_register($16, 3, val)
+#define write_gc0_config4(val) __write_32bit_gc0_register($16, 4, val)
+#define write_gc0_config5(val) __write_32bit_gc0_register($16, 5, val)
+#define write_gc0_config6(val) __write_32bit_gc0_register($16, 6, val)
+#define write_gc0_config7(val) __write_32bit_gc0_register($16, 7, val)
+
+#define read_gc0_lladdr() __read_ulong_gc0_register($17, 0)
+#define write_gc0_lladdr(val) __write_ulong_gc0_register($17, 0, val)
+
+#define read_gc0_watchlo0() __read_ulong_gc0_register($18, 0)
+#define read_gc0_watchlo1() __read_ulong_gc0_register($18, 1)
+#define read_gc0_watchlo2() __read_ulong_gc0_register($18, 2)
+#define read_gc0_watchlo3() __read_ulong_gc0_register($18, 3)
+#define read_gc0_watchlo4() __read_ulong_gc0_register($18, 4)
+#define read_gc0_watchlo5() __read_ulong_gc0_register($18, 5)
+#define read_gc0_watchlo6() __read_ulong_gc0_register($18, 6)
+#define read_gc0_watchlo7() __read_ulong_gc0_register($18, 7)
+#define write_gc0_watchlo0(val) __write_ulong_gc0_register($18, 0, val)
+#define write_gc0_watchlo1(val) __write_ulong_gc0_register($18, 1, val)
+#define write_gc0_watchlo2(val) __write_ulong_gc0_register($18, 2, val)
+#define write_gc0_watchlo3(val) __write_ulong_gc0_register($18, 3, val)
+#define write_gc0_watchlo4(val) __write_ulong_gc0_register($18, 4, val)
+#define write_gc0_watchlo5(val) __write_ulong_gc0_register($18, 5, val)
+#define write_gc0_watchlo6(val) __write_ulong_gc0_register($18, 6, val)
+#define write_gc0_watchlo7(val) __write_ulong_gc0_register($18, 7, val)
+
+#define read_gc0_watchhi0() __read_32bit_gc0_register($19, 0)
+#define read_gc0_watchhi1() __read_32bit_gc0_register($19, 1)
+#define read_gc0_watchhi2() __read_32bit_gc0_register($19, 2)
+#define read_gc0_watchhi3() __read_32bit_gc0_register($19, 3)
+#define read_gc0_watchhi4() __read_32bit_gc0_register($19, 4)
+#define read_gc0_watchhi5() __read_32bit_gc0_register($19, 5)
+#define read_gc0_watchhi6() __read_32bit_gc0_register($19, 6)
+#define read_gc0_watchhi7() __read_32bit_gc0_register($19, 7)
+#define write_gc0_watchhi0(val) __write_32bit_gc0_register($19, 0, val)
+#define write_gc0_watchhi1(val) __write_32bit_gc0_register($19, 1, val)
+#define write_gc0_watchhi2(val) __write_32bit_gc0_register($19, 2, val)
+#define write_gc0_watchhi3(val) __write_32bit_gc0_register($19, 3, val)
+#define write_gc0_watchhi4(val) __write_32bit_gc0_register($19, 4, val)
+#define write_gc0_watchhi5(val) __write_32bit_gc0_register($19, 5, val)
+#define write_gc0_watchhi6(val) __write_32bit_gc0_register($19, 6, val)
+#define write_gc0_watchhi7(val) __write_32bit_gc0_register($19, 7, val)
+
+#define read_gc0_xcontext() __read_ulong_gc0_register($20, 0)
+#define write_gc0_xcontext(val) __write_ulong_gc0_register($20, 0, val)
+
+#define read_gc0_perfctrl0() __read_32bit_gc0_register($25, 0)
+#define write_gc0_perfctrl0(val) __write_32bit_gc0_register($25, 0, val)
+#define read_gc0_perfcntr0() __read_32bit_gc0_register($25, 1)
+#define write_gc0_perfcntr0(val) __write_32bit_gc0_register($25, 1, val)
+#define read_gc0_perfcntr0_64() __read_64bit_gc0_register($25, 1)
+#define write_gc0_perfcntr0_64(val) __write_64bit_gc0_register($25, 1, val)
+#define read_gc0_perfctrl1() __read_32bit_gc0_register($25, 2)
+#define write_gc0_perfctrl1(val) __write_32bit_gc0_register($25, 2, val)
+#define read_gc0_perfcntr1() __read_32bit_gc0_register($25, 3)
+#define write_gc0_perfcntr1(val) __write_32bit_gc0_register($25, 3, val)
+#define read_gc0_perfcntr1_64() __read_64bit_gc0_register($25, 3)
+#define write_gc0_perfcntr1_64(val) __write_64bit_gc0_register($25, 3, val)
+#define read_gc0_perfctrl2() __read_32bit_gc0_register($25, 4)
+#define write_gc0_perfctrl2(val) __write_32bit_gc0_register($25, 4, val)
+#define read_gc0_perfcntr2() __read_32bit_gc0_register($25, 5)
+#define write_gc0_perfcntr2(val) __write_32bit_gc0_register($25, 5, val)
+#define read_gc0_perfcntr2_64() __read_64bit_gc0_register($25, 5)
+#define write_gc0_perfcntr2_64(val) __write_64bit_gc0_register($25, 5, val)
+#define read_gc0_perfctrl3() __read_32bit_gc0_register($25, 6)
+#define write_gc0_perfctrl3(val) __write_32bit_gc0_register($25, 6, val)
+#define read_gc0_perfcntr3() __read_32bit_gc0_register($25, 7)
+#define write_gc0_perfcntr3(val) __write_32bit_gc0_register($25, 7, val)
+#define read_gc0_perfcntr3_64() __read_64bit_gc0_register($25, 7)
+#define write_gc0_perfcntr3_64(val) __write_64bit_gc0_register($25, 7, val)
+
+#define read_gc0_errorepc() __read_ulong_gc0_register($30, 0)
+#define write_gc0_errorepc(val) __write_ulong_gc0_register($30, 0, val)
+
+#define read_gc0_kscratch1() __read_ulong_gc0_register($31, 2)
+#define read_gc0_kscratch2() __read_ulong_gc0_register($31, 3)
+#define read_gc0_kscratch3() __read_ulong_gc0_register($31, 4)
+#define read_gc0_kscratch4() __read_ulong_gc0_register($31, 5)
+#define read_gc0_kscratch5() __read_ulong_gc0_register($31, 6)
+#define read_gc0_kscratch6() __read_ulong_gc0_register($31, 7)
+#define write_gc0_kscratch1(val) __write_ulong_gc0_register($31, 2, val)
+#define write_gc0_kscratch2(val) __write_ulong_gc0_register($31, 3, val)
+#define write_gc0_kscratch3(val) __write_ulong_gc0_register($31, 4, val)
+#define write_gc0_kscratch4(val) __write_ulong_gc0_register($31, 5, val)
+#define write_gc0_kscratch5(val) __write_ulong_gc0_register($31, 6, val)
+#define write_gc0_kscratch6(val) __write_ulong_gc0_register($31, 7, val)
/* Cavium OCTEON (cnMIPS) */
-#define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6)
-#define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val)
+#define read_gc0_cvmcount() __read_ulong_gc0_register($9, 6)
+#define write_gc0_cvmcount(val) __write_ulong_gc0_register($9, 6, val)
-#define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7)
-#define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val)
+#define read_gc0_cvmctl() __read_64bit_gc0_register($9, 7)
+#define write_gc0_cvmctl(val) __write_64bit_gc0_register($9, 7, val)
-#define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7)
-#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val)
+#define read_gc0_cvmmemctl() __read_64bit_gc0_register($11, 7)
+#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register($11, 7, val)
-#define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6)
-#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val)
+#define read_gc0_cvmmemctl2() __read_64bit_gc0_register($16, 6)
+#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register($16, 6, val)
/*
* Macros to access the floating point coprocessor control registers
@@ -2581,8 +2651,6 @@ static inline void tlb_write_random(void)
".set reorder");
}
-#ifdef TOOLCHAIN_SUPPORTS_VIRT
-
/*
* Guest TLB operations.
*
@@ -2593,7 +2661,7 @@ static inline void guest_tlb_probe(void)
__asm__ __volatile__(
".set push\n\t"
".set noreorder\n\t"
- ".set virt\n\t"
+ _ASM_SET_VIRT
"tlbgp\n\t"
".set pop");
}
@@ -2603,7 +2671,7 @@ static inline void guest_tlb_read(void)
__asm__ __volatile__(
".set push\n\t"
".set noreorder\n\t"
- ".set virt\n\t"
+ _ASM_SET_VIRT
"tlbgr\n\t"
".set pop");
}
@@ -2613,7 +2681,7 @@ static inline void guest_tlb_write_indexed(void)
__asm__ __volatile__(
".set push\n\t"
".set noreorder\n\t"
- ".set virt\n\t"
+ _ASM_SET_VIRT
"tlbgwi\n\t"
".set pop");
}
@@ -2623,7 +2691,7 @@ static inline void guest_tlb_write_random(void)
__asm__ __volatile__(
".set push\n\t"
".set noreorder\n\t"
- ".set virt\n\t"
+ _ASM_SET_VIRT
"tlbgwr\n\t"
".set pop");
}
@@ -2636,63 +2704,11 @@ static inline void guest_tlbinvf(void)
__asm__ __volatile__(
".set push\n\t"
".set noreorder\n\t"
- ".set virt\n\t"
+ _ASM_SET_VIRT
"tlbginvf\n\t"
".set pop");
}
-#else /* TOOLCHAIN_SUPPORTS_VIRT */
-
-/*
- * Guest TLB operations.
- *
- * It is responsibility of the caller to take care of any TLB hazards.
- */
-static inline void guest_tlb_probe(void)
-{
- __asm__ __volatile__(
- "# tlbgp\n\t"
- _ASM_INSN_IF_MIPS(0x42000010)
- _ASM_INSN32_IF_MM(0x0000017c));
-}
-
-static inline void guest_tlb_read(void)
-{
- __asm__ __volatile__(
- "# tlbgr\n\t"
- _ASM_INSN_IF_MIPS(0x42000009)
- _ASM_INSN32_IF_MM(0x0000117c));
-}
-
-static inline void guest_tlb_write_indexed(void)
-{
- __asm__ __volatile__(
- "# tlbgwi\n\t"
- _ASM_INSN_IF_MIPS(0x4200000a)
- _ASM_INSN32_IF_MM(0x0000217c));
-}
-
-static inline void guest_tlb_write_random(void)
-{
- __asm__ __volatile__(
- "# tlbgwr\n\t"
- _ASM_INSN_IF_MIPS(0x4200000e)
- _ASM_INSN32_IF_MM(0x0000317c));
-}
-
-/*
- * Guest TLB Invalidate Flush
- */
-static inline void guest_tlbinvf(void)
-{
- __asm__ __volatile__(
- "# tlbginvf\n\t"
- _ASM_INSN_IF_MIPS(0x4200000c)
- _ASM_INSN32_IF_MM(0x0000517c));
-}
-
-#endif /* !TOOLCHAIN_SUPPORTS_VIRT */
-
/*
* Manipulate bits in a register.
*/
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index b1845102f8f9..b4f9577ed96a 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h
@@ -160,7 +160,23 @@ static inline void init_msa_upper(void)
_init_msa_upper();
}
-#ifdef TOOLCHAIN_SUPPORTS_MSA
+#ifndef TOOLCHAIN_SUPPORTS_MSA
+/*
+ * Define assembler macros using .word for the c[ft]cmsa instructions in order
+ * to allow compilation with toolchains that do not support MSA. Once all
+ * toolchains in use support MSA these can be removed.
+ */
+_ASM_MACRO_2R(cfcmsa, rd, cs,
+ _ASM_INSN_IF_MIPS(0x787e0019 | __cs << 11 | __rd << 6)
+ _ASM_INSN32_IF_MM(0x587e0016 | __cs << 11 | __rd << 6));
+_ASM_MACRO_2R(ctcmsa, cd, rs,
+ _ASM_INSN_IF_MIPS(0x783e0019 | __rs << 11 | __cd << 6)
+ _ASM_INSN32_IF_MM(0x583e0016 | __rs << 11 | __cd << 6));
+#define _ASM_SET_MSA ""
+#else /* TOOLCHAIN_SUPPORTS_MSA */
+#define _ASM_SET_MSA ".set\tfp=64\n\t" \
+ ".set\tmsa\n\t"
+#endif
#define __BUILD_MSA_CTL_REG(name, cs) \
static inline unsigned int read_msa_##name(void) \
@@ -168,8 +184,7 @@ static inline unsigned int read_msa_##name(void) \
unsigned int reg; \
__asm__ __volatile__( \
" .set push\n" \
- " .set fp=64\n" \
- " .set msa\n" \
+ _ASM_SET_MSA \
" cfcmsa %0, $" #cs "\n" \
" .set pop\n" \
: "=r"(reg)); \
@@ -180,52 +195,12 @@ static inline void write_msa_##name(unsigned int val) \
{ \
__asm__ __volatile__( \
" .set push\n" \
- " .set fp=64\n" \
- " .set msa\n" \
+ _ASM_SET_MSA \
" ctcmsa $" #cs ", %0\n" \
" .set pop\n" \
: : "r"(val)); \
}
-#else /* !TOOLCHAIN_SUPPORTS_MSA */
-
-/*
- * Define functions using .word for the c[ft]cmsa instructions in order to
- * allow compilation with toolchains that do not support MSA. Once all
- * toolchains in use support MSA these can be removed.
- */
-
-#define __BUILD_MSA_CTL_REG(name, cs) \
-static inline unsigned int read_msa_##name(void) \
-{ \
- unsigned int reg; \
- __asm__ __volatile__( \
- " .set push\n" \
- " .set noat\n" \
- " # cfcmsa $1, $%1\n" \
- _ASM_INSN_IF_MIPS(0x787e0059 | %1 << 11) \
- _ASM_INSN32_IF_MM(0x587e0056 | %1 << 11) \
- " move %0, $1\n" \
- " .set pop\n" \
- : "=r"(reg) : "i"(cs)); \
- return reg; \
-} \
- \
-static inline void write_msa_##name(unsigned int val) \
-{ \
- __asm__ __volatile__( \
- " .set push\n" \
- " .set noat\n" \
- " move $1, %0\n" \
- " # ctcmsa $%1, $1\n" \
- _ASM_INSN_IF_MIPS(0x783e0819 | %1 << 6) \
- _ASM_INSN32_IF_MM(0x583e0816 | %1 << 6) \
- " .set pop\n" \
- : : "r"(val), "i"(cs)); \
-}
-
-#endif /* !TOOLCHAIN_SUPPORTS_MSA */
-
__BUILD_MSA_CTL_REG(ir, 0)
__BUILD_MSA_CTL_REG(csr, 1)
__BUILD_MSA_CTL_REG(access, 2)
diff --git a/arch/mips/include/uapi/asm/poll.h b/arch/mips/include/uapi/asm/poll.h
index 3173f8917128..ad289d7b7434 100644
--- a/arch/mips/include/uapi/asm/poll.h
+++ b/arch/mips/include/uapi/asm/poll.h
@@ -2,25 +2,8 @@
#ifndef __ASM_POLL_H
#define __ASM_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)0x0100
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 0x0100
#include <asm-generic/poll.h>
diff --git a/arch/mips/jazz/setup.c b/arch/mips/jazz/setup.c
index e4374a5651ce..448fd41792e4 100644
--- a/arch/mips/jazz/setup.c
+++ b/arch/mips/jazz/setup.c
@@ -32,22 +32,22 @@ static struct resource jazz_io_resources[] = {
.start = 0x00,
.end = 0x1f,
.name = "dma1",
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
}, {
.start = 0x40,
.end = 0x5f,
.name = "timer",
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
}, {
.start = 0x80,
.end = 0x8f,
.name = "dma page reg",
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
}, {
.start = 0xc0,
.end = 0xdf,
.name = "dma2",
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
}
};
diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig
index 643af2012e14..4dd0c446ecec 100644
--- a/arch/mips/jz4740/Kconfig
+++ b/arch/mips/jz4740/Kconfig
@@ -8,6 +8,10 @@ config JZ4740_QI_LB60
bool "Qi Hardware Ben NanoNote"
select MACH_JZ4740
+config JZ4770_GCW0
+ bool "Game Consoles Worldwide GCW Zero"
+ select MACH_JZ4770
+
config JZ4780_CI20
bool "MIPS Creator CI20"
select MACH_JZ4780
@@ -18,6 +22,12 @@ config MACH_JZ4740
bool
select SYS_HAS_CPU_MIPS32_R1
+config MACH_JZ4770
+ bool
+ select MIPS_CPU_SCACHE
+ select SYS_HAS_CPU_MIPS32_R2
+ select SYS_SUPPORTS_HIGHMEM
+
config MACH_JZ4780
bool
select MIPS_CPU_SCACHE
diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c
index 47e857194ce6..eb9f2f97bedb 100644
--- a/arch/mips/jz4740/prom.c
+++ b/arch/mips/jz4740/prom.c
@@ -20,33 +20,12 @@
#include <linux/serial_reg.h>
#include <asm/bootinfo.h>
+#include <asm/fw/fw.h>
#include <asm/mach-jz4740/base.h>
-static __init void jz4740_init_cmdline(int argc, char *argv[])
-{
- unsigned int count = COMMAND_LINE_SIZE - 1;
- int i;
- char *dst = &(arcs_cmdline[0]);
- char *src;
-
- for (i = 1; i < argc && count; ++i) {
- src = argv[i];
- while (*src && count) {
- *dst++ = *src++;
- --count;
- }
- *dst++ = ' ';
- }
- if (i > 1)
- --dst;
-
- *dst = 0;
-}
-
void __init prom_init(void)
{
- jz4740_init_cmdline((int)fw_arg0, (char **)fw_arg1);
- mips_machtype = MACH_INGENIC_JZ4740;
+ fw_init_cmdline();
}
void __init prom_free_prom_memory(void)
diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c
index 6d0152321819..afb40f8bce96 100644
--- a/arch/mips/jz4740/setup.c
+++ b/arch/mips/jz4740/setup.c
@@ -53,6 +53,16 @@ static void __init jz4740_detect_mem(void)
add_memory_region(0, size, BOOT_MEM_RAM);
}
+static unsigned long __init get_board_mach_type(const void *fdt)
+{
+ if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4780"))
+ return MACH_INGENIC_JZ4780;
+ if (!fdt_node_check_compatible(fdt, 0, "ingenic,jz4770"))
+ return MACH_INGENIC_JZ4770;
+
+ return MACH_INGENIC_JZ4740;
+}
+
void __init plat_mem_setup(void)
{
int offset;
@@ -63,6 +73,8 @@ void __init plat_mem_setup(void)
offset = fdt_path_offset(__dtb_start, "/memory");
if (offset < 0)
jz4740_detect_mem();
+
+ mips_machtype = get_board_mach_type(__dtb_start);
}
void __init device_tree_init(void)
@@ -75,10 +87,14 @@ void __init device_tree_init(void)
const char *get_system_type(void)
{
- if (IS_ENABLED(CONFIG_MACH_JZ4780))
+ switch (mips_machtype) {
+ case MACH_INGENIC_JZ4780:
return "JZ4780";
-
- return "JZ4740";
+ case MACH_INGENIC_JZ4770:
+ return "JZ4770";
+ default:
+ return "JZ4740";
+ }
}
void __init arch_init_irq(void)
diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c
index bb1ad5119da4..2ca9160f642a 100644
--- a/arch/mips/jz4740/time.c
+++ b/arch/mips/jz4740/time.c
@@ -113,7 +113,7 @@ static struct clock_event_device jz4740_clockevent = {
#ifdef CONFIG_MACH_JZ4740
.irq = JZ4740_IRQ_TCU0,
#endif
-#ifdef CONFIG_MACH_JZ4780
+#if defined(CONFIG_MACH_JZ4770) || defined(CONFIG_MACH_JZ4780)
.irq = JZ4780_IRQ_TCU2,
#endif
};
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index b79ed9af9886..e48f6c0a9e4a 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -399,7 +399,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs)
*
* @regs: Pointer to pt_regs
* @insn: branch instruction to decode
- * @returns: -EFAULT on error and forces SIGILL, and on success
+ * Return: -EFAULT on error and forces SIGILL, and on success
* returns 0 or BRANCH_LIKELY_TAKEN as appropriate after
* evaluating the branch.
*
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index e68e6e04063a..1025f937ab0e 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -388,15 +388,16 @@ LEAF(mips_cps_boot_vpes)
#elif defined(CONFIG_MIPS_MT)
- .set push
- .set MIPS_ISA_LEVEL_RAW
- .set mt
-
/* If the core doesn't support MT then return */
has_mt t0, 5f
/* Enter VPE configuration state */
+ .set push
+ .set MIPS_ISA_LEVEL_RAW
+ .set mt
dvpe
+ .set pop
+
PTR_LA t1, 1f
jr.hb t1
nop
@@ -422,6 +423,10 @@ LEAF(mips_cps_boot_vpes)
mtc0 t0, CP0_VPECONTROL
ehb
+ .set push
+ .set MIPS_ISA_LEVEL_RAW
+ .set mt
+
/* Skip the VPE if its TC is not halted */
mftc0 t0, CP0_TCHALT
beqz t0, 2f
@@ -495,6 +500,8 @@ LEAF(mips_cps_boot_vpes)
ehb
evpe
+ .set pop
+
/* Check whether this VPE is meant to be running */
li t0, 1
sll t0, t0, a1
@@ -509,7 +516,7 @@ LEAF(mips_cps_boot_vpes)
1: jr.hb t0
nop
-2: .set pop
+2:
#endif /* CONFIG_MIPS_MT_SMP */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 99285be0e088..7f3dfdbc3657 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -361,7 +361,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
* If fails when getting the stack address of the non-leaf function's
* ra, stop function graph tracer and return
*/
- if (parent_ra_addr == 0)
+ if (parent_ra_addr == NULL)
goto out;
#endif
/* *parent_ra_addr = return_hooker; */
diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c
index 19c88d770054..fcf9af492d60 100644
--- a/arch/mips/kernel/mips-cpc.c
+++ b/arch/mips/kernel/mips-cpc.c
@@ -10,6 +10,8 @@
#include <linux/errno.h>
#include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/spinlock.h>
#include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
phys_addr_t __weak mips_cpc_default_phys_base(void)
{
+ struct device_node *cpc_node;
+ struct resource res;
+ int err;
+
+ cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+ if (cpc_node) {
+ err = of_address_to_resource(cpc_node, 0, &res);
+ if (!err)
+ return res.start;
+ }
+
return 0;
}
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index bbb0f4770c0d..18c509c59f33 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -349,11 +349,11 @@ static __poll_t file_poll(struct file *file, poll_table *wait)
/* data available to read? */
if (rtlx_read_poll(minor, 0))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
/* space to write */
if (rtlx_write_poll(minor))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 702c678de116..85bc601e9a0d 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -826,25 +826,6 @@ static void __init arch_mem_init(char **cmdline_p)
struct memblock_region *reg;
extern void plat_mem_setup(void);
- /* call board setup routine */
- plat_mem_setup();
-
- /*
- * Make sure all kernel memory is in the maps. The "UP" and
- * "DOWN" are opposite for initdata since if it crosses over
- * into another memory section you don't want that to be
- * freed when the initdata is freed.
- */
- arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
- PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
- BOOT_MEM_RAM);
- arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT,
- PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT,
- BOOT_MEM_INIT_RAM);
-
- pr_info("Determined physical RAM map:\n");
- print_memory_map();
-
#if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE)
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
#else
@@ -872,6 +853,26 @@ static void __init arch_mem_init(char **cmdline_p)
}
#endif
#endif
+
+ /* call board setup routine */
+ plat_mem_setup();
+
+ /*
+ * Make sure all kernel memory is in the maps. The "UP" and
+ * "DOWN" are opposite for initdata since if it crosses over
+ * into another memory section you don't want that to be
+ * freed when the initdata is freed.
+ */
+ arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
+ PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
+ BOOT_MEM_RAM);
+ arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT,
+ PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT,
+ BOOT_MEM_INIT_RAM);
+
+ pr_info("Determined physical RAM map:\n");
+ print_memory_map();
+
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index ecc1a853f48d..03f1026ad148 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -439,8 +439,6 @@ void play_dead(void)
pr_debug("CPU%d going offline\n", cpu);
if (cpu_has_mipsmt || cpu_has_vp) {
- core = cpu_core(&cpu_data[cpu]);
-
/* Look for another online VPE within the core */
for_each_online_cpu(cpu_death_sibling) {
if (!cpus_are_siblings(cpu, cpu_death_sibling))
diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c
index 19fcab7348b1..0e61a5b7647f 100644
--- a/arch/mips/kernel/watch.c
+++ b/arch/mips/kernel/watch.c
@@ -18,27 +18,24 @@
void mips_install_watch_registers(struct task_struct *t)
{
struct mips3264_watch_reg_state *watches = &t->thread.watch.mips3264;
+ unsigned int watchhi = MIPS_WATCHHI_G | /* Trap all ASIDs */
+ MIPS_WATCHHI_IRW; /* Clear result bits */
+
switch (current_cpu_data.watch_reg_use_cnt) {
default:
BUG();
case 4:
write_c0_watchlo3(watches->watchlo[3]);
- /* Write 1 to the I, R, and W bits to clear them, and
- 1 to G so all ASIDs are trapped. */
- write_c0_watchhi3(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
- watches->watchhi[3]);
+ write_c0_watchhi3(watchhi | watches->watchhi[3]);
case 3:
write_c0_watchlo2(watches->watchlo[2]);
- write_c0_watchhi2(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
- watches->watchhi[2]);
+ write_c0_watchhi2(watchhi | watches->watchhi[2]);
case 2:
write_c0_watchlo1(watches->watchlo[1]);
- write_c0_watchhi1(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
- watches->watchhi[1]);
+ write_c0_watchhi1(watchhi | watches->watchhi[1]);
case 1:
write_c0_watchlo0(watches->watchlo[0]);
- write_c0_watchhi0(MIPS_WATCHHI_G | MIPS_WATCHHI_IRW |
- watches->watchhi[0]);
+ write_c0_watchhi0(watchhi | watches->watchhi[0]);
}
}
@@ -51,21 +48,19 @@ void mips_read_watch_registers(void)
{
struct mips3264_watch_reg_state *watches =
&current->thread.watch.mips3264;
+ unsigned int watchhi_mask = MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW;
+
switch (current_cpu_data.watch_reg_use_cnt) {
default:
BUG();
case 4:
- watches->watchhi[3] = (read_c0_watchhi3() &
- (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
+ watches->watchhi[3] = (read_c0_watchhi3() & watchhi_mask);
case 3:
- watches->watchhi[2] = (read_c0_watchhi2() &
- (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
+ watches->watchhi[2] = (read_c0_watchhi2() & watchhi_mask);
case 2:
- watches->watchhi[1] = (read_c0_watchhi1() &
- (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
+ watches->watchhi[1] = (read_c0_watchhi1() & watchhi_mask);
case 1:
- watches->watchhi[0] = (read_c0_watchhi0() &
- (MIPS_WATCHHI_MASK | MIPS_WATCHHI_IRW));
+ watches->watchhi[0] = (read_c0_watchhi0() & watchhi_mask);
}
if (current_cpu_data.watch_reg_use_cnt == 1 &&
(watches->watchhi[0] & MIPS_WATCHHI_IRW) == 0) {
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index b17447ce8873..76b93a9c8c9b 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
select MMU_NOTIFIER
select SRCU
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 75fdeaa8c62f..2549fdd27ee1 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -446,6 +446,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r = -EINTR;
+ vcpu_load(vcpu);
+
kvm_sigset_activate(vcpu);
if (vcpu->mmio_needed) {
@@ -480,6 +482,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
out:
kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
return r;
}
@@ -900,6 +903,26 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ if (ioctl == KVM_INTERRUPT) {
+ struct kvm_mips_interrupt irq;
+
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ return -EFAULT;
+ kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
+ irq.irq);
+
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
@@ -907,56 +930,54 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
void __user *argp = (void __user *)arg;
long r;
+ vcpu_load(vcpu);
+
switch (ioctl) {
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
- return -EFAULT;
+ break;
if (ioctl == KVM_SET_ONE_REG)
- return kvm_mips_set_reg(vcpu, &reg);
+ r = kvm_mips_set_reg(vcpu, &reg);
else
- return kvm_mips_get_reg(vcpu, &reg);
+ r = kvm_mips_get_reg(vcpu, &reg);
+ break;
}
case KVM_GET_REG_LIST: {
struct kvm_reg_list __user *user_list = argp;
struct kvm_reg_list reg_list;
unsigned n;
+ r = -EFAULT;
if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
n = reg_list.n;
reg_list.n = kvm_mips_num_regs(vcpu);
if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
- return -EFAULT;
+ break;
+ r = -E2BIG;
if (n < reg_list.n)
- return -E2BIG;
- return kvm_mips_copy_reg_indices(vcpu, user_list->reg);
- }
- case KVM_INTERRUPT:
- {
- struct kvm_mips_interrupt irq;
-
- if (copy_from_user(&irq, argp, sizeof(irq)))
- return -EFAULT;
- kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__,
- irq.irq);
-
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
break;
- }
+ r = kvm_mips_copy_reg_indices(vcpu, user_list->reg);
+ break;
+ }
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
+ r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
- return -EFAULT;
+ break;
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
default:
r = -ENOIOCTLCMD;
}
+
+ vcpu_put(vcpu);
return r;
}
@@ -1145,6 +1166,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
vcpu->arch.gprs[i] = regs->gpr[i];
vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
@@ -1152,6 +1175,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.lo = regs->lo;
vcpu->arch.pc = regs->pc;
+ vcpu_put(vcpu);
return 0;
}
@@ -1159,6 +1183,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
regs->gpr[i] = vcpu->arch.gprs[i];
@@ -1166,6 +1192,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->lo = vcpu->arch.lo;
regs->pc = vcpu->arch.pc;
+ vcpu_put(vcpu);
return 0;
}
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 6f109bb54cdb..bc2fdbfa8223 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -17,7 +17,6 @@ config LEMOTE_FULOONG2E
select I8259
select ISA
select IRQ_MIPS_CPU
- select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_HIGHMEM
@@ -49,7 +48,6 @@ config LEMOTE_MACH2F
select ISA
select SYS_HAS_CPU_LOONGSON2F
select SYS_HAS_EARLY_PRINTK
- select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_HIGHMEM
select SYS_SUPPORTS_LITTLE_ENDIAN
diff --git a/arch/mips/loongson64/common/mem.c b/arch/mips/loongson64/common/mem.c
index b01d52473da8..c549e525fc11 100644
--- a/arch/mips/loongson64/common/mem.c
+++ b/arch/mips/loongson64/common/mem.c
@@ -79,7 +79,7 @@ void __init prom_init_memory(void)
(u64)loongson_memmap->map[i].mem_size << 20,
BOOT_MEM_RAM);
break;
- case MEM_RESERVED:
+ case SYSTEM_RAM_RESERVED:
add_memory_region(loongson_memmap->map[i].mem_start,
(u64)loongson_memmap->map[i].mem_size << 20,
BOOT_MEM_RESERVED);
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index f17ef520799a..9717106de4a5 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -166,7 +166,7 @@ static void __init szmem(unsigned int node)
memblock_add_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn), node);
break;
- case MEM_RESERVED:
+ case SYSTEM_RAM_RESERVED:
pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
(u32)node_id, mem_type, mem_start, mem_size);
add_memory_region((node_id << 44) + mem_start,
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index da6c1c0c30c1..62deb025970b 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -451,7 +451,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
regs->cp0_epc + dec_insn.pc_inc +
dec_insn.next_pc_inc;
}
- /* Fall through */
+ /* fall through */
case jr_op:
/* For R6, JR already emulated in jalr_op */
if (NO_R6EMU && insn.r_format.func == jr_op)
@@ -471,10 +471,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
regs->regs[31] = regs->cp0_epc +
dec_insn.pc_inc +
dec_insn.next_pc_inc;
- /* Fall through */
+ /* fall through */
case bltzl_op:
if (NO_R6EMU)
break;
+ /* fall through */
case bltz_op:
if ((long)regs->regs[insn.i_format.rs] < 0)
*contpc = regs->cp0_epc +
@@ -494,10 +495,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
regs->regs[31] = regs->cp0_epc +
dec_insn.pc_inc +
dec_insn.next_pc_inc;
- /* Fall through */
+ /* fall through */
case bgezl_op:
if (NO_R6EMU)
break;
+ /* fall through */
case bgez_op:
if ((long)regs->regs[insn.i_format.rs] >= 0)
*contpc = regs->cp0_epc +
@@ -512,11 +514,12 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
break;
case jalx_op:
set_isa16_mode(bit);
+ /* fall through */
case jal_op:
regs->regs[31] = regs->cp0_epc +
dec_insn.pc_inc +
dec_insn.next_pc_inc;
- /* Fall through */
+ /* fall through */
case j_op:
*contpc = regs->cp0_epc + dec_insn.pc_inc;
*contpc >>= 28;
@@ -528,6 +531,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case beql_op:
if (NO_R6EMU)
break;
+ /* fall through */
case beq_op:
if (regs->regs[insn.i_format.rs] ==
regs->regs[insn.i_format.rt])
@@ -542,6 +546,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case bnel_op:
if (NO_R6EMU)
break;
+ /* fall through */
case bne_op:
if (regs->regs[insn.i_format.rs] !=
regs->regs[insn.i_format.rt])
@@ -556,6 +561,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case blezl_op:
if (!insn.i_format.rt && NO_R6EMU)
break;
+ /* fall through */
case blez_op:
/*
@@ -593,6 +599,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
case bgtzl_op:
if (!insn.i_format.rt && NO_R6EMU)
break;
+ /* fall through */
case bgtz_op:
/*
* Compact branches for R6 for the
@@ -729,7 +736,8 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
return 1;
}
- /* R2/R6 compatible cop1 instruction. Fall through */
+ /* R2/R6 compatible cop1 instruction */
+ /* fall through */
case cop2_op:
case cop1x_op:
if (insn.i_format.rs == bc_op) {
@@ -1190,7 +1198,8 @@ emul:
if (!cpu_has_mips_r6 || delay_slot(xcp))
return SIGILL;
- cond = likely = 0;
+ likely = 0;
+ cond = 0;
fpr = &current->thread.fpu.fpr[MIPSInst_RT(ir)];
bit0 = get_fpr32(fpr, 0) & 0x1;
switch (MIPSInst_RS(ir)) {
@@ -1220,14 +1229,14 @@ emul:
case bcfl_op:
if (cpu_has_mips_2_3_4_5_r)
likely = 1;
- /* Fall through */
+ /* fall through */
case bcf_op:
cond = !cond;
break;
case bctl_op:
if (cpu_has_mips_2_3_4_5_r)
likely = 1;
- /* Fall through */
+ /* fall through */
case bct_op:
break;
}
@@ -1353,7 +1362,8 @@ branch_common:
return SIGILL;
/* a real fpu computation instruction */
- if ((sig = fpu_emu(xcp, ctx, ir)))
+ sig = fpu_emu(xcp, ctx, ir);
+ if (sig)
return sig;
}
break;
diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c
index 8954ef031f84..678de20e4cb1 100644
--- a/arch/mips/math-emu/dp_add.c
+++ b/arch/mips/math-emu/dp_add.c
@@ -104,8 +104,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
-
- /* FALL THROUGH */
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c
index f4746f7c5f63..3063ae3ab3b9 100644
--- a/arch/mips/math-emu/dp_div.c
+++ b/arch/mips/math-emu/dp_div.c
@@ -103,6 +103,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c
index 5bec64f2884e..d1f984b40344 100644
--- a/arch/mips/math-emu/dp_fmax.c
+++ b/arch/mips/math-emu/dp_fmax.c
@@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
@@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c
index a287b23818d8..f98b96135c8d 100644
--- a/arch/mips/math-emu/dp_fmin.c
+++ b/arch/mips/math-emu/dp_fmin.c
@@ -96,6 +96,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
@@ -224,6 +225,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 7ad79ed411f5..7ea2f8222026 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -16,7 +16,7 @@
/* 128 bits shift right logical with rounding. */
-void srl128(u64 *hptr, u64 *lptr, int count)
+static void srl128(u64 *hptr, u64 *lptr, int count)
{
u64 low;
@@ -157,6 +157,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
if (zc == IEEE754_CLASS_INF)
@@ -173,7 +174,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
- /* fall through to real computations */
+ /* continue to real computations */
}
/* Finally get to do some computation */
@@ -201,9 +202,6 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
* Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
*/
- /* 32 * 32 => 64 */
-#define DPXMULT(x, y) ((u64)(x) * (u64)y)
-
lxm = xm;
hxm = xm >> 32;
lym = ym;
diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c
index 60c8bfe40947..c34a6cdf1b25 100644
--- a/arch/mips/math-emu/dp_mul.c
+++ b/arch/mips/math-emu/dp_mul.c
@@ -101,6 +101,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
DPDNORMY;
@@ -128,9 +129,6 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
* Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
*/
- /* 32 * 32 => 64 */
-#define DPXMULT(x, y) ((u64)(x) * (u64)y)
-
lxm = xm;
hxm = xm >> 32;
lym = ym;
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c
index cea907b83146..1d26c92e5295 100644
--- a/arch/mips/math-emu/dp_sqrt.c
+++ b/arch/mips/math-emu/dp_sqrt.c
@@ -91,7 +91,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
scalx -= 256;
}
- y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
+ x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT);
+ y = x;
/* magic initial approximation to almost 8 sig. bits */
yh = y.bits >> 32;
@@ -108,7 +109,8 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
/* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */
/* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */
- z = t = ieee754dp_mul(y, y);
+ t = ieee754dp_mul(y, y);
+ z = t;
t.bexp += 0x001;
t = ieee754dp_add(t, z);
z = ieee754dp_mul(ieee754dp_sub(x, z), y);
@@ -140,7 +142,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x)
switch (oldcsr.rm) {
case FPU_CSR_RU:
y.bits += 1;
- /* drop through */
+ /* fall through */
case FPU_CSR_RN:
t.bits += 1;
break;
diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c
index fc17a781b9ae..3cc48b86519b 100644
--- a/arch/mips/math-emu/dp_sub.c
+++ b/arch/mips/math-emu/dp_sub.c
@@ -106,7 +106,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
- /* FALL THROUGH */
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
/* normalize ym,ye */
diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index 9ba023004eb6..a56707b75282 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h
@@ -55,6 +55,9 @@ static inline int ieee754dp_finite(union ieee754dp x)
#define XDPSRS1(v) \
(((v) >> 1) | ((v) & 1))
+/* 32bit * 32bit => 64bit unsigned integer multiplication */
+#define DPXMULT(x, y) ((u64)(x) * (u64)y)
+
/* convert denormal to normalized with extended exponent */
#define DPDNORMx(m,e) \
while ((m >> DP_FBITS) == 0) { m <<= 1; e--; }
diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c
index c55c0c00bca8..51dced9fbdaf 100644
--- a/arch/mips/math-emu/sp_add.c
+++ b/arch/mips/math-emu/sp_add.c
@@ -104,8 +104,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
-
- /* FALL THROUGH */
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c
index 23587b31ca87..5d2904960eb8 100644
--- a/arch/mips/math-emu/sp_div.c
+++ b/arch/mips/math-emu/sp_div.c
@@ -103,6 +103,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c
index 5060e8fdcb0b..36a50f9082d1 100644
--- a/arch/mips/math-emu/sp_fdp.c
+++ b/arch/mips/math-emu/sp_fdp.c
@@ -46,7 +46,8 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
case IEEE754_CLASS_SNAN:
x = ieee754dp_nanxcpt(x);
EXPLODEXDP;
- /* Fall through. */
+ /* fall through */
+
case IEEE754_CLASS_QNAN:
y = ieee754sp_nan_fdp(xs, xm);
if (!ieee754_csr.nan2008) {
diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c
index 74a5a00d2f22..22019ed691df 100644
--- a/arch/mips/math-emu/sp_fmax.c
+++ b/arch/mips/math-emu/sp_fmax.c
@@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
@@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c
index c51385f46b09..feaec3985cca 100644
--- a/arch/mips/math-emu/sp_fmin.c
+++ b/arch/mips/math-emu/sp_fmin.c
@@ -96,6 +96,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
@@ -224,6 +225,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index f823338dbb65..07ba675401e2 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -126,6 +126,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
if (zc == IEEE754_CLASS_INF)
@@ -142,7 +143,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
- /* fall through to real computations */
+ /* continue to real computations */
}
/* Finally get to do some computation */
diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c
index 4015101fbc37..fde71e293ec4 100644
--- a/arch/mips/math-emu/sp_mul.c
+++ b/arch/mips/math-emu/sp_mul.c
@@ -101,6 +101,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_sqrt.c b/arch/mips/math-emu/sp_sqrt.c
index 67059c33a250..9cc83f012342 100644
--- a/arch/mips/math-emu/sp_sqrt.c
+++ b/arch/mips/math-emu/sp_sqrt.c
@@ -82,7 +82,8 @@ union ieee754sp ieee754sp_sqrt(union ieee754sp x)
/* generate sqrt(x) bit by bit */
ix += ix;
- q = s = 0; /* q = sqrt(x) */
+ s = 0;
+ q = 0; /* q = sqrt(x) */
r = 0x01000000; /* r = moving bit from right to left */
while (r != 0) {
diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c
index dc998ed47295..9f2ff72c3d6b 100644
--- a/arch/mips/math-emu/sp_sub.c
+++ b/arch/mips/math-emu/sp_sub.c
@@ -106,6 +106,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
+ /* fall through */
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
SPDNORMY;
diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c
index a2450c7e452a..bca5ac995801 100644
--- a/arch/mips/math-emu/sp_tlong.c
+++ b/arch/mips/math-emu/sp_tlong.c
@@ -20,7 +20,6 @@
*/
#include "ieee754sp.h"
-#include "ieee754dp.h"
s64 ieee754sp_tlong(union ieee754sp x)
{
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 237532e89919..dcafa43613b6 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -370,11 +370,6 @@ static void mips_dma_sync_sg_for_device(struct device *dev,
}
}
-static int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
-
static int mips_dma_supported(struct device *dev, u64 mask)
{
return plat_dma_supported(dev, mask);
@@ -401,7 +396,6 @@ static const struct dma_map_ops mips_default_dma_map_ops = {
.sync_single_for_device = mips_dma_sync_single_for_device,
.sync_sg_for_cpu = mips_dma_sync_sg_for_cpu,
.sync_sg_for_device = mips_dma_sync_sg_for_device,
- .mapping_error = mips_dma_mapping_error,
.dma_supported = mips_dma_supported,
.cache_sync = mips_dma_cache_sync,
};
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 548acb7f8557..394673991bab 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -16,6 +16,7 @@
#include <asm/mmu_context.h>
#include <asm/r4kcache.h>
#include <asm/mips-cps.h>
+#include <asm/bootinfo.h>
/*
* MIPS32/MIPS64 L2 cache handling
@@ -220,6 +221,14 @@ static inline int __init mips_sc_probe(void)
else
return 0;
+ /*
+ * According to config2 it would be 5-ways, but that is contradicted
+ * by all documentation.
+ */
+ if (current_cpu_type() == CPU_JZRISC &&
+ mips_machtype == MACH_INGENIC_JZ4770)
+ c->scache.ways = 4;
+
c->scache.waysize = c->scache.sets * c->scache.linesz;
c->scache.waybit = __ffs(c->scache.waysize);
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index de34adb76157..7b63914d2e58 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -47,31 +47,31 @@ static struct resource standard_io_resources[] = {
.name = "dma1",
.start = 0x00,
.end = 0x1f,
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
},
{
.name = "timer",
.start = 0x40,
.end = 0x5f,
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
},
{
.name = "keyboard",
.start = 0x60,
.end = 0x6f,
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
},
{
.name = "dma page reg",
.start = 0x80,
.end = 0x8f,
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
},
{
.name = "dma2",
.start = 0xc0,
.end = 0xdf,
- .flags = IORESOURCE_BUSY
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY
},
};
diff --git a/arch/mips/txx9/rbtx4939/setup.c b/arch/mips/txx9/rbtx4939/setup.c
index 8b937300fb7f..fd26fadc8617 100644
--- a/arch/mips/txx9/rbtx4939/setup.c
+++ b/arch/mips/txx9/rbtx4939/setup.c
@@ -186,7 +186,7 @@ static void __init rbtx4939_update_ioc_pen(void)
#define RBTX4939_MAX_7SEGLEDS 8
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_BUILTIN(CONFIG_LEDS_CLASS)
static u8 led_val[RBTX4939_MAX_7SEGLEDS];
struct rbtx4939_led_data {
struct led_classdev cdev;
@@ -261,7 +261,7 @@ static inline void rbtx4939_led_setup(void)
static void __rbtx4939_7segled_putc(unsigned int pos, unsigned char val)
{
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_BUILTIN(CONFIG_LEDS_CLASS)
unsigned long flags;
local_irq_save(flags);
/* bit7: reserved for LED class */
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 60fae03dac79..3d4ec88f1db1 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -152,7 +152,6 @@ menu "Advanced setup"
config ADVANCED_OPTIONS
bool "Prompt for advanced kernel configuration options"
- help
comment "Default settings for advanced configuration options are used"
depends on !ADVANCED_OPTIONS
diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts
index 36ccdf05837d..56f4b5df6d65 100644
--- a/arch/nios2/boot/dts/3c120_devboard.dts
+++ b/arch/nios2/boot/dts/3c120_devboard.dts
@@ -29,7 +29,7 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu: cpu@0x0 {
+ cpu: cpu@0 {
device_type = "cpu";
compatible = "altr,nios2-1.0";
reg = <0x00000000>;
@@ -69,7 +69,7 @@
compatible = "altr,avalon", "simple-bus";
bus-frequency = <125000000>;
- pb_cpu_to_io: bridge@0x8000000 {
+ pb_cpu_to_io: bridge@8000000 {
compatible = "simple-bus";
reg = <0x08000000 0x00800000>;
#address-cells = <1>;
@@ -83,7 +83,7 @@
<0x00008000 0x08008000 0x00000020>,
<0x00400000 0x08400000 0x00000020>;
- timer_1ms: timer@0x400000 {
+ timer_1ms: timer@400000 {
compatible = "altr,timer-1.0";
reg = <0x00400000 0x00000020>;
interrupt-parent = <&cpu>;
@@ -91,7 +91,7 @@
clock-frequency = <125000000>;
};
- timer_0: timer@0x8000 {
+ timer_0: timer@8000 {
compatible = "altr,timer-1.0";
reg = < 0x00008000 0x00000020 >;
interrupt-parent = < &cpu >;
@@ -99,14 +99,14 @@
clock-frequency = < 125000000 >;
};
- jtag_uart: serial@0x4d50 {
+ jtag_uart: serial@4d50 {
compatible = "altr,juart-1.0";
reg = <0x00004d50 0x00000008>;
interrupt-parent = <&cpu>;
interrupts = <1>;
};
- tse_mac: ethernet@0x4000 {
+ tse_mac: ethernet@4000 {
compatible = "altr,tse-1.0";
reg = <0x00004000 0x00000400>,
<0x00004400 0x00000040>,
@@ -133,7 +133,7 @@
};
};
- uart: serial@0x4c80 {
+ uart: serial@4c80 {
compatible = "altr,uart-1.0";
reg = <0x00004c80 0x00000020>;
interrupt-parent = <&cpu>;
@@ -143,7 +143,7 @@
};
};
- cfi_flash_64m: flash@0x0 {
+ cfi_flash_64m: flash@0 {
compatible = "cfi-flash";
reg = <0x00000000 0x04000000>;
bank-width = <2>;
diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig
index 8b2a30b3b34f..c601c8ff1ae6 100644
--- a/arch/nios2/configs/10m50_defconfig
+++ b/arch/nios2/configs/10m50_defconfig
@@ -33,7 +33,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig
index 9451940678a0..fce33588d55c 100644
--- a/arch/nios2/configs/3c120_defconfig
+++ b/arch/nios2/configs/3c120_defconfig
@@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9d3329811cc1..73ce5dd07642 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,6 +141,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
+ select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 30a155c0a6b0..c615abdce119 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -16,6 +16,7 @@
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 949d691094a4..67c5475311ee 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
* keeping the prototype consistent across the two formats.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
- unsigned int subpg_index, unsigned long hidx)
+ unsigned int subpg_index, unsigned long hidx,
+ int offset)
{
return (hidx << H_PAGE_F_GIX_SHIFT) &
(H_PAGE_F_SECOND | H_PAGE_F_GIX);
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 338b7da468ce..3bcf269f8f55 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -45,7 +45,7 @@
* generic accessors and iterators here
*/
#define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
{
real_pte_t rpte;
unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
*/
smp_rmb();
- hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ hidxp = (unsigned long *)(ptep + offset);
rpte.hidx = *hidxp;
return rpte;
}
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
* expected to modify the PTE bits accordingly and commit the PTE to memory.
*/
static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
- unsigned int subpg_index, unsigned long hidx)
+ unsigned int subpg_index,
+ unsigned long hidx, int offset)
{
- unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+ unsigned long *hidxp = (unsigned long *)(ptep + offset);
rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
*hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
}
#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
(sizeof(unsigned long) << PMD_INDEX_SIZE))
#else
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
+ (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 0920eff731b3..935adcd92a81 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -23,7 +23,8 @@
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+ defined(CONFIG_PPC_64K_PAGES)
/*
* only with hash 64k we need to use the second half of pmd page table
* to store pointer to deposited pgtable_t
@@ -33,6 +34,16 @@
#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE
#endif
/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
+#endif
+/*
* Define the address range of the kernel non-linear virtual area
*/
#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 1fcfa425cefa..4746bc68d446 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
+ pgd_t *pgd;
+
if (radix_enabled())
return radix__pgd_alloc(mm);
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
+
+ pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+ memset(pgd, 0, PGD_TABLE_SIZE);
+
+ return pgd;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
- kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+ kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
* ahead and flush the page walk cache
*/
flush_tlb_pgtable(tlb, address);
- pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+ pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 51017726d495..a6b9f1d74600 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
extern unsigned long __pud_index_size;
extern unsigned long __pgd_index_size;
extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
#define PTE_INDEX_SIZE __pte_index_size
#define PMD_INDEX_SIZE __pmd_index_size
#define PUD_INDEX_SIZE __pud_index_size
#define PGD_INDEX_SIZE __pgd_index_size
#define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
/*
* Because of use of pte fragments and THP, size of page table
* are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
*/
#ifndef __real_pte
-#define __real_pte(e,p) ((real_pte_t){(e)})
+#define __real_pte(e, p, o) ((real_pte_t){(e)})
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 176dfb73d42c..471b2274fbeb 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXC_HV, SOFTEN_TEST_HV, bitmask)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
- MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 88e5e8f17e98..855e17d158b1 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -30,6 +30,16 @@
#define PACA_IRQ_PMI 0x40
/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE)
+#endif
+
+/*
* flags for paca->irq_soft_mask
*/
#define IRQS_ENABLED 0
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
static inline void may_hard_irq_enable(void)
{
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
- if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+ if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
__hard_irq_enable();
}
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 9dcbfa6bbb91..d8b1e8e7e035 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
return false;
}
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
#endif /* CONFIG_KEXEC_CORE */
#endif /* ! __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 9a667007bff8..376ae803b69c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -249,10 +249,8 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
extern int kvmppc_hcall_impl_pr(unsigned long cmd);
extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
-extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu);
-extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
extern int kvm_irq_bypass;
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 735cfa35298a..998f7b7aaa9e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -122,13 +122,13 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
lphi = (l >> 16) & 0xf;
switch ((l >> 12) & 0xf) {
case 0:
- return !lphi ? 24 : -1; /* 16MB */
+ return !lphi ? 24 : 0; /* 16MB */
break;
case 1:
return 16; /* 64kB */
break;
case 3:
- return !lphi ? 34 : -1; /* 16GB */
+ return !lphi ? 34 : 0; /* 16GB */
break;
case 7:
return (16 << 8) + 12; /* 64kB in 4kB */
@@ -140,7 +140,7 @@ static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
return (24 << 8) + 12; /* 16MB in 4kB */
break;
}
- return -1;
+ return 0;
}
static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
@@ -159,7 +159,11 @@ static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l
static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
{
- return 1ul << kvmppc_hpte_actual_page_shift(v, r);
+ int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+ if (shift)
+ return 1ul << shift;
+ return 0;
}
static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
@@ -232,7 +236,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff;
- if (b_pgshift == 12) {
+ if (b_pgshift <= 12) {
if (a_pgshift > 12) {
sllp = (a_pgshift == 16) ? 5 : 4;
rb |= sllp << 5; /* AP field */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3aa5b577cd60..1f53b562726f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -690,6 +690,7 @@ struct kvm_vcpu_arch {
u8 mmio_vsx_offset;
u8 mmio_vsx_copy_type;
u8 mmio_vsx_tx_sx_enabled;
+ u8 mmio_vmx_copy_nums;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
@@ -709,6 +710,7 @@ struct kvm_vcpu_arch {
u8 ceded;
u8 prodded;
u8 doorbell_request;
+ u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;
struct swait_queue_head *wqp;
@@ -738,8 +740,11 @@ struct kvm_vcpu_arch {
struct kvmppc_icp *icp; /* XICS presentation controller */
struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
__be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
- u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_esc_on; /* Is the escalation irq enabled ? */
union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+ u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
+ u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -800,6 +805,7 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
#define KVM_MMIO_REG_VSX 0x0080
+#define KVM_MMIO_REG_VMX 0x00c0
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9db18287b5f4..7765a800ddae 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -81,6 +81,10 @@ extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian, int mmio_sign_extend);
+extern int kvmppc_handle_load128_by2x64(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned int rt, int is_default_endian);
+extern int kvmppc_handle_store128_by2x64(struct kvm_run *run,
+ struct kvm_vcpu *vcpu, unsigned int rs, int is_default_endian);
extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes,
int is_default_endian);
diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
new file mode 100644
index 000000000000..6e20bb5c74ea
--- /dev/null
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_POWERPC_MEMBARRIER_H
+#define _ASM_POWERPC_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ /*
+ * Only need the full barrier when switching between processes.
+ * Barrier when switching from kernel to userspace is not
+ * required here, given that it is implied by mmdrop(). Barrier
+ * when switching from userspace to kernel is not needed after
+ * store to rq->curr.
+ */
+ if (likely(!(atomic_read(&next->membarrier_state) &
+ (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
+ MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
+ return;
+
+ /*
+ * The membarrier system call requires a full memory barrier
+ * after storing to rq->curr, before going back to user-space.
+ */
+ smp_mb();
+}
+
+#endif /* _ASM_POWERPC_MEMBARRIER_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 504a3c36ce5c..03bbd1149530 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
#ifndef __ASSEMBLY__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index abddf5830ad5..5c5f75d005ad 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -27,6 +27,7 @@
#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
#endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
* Define the address range of the kernel non-linear virtual area
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 24c73f5575ee..94bd1bf2c873 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1076,6 +1076,7 @@ enum {
/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
enum {
OPAL_XIVE_VP_ENABLED = 0x00000001,
+ OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
};
/* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index ab5c1588b487..f1083bcf449c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -156,6 +156,12 @@
#define OP_31_XOP_LFDX 599
#define OP_31_XOP_LFDUX 631
+/* VMX Vector Load Instructions */
+#define OP_31_XOP_LVX 103
+
+/* VMX Vector Store Instructions */
+#define OP_31_XOP_STVX 231
+
#define OP_LWZ 32
#define OP_STFS 52
#define OP_STFSU 53
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 88187c285c70..593248110902 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+ numa_cpu_lookup_table[cpu] = node;
+}
+
static inline int early_cpu_to_node(int cpu)
{
int nid;
@@ -82,6 +87,7 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
extern int start_topology_update(void);
extern int stop_topology_update(void);
extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
#else
static inline int start_topology_update(void)
{
@@ -95,6 +101,10 @@ static inline int prrn_is_enabled(void)
{
return 0;
}
+static inline int find_and_online_cpu_nid(int cpu)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 7624e22f5045..8d1a2792484f 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+extern bool xive_native_has_single_escalation(void);
#else
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 637b7263cb86..833ed9a16adf 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -632,6 +632,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 88b84ac76b53..ea5eb91b836e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -520,6 +520,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+ OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
@@ -739,6 +740,9 @@ int main(void)
DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
arch.xive_cam_word));
DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+ DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+ DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
+ DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index cc649809885e..2b9df0040d6b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -756,14 +756,14 @@ int eeh_restore_vf_config(struct pci_dn *pdn)
eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
2, devctl);
- /* Disable Completion Timeout */
+ /* Disable Completion Timeout if possible */
eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
4, &cap2);
- if (cap2 & 0x10) {
+ if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) {
eeh_ops->read_config(pdn,
edev->pcie_cap + PCI_EXP_DEVCTL2,
4, &cap2);
- cap2 |= 0x10;
+ cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS;
eeh_ops->write_config(pdn,
edev->pcie_cap + PCI_EXP_DEVCTL2,
4, cap2);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index ee832d344a5a..9b6e653e501a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -943,6 +943,8 @@ kernel_dbg_exc:
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
*/
.macro masked_interrupt_book3e paca_irq full_mask
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 243d072a225a..3ac87e53b3da 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
* triggered and won't automatically refire.
* - If it was a HMI we return immediately since we handled it in realmode
* and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
* This is called with r10 containing the value to OR to the paca field.
*/
#define MASKED_INTERRUPT(_H) \
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt: \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
b MASKED_DEC_HANDLER_LABEL; \
-1: andi. r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI); \
- bne 2f; \
+1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \
+ beq 2f; \
mfspr r10,SPRN_##_H##SRR1; \
xori r10,r10,MSR_EE; /* clear MSR_EE */ \
mtspr SPRN_##_H##SRR1,r10; \
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 208e623b2557..446c79611d56 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -362,8 +362,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
*/
static int pci_read_irq_line(struct pci_dev *pci_dev)
{
- struct of_phandle_args oirq;
- unsigned int virq;
+ int virq;
pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
@@ -371,7 +370,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
memset(&oirq, 0xff, sizeof(oirq));
#endif
/* Try to get a mapping from the device-tree */
- if (of_irq_parse_pci(pci_dev, &oirq)) {
+ virq = of_irq_parse_and_map_pci(pci_dev, 0, 0);
+ if (virq <= 0) {
u8 line, pin;
/* If that fails, lets fallback to what is in the config
@@ -395,11 +395,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
virq = irq_create_mapping(NULL, line);
if (virq)
irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
- } else {
- pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n",
- oirq.args_count, oirq.args[0], oirq.args[1], oirq.np);
-
- virq = irq_create_of_mapping(&oirq);
}
if (!virq) {
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 2d71269e7dc1..cf47b1aec4c2 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -104,7 +104,7 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices);
*/
void pci_hp_add_devices(struct pci_bus *bus)
{
- int slotno, mode, pass, max;
+ int slotno, mode, max;
struct pci_dev *dev;
struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
@@ -133,13 +133,17 @@ void pci_hp_add_devices(struct pci_bus *bus)
pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
pcibios_setup_bus_devices(bus);
max = bus->busn_res.start;
- for (pass = 0; pass < 2; pass++) {
- list_for_each_entry(dev, &bus->devices, bus_list) {
- if (pci_is_bridge(dev))
- max = pci_scan_bridge(bus, dev,
- max, pass);
- }
- }
+ /*
+ * Scan bridges that are already configured. We don't touch
+ * them unless they are misconfigured (which will be done in
+ * the second scan below).
+ */
+ for_each_pci_bridge(dev, bus)
+ max = pci_scan_bridge(bus, dev, max, 0);
+
+ /* Scan bridges that need to be reconfigured */
+ for_each_pci_bridge(dev, bus)
+ max = pci_scan_bridge(bus, dev, max, 1);
}
pcibios_finish_adding_to_bus(bus);
}
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 1d817f4d97d9..85ad2f78b889 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -96,7 +96,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
reg = of_get_property(node, "reg", NULL);
if (!reg)
continue;
- dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff));
+ dev = pci_get_domain_bus_and_slot(0, pci_bus,
+ ((reg[0] >> 8) & 0xff));
if (!dev || !dev->subordinate) {
pci_dev_put(dev);
continue;
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 20ceec4a5f5e..98f04725def7 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -369,11 +369,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
pcibios_setup_bus_devices(bus);
/* Now scan child busses */
- list_for_each_entry(dev, &bus->devices, bus_list) {
- if (pci_is_bridge(dev)) {
- of_scan_pci_bridge(dev);
- }
- }
+ for_each_pci_bridge(dev, bus)
+ of_scan_pci_bridge(dev);
}
/**
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index fc600a8b1e77..f915db93cd42 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -392,7 +392,7 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
{
poll_wait(file, &rtas_log_wait, wait);
if (rtas_log_size)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index b12b8eb39c29..68a0e9d5b440 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -22,6 +22,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_EVENTFD
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select SRCU
select KVM_VFIO
select IRQ_BYPASS_MANAGER
@@ -68,7 +69,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
- select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_SERIES || PPC_POWERNV)
+ select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 72d977e30952..234531d1bee1 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -484,19 +484,33 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
regs->pc = kvmppc_get_pc(vcpu);
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = kvmppc_get_ctr(vcpu);
@@ -518,6 +532,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -525,6 +540,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
kvmppc_set_pc(vcpu, regs->pc);
kvmppc_set_cr(vcpu, regs->cr);
kvmppc_set_ctr(vcpu, regs->ctr);
@@ -545,6 +562,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -737,7 +755,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
+ vcpu_load(vcpu);
vcpu->guest_debug = dbg->control;
+ vcpu_put(vcpu);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b73dbc9e797d..ef243fed2f2b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1269,6 +1269,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Nothing to do */
goto out;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = be64_to_cpu(hptep[1]);
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ }
+
/* Unmap */
rev = &old->rev[idx];
guest_rpte = rev->guest_rpte;
@@ -1298,7 +1303,6 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Reload PTE after unmap */
vpte = be64_to_cpu(hptep[0]);
-
BUG_ON(vpte & HPTE_V_VALID);
BUG_ON(!(vpte & HPTE_V_ABSENT));
@@ -1307,6 +1311,12 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
goto out;
rpte = be64_to_cpu(hptep[1]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ rpte = hpte_new_to_old_r(rpte);
+ }
+
pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
pteg = idx / HPTES_PER_GROUP;
@@ -1337,17 +1347,17 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
}
new_pteg = hash & new_hash_mask;
- if (vpte & HPTE_V_SECONDARY) {
- BUG_ON(~pteg != (hash & old_hash_mask));
- new_pteg = ~new_pteg;
- } else {
- BUG_ON(pteg != (hash & old_hash_mask));
- }
+ if (vpte & HPTE_V_SECONDARY)
+ new_pteg = ~hash & new_hash_mask;
new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
new_hptep = (__be64 *)(new->virt + (new_idx << 4));
replace_vpte = be64_to_cpu(new_hptep[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+ replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+ }
if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
BUG_ON(new->order >= old->order);
@@ -1363,6 +1373,11 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
/* Discard the previous HPTE */
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = hpte_old_to_new_r(vpte, rpte);
+ vpte = hpte_old_to_new_v(vpte);
+ }
+
new_hptep[1] = cpu_to_be64(rpte);
new->rev[new_idx].guest_rpte = guest_rpte;
/* No need for a barrier, since new HPT isn't active */
@@ -1380,12 +1395,6 @@ static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
unsigned long i;
int rc;
- /*
- * resize_hpt_rehash_hpte() doesn't handle the new-format HPTEs
- * that POWER9 uses, and could well hit a BUG_ON on POWER9.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- return -EIO;
for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
rc = resize_hpt_rehash_hpte(resize, i);
if (rc != 0)
@@ -1416,6 +1425,9 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
synchronize_srcu_expedited(&kvm->srcu);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvmppc_setup_partition_table(kvm);
+
resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 58618f644c56..0c854816e653 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -573,7 +573,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
j = i + 1;
if (npages) {
set_dirty_bits(map, i, npages);
- i = j + npages;
+ j = i + npages;
}
}
return 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e4f70c33fbc7..89707354c2ef 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -116,6 +116,9 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* If set, the threads on each CPU core have to be in the same MMU mode */
+static bool no_mixing_hpt_and_radix;
+
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -1003,8 +1006,6 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *tvcpu;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- return EMULATE_FAIL;
if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
return RESUME_GUEST;
if (get_op(inst) != 31)
@@ -1054,6 +1055,7 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
}
+/* Called with vcpu->arch.vcore->lock held */
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -1174,7 +1176,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
swab32(vcpu->arch.emul_inst) :
vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ /* Need vcore unlocked to call kvmppc_get_last_inst */
+ spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_debug_inst(run, vcpu);
+ spin_lock(&vcpu->arch.vcore->lock);
} else {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1189,8 +1194,13 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
r = EMULATE_FAIL;
- if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+ if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
+ cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* Need vcore unlocked to call kvmppc_get_last_inst */
+ spin_unlock(&vcpu->arch.vcore->lock);
r = kvmppc_emulate_doorbell_instr(vcpu);
+ spin_lock(&vcpu->arch.vcore->lock);
+ }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
@@ -1495,6 +1505,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ *val = get_reg_val(id, vcpu->arch.dec_expires +
+ vcpu->arch.vcore->tb_offset);
+ break;
default:
r = -EINVAL;
break;
@@ -1722,6 +1736,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ vcpu->arch.dec_expires = set_reg_val(id, *val) -
+ vcpu->arch.vcore->tb_offset;
+ break;
default:
r = -EINVAL;
break;
@@ -2376,8 +2394,8 @@ static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
static bool subcore_config_ok(int n_subcores, int n_threads)
{
/*
- * POWER9 "SMT4" cores are permanently in what is effectively a 4-way split-core
- * mode, with one thread per subcore.
+ * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
+ * split-core mode, with one thread per subcore.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
return n_subcores <= 4 && n_threads == 1;
@@ -2413,8 +2431,8 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
if (!cpu_has_feature(CPU_FTR_ARCH_207S))
return false;
- /* POWER9 currently requires all threads to be in the same MMU mode */
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ /* Some POWER9 chips require all threads to be in the same MMU mode */
+ if (no_mixing_hpt_and_radix &&
kvm_is_radix(vc->kvm) != kvm_is_radix(cip->vc[0]->kvm))
return false;
@@ -2677,9 +2695,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
* On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host.
+ * this is a HPT guest on a radix host machine where the
+ * CPU threads may not be in different MMU modes.
*/
- hpt_on_radix = radix_enabled() && !kvm_is_radix(vc->kvm);
+ hpt_on_radix = no_mixing_hpt_and_radix && radix_enabled() &&
+ !kvm_is_radix(vc->kvm);
if (((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) ||
(hpt_on_radix && vc->kvm->arch.threads_indep)) {
@@ -2829,7 +2849,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
if (!thr0_done)
kvmppc_start_thread(NULL, pvc);
- thr += pvc->num_threads;
}
/*
@@ -2932,13 +2951,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
+ preempt_enable();
+
for (sub = 0; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
post_guest_process(pvc, pvc == vc);
}
spin_lock(&vc->lock);
- preempt_enable();
out:
vc->vcore_state = VCORE_INACTIVE;
@@ -2985,7 +3005,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
{
if (!xive_enabled())
return false;
- return vcpu->arch.xive_saved_state.pipr <
+ return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
vcpu->arch.xive_saved_state.cppr;
}
#else
@@ -3174,17 +3194,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
* this thread straight away and have it join in.
*/
if (!signal_pending(current)) {
- if (vc->vcore_state == VCORE_PIGGYBACK) {
- if (spin_trylock(&vc->lock)) {
- if (vc->vcore_state == VCORE_RUNNING &&
- !VCORE_IS_EXITING(vc)) {
- kvmppc_create_dtl_entry(vcpu, vc);
- kvmppc_start_thread(vcpu, vc);
- trace_kvm_guest_enter(vcpu);
- }
- spin_unlock(&vc->lock);
- }
- } else if (vc->vcore_state == VCORE_RUNNING &&
+ if ((vc->vcore_state == VCORE_PIGGYBACK ||
+ vc->vcore_state == VCORE_RUNNING) &&
!VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
@@ -4446,6 +4457,19 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
+
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ no_mixing_hpt_and_radix = true;
+ }
+
return r;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7886b313d135..f31f357b8c5a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -413,10 +413,11 @@ FTR_SECTION_ELSE
/* On P9 we use the split_info for coordinating LPCR changes */
lwz r4, KVM_SPLIT_DO_SET(r6)
cmpwi r4, 0
- beq 63f
+ beq 1f
mr r3, r6
bl kvmhv_p9_set_lpcr
nop
+1:
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
@@ -617,13 +618,6 @@ kvmppc_hv_entry:
lbz r0, KVM_RADIX(r9)
cmpwi cr7, r0, 0
- /* Clear out SLB if hash */
- bne cr7, 2f
- li r6,0
- slbmte r6,r6
- slbia
- ptesync
-2:
/*
* POWER7/POWER8 host -> guest partition switch code.
* We don't have to lock against concurrent tlbies,
@@ -738,19 +732,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
10: cmpdi r4, 0
beq kvmppc_primary_no_guest
kvmppc_got_guest:
-
- /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
- lwz r5,VCPU_SLB_MAX(r4)
- cmpwi r5,0
- beq 9f
- mtctr r5
- addi r6,r4,VCPU_SLB
-1: ld r8,VCPU_SLB_E(r6)
- ld r9,VCPU_SLB_V(r6)
- slbmte r9,r8
- addi r6,r6,VCPU_SLB_SIZE
- bdnz 1b
-9:
/* Increment yield count if they have a VPA */
ld r3, VCPU_VPA(r4)
cmpdi r3, 0
@@ -957,7 +938,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
- std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -1018,6 +998,29 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
+ /* For hash guest, clear out and reload the SLB */
+ ld r6, VCPU_KVM(r4)
+ lbz r0, KVM_RADIX(r6)
+ cmpwi r0, 0
+ bne 9f
+ li r6, 0
+ slbmte r6, r6
+ slbia
+ ptesync
+
+ /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+ lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
@@ -1031,8 +1034,53 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li r9, TM_QW1_OS + TM_WORD2
stwcix r11,r9,r10
li r9, 1
- stw r9, VCPU_XIVE_PUSHED(r4)
+ stb r9, VCPU_XIVE_PUSHED(r4)
eieio
+
+ /*
+ * We clear the irq_pending flag. There is a small chance of a
+ * race vs. the escalation interrupt happening on another
+ * processor setting it again, but the only consequence is to
+ * cause a spurrious wakeup on the next H_CEDE which is not an
+ * issue.
+ */
+ li r0,0
+ stb r0, VCPU_IRQ_PENDING(r4)
+
+ /*
+ * In single escalation mode, if the escalation interrupt is
+ * on, we mask it.
+ */
+ lbz r0, VCPU_XIVE_ESC_ON(r4)
+ cmpwi r0,0
+ beq 1f
+ ld r10, VCPU_XIVE_ESC_RADDR(r4)
+ li r9, XIVE_ESB_SET_PQ_01
+ ldcix r0, r10, r9
+ sync
+
+ /* We have a possible subtle race here: The escalation interrupt might
+ * have fired and be on its way to the host queue while we mask it,
+ * and if we unmask it early enough (re-cede right away), there is
+ * a theorical possibility that it fires again, thus landing in the
+ * target queue more than once which is a big no-no.
+ *
+ * Fortunately, solving this is rather easy. If the above load setting
+ * PQ to 01 returns a previous value where P is set, then we know the
+ * escalation interrupt is somewhere on its way to the host. In that
+ * case we simply don't clear the xive_esc_on flag below. It will be
+ * eventually cleared by the handler for the escalation interrupt.
+ *
+ * Then, when doing a cede, we check that flag again before re-enabling
+ * the escalation interrupt, and if set, we abort the cede.
+ */
+ andi. r0, r0, XIVE_ESB_VAL_P
+ bne- 1f
+
+ /* Now P is 0, we can clear the flag */
+ li r0, 0
+ stb r0, VCPU_XIVE_ESC_ON(r4)
+1:
no_xive:
#endif /* CONFIG_KVM_XICS */
@@ -1193,7 +1241,7 @@ hdec_soon:
addi r3, r4, VCPU_TB_RMEXIT
bl kvmhv_accumulate_time
#endif
- b guest_exit_cont
+ b guest_bypass
/******************************************************************************
* *
@@ -1423,15 +1471,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
blt deliver_guest_interrupt
guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+ /* Save more register state */
+ mfdar r6
+ mfdsisr r7
+ std r6, VCPU_DAR(r9)
+ stw r7, VCPU_DSISR(r9)
+ /* don't overwrite fault_dar/fault_dsisr if HDSI */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
+ beq mc_cont
+ std r6, VCPU_FAULT_DAR(r9)
+ stw r7, VCPU_FAULT_DSISR(r9)
+
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ addi r3, r9, VCPU_TB_RMEXIT
+ mr r4, r9
+ bl kvmhv_accumulate_time
+#endif
#ifdef CONFIG_KVM_XICS
/* We are exiting, pull the VP from the XIVE */
- lwz r0, VCPU_XIVE_PUSHED(r9)
+ lbz r0, VCPU_XIVE_PUSHED(r9)
cmpwi cr0, r0, 0
beq 1f
li r7, TM_SPC_PULL_OS_CTX
li r6, TM_QW1_OS
mfmsr r0
- andi. r0, r0, MSR_IR /* in real mode? */
+ andi. r0, r0, MSR_DR /* in real mode? */
beq 2f
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi cr0, r10, 0
@@ -1454,33 +1522,42 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Fixup some of the state for the next load */
li r10, 0
li r0, 0xff
- stw r10, VCPU_XIVE_PUSHED(r9)
+ stb r10, VCPU_XIVE_PUSHED(r9)
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
eieio
1:
#endif /* CONFIG_KVM_XICS */
- /* Save more register state */
- mfdar r6
- mfdsisr r7
- std r6, VCPU_DAR(r9)
- stw r7, VCPU_DSISR(r9)
- /* don't overwrite fault_dar/fault_dsisr if HDSI */
- cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
- beq mc_cont
- std r6, VCPU_FAULT_DAR(r9)
- stw r7, VCPU_FAULT_DSISR(r9)
- /* See if it is a machine check */
- cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- beq machine_check_realmode
-mc_cont:
-#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
- addi r3, r9, VCPU_TB_RMEXIT
- mr r4, r9
- bl kvmhv_accumulate_time
-#endif
+ /* For hash guest, read the guest SLB and save it away */
+ ld r5, VCPU_KVM(r9)
+ lbz r0, KVM_RADIX(r5)
+ li r5, 0
+ cmpwi r0, 0
+ bne 3f /* for radix, save 0 entries */
+ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
+ mtctr r0
+ li r6,0
+ addi r7,r9,VCPU_SLB
+1: slbmfee r8,r6
+ andis. r0,r8,SLB_ESID_V@h
+ beq 2f
+ add r8,r8,r6 /* put index in */
+ slbmfev r3,r6
+ std r8,VCPU_SLB_E(r7)
+ std r3,VCPU_SLB_V(r7)
+ addi r7,r7,VCPU_SLB_SIZE
+ addi r5,r5,1
+2: addi r6,r6,1
+ bdnz 1b
+ /* Finally clear out the SLB */
+ li r0,0
+ slbmte r0,r0
+ slbia
+ ptesync
+3: stw r5,VCPU_SLB_MAX(r9)
+guest_bypass:
mr r3, r12
/* Increment exit count, poke other threads to exit */
bl kvmhv_commence_exit
@@ -1501,31 +1578,6 @@ mc_cont:
ori r6,r6,1
mtspr SPRN_CTRLT,r6
4:
- /* Check if we are running hash or radix and store it in cr2 */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2,r0,0
-
- /* Read the guest SLB and save it away */
- li r5, 0
- bne cr2, 3f /* for radix, save 0 entries */
- lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
- mtctr r0
- li r6,0
- addi r7,r9,VCPU_SLB
-1: slbmfee r8,r6
- andis. r0,r8,SLB_ESID_V@h
- beq 2f
- add r8,r8,r6 /* put index in */
- slbmfev r3,r6
- std r8,VCPU_SLB_E(r7)
- std r3,VCPU_SLB_V(r7)
- addi r7,r7,VCPU_SLB_SIZE
- addi r5,r5,1
-2: addi r6,r6,1
- bdnz 1b
-3: stw r5,VCPU_SLB_MAX(r9)
-
/*
* Save the guest PURR/SPURR
*/
@@ -1803,7 +1855,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi cr2, r0, 0
- beq cr2, 3f
+ beq cr2, 4f
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
@@ -1839,15 +1891,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
- b 4f
+4:
#endif /* CONFIG_PPC_RADIX_MMU */
- /* Hash: clear out SLB */
-3: li r5,0
- slbmte r5,r5
- slbia
- ptesync
-4:
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
@@ -2745,7 +2791,32 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
- b guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+ /* Abort if we still have a pending escalation */
+ lbz r5, VCPU_XIVE_ESC_ON(r9)
+ cmpwi r5, 0
+ beq 1f
+ li r0, 0
+ stb r0, VCPU_CEDED(r9)
+1: /* Enable XIVE escalation */
+ li r5, XIVE_ESB_SET_PQ_00
+ mfmsr r0
+ andi. r0, r0, MSR_DR /* in real mode? */
+ beq 1f
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ ldx r0, r10, r5
+ b 2f
+1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ ldcix r0, r10, r5
+2: sync
+ li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+#endif /* CONFIG_KVM_XICS */
+3: b guest_exit_cont
/* Try to handle a machine check in real mode */
machine_check_realmode:
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 901e6fe00c39..c18e845019ec 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -96,7 +96,7 @@ kvm_start_entry:
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- GET_SHADOW_VCPU(r3)
+ mr r3, r4
bl FUNC(kvmppc_copy_to_svcpu)
nop
REST_GPR(4, r1)
@@ -165,9 +165,7 @@ after_sprg3_load:
stw r12, VCPU_TRAP(r3)
/* Transfer reg values from shadow vcpu back to vcpu struct */
- /* On 64-bit, interrupts are still off at this point */
- GET_SHADOW_VCPU(r4)
bl FUNC(kvmppc_copy_from_svcpu)
nop
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7deaeeb14b93..3ae752314b34 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -121,7 +121,7 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
if (svcpu->in_use) {
- kvmppc_copy_from_svcpu(vcpu, svcpu);
+ kvmppc_copy_from_svcpu(vcpu);
}
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
@@ -143,9 +143,10 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
}
/* Copy data needed by real-mode code from vcpu to shadow vcpu */
-void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu)
+void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
svcpu->gpr[0] = vcpu->arch.gpr[0];
svcpu->gpr[1] = vcpu->arch.gpr[1];
svcpu->gpr[2] = vcpu->arch.gpr[2];
@@ -177,17 +178,14 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
if (cpu_has_feature(CPU_FTR_ARCH_207S))
vcpu->arch.entry_ic = mfspr(SPRN_IC);
svcpu->in_use = true;
+
+ svcpu_put(svcpu);
}
/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
-void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu)
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
{
- /*
- * vcpu_put would just call us again because in_use hasn't
- * been updated yet.
- */
- preempt_disable();
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
/*
* Maybe we were already preempted and synced the svcpu from
@@ -233,7 +231,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
svcpu->in_use = false;
out:
- preempt_enable();
+ svcpu_put(svcpu);
}
static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6882bc94eba8..f0f5cd4d2fe7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,12 +84,22 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
{
struct kvm_vcpu *vcpu = data;
- /* We use the existing H_PROD mechanism to wake up the target */
- vcpu->arch.prodded = 1;
+ vcpu->arch.irq_pending = 1;
smp_mb();
if (vcpu->arch.ceded)
kvmppc_fast_vcpu_kick(vcpu);
+ /* Since we have the no-EOI flag, the interrupt is effectively
+ * disabled now. Clearing xive_esc_on means we won't bother
+ * doing so on the next entry.
+ *
+ * This also allows the entry code to know that if a PQ combination
+ * of 10 is observed while xive_esc_on is true, it means the queue
+ * contains an unprocessed escalation interrupt. We don't make use of
+ * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+ */
+ vcpu->arch.xive_esc_on = false;
+
return IRQ_HANDLED;
}
@@ -112,19 +122,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
return -EIO;
}
- /*
- * Future improvement: start with them disabled
- * and handle DD2 and later scheme of merged escalation
- * interrupts
- */
- name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
- vcpu->kvm->arch.lpid, xc->server_num, prio);
+ if (xc->xive->single_escalation)
+ name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num);
+ else
+ name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
prio, xc->server_num);
rc = -ENOMEM;
goto error;
}
+
+ pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
IRQF_NO_THREAD, name, vcpu);
if (rc) {
@@ -133,6 +145,25 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
goto error;
}
xc->esc_virq_names[prio] = name;
+
+ /* In single escalation mode, we grab the ESB MMIO of the
+ * interrupt and mask it. Also populate the VCPU v/raddr
+ * of the ESB page for use by asm entry/exit code. Finally
+ * set the XIVE_IRQ_NO_EOI flag which will prevent the
+ * core code from performing an EOI on the escalation
+ * interrupt, thus leaving it effectively masked after
+ * it fires once.
+ */
+ if (xc->xive->single_escalation) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+ vcpu->arch.xive_esc_raddr = xd->eoi_page;
+ vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+ xd->flags |= XIVE_IRQ_NO_EOI;
+ }
+
return 0;
error:
irq_dispose_mapping(xc->esc_virq[prio]);
@@ -191,12 +222,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
pr_devel("Provisioning prio... %d\n", prio);
- /* Provision each VCPU and enable escalations */
+ /* Provision each VCPU and enable escalations if needed */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
- if (rc == 0)
+ if (rc == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, prio);
if (rc)
return rc;
@@ -1082,6 +1113,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
/* Allocate IPI */
xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) {
+ pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO;
goto bail;
}
@@ -1092,18 +1124,33 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
goto bail;
/*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+ if (r) {
+ pr_err("Failed to enable VP in OPAL, err %d\n", r);
+ goto bail;
+ }
+
+ /*
* Initialize queues. Initially we set them all for no queueing
* and we enable escalation for queue 0 only which we'll use for
* our mfrr change notifications. If the VCPU is hot-plugged, we
- * do handle provisioning however.
+ * do handle provisioning however based on the existing "map"
+ * of enabled queues.
*/
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
struct xive_q *q = &xc->queues[i];
+ /* Single escalation, no queue 7 */
+ if (i == 7 && xive->single_escalation)
+ break;
+
/* Is queue already enabled ? Provision it */
if (xive->qmap & (1 << i)) {
r = xive_provision_queue(vcpu, i);
- if (r == 0)
+ if (r == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, i);
if (r)
goto bail;
@@ -1123,11 +1170,6 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
- /* Enable the VP */
- r = xive_native_enable_vp(xc->vp_id);
- if (r)
- goto bail;
-
/* Route the IPI */
r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
if (!r)
@@ -1474,6 +1516,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
val, server, guest_prio);
+
/*
* If the source doesn't already have an IPI, allocate
* one and get the corresponding data
@@ -1762,6 +1805,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
if (xive->vp_base == XIVE_INVALID_VP)
ret = -ENOMEM;
+ xive->single_escalation = xive_native_has_single_escalation();
+
if (ret) {
kfree(xive);
return ret;
@@ -1795,6 +1840,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
if (!xc)
continue;
@@ -1804,6 +1850,33 @@ static int xive_debug_show(struct seq_file *m, void *private)
xc->server_num, xc->cppr, xc->hw_cppr,
xc->mfrr, xc->pending,
xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ seq_printf(m, " [q%d]: ", i);
+
+ if (q->qpage) {
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+ seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+ (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+ xc->esc_virq[i], pq, xd->eoi_page);
+ seq_printf(m, "\n");
+ }
+ }
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 6ba63f8e8a61..a08ae6fd4c51 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -120,6 +120,8 @@ struct kvmppc_xive {
u32 q_order;
u32 q_page_order;
+ /* Flags */
+ u8 single_escalation;
};
#define KVMPPC_XIVE_Q_COUNT 8
@@ -201,25 +203,20 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp
* is as follow.
*
* Guest request for 0...6 are honored. Guest request for anything
- * higher results in a priority of 7 being applied.
- *
- * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
- * in order to match AIX expectations
+ * higher results in a priority of 6 being applied.
*
* Similar mapping is done for CPPR values
*/
static inline u8 xive_prio_from_guest(u8 prio)
{
- if (prio == 0xff || prio < 8)
+ if (prio == 0xff || prio < 6)
return prio;
- return 7;
+ return 6;
}
static inline u8 xive_prio_to_guest(u8 prio)
{
- if (prio == 0xff || prio < 7)
- return prio;
- return 0xb;
+ return prio;
}
static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 83b485810aea..6038e2e7aee0 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1431,6 +1431,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
regs->pc = vcpu->arch.pc;
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
@@ -1452,6 +1454,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -1459,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
+ vcpu_load(vcpu);
+
vcpu->arch.pc = regs->pc;
kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
@@ -1480,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -1607,30 +1613,42 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
sregs->pvr = vcpu->arch.pvr;
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- int ret;
+ int ret = -EINVAL;
+ vcpu_load(vcpu);
if (vcpu->arch.pvr != sregs->pvr)
- return -EINVAL;
+ goto out;
ret = set_sregs_base(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
ret = set_sregs_arch206(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
@@ -1773,7 +1791,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
{
int r;
+ vcpu_load(vcpu);
r = kvmppc_core_vcpu_translate(vcpu, tr);
+ vcpu_put(vcpu);
return r;
}
@@ -1996,12 +2016,15 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
struct debug_reg *dbg_reg;
int n, b = 0, w = 0;
+ int ret = 0;
+
+ vcpu_load(vcpu);
if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
vcpu->arch.dbg_reg.dbcr0 = 0;
vcpu->guest_debug = 0;
kvm_guest_protect_msr(vcpu, MSR_DE, false);
- return 0;
+ goto out;
}
kvm_guest_protect_msr(vcpu, MSR_DE, true);
@@ -2033,8 +2056,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
#endif
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- return 0;
+ goto out;
+ ret = -EINVAL;
for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
uint64_t addr = dbg->arch.bp[n].addr;
uint32_t type = dbg->arch.bp[n].type;
@@ -2045,21 +2069,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (type & ~(KVMPPC_DEBUG_WATCH_READ |
KVMPPC_DEBUG_WATCH_WRITE |
KVMPPC_DEBUG_BREAKPOINT))
- return -EINVAL;
+ goto out;
if (type & KVMPPC_DEBUG_BREAKPOINT) {
/* Setting H/W breakpoint */
if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
- return -EINVAL;
+ goto out;
} else {
/* Setting H/W watchpoint */
if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
type, w++))
- return -EINVAL;
+ goto out;
}
}
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index af833531af31..a382e15135e6 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,6 +58,18 @@ static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
+ kvmppc_core_queue_vec_unavail(vcpu);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_ALTIVEC */
+
/*
* XXX to do:
* lfiwax, lfiwzx
@@ -98,6 +110,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
vcpu->arch.mmio_sp64_extend = 0;
vcpu->arch.mmio_sign_extend = 0;
+ vcpu->arch.mmio_vmx_copy_nums = 0;
switch (get_op(inst)) {
case 31:
@@ -459,6 +472,29 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
rs, 4, 1);
break;
#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+ case OP_31_XOP_LVX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.vaddr_accessed &= ~0xFULL;
+ vcpu->arch.paddr_accessed &= ~0xFULL;
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
+ KVM_MMIO_REG_VMX|rt, 1);
+ break;
+
+ case OP_31_XOP_STVX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.vaddr_accessed &= ~0xFULL;
+ vcpu->arch.paddr_accessed &= ~0xFULL;
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
+ rs, 1);
+ break;
+#endif /* CONFIG_ALTIVEC */
+
default:
emulated = EMULATE_FAIL;
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0a7c88786ec0..403e642c78f5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -638,8 +638,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_SPAPR_RESIZE_HPT:
- /* Disable this on POWER9 until code handles new HPTE format */
- r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
+ r = !!hv_enabled;
break;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -763,7 +762,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = ~(u64)0;
+ vcpu->arch.dec_expires = get_tb();
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
@@ -930,6 +929,34 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+ u32 hi, lo;
+ u32 di;
+
+#ifdef __BIG_ENDIAN
+ hi = gpr >> 32;
+ lo = gpr & 0xffffffff;
+#else
+ lo = gpr >> 32;
+ hi = gpr & 0xffffffff;
+#endif
+
+ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
+ if (di > 1)
+ return;
+
+ if (vcpu->arch.mmio_host_swabbed)
+ di = 1 - di;
+
+ VCPU_VSX_VR(vcpu, index).u[di * 2] = hi;
+ VCPU_VSX_VR(vcpu, index).u[di * 2 + 1] = lo;
+}
+#endif /* CONFIG_ALTIVEC */
+
#ifdef CONFIG_PPC_FPU
static inline u64 sp_to_dp(u32 fprs)
{
@@ -1033,6 +1060,11 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_vsr_dword_dump(vcpu, gpr);
break;
#endif
+#ifdef CONFIG_ALTIVEC
+ case KVM_MMIO_REG_VMX:
+ kvmppc_set_vmx_dword(vcpu, gpr);
+ break;
+#endif
default:
BUG();
}
@@ -1106,11 +1138,9 @@ int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
enum emulation_result emulated = EMULATE_DONE;
- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
return EMULATE_FAIL;
- }
while (vcpu->arch.mmio_vsx_copy_nums) {
emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
@@ -1252,11 +1282,9 @@ int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.io_gpr = rs;
- /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
- if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
- (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
return EMULATE_FAIL;
- }
while (vcpu->arch.mmio_vsx_copy_nums) {
if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
@@ -1312,6 +1340,111 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
}
#endif /* CONFIG_VSX */
+#ifdef CONFIG_ALTIVEC
+/* handle quadword load access in two halves */
+int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, int is_default_endian)
+{
+ enum emulation_result emulated;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
+ is_default_endian, 0);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ }
+
+ return emulated;
+}
+
+static inline int kvmppc_get_vmx_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+ vector128 vrs = VCPU_VSX_VR(vcpu, rs);
+ u32 di;
+ u64 w0, w1;
+
+ di = 2 - vcpu->arch.mmio_vmx_copy_nums; /* doubleword index */
+ if (di > 1)
+ return -1;
+
+ if (vcpu->arch.mmio_host_swabbed)
+ di = 1 - di;
+
+ w0 = vrs.u[di * 2];
+ w1 = vrs.u[di * 2 + 1];
+
+#ifdef __BIG_ENDIAN
+ *val = (w0 << 32) | w1;
+#else
+ *val = (w1 << 32) | w0;
+#endif
+ return 0;
+}
+
+/* handle quadword store in two halves */
+int kvmppc_handle_store128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rs, int is_default_endian)
+{
+ u64 val = 0;
+ enum emulation_result emulated = EMULATE_DONE;
+
+ vcpu->arch.io_gpr = rs;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ if (kvmppc_get_vmx_data(vcpu, rs, &val) == -1)
+ return EMULATE_FAIL;
+
+ emulated = kvmppc_handle_store(run, vcpu, val, 8,
+ is_default_endian);
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ enum emulation_result emulated = EMULATE_FAIL;
+ int r;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ if (!vcpu->mmio_is_write) {
+ emulated = kvmppc_handle_load128_by2x64(run, vcpu,
+ vcpu->arch.io_gpr, 1);
+ } else {
+ emulated = kvmppc_handle_store128_by2x64(run, vcpu,
+ vcpu->arch.io_gpr, 1);
+ }
+
+ switch (emulated) {
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ r = RESUME_HOST;
+ break;
+ default:
+ r = RESUME_GUEST;
+ break;
+ }
+ return r;
+}
+#endif /* CONFIG_ALTIVEC */
+
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = 0;
@@ -1413,6 +1546,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
+ vcpu_load(vcpu);
+
if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0;
if (!vcpu->mmio_is_write)
@@ -1427,7 +1562,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
if (r == RESUME_HOST) {
vcpu->mmio_needed = 1;
- return r;
+ goto out;
+ }
+ }
+#endif
+#ifdef CONFIG_ALTIVEC
+ if (vcpu->arch.mmio_vmx_copy_nums > 0)
+ vcpu->arch.mmio_vmx_copy_nums--;
+
+ if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+ r = kvmppc_emulate_mmio_vmx_loadstore(vcpu, run);
+ if (r == RESUME_HOST) {
+ vcpu->mmio_needed = 1;
+ goto out;
}
}
#endif
@@ -1461,6 +1608,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
+out:
+ vcpu_put(vcpu);
return r;
}
@@ -1608,23 +1757,31 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- long r;
- switch (ioctl) {
- case KVM_INTERRUPT: {
+ if (ioctl == KVM_INTERRUPT) {
struct kvm_interrupt irq;
- r = -EFAULT;
if (copy_from_user(&irq, argp, sizeof(irq)))
- goto out;
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- goto out;
+ return -EFAULT;
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ vcpu_load(vcpu);
+ switch (ioctl) {
case KVM_ENABLE_CAP:
{
struct kvm_enable_cap cap;
@@ -1664,6 +1821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
out:
+ vcpu_put(vcpu);
return r;
}
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index e44d2b2ea97e..1c03c978eb18 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -143,8 +143,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
int i;
u64 min, max, sum, sum_quad;
- seq_printf(m, "%s", "type count min max sum sum_squared\n");
-
+ seq_puts(m, "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 5a69b51d08a3..d573d7d07f25 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
* need to add in 0x1 if it's a read-only user page
*/
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
return -1;
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 2253bbc6a599..e601d95c3b20 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
vpn = hpt_vpn(ea, vsid, ssize);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
/*
*None of the sub 4k page is hashed
*/
@@ -214,7 +214,7 @@ repeat:
return -1;
}
- new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7d07c7e17db6..cf290d415dcd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
__pmd_index_size = H_PMD_INDEX_SIZE;
__pud_index_size = H_PUD_INDEX_SIZE;
__pgd_index_size = H_PGD_INDEX_SIZE;
+ __pud_cache_index = H_PUD_CACHE_INDEX;
__pmd_cache_index = H_PMD_CACHE_INDEX;
__pte_table_size = H_PTE_TABLE_SIZE;
__pmd_table_size = H_PMD_TABLE_SIZE;
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 12511f5a015f..b320f5097a06 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
unsigned long vpn;
unsigned long old_pte, new_pte;
unsigned long rflags, pa, sz;
- long slot;
+ long slot, offset;
BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
rflags = htab_convert_pte_flags(new_pte);
- rpte = __real_pte(__pte(old_pte), ptep);
+ if (unlikely(mmu_psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
+ rpte = __real_pte(__pte(old_pte), ptep, offset);
sz = ((1UL) << shift);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
return -1;
}
- new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
}
/*
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index eb8c6c8c4851..2b656e67f2ea 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
* same size as either the pgd or pmd index except with THP enabled
* on book3s 64
*/
- if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
- pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+ if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+ pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index f6eb7e8f4c93..fdb424a29f03 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmemmap_list = vmem_back;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
- struct vmem_altmap *altmap;
void *p;
int rc;
if (vmemmap_populated(start, page_size))
continue;
- /* altmap lookups only work at section boundaries */
- altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
-
- p = __vmemmap_alloc_block_buf(page_size, node, altmap);
+ if (altmap)
+ p = altmap_alloc_block_buf(page_size, altmap);
+ else
+ p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
return -ENOMEM;
@@ -256,7 +256,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
return vmem_back->phys;
}
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long page_order = get_order(page_size);
@@ -267,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
for (; start < end; start += page_size) {
unsigned long nr_pages, addr;
- struct vmem_altmap *altmap;
struct page *section_base;
struct page *page;
@@ -287,7 +287,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
section_base = pfn_to_page(vmemmap_section_start(start));
nr_pages = 1 << page_order;
- altmap = to_vmem_altmap((unsigned long) section_base);
if (altmap) {
vmem_altmap_free(altmap, nr_pages);
} else if (PageReserved(page)) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1281c6eb3a85..fe8c61149fb8 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -143,15 +144,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
return -EFAULT;
}
- return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct vmem_altmap *altmap;
struct page *page;
int ret;
@@ -160,11 +160,10 @@ int arch_remove_memory(u64 start, u64 size)
* when querying the zone.
*/
page = pfn_to_page(start_pfn);
- altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
page += vmem_altmap_offset(altmap);
- ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
+ ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
if (ret)
return ret;
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index d60a62bf4fc7..0ab297c4cfad 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/cpu.h>
+#include <linux/sched/mm.h>
#include <asm/mmu_context.h>
@@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
*
* On the read side the barrier is in pte_xchg(), which orders
* the store to the PTE vs the load of mm_cpumask.
+ *
+ * This full barrier is needed by membarrier when switching
+ * between processes after store to rq->curr, before user-space
+ * memory accesses.
*/
smp_mb();
@@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (new_on_cpu)
radix_kvm_prefetch_workaround(next);
+ else
+ membarrier_arch_switch_mm(prev, next, tsk);
/*
* The actual HW switching method differs between the various
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 314d19ab9385..edd8d0bc9364 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
}
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
- numa_cpu_lookup_table[cpu] = node;
-}
-
static void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 573a9a2ee455..2e10a964e290 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -17,9 +17,11 @@
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
#include <asm/dma.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+ /*
+ * The init_mm context is given the first available (non-zero) PID,
+ * which is the "guard PID" and contains no page table. PIDR should
+ * never be set to zero because that duplicates the kernel address
+ * space at the 0x0... offset (quadrant 0)!
+ *
+ * An arbitrary PID that may later be allocated by the PID allocator
+ * for userspace processes must not be used either, because that
+ * would cause stale user mappings for that PID on CPUs outside of
+ * the TLB invalidation scheme (because it won't be in mm_cpumask).
+ *
+ * So permanently carve out one PID for the purpose of a guard PID.
+ */
+ init_mm.context.id = mmu_base_pid;
+ mmu_base_pid++;
}
static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
__pmd_index_size = RADIX_PMD_INDEX_SIZE;
__pud_index_size = RADIX_PUD_INDEX_SIZE;
__pgd_index_size = RADIX_PGD_INDEX_SIZE;
+ __pud_cache_index = RADIX_PUD_INDEX_SIZE;
__pmd_cache_index = RADIX_PMD_INDEX_SIZE;
__pte_table_size = RADIX_PTE_TABLE_SIZE;
__pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
radix_init_iamr();
radix_init_pgtable();
-
+ /* Switch to the guard PID before turning on MMU */
+ radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
}
radix_init_iamr();
+ radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
tlbiel_all();
}
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
pud_clear(pud);
}
+struct change_mapping_params {
+ pte_t *pte;
+ unsigned long start;
+ unsigned long end;
+ unsigned long aligned_start;
+ unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+ struct change_mapping_params *params =
+ (struct change_mapping_params *)data;
+
+ if (!data)
+ return -1;
+
+ spin_unlock(&init_mm.page_table_lock);
+ pte_clear(&init_mm, params->aligned_start, params->pte);
+ create_physical_mapping(params->aligned_start, params->start);
+ create_physical_mapping(params->end, params->aligned_end);
+ spin_lock(&init_mm.page_table_lock);
+ return 0;
+}
+
static void remove_pte_table(pte_t *pte_start, unsigned long addr,
unsigned long end)
{
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
}
}
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+ unsigned long size, pte_t *pte)
+{
+ unsigned long mask = ~(size - 1);
+ unsigned long aligned_start = addr & mask;
+ unsigned long aligned_end = addr + size;
+ struct change_mapping_params params;
+ bool split_region = false;
+
+ if ((end - addr) < size) {
+ /*
+ * We're going to clear the PTE, but not flushed
+ * the mapping, time to remap and flush. The
+ * effects if visible outside the processor or
+ * if we are running in code close to the
+ * mapping we cleared, we are in trouble.
+ */
+ if (overlaps_kernel_text(aligned_start, addr) ||
+ overlaps_kernel_text(end, aligned_end)) {
+ /*
+ * Hack, just return, don't pte_clear
+ */
+ WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+ "text, not splitting\n", addr, end);
+ return;
+ }
+ split_region = true;
+ }
+
+ if (split_region) {
+ params.pte = pte;
+ params.start = addr;
+ params.end = end;
+ params.aligned_start = addr & ~(size - 1);
+ params.aligned_end = min_t(unsigned long, aligned_end,
+ (unsigned long)__va(memblock_end_of_DRAM()));
+ stop_machine(stop_machine_change_mapping, &params, NULL);
+ return;
+ }
+
+ pte_clear(&init_mm, addr, pte);
+}
+
static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
unsigned long end)
{
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
continue;
if (pmd_huge(*pmd)) {
- if (!IS_ALIGNED(addr, PMD_SIZE) ||
- !IS_ALIGNED(next, PMD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pmd);
+ split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
continue;
}
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
continue;
if (pud_huge(*pud)) {
- if (!IS_ALIGNED(addr, PUD_SIZE) ||
- !IS_ALIGNED(next, PUD_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pud);
+ split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
continue;
}
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
continue;
if (pgd_huge(*pgd)) {
- if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
- !IS_ALIGNED(next, PGDIR_SIZE)) {
- WARN_ONCE(1, "%s: unaligned range\n", __func__);
- continue;
- }
-
- pte_clear(&init_mm, addr, (pte_t *)pgd);
+ split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
continue;
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c9a623c2d8a2..28c980eb4422 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
EXPORT_SYMBOL(__pgd_index_size);
unsigned long __pmd_cache_index;
EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
unsigned long __pte_table_size;
EXPORT_SYMBOL(__pte_table_size);
unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
} else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 881ebd53ffc2..9b23f12e863c 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
unsigned int psize;
int ssize;
real_pte_t rpte;
- int i;
+ int i, offset;
i = batch->index;
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
psize = get_slice_psize(mm, addr);
/* Mask the address for the correct page size */
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+ if (unlikely(psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
#else
BUG();
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
* support 64k pages, this might be different from the
* hardware page size encoded in the slice table. */
addr &= PAGE_MASK;
+ offset = PTRS_PER_PTE;
}
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
}
WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
- rpte = __real_pte(__pte(pte), ptep);
+ rpte = __real_pte(__pte(pte), ptep, offset);
/*
* Check if we have an active batch on this CPU. If not, just
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index 1a9a756b0b2f..857580a78bbd 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -101,9 +101,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
but first mark any pending interrupts as done so
we don't get woken up unnecessarily */
- if (events & (POLLIN | POLLRDNORM)) {
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
if (stat & 0xff0000)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
else {
ctx->csa.priv1.int_stat_class2_RW &=
~CLASS2_MAILBOX_INTR;
@@ -111,9 +111,9 @@ static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
CLASS2_ENABLE_MAILBOX_INTR;
}
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
if (stat & 0x00ff00)
- ret = POLLOUT | POLLWRNORM;
+ ret = EPOLLOUT | EPOLLWRNORM;
else {
ctx->csa.priv1.int_stat_class2_RW &=
~CLASS2_MAILBOX_THRESHOLD_INTR;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c1be486da899..469bdd0b748f 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -774,7 +774,7 @@ static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait)
* that poll should not sleep. Will be fixed later.
*/
mutex_lock(&ctx->state_mutex);
- mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM);
spu_release(ctx);
return mask;
@@ -910,7 +910,7 @@ static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait)
* that poll should not sleep. Will be fixed later.
*/
mutex_lock(&ctx->state_mutex);
- mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM);
spu_release(ctx);
return mask;
@@ -1710,9 +1710,9 @@ static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait)
mask = 0;
if (free_elements & 0xffff)
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
if (tagstatus & ctx->tagwait)
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
free_elements, tagstatus, ctx->tagwait);
@@ -2469,7 +2469,7 @@ static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait)
return rc;
if (spufs_switch_log_used(ctx) > 0)
- mask |= POLLIN;
+ mask |= EPOLLIN;
spu_release(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
index fff58198b5b6..ae9d24d31eed 100644
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -70,17 +70,17 @@ static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events)
but first mark any pending interrupts as done so
we don't get woken up unnecessarily */
- if (events & (POLLIN | POLLRDNORM)) {
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
if (stat & 0xff0000)
- ret |= POLLIN | POLLRDNORM;
+ ret |= EPOLLIN | EPOLLRDNORM;
else {
spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
}
}
- if (events & (POLLOUT | POLLWRNORM)) {
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
if (stat & 0x00ff00)
- ret = POLLOUT | POLLWRNORM;
+ ret = EPOLLOUT | EPOLLWRNORM;
else {
spu_int_stat_clear(spu, 2,
CLASS2_MAILBOX_THRESHOLD_INTR);
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
index 81799d70a1ee..cfddc87f81bf 100644
--- a/arch/powerpc/platforms/maple/time.c
+++ b/arch/powerpc/platforms/maple/time.c
@@ -134,7 +134,7 @@ int maple_set_rtc_time(struct rtc_time *tm)
static struct resource rtc_iores = {
.name = "rtc",
- .flags = IORESOURCE_BUSY,
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY,
};
unsigned long __init maple_get_boot_time(void)
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 466b84234683..3f82cb24eb2b 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -829,7 +829,7 @@ core99_ata100_enable(struct device_node *node, long value)
if (value) {
if (pci_device_from_OF_node(node, &pbus, &pid) == 0)
- pdev = pci_get_bus_and_slot(pbus, pid);
+ pdev = pci_get_domain_bus_and_slot(0, pbus, pid);
if (pdev == NULL)
return 0;
rc = pci_enable_device(pdev);
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index c18de0a9b1bd..4070bb4e9da4 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -153,7 +153,7 @@ static __poll_t opal_prd_poll(struct file *file,
poll_wait(file, &opal_prd_msg_wait, wait);
if (!opal_msg_queue_empty())
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 2b3eb01ab110..b7c53a51c31b 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
rc = PTR_ERR(txwin->paste_kaddr);
goto free_window;
}
+ } else {
+ /*
+ * A user mapping must ensure that context switch issues
+ * CP_ABORT for this thread.
+ */
+ rc = set_thread_uses_vas();
+ if (rc)
+ goto free_window;
}
- /*
- * Now that we have a send window, ensure context switch issues
- * CP_ABORT for this thread.
- */
- rc = -EINVAL;
- if (set_thread_uses_vas() < 0)
- goto free_window;
-
set_vinst_win(vinst, txwin);
return txwin;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index dceb51454d8d..652d3e96b812 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -36,6 +36,7 @@
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
#include "pseries.h"
#include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
BUG_ON(cpu_online(cpu));
set_cpu_present(cpu, false);
set_hard_smp_processor_id(cpu, -1);
+ update_numa_cpu_lookup_table(cpu, -1);
break;
}
if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
cpu_maps_update_done();
}
-extern int find_and_online_cpu_nid(int cpu);
-
static int dlpar_online_cpu(struct device_node *dn)
{
int rc = 0;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 81d8614e7379..5e1ef9150182 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -49,6 +49,28 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+ struct device_node *np;
+
+ /* Hotplug Events */
+ np = of_find_node_by_path("/event-sources/hot-plug-events");
+ if (np != NULL) {
+ if (dlpar_workqueue_init() == 0)
+ request_event_sources_irqs(np, ras_hotplug_interrupt,
+ "RAS_HOTPLUG");
+ of_node_put(np);
+ }
+
+ return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
+/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
*/
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
of_node_put(np);
}
- /* Hotplug Events */
- np = of_find_node_by_path("/event-sources/hot-plug-events");
- if (np != NULL) {
- if (dlpar_workqueue_init() == 0)
- request_event_sources_irqs(np, ras_hotplug_interrupt,
- "RAS_HOTPLUG");
- of_node_put(np);
- }
-
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index bafb014e1a7e..cb9a8b71fd0f 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -145,21 +145,21 @@ static struct resource pic1_iores = {
.name = "8259 (master)",
.start = 0x20,
.end = 0x21,
- .flags = IORESOURCE_BUSY,
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY,
};
static struct resource pic2_iores = {
.name = "8259 (slave)",
.start = 0xa0,
.end = 0xa1,
- .flags = IORESOURCE_BUSY,
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY,
};
static struct resource pic_edgectrl_iores = {
.name = "8259 edge control",
.start = 0x4d0,
.end = 0x4d1,
- .flags = IORESOURCE_BUSY,
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY,
};
static int i8259_host_match(struct irq_domain *h, struct device_node *node,
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index 50c411b1761e..1afcdb428e51 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -37,7 +37,7 @@ static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj,
if (count < MV64X60_VAL_LEN_MAX)
return -EINVAL;
- phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
+ phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
if (!phb)
return -ENODEV;
pci_read_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, &v);
@@ -61,7 +61,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
if (sscanf(buf, "%i", &v) != 1)
return -EINVAL;
- phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
+ phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
if (!phb)
return -ENODEV;
pci_write_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, v);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ebc244b08d67..d22aeb0b69e1 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -42,6 +42,7 @@ static u32 xive_provision_chip_count;
static u32 xive_queue_shift;
static u32 xive_pool_vps = XIVE_INVALID_VP;
static struct kmem_cache *xive_provision_cache;
+static bool xive_has_single_esc;
int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
{
@@ -571,6 +572,10 @@ bool __init xive_native_init(void)
break;
}
+ /* Do we support single escalation */
+ if (of_get_property(np, "single-escalation-support", NULL) != NULL)
+ xive_has_single_esc = true;
+
/* Configure Thread Management areas for KVM */
for_each_possible_cpu(cpu)
kvmppc_set_xive_tima(cpu, r.start, tima);
@@ -667,12 +672,15 @@ void xive_native_free_vp_block(u32 vp_base)
}
EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
-int xive_native_enable_vp(u32 vp_id)
+int xive_native_enable_vp(u32 vp_id, bool single_escalation)
{
s64 rc;
+ u64 flags = OPAL_XIVE_VP_ENABLED;
+ if (single_escalation)
+ flags |= OPAL_XIVE_VP_SINGLE_ESCALATION;
for (;;) {
- rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
+ rc = opal_xive_set_vp_info(vp_id, flags, 0);
if (rc != OPAL_BUSY)
break;
msleep(1);
@@ -710,3 +718,9 @@ int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
return 0;
}
EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
+
+bool xive_native_has_single_escalation(void)
+{
+ return xive_has_single_esc;
+}
+EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 865e14f50c14..b6722c246d9c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -22,6 +22,7 @@ config RISCV
select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_GENERIC_DMA_COHERENT
@@ -43,6 +44,10 @@ config MMU
config ARCH_PHYS_ADDR_T_64BIT
def_bool y
+config ZONE_DMA32
+ bool
+ default y
+
config ARCH_DMA_ADDR_T_64BIT
def_bool y
@@ -55,6 +60,9 @@ config PAGE_OFFSET
config STACKTRACE_SUPPORT
def_bool y
+config TRACE_IRQFLAGS_SUPPORT
+ def_bool y
+
config RWSEM_GENERIC_SPINLOCK
def_bool y
@@ -107,6 +115,8 @@ config ARCH_RV64I
bool "RV64I"
select CPU_SUPPORTS_64BIT_KERNEL
select 64BIT
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER
endchoice
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 681ac0d09314..4286a5f83876 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += errno.h
generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
-generic-y += ftrace.h
generic-y += futex.h
generic-y += hardirq.h
generic-y += hash.h
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 3c7a2c97e377..421fa3585798 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,15 +40,15 @@
#define SR_SD _AC(0x8000000000000000, UL) /* FS/XS dirty */
#endif
-/* SPTBR flags */
+/* SATP flags */
#if __riscv_xlen == 32
-#define SPTBR_PPN _AC(0x003FFFFF, UL)
-#define SPTBR_MODE_32 _AC(0x80000000, UL)
-#define SPTBR_MODE SPTBR_MODE_32
+#define SATP_PPN _AC(0x003FFFFF, UL)
+#define SATP_MODE_32 _AC(0x80000000, UL)
+#define SATP_MODE SATP_MODE_32
#else
-#define SPTBR_PPN _AC(0x00000FFFFFFFFFFF, UL)
-#define SPTBR_MODE_39 _AC(0x8000000000000000, UL)
-#define SPTBR_MODE SPTBR_MODE_39
+#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39 _AC(0x8000000000000000, UL)
+#define SATP_MODE SATP_MODE_39
#endif
/* Interrupt Enable and Interrupt Pending flags */
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
new file mode 100644
index 000000000000..66d4175eb13e
--- /dev/null
+++ b/arch/riscv/include/asm/ftrace.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+/*
+ * The graph frame test is not possible if CONFIG_FRAME_POINTER is not enabled.
+ * Check arch/riscv/kernel/mcount.S for detail.
+ */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER)
+#define HAVE_FUNCTION_GRAPH_FP_TEST
+#endif
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index 97424834dce2..336d60ec5698 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -39,16 +39,6 @@ static inline void destroy_context(struct mm_struct *mm)
{
}
-static inline pgd_t *current_pgdir(void)
-{
- return pfn_to_virt(csr_read(sptbr) & SPTBR_PPN);
-}
-
-static inline void set_pgdir(pgd_t *pgd)
-{
- csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
-}
-
/*
* When necessary, performs a deferred icache flush for the given MM context,
* on the local CPU. RISC-V has no direct mechanism for instruction cache
@@ -93,7 +83,12 @@ static inline void switch_mm(struct mm_struct *prev,
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
- set_pgdir(next->pgd);
+ /*
+ * Use the old spbtr name instead of using the current satp
+ * name to support binutils 2.29 which doesn't know about the
+ * privileged ISA 1.10 yet.
+ */
+ csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
local_flush_tlb_all();
flush_icache_deferred(next);
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 7b9c24ebdf52..7b209aec355d 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -36,7 +36,14 @@ static inline void local_flush_tlb_page(unsigned long addr)
#define flush_tlb_all() local_flush_tlb_all()
#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
-#define flush_tlb_range(vma, start, end) local_flush_tlb_all()
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ local_flush_tlb_all();
+}
+
+#define flush_tlb_mm(mm) flush_tlb_all()
#else /* CONFIG_SMP */
@@ -45,16 +52,13 @@ static inline void local_flush_tlb_page(unsigned long addr)
#define flush_tlb_all() sbi_remote_sfence_vma(0, 0, -1)
#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
#define flush_tlb_range(vma, start, end) \
- sbi_remote_sfence_vma(0, start, (end) - (start))
+ sbi_remote_sfence_vma(mm_cpumask((vma)->vm_mm)->bits, \
+ start, (end) - (start))
+#define flush_tlb_mm(mm) \
+ sbi_remote_sfence_vma(mm_cpumask(mm)->bits, 0, -1)
#endif /* CONFIG_SMP */
-/* Flush the TLB entries of the specified mm context */
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
- flush_tlb_all();
-}
-
/* Flush a range of kernel pages */
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index 2f704a5c4196..080fb28061de 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -11,7 +11,6 @@
* GNU General Public License for more details.
*/
-#define __ARCH_HAVE_MMU
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index ab8baf7bd142..196f62ffc428 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -2,6 +2,11 @@
# Makefile for the RISC-V Linux kernel
#
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_setup.o = -pg
+endif
+
extra-y += head.o
extra-y += vmlinux.lds
@@ -29,5 +34,7 @@ CFLAGS_setup.o := -mcmodel=medany
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
clean:
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 7404ec222406..87fc045be51f 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -78,10 +78,13 @@ _save_context:
REG_S x31, PT_T6(sp)
/*
- * Disable FPU to detect illegal usage of
- * floating point in kernel space
+ * Disable user-mode memory access as it should only be set in the
+ * actual user copy routines.
+ *
+ * Disable the FPU to detect illegal usage of floating point in kernel
+ * space.
*/
- li t0, SR_FS
+ li t0, SR_SUM | SR_FS
REG_L s0, TASK_TI_USER_SP(tp)
csrrc s1, sstatus, t0
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
new file mode 100644
index 000000000000..d0de68d144cb
--- /dev/null
+++ b/arch/riscv/kernel/ftrace.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ * Copyright (C) 2017 Andes Technology Corporation
+ */
+
+#include <linux/ftrace.h>
+
+/*
+ * Most of this file is copied from arm64.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long old;
+ struct ftrace_graph_ent trace;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ /*
+ * We don't suffer access faults, so no extra fault-recovery assembly
+ * is needed here.
+ */
+ old = *parent;
+
+ trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
+
+ if (!ftrace_graph_entry(&trace))
+ return;
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer, NULL);
+ if (err == -EBUSY)
+ return;
+ *parent = return_hooker;
+}
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 78f670d70133..226eeb190f90 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -74,15 +74,15 @@ relocate:
sub a1, a1, a0
add ra, ra, a1
- /* Point stvec to virtual address of intruction after sptbr write */
+ /* Point stvec to virtual address of intruction after satp write */
la a0, 1f
add a0, a0, a1
csrw stvec, a0
- /* Compute sptbr for kernel page tables, but don't load it yet */
+ /* Compute satp for kernel page tables, but don't load it yet */
la a2, swapper_pg_dir
srl a2, a2, PAGE_SHIFT
- li a1, SPTBR_MODE
+ li a1, SATP_MODE
or a2, a2, a1
/*
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
new file mode 100644
index 000000000000..c46a778627be
--- /dev/null
+++ b/arch/riscv/kernel/mcount.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/csr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm-generic/export.h>
+#include <asm/ftrace.h>
+
+ .text
+
+ .macro SAVE_ABI_STATE
+ addi sp, sp, -16
+ sd s0, 0(sp)
+ sd ra, 8(sp)
+ addi s0, sp, 16
+ .endm
+
+ /*
+ * The call to ftrace_return_to_handler would overwrite the return
+ * register if a0 was not saved.
+ */
+ .macro SAVE_RET_ABI_STATE
+ addi sp, sp, -32
+ sd s0, 16(sp)
+ sd ra, 24(sp)
+ sd a0, 8(sp)
+ addi s0, sp, 32
+ .endm
+
+ .macro STORE_ABI_STATE
+ ld ra, 8(sp)
+ ld s0, 0(sp)
+ addi sp, sp, 16
+ .endm
+
+ .macro STORE_RET_ABI_STATE
+ ld ra, 24(sp)
+ ld s0, 16(sp)
+ ld a0, 8(sp)
+ addi sp, sp, 32
+ .endm
+
+ENTRY(ftrace_stub)
+ ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(return_to_handler)
+/*
+ * On implementing the frame point test, the ideal way is to compare the
+ * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return.
+ * However, the psABI of variable-length-argument functions does not allow this.
+ *
+ * So alternatively we check the *old* frame pointer position, that is, the
+ * value stored in -16(s0) on entry, and the s0 on return.
+ */
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv t6, s0
+#endif
+ SAVE_RET_ABI_STATE
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a0, t6
+#endif
+ la t0, ftrace_return_to_handler
+ jalr t0
+ mv a1, a0
+ STORE_RET_ABI_STATE
+ jalr a1
+ENDPROC(return_to_handler)
+EXPORT_SYMBOL(return_to_handler)
+#endif
+
+ENTRY(_mcount)
+ la t4, ftrace_stub
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ la t0, ftrace_graph_return
+ ld t1, 0(t0)
+ bne t1, t4, do_ftrace_graph_caller
+
+ la t3, ftrace_graph_entry
+ ld t2, 0(t3)
+ la t6, ftrace_graph_entry_stub
+ bne t2, t6, do_ftrace_graph_caller
+#endif
+ la t3, ftrace_trace_function
+ ld t5, 0(t3)
+ bne t5, t4, do_trace
+ ret
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * A pseudo representation for the function graph tracer:
+ * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller)
+ */
+do_ftrace_graph_caller:
+ addi a0, s0, -8
+ mv a1, ra
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ ld a2, -16(s0)
+#endif
+ SAVE_ABI_STATE
+ la t0, prepare_ftrace_return
+ jalr t0
+ STORE_ABI_STATE
+ ret
+#endif
+
+/*
+ * A pseudo representation for the function tracer:
+ * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
+ */
+do_trace:
+ ld a1, -8(s0)
+ mv a0, ra
+
+ SAVE_ABI_STATE
+ jalr t5
+ STORE_ABI_STATE
+ ret
+ENDPROC(_mcount)
+EXPORT_SYMBOL(_mcount)
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index cb7b0c63014e..09f7064e898c 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -49,10 +49,6 @@ struct screen_info screen_info = {
};
#endif
-#ifdef CONFIG_CMDLINE_BOOL
-static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
-#endif /* CONFIG_CMDLINE_BOOL */
-
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
@@ -153,25 +149,6 @@ void __init sbi_save(unsigned int hartid, void *dtb)
early_init_dt_scan(__va(dtb));
}
-/*
- * Allow the user to manually add a memory region (in case DTS is broken);
- * "mem_end=nn[KkMmGg]"
- */
-static int __init mem_end_override(char *p)
-{
- resource_size_t base, end;
-
- if (!p)
- return -EINVAL;
- base = (uintptr_t) __pa(PAGE_OFFSET);
- end = memparse(p, &p) & PMD_MASK;
- if (end == 0)
- return -EINVAL;
- memblock_add(base, end - base);
- return 0;
-}
-early_param("mem_end", mem_end_override);
-
static void __init setup_bootmem(void)
{
struct memblock_region *reg;
@@ -204,22 +181,19 @@ static void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
+
+ for_each_memblock(memory, reg) {
+ unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
+ unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
+
+ memblock_set_node(PFN_PHYS(start_pfn),
+ PFN_PHYS(end_pfn - start_pfn),
+ &memblock.memory, 0);
+ }
}
void __init setup_arch(char **cmdline_p)
{
-#ifdef CONFIG_CMDLINE_BOOL
-#ifdef CONFIG_CMDLINE_OVERRIDE
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
-#else
- if (builtin_cmdline[0] != '\0') {
- /* Append bootloader command line to built-in */
- strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
- strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
- strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
- }
-#endif /* CONFIG_CMDLINE_OVERRIDE */
-#endif /* CONFIG_CMDLINE_BOOL */
*cmdline_p = boot_command_line;
parse_early_param();
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index e8a178df8144..582cb153eb24 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -74,7 +74,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
down_write(&mm->mmap_sem);
vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
- if (unlikely(IS_ERR_VALUE(vdso_base))) {
+ if (IS_ERR_VALUE(vdso_base)) {
ret = vdso_base;
goto end;
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index ceebfc29305b..148c98ca9b45 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -238,6 +238,10 @@ vmalloc_fault:
* Do _not_ use "tsk->active_mm->pgd" here.
* We might be inside an interrupt in the middle
* of a task switch.
+ *
+ * Note: Use the old spbtr name instead of using the current
+ * satp name to support binutils 2.29 which doesn't know about
+ * the privileged ISA 1.10 yet.
*/
index = pgd_index(addr);
pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index;
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 9f4bee5e51fd..c77df8142be2 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -17,6 +17,7 @@
#include <linux/initrd.h>
#include <linux/memblock.h>
#include <linux/swap.h>
+#include <linux/sizes.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
@@ -25,11 +26,12 @@
static void __init zone_sizes_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES];
+ unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
- memset(zones_size, 0, sizeof(zones_size));
- zones_size[ZONE_NORMAL] = max_mapnr;
- free_area_init_node(0, zones_size, pfn_base, NULL);
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn));
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+
+ free_area_init_nodes(max_zone_pfns);
}
void setup_zero_page(void)
@@ -39,8 +41,6 @@ void setup_zero_page(void)
void __init paging_init(void)
{
- init_mm.pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr));
-
setup_zero_page();
local_flush_tlb_all();
zone_sizes_init();
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0105ce28e246..eaee7087886f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -112,7 +112,6 @@ config S390
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
- select ARCH_WANTS_PROT_NUMA_PROT_NONE
select ARCH_WANTS_UBSAN_NO_NULL
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
@@ -540,6 +539,51 @@ config ARCH_RANDOM
If unsure, say Y.
+config KERNEL_NOBP
+ def_bool n
+ prompt "Enable modified branch prediction for the kernel by default"
+ help
+ If this option is selected the kernel will switch to a modified
+ branch prediction mode if the firmware interface is available.
+ The modified branch prediction mode improves the behaviour in
+ regard to speculative execution.
+
+ With the option enabled the kernel parameter "nobp=0" or "nospec"
+ can be used to run the kernel in the normal branch prediction mode.
+
+ With the option disabled the modified branch prediction mode is
+ enabled with the "nobp=1" kernel parameter.
+
+ If unsure, say N.
+
+config EXPOLINE
+ def_bool n
+ prompt "Avoid speculative indirect branches in the kernel"
+ help
+ Compile the kernel with the expoline compiler options to guard
+ against kernel-to-user data leaks by avoiding speculative indirect
+ branches.
+ Requires a compiler with -mindirect-branch=thunk support for full
+ protection. The kernel may run slower.
+
+ If unsure, say N.
+
+choice
+ prompt "Expoline default"
+ depends on EXPOLINE
+ default EXPOLINE_FULL
+
+config EXPOLINE_OFF
+ bool "spectre_v2=off"
+
+config EXPOLINE_MEDIUM
+ bool "spectre_v2=auto"
+
+config EXPOLINE_FULL
+ bool "spectre_v2=on"
+
+endchoice
+
endmenu
menu "Memory setup"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index fd691c4ff89e..2ced3239cb84 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -78,6 +78,16 @@ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack
endif
+ifdef CONFIG_EXPOLINE
+ ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y)
+ CC_FLAGS_EXPOLINE := -mindirect-branch=thunk
+ CC_FLAGS_EXPOLINE += -mfunction-return=thunk
+ CC_FLAGS_EXPOLINE += -mindirect-branch-table
+ export CC_FLAGS_EXPOLINE
+ cflags-y += $(CC_FLAGS_EXPOLINE)
+ endif
+endif
+
ifdef CONFIG_FUNCTION_TRACER
# make use of hotpatch feature if the compiler supports it
cc_hotpatch := -mhotpatch=0,3
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 10432607a573..f9eddbca79d2 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -49,6 +49,30 @@ do { \
#define __smp_mb__before_atomic() barrier()
#define __smp_mb__after_atomic() barrier()
+/**
+ * array_index_mask_nospec - generate a mask for array_idx() that is
+ * ~0UL when the bounds check succeeds and 0 otherwise
+ * @index: array element index
+ * @size: number of elements in array
+ */
+#define array_index_mask_nospec array_index_mask_nospec
+static inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
+{
+ unsigned long mask;
+
+ if (__builtin_constant_p(size) && size > 0) {
+ asm(" clgr %2,%1\n"
+ " slbgr %0,%0\n"
+ :"=d" (mask) : "d" (size-1), "d" (index) :"cc");
+ return mask;
+ }
+ asm(" clgr %1,%2\n"
+ " slbgr %0,%0\n"
+ :"=d" (mask) : "d" (size), "d" (index) :"cc");
+ return ~mask;
+}
+
#include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 31e400c1a1f3..86e5b2fdee3c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -261,6 +261,11 @@ static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
+static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+ return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
{
return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index a478eb61aaf7..fb56fa3283a2 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -20,7 +20,9 @@ struct css_general_char {
u32 aif_tdd : 1; /* bit 56 */
u32 : 1;
u32 qebsm : 1; /* bit 58 */
- u32 : 8;
+ u32 : 2;
+ u32 aiv : 1; /* bit 61 */
+ u32 : 5;
u32 aif_osa : 1; /* bit 67 */
u32 : 12;
u32 eadm_rf : 1; /* bit 80 */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index eb5323161f11..bb63b2afdf6f 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
#include <linux/device.h>
-#include <linux/blkdev.h>
+#include <linux/blk_types.h>
struct arqb {
u64 data;
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index fbe0c4be3cd8..99c8ce30b3cd 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -15,6 +15,24 @@
#define MAX_FACILITY_BIT (sizeof(((struct lowcore *)0)->stfle_fac_list) * 8)
+static inline void __set_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr = (unsigned char *) facilities;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return;
+ ptr[nr >> 3] |= 0x80 >> (nr & 7);
+}
+
+static inline void __clear_facility(unsigned long nr, void *facilities)
+{
+ unsigned char *ptr = (unsigned char *) facilities;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return;
+ ptr[nr >> 3] &= ~(0x80 >> (nr & 7));
+}
+
static inline int __test_facility(unsigned long nr, void *facilities)
{
unsigned char *ptr;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c1b0a9ac1dc8..afb0f08b8021 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -2,7 +2,7 @@
/*
* definition for kernel virtual machines on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
@@ -183,6 +183,7 @@ struct kvm_s390_sie_block {
#define ECA_IB 0x40000000
#define ECA_SIGPI 0x10000000
#define ECA_MVPGI 0x01000000
+#define ECA_AIV 0x00200000
#define ECA_VX 0x00020000
#define ECA_PROTEXCI 0x00002000
#define ECA_SII 0x00000001
@@ -228,7 +229,9 @@ struct kvm_s390_sie_block {
__u8 epdx; /* 0x0069 */
__u8 reserved6a[2]; /* 0x006a */
__u32 todpr; /* 0x006c */
- __u8 reserved70[16]; /* 0x0070 */
+#define GISA_FORMAT1 0x00000001
+ __u32 gd; /* 0x0070 */
+ __u8 reserved74[12]; /* 0x0074 */
__u64 mso; /* 0x0080 */
__u64 msl; /* 0x0088 */
psw_t gpsw; /* 0x0090 */
@@ -317,18 +320,30 @@ struct kvm_vcpu_stat {
u64 deliver_program_int;
u64 deliver_io_int;
u64 exit_wait_state;
+ u64 instruction_epsw;
+ u64 instruction_gs;
+ u64 instruction_io_other;
+ u64 instruction_lpsw;
+ u64 instruction_lpswe;
u64 instruction_pfmf;
+ u64 instruction_ptff;
+ u64 instruction_sck;
+ u64 instruction_sckpf;
u64 instruction_stidp;
u64 instruction_spx;
u64 instruction_stpx;
u64 instruction_stap;
- u64 instruction_storage_key;
+ u64 instruction_iske;
+ u64 instruction_ri;
+ u64 instruction_rrbe;
+ u64 instruction_sske;
u64 instruction_ipte_interlock;
- u64 instruction_stsch;
- u64 instruction_chsc;
u64 instruction_stsi;
u64 instruction_stfl;
+ u64 instruction_tb;
+ u64 instruction_tpi;
u64 instruction_tprot;
+ u64 instruction_tsch;
u64 instruction_sie;
u64 instruction_essa;
u64 instruction_sthyi;
@@ -354,6 +369,7 @@ struct kvm_vcpu_stat {
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
+ u64 diagnose_other;
};
#define PGM_OPERATION 0x01
@@ -410,35 +426,35 @@ struct kvm_vcpu_stat {
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
-/* irq types in order of priority */
+/* irq types in ascend order of priorities */
enum irq_types {
- IRQ_PEND_MCHK_EX = 0,
- IRQ_PEND_SVC,
- IRQ_PEND_PROG,
- IRQ_PEND_MCHK_REP,
- IRQ_PEND_EXT_IRQ_KEY,
- IRQ_PEND_EXT_MALFUNC,
- IRQ_PEND_EXT_EMERGENCY,
- IRQ_PEND_EXT_EXTERNAL,
- IRQ_PEND_EXT_CLOCK_COMP,
- IRQ_PEND_EXT_CPU_TIMER,
- IRQ_PEND_EXT_TIMING,
- IRQ_PEND_EXT_SERVICE,
- IRQ_PEND_EXT_HOST,
- IRQ_PEND_PFAULT_INIT,
- IRQ_PEND_PFAULT_DONE,
- IRQ_PEND_VIRTIO,
- IRQ_PEND_IO_ISC_0,
- IRQ_PEND_IO_ISC_1,
- IRQ_PEND_IO_ISC_2,
- IRQ_PEND_IO_ISC_3,
- IRQ_PEND_IO_ISC_4,
- IRQ_PEND_IO_ISC_5,
- IRQ_PEND_IO_ISC_6,
- IRQ_PEND_IO_ISC_7,
- IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_SET_PREFIX = 0,
IRQ_PEND_RESTART,
- IRQ_PEND_SET_PREFIX,
+ IRQ_PEND_SIGP_STOP,
+ IRQ_PEND_IO_ISC_7,
+ IRQ_PEND_IO_ISC_6,
+ IRQ_PEND_IO_ISC_5,
+ IRQ_PEND_IO_ISC_4,
+ IRQ_PEND_IO_ISC_3,
+ IRQ_PEND_IO_ISC_2,
+ IRQ_PEND_IO_ISC_1,
+ IRQ_PEND_IO_ISC_0,
+ IRQ_PEND_VIRTIO,
+ IRQ_PEND_PFAULT_DONE,
+ IRQ_PEND_PFAULT_INIT,
+ IRQ_PEND_EXT_HOST,
+ IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_TIMING,
+ IRQ_PEND_EXT_CPU_TIMER,
+ IRQ_PEND_EXT_CLOCK_COMP,
+ IRQ_PEND_EXT_EXTERNAL,
+ IRQ_PEND_EXT_EMERGENCY,
+ IRQ_PEND_EXT_MALFUNC,
+ IRQ_PEND_EXT_IRQ_KEY,
+ IRQ_PEND_MCHK_REP,
+ IRQ_PEND_PROG,
+ IRQ_PEND_SVC,
+ IRQ_PEND_MCHK_EX,
IRQ_PEND_COUNT
};
@@ -516,9 +532,6 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
- struct kvm_s390_float_interrupt *float_int;
- struct swait_queue_head *wq;
- atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
unsigned long pending_irqs;
@@ -707,14 +720,50 @@ struct kvm_s390_crypto_cb {
struct kvm_s390_apcb1 apcb1; /* 0x0080 */
};
+struct kvm_s390_gisa {
+ union {
+ struct { /* common to all formats */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01[2];
+ u8 iam;
+ };
+ struct { /* format 0 */
+ u32 next_alert;
+ u8 ipm;
+ u8 reserved01;
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 iam;
+ u8 reserved02[4];
+ u32 airq_count;
+ } g0;
+ struct { /* format 1 */
+ u32 next_alert;
+ u8 ipm;
+ u8 simm;
+ u8 nimm;
+ u8 iam;
+ u8 aism[8];
+ u8 : 6;
+ u8 g : 1;
+ u8 c : 1;
+ u8 reserved03[11];
+ u32 airq_count;
+ } g1;
+ };
+};
+
/*
- * sie_page2 has to be allocated as DMA because fac_list and crycb need
- * 31bit addresses in the sie control block.
+ * sie_page2 has to be allocated as DMA because fac_list, crycb and
+ * gisa need 31bit addresses in the sie control block.
*/
struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
- u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
+ struct kvm_s390_gisa gisa; /* 0x0900 */
+ u8 reserved920[0x1000 - 0x920]; /* 0x0920 */
};
struct kvm_s390_vsie {
@@ -761,6 +810,7 @@ struct kvm_arch{
struct kvm_s390_migration_state *migration_state;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+ struct kvm_s390_gisa *gisa;
};
#define KVM_HVA_ERR_BAD (-1UL)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index ec6592e8ba36..5bc888841eaf 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -136,7 +136,11 @@ struct lowcore {
__u64 vdso_per_cpu_data; /* 0x03b8 */
__u64 machine_flags; /* 0x03c0 */
__u64 gmap; /* 0x03c8 */
- __u8 pad_0x03d0[0x0e00-0x03d0]; /* 0x03d0 */
+ __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */
+
+ /* br %r1 trampoline */
+ __u16 br_r1_trampoline; /* 0x0400 */
+ __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */
/*
* 0xe00 contains the address of the IPL Parameter Information
@@ -151,7 +155,8 @@ struct lowcore {
__u8 pad_0x0e20[0x0f00-0x0e20]; /* 0x0e20 */
/* Extended facility list */
- __u64 stfle_fac_list[32]; /* 0x0f00 */
+ __u64 stfle_fac_list[16]; /* 0x0f00 */
+ __u64 alt_stfle_fac_list[16]; /* 0x0f80 */
__u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
/* Pointer to the machine check extended save area */
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
new file mode 100644
index 000000000000..7df48e5cf36f
--- /dev/null
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_EXPOLINE_H
+#define _ASM_S390_EXPOLINE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+extern int nospec_call_disable;
+extern int nospec_return_disable;
+
+void nospec_init_branches(void);
+void nospec_call_revert(s32 *start, s32 *end);
+void nospec_return_revert(s32 *start, s32 *end);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bfbfad482289..7f2953c15c37 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -91,6 +91,7 @@ void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern int sysctl_ieee_emulation_warnings;
extern void execve_tail(void);
+extern void __bpon(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -377,6 +378,9 @@ extern void memcpy_absolute(void *, void *, size_t);
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
} while (0)
+extern int s390_isolate_bp(void);
+extern int s390_isolate_bp_guest(void);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
index 6b1540337ed6..0e1605538cd4 100644
--- a/arch/s390/include/asm/runtime_instr.h
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -2,75 +2,10 @@
#ifndef _RUNTIME_INSTR_H
#define _RUNTIME_INSTR_H
-#define S390_RUNTIME_INSTR_START 0x1
-#define S390_RUNTIME_INSTR_STOP 0x2
-
-struct runtime_instr_cb {
- __u64 rca;
- __u64 roa;
- __u64 rla;
-
- __u32 v : 1;
- __u32 s : 1;
- __u32 k : 1;
- __u32 h : 1;
- __u32 a : 1;
- __u32 reserved1 : 3;
- __u32 ps : 1;
- __u32 qs : 1;
- __u32 pc : 1;
- __u32 qc : 1;
- __u32 reserved2 : 1;
- __u32 g : 1;
- __u32 u : 1;
- __u32 l : 1;
- __u32 key : 4;
- __u32 reserved3 : 8;
- __u32 t : 1;
- __u32 rgs : 3;
-
- __u32 m : 4;
- __u32 n : 1;
- __u32 mae : 1;
- __u32 reserved4 : 2;
- __u32 c : 1;
- __u32 r : 1;
- __u32 b : 1;
- __u32 j : 1;
- __u32 e : 1;
- __u32 x : 1;
- __u32 reserved5 : 2;
- __u32 bpxn : 1;
- __u32 bpxt : 1;
- __u32 bpti : 1;
- __u32 bpni : 1;
- __u32 reserved6 : 2;
-
- __u32 d : 1;
- __u32 f : 1;
- __u32 ic : 4;
- __u32 dc : 4;
-
- __u64 reserved7;
- __u64 sf;
- __u64 rsic;
- __u64 reserved8;
-} __packed __aligned(8);
+#include <uapi/asm/runtime_instr.h>
extern struct runtime_instr_cb runtime_instr_empty_cb;
-static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
-{
- asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
- : : "Q" (*cb));
-}
-
-static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
-{
- asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
- : "=Q" (*cb) : : "cc");
-}
-
static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
{
if (cb_prev)
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index d3c1a8a2e3ad..3cae9168f63c 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -77,6 +77,7 @@ struct sclp_info {
unsigned char has_ibs : 1;
unsigned char has_skey : 1;
unsigned char has_kss : 1;
+ unsigned char has_gisaf : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 25057c118d56..fe7b3f8f0791 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -21,7 +21,8 @@ struct sysinfo_1_1_1 {
unsigned char :8;
unsigned char ccr;
unsigned char cai;
- char reserved_0[28];
+ char reserved_0[20];
+ unsigned long lic;
char manufacturer[16];
char type[4];
char reserved_1[12];
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 25d6ec3aaddd..83ba57533ce6 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -58,6 +58,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
#define TIF_PATCH_PENDING 5 /* pending live patching update */
#define TIF_PGSTE 6 /* New mm's will use 4K page tables */
+#define TIF_ISOLATE_BP 8 /* Run process with isolated BP */
+#define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
@@ -78,6 +80,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define _TIF_UPROBE _BITUL(TIF_UPROBE)
#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
#define _TIF_PATCH_PENDING _BITUL(TIF_PATCH_PENDING)
+#define _TIF_ISOLATE_BP _BITUL(TIF_ISOLATE_BP)
+#define _TIF_ISOLATE_BP_GUEST _BITUL(TIF_ISOLATE_BP_GUEST)
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h
new file mode 100644
index 000000000000..45c9ec984e6b
--- /dev/null
+++ b/arch/s390/include/uapi/asm/runtime_instr.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _S390_UAPI_RUNTIME_INSTR_H
+#define _S390_UAPI_RUNTIME_INSTR_H
+
+#include <linux/types.h>
+
+#define S390_RUNTIME_INSTR_START 0x1
+#define S390_RUNTIME_INSTR_STOP 0x2
+
+struct runtime_instr_cb {
+ __u64 rca;
+ __u64 roa;
+ __u64 rla;
+
+ __u32 v : 1;
+ __u32 s : 1;
+ __u32 k : 1;
+ __u32 h : 1;
+ __u32 a : 1;
+ __u32 reserved1 : 3;
+ __u32 ps : 1;
+ __u32 qs : 1;
+ __u32 pc : 1;
+ __u32 qc : 1;
+ __u32 reserved2 : 1;
+ __u32 g : 1;
+ __u32 u : 1;
+ __u32 l : 1;
+ __u32 key : 4;
+ __u32 reserved3 : 8;
+ __u32 t : 1;
+ __u32 rgs : 3;
+
+ __u32 m : 4;
+ __u32 n : 1;
+ __u32 mae : 1;
+ __u32 reserved4 : 2;
+ __u32 c : 1;
+ __u32 r : 1;
+ __u32 b : 1;
+ __u32 j : 1;
+ __u32 e : 1;
+ __u32 x : 1;
+ __u32 reserved5 : 2;
+ __u32 bpxn : 1;
+ __u32 bpxt : 1;
+ __u32 bpti : 1;
+ __u32 bpni : 1;
+ __u32 reserved6 : 2;
+
+ __u32 d : 1;
+ __u32 f : 1;
+ __u32 ic : 4;
+ __u32 dc : 4;
+
+ __u64 reserved7;
+ __u64 sf;
+ __u64 rsic;
+ __u64 reserved8;
+} __packed __aligned(8);
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
+ : : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
+ : "=Q" (*cb) : : "cc");
+}
+
+#endif /* _S390_UAPI_RUNTIME_INSTR_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 909bce65cb2b..7f27e3da9709 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -29,6 +29,7 @@ UBSAN_SANITIZE_early.o := n
#
ifneq ($(CC_FLAGS_MARCH),-march=z900)
CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
+CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE)
CFLAGS_als.o += -march=z900
AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
AFLAGS_head.o += -march=z900
@@ -63,6 +64,9 @@ obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
extra-y += head.o head64.o vmlinux.lds
+obj-$(CONFIG_EXPOLINE) += nospec-branch.o
+CFLAGS_REMOVE_expoline.o += $(CC_FLAGS_EXPOLINE)
+
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 574e77622c04..22476135f738 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -15,6 +15,29 @@ static int __init disable_alternative_instructions(char *str)
early_param("noaltinstr", disable_alternative_instructions);
+static int __init nobp_setup_early(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ if (enabled && test_facility(82))
+ __set_facility(82, S390_lowcore.alt_stfle_fac_list);
+ else
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+ return 0;
+}
+early_param("nospec", nospec_setup_early);
+
struct brcl_insn {
u16 opc;
s32 disp;
@@ -75,7 +98,8 @@ static void __init_or_module __apply_alternatives(struct alt_instr *start,
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- if (!test_facility(a->facility))
+ if (!__test_facility(a->facility,
+ S390_lowcore.alt_stfle_fac_list))
continue;
if (unlikely(a->instrlen % 2 || a->replacementlen % 2)) {
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 497a92047591..ac707a9f729e 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -193,6 +193,11 @@ static noinline __init void setup_facility_list(void)
{
stfle(S390_lowcore.stfle_fac_list,
ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+ memcpy(S390_lowcore.alt_stfle_fac_list,
+ S390_lowcore.stfle_fac_list,
+ sizeof(S390_lowcore.alt_stfle_fac_list));
+ if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
+ __clear_facility(82, S390_lowcore.alt_stfle_fac_list);
}
static __init void detect_diag9c(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6cd444d25545..13a133a6015c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -107,6 +107,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 3f
1: UPDATE_VTIME %r14,%r15,\timer
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
2: lg %r15,__LC_ASYNC_STACK # load async stack
3: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
@@ -159,6 +160,130 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
tm off+\addr, \mask
.endm
+ .macro BPOFF
+ .pushsection .altinstr_replacement, "ax"
+660: .long 0xb2e8c000
+ .popsection
+661: .long 0x47000000
+ .pushsection .altinstructions, "a"
+ .long 661b - .
+ .long 660b - .
+ .word 82
+ .byte 4
+ .byte 4
+ .popsection
+ .endm
+
+ .macro BPON
+ .pushsection .altinstr_replacement, "ax"
+662: .long 0xb2e8d000
+ .popsection
+663: .long 0x47000000
+ .pushsection .altinstructions, "a"
+ .long 663b - .
+ .long 662b - .
+ .word 82
+ .byte 4
+ .byte 4
+ .popsection
+ .endm
+
+ .macro BPENTER tif_ptr,tif_mask
+ .pushsection .altinstr_replacement, "ax"
+662: .word 0xc004, 0x0000, 0x0000 # 6 byte nop
+ .word 0xc004, 0x0000, 0x0000 # 6 byte nop
+ .popsection
+664: TSTMSK \tif_ptr,\tif_mask
+ jz . + 8
+ .long 0xb2e8d000
+ .pushsection .altinstructions, "a"
+ .long 664b - .
+ .long 662b - .
+ .word 82
+ .byte 12
+ .byte 12
+ .popsection
+ .endm
+
+ .macro BPEXIT tif_ptr,tif_mask
+ TSTMSK \tif_ptr,\tif_mask
+ .pushsection .altinstr_replacement, "ax"
+662: jnz . + 8
+ .long 0xb2e8d000
+ .popsection
+664: jz . + 8
+ .long 0xb2e8c000
+ .pushsection .altinstructions, "a"
+ .long 664b - .
+ .long 662b - .
+ .word 82
+ .byte 8
+ .byte 8
+ .popsection
+ .endm
+
+#ifdef CONFIG_EXPOLINE
+
+ .macro GEN_BR_THUNK name,reg,tmp
+ .section .text.\name,"axG",@progbits,\name,comdat
+ .globl \name
+ .hidden \name
+ .type \name,@function
+\name:
+ .cfi_startproc
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,0f
+#else
+ larl \tmp,0f
+ ex 0,0(\tmp)
+#endif
+ j .
+0: br \reg
+ .cfi_endproc
+ .endm
+
+ GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
+ GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
+ GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
+
+ .macro BASR_R14_R9
+0: brasl %r14,__s390x_indirect_jump_r1use_r9
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 0b-.
+ .popsection
+ .endm
+
+ .macro BR_R1USE_R14
+0: jg __s390x_indirect_jump_r1use_r14
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 0b-.
+ .popsection
+ .endm
+
+ .macro BR_R11USE_R14
+0: jg __s390x_indirect_jump_r11use_r14
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 0b-.
+ .popsection
+ .endm
+
+#else /* CONFIG_EXPOLINE */
+
+ .macro BASR_R14_R9
+ basr %r14,%r9
+ .endm
+
+ .macro BR_R1USE_R14
+ br %r14
+ .endm
+
+ .macro BR_R11USE_R14
+ br %r14
+ .endm
+
+#endif /* CONFIG_EXPOLINE */
+
+
.section .kprobes.text, "ax"
.Ldummy:
/*
@@ -171,6 +296,11 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
*/
nop 0
+ENTRY(__bpon)
+ .globl __bpon
+ BPON
+ BR_R1USE_R14
+
/*
* Scheduler resume function, called by switch_to
* gpr2 = (task_struct *) prev
@@ -193,9 +323,9 @@ ENTRY(__switch_to)
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
- bzr %r14
+ jz 0f
.insn s,0xb2800000,__LC_LPP # set program parameter
- br %r14
+0: BR_R1USE_R14
.L__critical_start:
@@ -207,9 +337,11 @@ ENTRY(__switch_to)
*/
ENTRY(sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ lg %r12,__LC_CURRENT
stg %r2,__SF_EMPTY(%r15) # save control block pointer
stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
+ mvc __SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags
TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ?
jno .Lsie_load_guest_gprs
brasl %r14,load_fpu_regs # load guest fp/vx regs
@@ -226,8 +358,12 @@ ENTRY(sie64a)
jnz .Lsie_skip
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lsie_skip # exit if fp/vx regs changed
+ BPEXIT __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
+.Lsie_exit:
+ BPOFF
+ BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
@@ -248,9 +384,15 @@ ENTRY(sie64a)
sie_exit:
lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ xgr %r0,%r0 # clear guest registers to
+ xgr %r1,%r1 # prevent speculative use
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_EMPTY+16(%r15) # return exit reason code
- br %r14
+ BR_R1USE_R14
.Lsie_fault:
lghi %r14,-EFAULT
stg %r14,__SF_EMPTY+16(%r15) # set exit reason code
@@ -273,6 +415,7 @@ ENTRY(system_call)
stpt __LC_SYNC_ENTER_TIMER
.Lsysc_stmg:
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ BPOFF
lg %r12,__LC_CURRENT
lghi %r13,__TASK_thread
lghi %r14,_PIF_SYSCALL
@@ -281,7 +424,10 @@ ENTRY(system_call)
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
.Lsysc_vtime:
UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled register to prevent speculative use
+ xgr %r0,%r0
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
@@ -305,7 +451,7 @@ ENTRY(system_call)
lgf %r9,0(%r8,%r10) # get system call add.
TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
- basr %r14,%r9 # call sys_xxxx
+ BASR_R14_R9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_return:
@@ -317,6 +463,7 @@ ENTRY(system_call)
jnz .Lsysc_work # check for work
TSTMSK __LC_CPU_FLAGS,_CIF_WORK
jnz .Lsysc_work
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
.Lsysc_restore:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
@@ -489,7 +636,7 @@ ENTRY(system_call)
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r2,__PT_ORIG_GPR2(%r11)
- basr %r14,%r9 # call sys_xxx
+ BASR_R14_R9 # call sys_xxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_tracenogo:
TSTMSK __TI_flags(%r12),_TIF_TRACE
@@ -513,7 +660,7 @@ ENTRY(ret_from_fork)
lmg %r9,%r10,__PT_R9(%r11) # load gprs
ENTRY(kernel_thread_starter)
la %r2,0(%r10)
- basr %r14,%r9
+ BASR_R14_R9
j .Lsysc_tracenogo
/*
@@ -522,6 +669,7 @@ ENTRY(kernel_thread_starter)
ENTRY(pgm_check_handler)
stpt __LC_SYNC_ENTER_TIMER
+ BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
lg %r12,__LC_CURRENT
@@ -550,6 +698,7 @@ ENTRY(pgm_check_handler)
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+ BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK
lgr %r14,%r12
aghi %r14,__TASK_thread # pointer to thread_struct
@@ -561,6 +710,15 @@ ENTRY(pgm_check_handler)
4: lgr %r13,%r11
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
@@ -582,9 +740,9 @@ ENTRY(pgm_check_handler)
nill %r10,0x007f
sll %r10,2
je .Lpgm_return
- lgf %r1,0(%r10,%r1) # load address of handler routine
+ lgf %r9,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
- basr %r14,%r1 # branch to interrupt-handler
+ BASR_R14_R9 # branch to interrupt-handler
.Lpgm_return:
LOCKDEP_SYS_EXIT
tm __PT_PSW+1(%r11),0x01 # returning to user ?
@@ -620,12 +778,23 @@ ENTRY(pgm_check_handler)
ENTRY(io_int_handler)
STCK __LC_INT_CLOCK
stpt __LC_ASYNC_ENTER_TIMER
+ BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
@@ -660,9 +829,13 @@ ENTRY(io_int_handler)
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lio_exit_kernel
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
.Lio_exit_timer:
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+.Lio_exit_kernel:
lmg %r11,%r15,__PT_R11(%r11)
lpswe __LC_RETURN_PSW
.Lio_done:
@@ -833,12 +1006,23 @@ ENTRY(io_int_handler)
ENTRY(ext_int_handler)
STCK __LC_INT_CLOCK
stpt __LC_ASYNC_ENTER_TIMER
+ BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
stmg %r8,%r9,__PT_PSW(%r11)
lghi %r1,__LC_EXT_PARAMS2
@@ -871,11 +1055,12 @@ ENTRY(psw_idle)
.Lpsw_idle_stcctm:
#endif
oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
+ BPON
STCK __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
.Lpsw_idle_lpsw:
lpswe __SF_EMPTY(%r15)
- br %r14
+ BR_R1USE_R14
.Lpsw_idle_end:
/*
@@ -889,7 +1074,7 @@ ENTRY(save_fpu_regs)
lg %r2,__LC_CURRENT
aghi %r2,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bor %r14
+ jo .Lsave_fpu_regs_exit
stfpc __THREAD_FPU_fpc(%r2)
lg %r3,__THREAD_FPU_regs(%r2)
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
@@ -916,7 +1101,8 @@ ENTRY(save_fpu_regs)
std 15,120(%r3)
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
- br %r14
+.Lsave_fpu_regs_exit:
+ BR_R1USE_R14
.Lsave_fpu_regs_end:
EXPORT_SYMBOL(save_fpu_regs)
@@ -934,7 +1120,7 @@ load_fpu_regs:
lg %r4,__LC_CURRENT
aghi %r4,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bnor %r14
+ jno .Lload_fpu_regs_exit
lfpc __THREAD_FPU_fpc(%r4)
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
@@ -961,7 +1147,8 @@ load_fpu_regs:
ld 15,120(%r4)
.Lload_fpu_regs_done:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
- br %r14
+.Lload_fpu_regs_exit:
+ BR_R1USE_R14
.Lload_fpu_regs_end:
.L__critical_end:
@@ -971,6 +1158,7 @@ load_fpu_regs:
*/
ENTRY(mcck_int_handler)
STCK __LC_MCCK_CLOCK
+ BPOFF
la %r1,4095 # validate r1
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # validate cpu timer
sckc __LC_CLOCK_COMPARATOR # validate comparator
@@ -1046,6 +1234,16 @@ ENTRY(mcck_int_handler)
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
+ # clear user controlled registers to prevent speculative use
+ xgr %r0,%r0
+ xgr %r1,%r1
+ xgr %r2,%r2
+ xgr %r3,%r3
+ xgr %r4,%r4
+ xgr %r5,%r5
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
@@ -1071,6 +1269,7 @@ ENTRY(mcck_int_handler)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
+ BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
@@ -1166,7 +1365,7 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
-0: br %r14
+0: BR_R11USE_R14
.align 8
.Lcleanup_table:
@@ -1197,11 +1396,12 @@ cleanup_critical:
clg %r9,BASED(.Lsie_crit_mcck_length)
jh 1f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1: lg %r9,__SF_EMPTY(%r15) # get control block pointer
+1: BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ lg %r9,__SF_EMPTY(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
- br %r14
+ BR_R11USE_R14
#endif
.Lcleanup_system_call:
@@ -1254,7 +1454,7 @@ cleanup_critical:
stg %r15,56(%r11) # r15 stack pointer
# set new psw address and exit
larl %r9,.Lsysc_do_svc
- br %r14
+ BR_R11USE_R14
.Lcleanup_system_call_insn:
.quad system_call
.quad .Lsysc_stmg
@@ -1266,7 +1466,7 @@ cleanup_critical:
.Lcleanup_sysc_tif:
larl %r9,.Lsysc_tif
- br %r14
+ BR_R11USE_R14
.Lcleanup_sysc_restore:
# check if stpt has been executed
@@ -1283,14 +1483,14 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- br %r14
+ BR_R11USE_R14
.Lcleanup_sysc_restore_insn:
.quad .Lsysc_exit_timer
.quad .Lsysc_done - 4
.Lcleanup_io_tif:
larl %r9,.Lio_tif
- br %r14
+ BR_R11USE_R14
.Lcleanup_io_restore:
# check if stpt has been executed
@@ -1304,7 +1504,7 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- br %r14
+ BR_R11USE_R14
.Lcleanup_io_restore_insn:
.quad .Lio_exit_timer
.quad .Lio_done - 4
@@ -1357,17 +1557,17 @@ cleanup_critical:
# prepare return psw
nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
- br %r14
+ BR_R11USE_R14
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
larl %r9,save_fpu_regs
- br %r14
+ BR_R11USE_R14
.Lcleanup_load_fpu_regs:
larl %r9,load_fpu_regs
- br %r14
+ BR_R11USE_R14
/*
* Integer constants
@@ -1387,7 +1587,6 @@ cleanup_critical:
.Lsie_crit_mcck_length:
.quad .Lsie_skip - .Lsie_entry
#endif
-
.section .rodata, "a"
#define SYSCALL(esame,emu) .long esame
.globl sys_call_table
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index da5cc3b469aa..34477c1aee6d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -543,6 +543,7 @@ static struct kset *ipl_kset;
static void __ipl_run(void *unused)
{
+ __bpon();
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 943d13e90c98..60f60afa645c 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -281,7 +281,7 @@ static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
* is a BUG. The code path resides in the .kprobes.text
* section and is executed with interrupts disabled.
*/
- printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
+ pr_err("Invalid kprobe detected.\n");
dump_kprobe(p);
BUG();
}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index b7abfad4fd7d..1fc6d1ff92d3 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -19,6 +19,8 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/facility.h>
#if 0
#define DEBUGP printk
@@ -156,7 +158,11 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
me->arch.got_offset = me->core_layout.size;
me->core_layout.size += me->arch.got_size;
me->arch.plt_offset = me->core_layout.size;
- me->core_layout.size += me->arch.plt_size;
+ if (me->arch.plt_size) {
+ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable)
+ me->arch.plt_size += PLT_ENTRY_SIZE;
+ me->core_layout.size += me->arch.plt_size;
+ }
return 0;
}
@@ -310,9 +316,21 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
unsigned int *ip;
ip = me->core_layout.base + me->arch.plt_offset +
info->plt_offset;
- ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
- ip[1] = 0x100a0004;
- ip[2] = 0x07f10000;
+ ip[0] = 0x0d10e310; /* basr 1,0 */
+ ip[1] = 0x100a0004; /* lg 1,10(1) */
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ !nospec_call_disable) {
+ unsigned int *ij;
+ ij = me->core_layout.base +
+ me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ ip[2] = 0xa7f40000 + /* j __jump_r1 */
+ (unsigned int)(u16)
+ (((unsigned long) ij - 8 -
+ (unsigned long) ip) / 2);
+ } else {
+ ip[2] = 0x07f10000; /* br %r1 */
+ }
ip[3] = (unsigned int) (val >> 32);
ip[4] = (unsigned int) val;
info->plt_initialized = 1;
@@ -418,16 +436,42 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s;
- char *secstrings;
+ char *secstrings, *secname;
+ void *aseg;
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ !nospec_call_disable && me->arch.plt_size) {
+ unsigned int *ij;
+
+ ij = me->core_layout.base + me->arch.plt_offset +
+ me->arch.plt_size - PLT_ENTRY_SIZE;
+ if (test_facility(35)) {
+ ij[0] = 0xc6000000; /* exrl %r0,.+10 */
+ ij[1] = 0x0005a7f4; /* j . */
+ ij[2] = 0x000007f1; /* br %r1 */
+ } else {
+ ij[0] = 0x44000000 | (unsigned int)
+ offsetof(struct lowcore, br_r1_trampoline);
+ ij[1] = 0xa7f40000; /* j . */
+ }
+ }
secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
- if (!strcmp(".altinstructions", secstrings + s->sh_name)) {
- /* patch .altinstructions */
- void *aseg = (void *)s->sh_addr;
+ aseg = (void *) s->sh_addr;
+ secname = secstrings + s->sh_name;
+ if (!strcmp(".altinstructions", secname))
+ /* patch .altinstructions */
apply_alternatives(aseg, aseg + s->sh_size);
- }
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (!strcmp(".nospec_call_table", secname)))
+ nospec_call_revert(aseg, aseg + s->sh_size);
+
+ if (IS_ENABLED(CONFIG_EXPOLINE) &&
+ (!strcmp(".nospec_return_table", secname)))
+ nospec_return_revert(aseg, aseg + s->sh_size);
}
jump_label_apply_nops(me);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
new file mode 100644
index 000000000000..69d7fcf48158
--- /dev/null
+++ b/arch/s390/kernel/nospec-branch.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <asm/nospec-branch.h>
+
+int nospec_call_disable = IS_ENABLED(EXPOLINE_OFF);
+int nospec_return_disable = !IS_ENABLED(EXPOLINE_FULL);
+
+static int __init nospectre_v2_setup_early(char *str)
+{
+ nospec_call_disable = 1;
+ nospec_return_disable = 1;
+ return 0;
+}
+early_param("nospectre_v2", nospectre_v2_setup_early);
+
+static int __init spectre_v2_setup_early(char *str)
+{
+ if (str && !strncmp(str, "on", 2)) {
+ nospec_call_disable = 0;
+ nospec_return_disable = 0;
+ }
+ if (str && !strncmp(str, "off", 3)) {
+ nospec_call_disable = 1;
+ nospec_return_disable = 1;
+ }
+ if (str && !strncmp(str, "auto", 4)) {
+ nospec_call_disable = 0;
+ nospec_return_disable = 1;
+ }
+ return 0;
+}
+early_param("spectre_v2", spectre_v2_setup_early);
+
+static void __init_or_module __nospec_revert(s32 *start, s32 *end)
+{
+ enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type;
+ u8 *instr, *thunk, *br;
+ u8 insnbuf[6];
+ s32 *epo;
+
+ /* Second part of the instruction replace is always a nop */
+ memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
+ for (epo = start; epo < end; epo++) {
+ instr = (u8 *) epo + *epo;
+ if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
+ type = BRCL_EXPOLINE; /* brcl instruction */
+ else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05)
+ type = BRASL_EXPOLINE; /* brasl instruction */
+ else
+ continue;
+ thunk = instr + (*(int *)(instr + 2)) * 2;
+ if (thunk[0] == 0xc6 && thunk[1] == 0x00)
+ /* exrl %r0,<target-br> */
+ br = thunk + (*(int *)(thunk + 2)) * 2;
+ else if (thunk[0] == 0xc0 && (thunk[1] & 0x0f) == 0x00 &&
+ thunk[6] == 0x44 && thunk[7] == 0x00 &&
+ (thunk[8] & 0x0f) == 0x00 && thunk[9] == 0x00 &&
+ (thunk[1] & 0xf0) == (thunk[8] & 0xf0))
+ /* larl %rx,<target br> + ex %r0,0(%rx) */
+ br = thunk + (*(int *)(thunk + 2)) * 2;
+ else
+ continue;
+ if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+ continue;
+ switch (type) {
+ case BRCL_EXPOLINE:
+ /* brcl to thunk, replace with br + nop */
+ insnbuf[0] = br[0];
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ break;
+ case BRASL_EXPOLINE:
+ /* brasl to thunk, replace with basr + nop */
+ insnbuf[0] = 0x0d;
+ insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ break;
+ }
+
+ s390_kernel_write(instr, insnbuf, 6);
+ }
+}
+
+void __init_or_module nospec_call_revert(s32 *start, s32 *end)
+{
+ if (nospec_call_disable)
+ __nospec_revert(start, end);
+}
+
+void __init_or_module nospec_return_revert(s32 *start, s32 *end)
+{
+ if (nospec_return_disable)
+ __nospec_revert(start, end);
+}
+
+extern s32 __nospec_call_start[], __nospec_call_end[];
+extern s32 __nospec_return_start[], __nospec_return_end[];
+void __init nospec_init_branches(void)
+{
+ nospec_call_revert(__nospec_call_start, __nospec_call_end);
+ nospec_return_revert(__nospec_return_start, __nospec_return_end);
+}
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 94f90cefbffc..c5bc3f209652 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -226,7 +226,7 @@ CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1);
CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2);
-CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e9);
+CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8);
CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5362fd868d0d..6fe2e1875058 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -197,3 +197,21 @@ const struct seq_operations cpuinfo_op = {
.stop = c_stop,
.show = show_cpuinfo,
};
+
+int s390_isolate_bp(void)
+{
+ if (!test_facility(82))
+ return -EOPNOTSUPP;
+ set_thread_flag(TIF_ISOLATE_BP);
+ return 0;
+}
+EXPORT_SYMBOL(s390_isolate_bp);
+
+int s390_isolate_bp_guest(void)
+{
+ if (!test_facility(82))
+ return -EOPNOTSUPP;
+ set_thread_flag(TIF_ISOLATE_BP_GUEST);
+ return 0;
+}
+EXPORT_SYMBOL(s390_isolate_bp_guest);
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index 09f5bf0d5c0c..125c7f6e8715 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -18,6 +18,8 @@
#include <asm/cpu_mf.h>
#include <asm/irq.h>
+#include "entry.h"
+
/* empty control block to disable RI by loading it */
struct runtime_instr_cb runtime_instr_empty_cb;
@@ -59,7 +61,13 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
cb->v = 1;
}
-SYSCALL_DEFINE1(s390_runtime_instr, int, command)
+/*
+ * The signum argument is unused. In older kernels it was used to
+ * specify a real-time signal. For backwards compatibility user space
+ * should pass a valid real-time signal number (the signum argument
+ * was checked in older kernels).
+ */
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
{
struct runtime_instr_cb *cb;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 793da97f9a6e..a6a91f01a17a 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -68,6 +68,7 @@
#include <asm/sysinfo.h>
#include <asm/numa.h>
#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
#include "entry.h"
/*
@@ -340,7 +341,9 @@ static void __init setup_lowcore(void)
lc->preempt_count = S390_lowcore.preempt_count;
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
+ sizeof(lc->stfle_fac_list));
+ memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
+ sizeof(lc->alt_stfle_fac_list));
nmi_alloc_boot_cpu(lc);
vdso_alloc_boot_cpu(lc);
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
@@ -377,6 +380,7 @@ static void __init setup_lowcore(void)
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
#endif
+ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
@@ -952,6 +956,8 @@ void __init setup_arch(char **cmdline_p)
set_preferred_console();
apply_alternative_instructions();
+ if (IS_ENABLED(CONFIG_EXPOLINE))
+ nospec_init_branches();
/* Setup zfcpdump support */
setup_zfcpdump();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a919b2f0141d..a4a9fe1934e9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -214,6 +214,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
+ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
if (nmi_alloc_per_cpu(lc))
goto out;
if (vdso_alloc_per_cpu(lc))
@@ -266,7 +267,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
__ctl_store(lc->cregs_save_area, 0, 15);
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
- MAX_FACILITY_BIT/8);
+ sizeof(lc->stfle_fac_list));
+ memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
+ sizeof(lc->alt_stfle_fac_list));
arch_spin_lock_setup(cpu);
}
@@ -317,6 +320,7 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
mem_assign_absolute(lc->restart_fn, (unsigned long) func);
mem_assign_absolute(lc->restart_data, (unsigned long) data);
mem_assign_absolute(lc->restart_source, source_cpu);
+ __bpon();
asm volatile(
"0: sigp 0,%0,%2 # sigp restart to target cpu\n"
" brc 2,0b # busy, try again\n"
@@ -901,6 +905,7 @@ void __cpu_die(unsigned int cpu)
void __noreturn cpu_die(void)
{
idle_task_exit();
+ __bpon();
pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
for (;;) ;
}
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index a441cba8d165..fc7e04c2195b 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -89,6 +89,8 @@ static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
seq_printf(m, "Type: %-4.4s\n", info->type);
+ if (info->lic)
+ seq_printf(m, "LIC Identifier: %016lx\n", info->lic);
/*
* Sigh: the model field has been renamed with System z9
* to model_capacity and a new model field has been added
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 608cf2987d19..08d12cfaf091 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -123,6 +123,20 @@ SECTIONS
*(.altinstr_replacement)
}
+ /*
+ * Table with the patch locations to undo expolines
+ */
+ .nospec_call_table : {
+ __nospec_call_start = . ;
+ *(.s390_indirect*)
+ __nospec_call_end = . ;
+ }
+ .nospec_return_table : {
+ __nospec_return_start = . ;
+ *(.s390_return*)
+ __nospec_return_end = . ;
+ }
+
/* early.c uses stsi, which requires page aligned data. */
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 9a4594e0a1ff..a3dbd459cce9 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 89aa114a2cba..45634b3d2e0a 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -257,6 +257,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
case 0x500:
return __diag_virtio_hypercall(vcpu);
default:
+ vcpu->stat.diagnose_other++;
return -EOPNOTSUPP;
}
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 024ad8bcc516..aabf46f5f883 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -36,7 +36,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
{
int c, scn;
- if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+ if (!kvm_s390_test_cpuflags(vcpu, CPUSTAT_ECALL_PEND))
return 0;
BUG_ON(!kvm_s390_use_sca_entries());
@@ -101,18 +101,17 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
/* another external call is pending */
return -EBUSY;
}
- atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
return 0;
}
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc, expect;
if (!kvm_s390_use_sca_entries())
return;
- atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -190,8 +189,8 @@ static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
static inline int is_ioirq(unsigned long irq_type)
{
- return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
- (irq_type <= IRQ_PEND_IO_ISC_7));
+ return ((irq_type >= IRQ_PEND_IO_ISC_7) &&
+ (irq_type <= IRQ_PEND_IO_ISC_0));
}
static uint64_t isc_to_isc_bits(int isc)
@@ -199,25 +198,59 @@ static uint64_t isc_to_isc_bits(int isc)
return (0x80 >> isc) << 24;
}
+static inline u32 isc_to_int_word(u8 isc)
+{
+ return ((u32)isc << 27) | 0x80000000;
+}
+
static inline u8 int_word_to_isc(u32 int_word)
{
return (int_word & 0x38000000) >> 27;
}
+/*
+ * To use atomic bitmap functions, we have to provide a bitmap address
+ * that is u64 aligned. However, the ipm might be u32 aligned.
+ * Therefore, we logically start the bitmap at the very beginning of the
+ * struct and fixup the bit number.
+ */
+#define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
+
+static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa)
+{
+ return READ_ONCE(gisa->ipm);
+}
+
+static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
+static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
+{
+ return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
+}
+
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ vcpu->arch.local_int.pending_irqs |
+ kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7;
}
static inline int isc_to_irq_type(unsigned long isc)
{
- return IRQ_PEND_IO_ISC_0 + isc;
+ return IRQ_PEND_IO_ISC_0 - isc;
}
static inline int irq_type_to_isc(unsigned long irq_type)
{
- return irq_type - IRQ_PEND_IO_ISC_0;
+ return IRQ_PEND_IO_ISC_0 - irq_type;
}
static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
@@ -278,20 +311,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
static void __set_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
- set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
+ set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
- clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
+ clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IO_INT | CPUSTAT_EXT_INT |
+ CPUSTAT_STOP_INT);
vcpu->arch.sie_block->lctl = 0x0000;
vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
@@ -302,17 +335,12 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
}
}
-static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
-{
- atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
-}
-
static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
{
if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK))
return;
else if (psw_ioint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT);
else
vcpu->arch.sie_block->lctl |= LCTL_CR6;
}
@@ -322,7 +350,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK))
return;
if (psw_extint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
else
vcpu->arch.sie_block->lctl |= LCTL_CR0;
}
@@ -340,7 +368,7 @@ static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
{
if (kvm_s390_is_stop_irq_pending(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
}
/* Set interception request for non-deliverable interrupts */
@@ -897,18 +925,38 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
+static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io)
+{
+ int rc;
+
+ rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, io->io_int_word, (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
unsigned long irq_type)
{
struct list_head *isc_list;
struct kvm_s390_float_interrupt *fi;
struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_io_info io;
+ u32 isc;
int rc = 0;
fi = &vcpu->kvm->arch.float_int;
spin_lock(&fi->lock);
- isc_list = &fi->lists[irq_type_to_isc(irq_type)];
+ isc = irq_type_to_isc(irq_type);
+ isc_list = &fi->lists[isc];
inti = list_first_entry_or_null(isc_list,
struct kvm_s390_interrupt_info,
list);
@@ -936,24 +984,31 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
spin_unlock(&fi->lock);
if (inti) {
- rc = put_guest_lc(vcpu, inti->io.subchannel_id,
- (u16 *)__LC_SUBCHANNEL_ID);
- rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
- (u16 *)__LC_SUBCHANNEL_NR);
- rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
- (u32 *)__LC_IO_INT_PARM);
- rc |= put_guest_lc(vcpu, inti->io.io_int_word,
- (u32 *)__LC_IO_INT_WORD);
- rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
- &vcpu->arch.sie_block->gpsw,
- sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
- &vcpu->arch.sie_block->gpsw,
- sizeof(psw_t));
+ rc = __do_deliver_io(vcpu, &(inti->io));
kfree(inti);
+ goto out;
}
- return rc ? -EFAULT : 0;
+ if (vcpu->kvm->arch.gisa &&
+ kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) {
+ /*
+ * in case an adapter interrupt was not delivered
+ * in SIE context KVM will handle the delivery
+ */
+ VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
+ memset(&io, 0, sizeof(io));
+ io.io_int_word = isc_to_int_word(isc);
+ vcpu->stat.deliver_io_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_IO(1, 0, 0, 0),
+ ((__u32)io.subchannel_id << 16) |
+ io.subchannel_nr,
+ ((__u64)io.io_int_parm << 32) |
+ io.io_int_word);
+ rc = __do_deliver_io(vcpu, &io);
+ }
+out:
+ return rc;
}
typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
@@ -1155,8 +1210,8 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
while ((irqs = deliverable_irqs(vcpu)) && !rc) {
- /* bits are in the order of interrupt priority */
- irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
+ /* bits are in the reverse order of interrupt priority */
+ irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT);
if (is_ioirq(irq_type)) {
rc = __deliver_io(vcpu, irq_type);
} else {
@@ -1228,7 +1283,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
li->irq.ext = irq->u.ext;
set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1253,7 +1308,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
return -EBUSY;
*extcall = irq->u.extcall;
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1297,7 +1352,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
return -EBUSY;
stop->flags = irq->u.stop.flags;
- __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
return 0;
}
@@ -1329,7 +1384,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1373,7 +1428,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
0, 0);
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1386,7 +1441,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
0, 0);
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT);
return 0;
}
@@ -1416,20 +1471,86 @@ static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
return NULL;
}
+static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm,
+ u64 isc_mask, u32 schid)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int isc;
+
+ for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+ if (isc_mask & isc_to_isc_bits(isc))
+ inti = get_io_int(kvm, isc, schid);
+ }
+ return inti;
+}
+
+static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid)
+{
+ unsigned long active_mask;
+ int isc;
+
+ if (schid)
+ goto out;
+ if (!kvm->arch.gisa)
+ goto out;
+
+ active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32;
+ while (active_mask) {
+ isc = __fls(active_mask) ^ (BITS_PER_LONG - 1);
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc))
+ return isc;
+ clear_bit_inv(isc, &active_mask);
+ }
+out:
+ return -EINVAL;
+}
+
/*
* Dequeue and return an I/O interrupt matching any of the interruption
* subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ * Take into account the interrupts pending in the interrupt list and in GISA.
+ *
+ * Note that for a guest that does not enable I/O interrupts
+ * but relies on TPI, a flood of classic interrupts may starve
+ * out adapter interrupts on the same isc. Linux does not do
+ * that, and it is possible to work around the issue by configuring
+ * different iscs for classic and adapter interrupts in the guest,
+ * but we may want to revisit this in the future.
*/
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 isc_mask, u32 schid)
{
- struct kvm_s390_interrupt_info *inti = NULL;
+ struct kvm_s390_interrupt_info *inti, *tmp_inti;
int isc;
- for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
- if (isc_mask & isc_to_isc_bits(isc))
- inti = get_io_int(kvm, isc, schid);
+ inti = get_top_io_int(kvm, isc_mask, schid);
+
+ isc = get_top_gisa_isc(kvm, isc_mask, schid);
+ if (isc < 0)
+ /* no AI in GISA */
+ goto out;
+
+ if (!inti)
+ /* AI in GISA but no classical IO int */
+ goto gisa_out;
+
+ /* both types of interrupts present */
+ if (int_word_to_isc(inti->io.io_int_word) <= isc) {
+ /* classical IO int with higher priority */
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ goto out;
}
+gisa_out:
+ tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (tmp_inti) {
+ tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ tmp_inti->io.io_int_word = isc_to_int_word(isc);
+ if (inti)
+ kvm_s390_reinject_io_int(kvm, inti);
+ inti = tmp_inti;
+ } else
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+out:
return inti;
}
@@ -1517,6 +1638,15 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
struct list_head *list;
int isc;
+ isc = int_word_to_isc(inti->io.io_int_word);
+
+ if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
+ kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc);
+ kfree(inti);
+ return 0;
+ }
+
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
@@ -1532,7 +1662,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
inti->io.subchannel_id >> 8,
inti->io.subchannel_id >> 1 & 0x3,
inti->io.subchannel_nr);
- isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list);
set_bit(isc_to_irq_type(isc), &fi->pending_irqs);
@@ -1546,7 +1675,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
static void __floating_irq_kick(struct kvm *kvm, u64 type)
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
- struct kvm_s390_local_interrupt *li;
struct kvm_vcpu *dst_vcpu;
int sigcpu, online_vcpus, nr_tries = 0;
@@ -1568,20 +1696,17 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
/* make the VCPU drop out of the SIE, or wake it up if sleeping */
- li = &dst_vcpu->arch.local_int;
- spin_lock(&li->lock);
switch (type) {
case KVM_S390_MCHK:
- atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT);
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- atomic_or(CPUSTAT_IO_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
- atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
+ kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT);
break;
}
- spin_unlock(&li->lock);
kvm_s390_vcpu_wakeup(dst_vcpu);
}
@@ -1820,6 +1945,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
for (i = 0; i < FIRQ_MAX_COUNT; i++)
fi->counters[i] = 0;
spin_unlock(&fi->lock);
+ kvm_s390_gisa_clear(kvm);
};
static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
@@ -1847,6 +1973,22 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
max_irqs = len / sizeof(struct kvm_s390_irq);
+ if (kvm->arch.gisa &&
+ kvm_s390_gisa_get_ipm(kvm->arch.gisa)) {
+ for (i = 0; i <= MAX_ISC; i++) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out_nolock;
+ }
+ if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) {
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
+ irq->u.io.io_int_word = isc_to_int_word(i);
+ n++;
+ }
+ }
+ }
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++) {
@@ -1885,6 +2027,7 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
out:
spin_unlock(&fi->lock);
+out_nolock:
if (!ret && n > 0) {
if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
ret = -EFAULT;
@@ -2245,7 +2388,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
struct kvm_s390_interrupt s390int = {
.type = KVM_S390_INT_IO(1, 0, 0, 0),
.parm = 0,
- .parm64 = (adapter->isc << 27) | 0x80000000,
+ .parm64 = isc_to_int_word(adapter->isc),
};
int ret = 0;
@@ -2687,3 +2830,28 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
return n;
}
+
+void kvm_s390_gisa_clear(struct kvm *kvm)
+{
+ if (kvm->arch.gisa) {
+ memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa));
+ kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa);
+ }
+}
+
+void kvm_s390_gisa_init(struct kvm *kvm)
+{
+ if (css_general_characteristics.aiv) {
+ kvm->arch.gisa = &kvm->arch.sie_page2->gisa;
+ VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa);
+ kvm_s390_gisa_clear(kvm);
+ }
+}
+
+void kvm_s390_gisa_destroy(struct kvm *kvm)
+{
+ if (!kvm->arch.gisa)
+ return;
+ kvm->arch.gisa = NULL;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1371dff2b90d..ba4c7092335a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2017
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -87,19 +87,31 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_epsw", VCPU_STAT(instruction_epsw) },
+ { "instruction_gs", VCPU_STAT(instruction_gs) },
+ { "instruction_io_other", VCPU_STAT(instruction_io_other) },
+ { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
+ { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
+ { "instruction_ptff", VCPU_STAT(instruction_ptff) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+ { "instruction_sck", VCPU_STAT(instruction_sck) },
+ { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
- { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_iske", VCPU_STAT(instruction_iske) },
+ { "instruction_ri", VCPU_STAT(instruction_ri) },
+ { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
+ { "instruction_sske", VCPU_STAT(instruction_sske) },
{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
- { "instruction_stsch", VCPU_STAT(instruction_stsch) },
- { "instruction_chsc", VCPU_STAT(instruction_chsc) },
{ "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_tb", VCPU_STAT(instruction_tb) },
+ { "instruction_tpi", VCPU_STAT(instruction_tpi) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_tsch", VCPU_STAT(instruction_tsch) },
{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sie", VCPU_STAT(instruction_sie) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
@@ -118,12 +130,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
- { "diagnose_10", VCPU_STAT(diagnose_10) },
- { "diagnose_44", VCPU_STAT(diagnose_44) },
- { "diagnose_9c", VCPU_STAT(diagnose_9c) },
- { "diagnose_258", VCPU_STAT(diagnose_258) },
- { "diagnose_308", VCPU_STAT(diagnose_308) },
- { "diagnose_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_10", VCPU_STAT(diagnose_10) },
+ { "instruction_diag_44", VCPU_STAT(diagnose_44) },
+ { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
+ { "instruction_diag_258", VCPU_STAT(diagnose_258) },
+ { "instruction_diag_308", VCPU_STAT(diagnose_308) },
+ { "instruction_diag_500", VCPU_STAT(diagnose_500) },
+ { "instruction_diag_other", VCPU_STAT(diagnose_other) },
{ NULL }
};
@@ -576,7 +589,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
case KVM_CAP_S390_GS:
r = -EINVAL;
mutex_lock(&kvm->lock);
- if (atomic_read(&kvm->online_vcpus)) {
+ if (kvm->created_vcpus) {
r = -EBUSY;
} else if (test_facility(133)) {
set_kvm_facility(kvm->arch.model.fac_mask, 133);
@@ -1088,7 +1101,6 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_feat data;
- int ret = -EBUSY;
if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
return -EFAULT;
@@ -1098,13 +1110,18 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm,
return -EINVAL;
mutex_lock(&kvm->lock);
- if (!atomic_read(&kvm->online_vcpus)) {
- bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
- KVM_S390_VM_CPU_FEAT_NR_BITS);
- ret = 0;
+ if (kvm->created_vcpus) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
}
+ bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
mutex_unlock(&kvm->lock);
- return ret;
+ VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
+ return 0;
}
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
@@ -1206,6 +1223,10 @@ static int kvm_s390_get_processor_feat(struct kvm *kvm,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
return 0;
}
@@ -1219,6 +1240,10 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
+ data.feat[0],
+ data.feat[1],
+ data.feat[2]);
return 0;
}
@@ -1911,6 +1936,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
+ BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
kvm->arch.sie_page2 =
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.sie_page2)
@@ -1981,6 +2007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2043,6 +2070,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_free_vcpus(kvm);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
+ kvm_s390_gisa_destroy(kvm);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2314,7 +2342,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
gmap_enable(vcpu->arch.enabled_gmap);
- atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
vcpu->cpu = cpu;
@@ -2325,7 +2353,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1;
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
- atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
vcpu->arch.enabled_gmap = gmap_get_enabled();
gmap_disable(vcpu->arch.enabled_gmap);
@@ -2422,9 +2450,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
CPUSTAT_STOPPED);
if (test_kvm_facility(vcpu->kvm, 78))
- atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
else if (test_kvm_facility(vcpu->kvm, 8))
- atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
kvm_s390_vcpu_setup_model(vcpu);
@@ -2456,12 +2484,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF;
+ if (vcpu->arch.sie_block->gd) {
+ vcpu->arch.sie_block->eca |= ECA_AIV;
+ VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
+ vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
+ }
vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
| SDNXC;
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
if (sclp.has_kss)
- atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
else
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
@@ -2508,9 +2541,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->icpua = id;
spin_lock_init(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- vcpu->arch.local_int.wq = &vcpu->wq;
- vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
+ if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
+ vcpu->arch.sie_block->gd |= GISA_FORMAT1;
seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
@@ -2567,7 +2600,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
- atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
@@ -2720,47 +2753,70 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ vcpu_load(vcpu);
+
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ vcpu_load(vcpu);
+
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- if (test_fp_ctl(fpu->fpc))
- return -EINVAL;
+ int ret = 0;
+
+ vcpu_load(vcpu);
+
+ if (test_fp_ctl(fpu->fpc)) {
+ ret = -EINVAL;
+ goto out;
+ }
vcpu->run->s.regs.fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
(freg_t *) fpu->fprs);
else
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- return 0;
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ vcpu_load(vcpu);
+
/* make sure we have the latest values */
save_fpu_regs();
if (MACHINE_HAS_VX)
@@ -2769,6 +2825,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
else
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
fpu->fpc = vcpu->run->s.regs.fpc;
+
+ vcpu_put(vcpu);
return 0;
}
@@ -2800,41 +2858,56 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
int rc = 0;
+ vcpu_load(vcpu);
+
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- if (dbg->control & ~VALID_GUESTDBG_FLAGS)
- return -EINVAL;
- if (!sclp.has_gpere)
- return -EINVAL;
+ if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
+ rc = -EINVAL;
+ goto out;
+ }
+ if (!sclp.has_gpere) {
+ rc = -EINVAL;
+ goto out;
+ }
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* enforce guest PER */
- atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
rc = kvm_s390_import_bp_data(vcpu, dbg);
} else {
- atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
vcpu->arch.guestdbg.last_bp = 0;
}
if (rc) {
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
- atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
}
+out:
+ vcpu_put(vcpu);
return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
/* CHECK_STOP and LOAD are not supported yet */
- return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
- KVM_MP_STATE_OPERATING;
+ ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+ KVM_MP_STATE_OPERATING;
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
@@ -2842,6 +2915,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int rc = 0;
+ vcpu_load(vcpu);
+
/* user space knows about this interface - let it control the state */
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
@@ -2859,12 +2934,13 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
rc = -ENXIO;
}
+ vcpu_put(vcpu);
return rc;
}
static bool ibs_enabled(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
}
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
@@ -2900,8 +2976,7 @@ retry:
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
- atomic_or(CPUSTAT_IBS,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
}
goto retry;
}
@@ -2909,8 +2984,7 @@ retry:
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
if (ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
- atomic_andnot(CPUSTAT_IBS,
- &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
}
goto retry;
}
@@ -3390,9 +3464,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (kvm_run->immediate_exit)
return -EINTR;
+ vcpu_load(vcpu);
+
if (guestdbg_exit_pending(vcpu)) {
kvm_s390_prepare_debug_exit(vcpu);
- return 0;
+ rc = 0;
+ goto out;
}
kvm_sigset_activate(vcpu);
@@ -3402,7 +3479,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} else if (is_vcpu_stopped(vcpu)) {
pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
sync_regs(vcpu, kvm_run);
@@ -3432,6 +3510,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_deactivate(vcpu);
vcpu->stat.exit_userspace++;
+out:
+ vcpu_put(vcpu);
return rc;
}
@@ -3560,7 +3640,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
__disable_ibs_on_all_vcpus(vcpu->kvm);
}
- atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
@@ -3586,7 +3666,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
- atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -3693,36 +3773,45 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- int idx;
- long r;
switch (ioctl) {
case KVM_S390_IRQ: {
struct kvm_s390_irq s390irq;
- r = -EFAULT;
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
- break;
- r = kvm_s390_inject_vcpu(vcpu, &s390irq);
- break;
+ return -EFAULT;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
struct kvm_s390_irq s390irq;
- r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
- break;
+ return -EFAULT;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
- r = kvm_s390_inject_vcpu(vcpu, &s390irq);
- break;
+ return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ }
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int idx;
+ long r;
+
+ vcpu_load(vcpu);
+
+ switch (ioctl) {
case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_vcpu_store_status(vcpu, arg);
@@ -3847,6 +3936,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
default:
r = -ENOTTY;
}
+
+ vcpu_put(vcpu);
return r;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 5e46ba429bcb..bd31b37b0e6f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -47,14 +47,29 @@ do { \
d_args); \
} while (0)
+static inline void kvm_s390_set_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_or(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline void kvm_s390_clear_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ atomic_andnot(flags, &vcpu->arch.sie_block->cpuflags);
+}
+
+static inline bool kvm_s390_test_cpuflags(struct kvm_vcpu *vcpu, u32 flags)
+{
+ return (atomic_read(&vcpu->arch.sie_block->cpuflags) & flags) == flags;
+}
+
static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+ return kvm_s390_test_cpuflags(vcpu, CPUSTAT_STOPPED);
}
static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
{
- return test_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+ return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask);
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -367,6 +382,9 @@ int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
void __user *buf, int len);
int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
__u8 __user *buf, int len);
+void kvm_s390_gisa_init(struct kvm *kvm);
+void kvm_s390_gisa_clear(struct kvm *kvm);
+void kvm_s390_gisa_destroy(struct kvm *kvm);
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0714bfa56da0..c4c4e157c036 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2013
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -34,6 +34,8 @@
static int handle_ri(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_ri++;
+
if (test_kvm_facility(vcpu->kvm, 64)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
vcpu->arch.sie_block->ecb3 |= ECB3_RI;
@@ -53,6 +55,8 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
static int handle_gs(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_gs++;
+
if (test_kvm_facility(vcpu->kvm, 133)) {
VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
preempt_disable();
@@ -85,6 +89,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
u8 ar;
u64 op2, val;
+ vcpu->stat.instruction_sck++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -203,14 +209,14 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
trace_kvm_s390_skey_related_inst(vcpu);
if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
- !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS))
+ !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
return rc;
rc = s390_enable_skey();
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
if (!rc) {
- if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)
- atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags);
+ if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
else
sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
ICTL_RRBE);
@@ -222,7 +228,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
{
int rc;
- vcpu->stat.instruction_storage_key++;
rc = kvm_s390_skey_check_enable(vcpu);
if (rc)
return rc;
@@ -242,6 +247,8 @@ static int handle_iske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_iske++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -274,6 +281,8 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_rrbe++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -312,6 +321,8 @@ static int handle_sske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ vcpu->stat.instruction_sske++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -392,6 +403,8 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
gpa_t addr;
int reg2;
+ vcpu->stat.instruction_tb++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -424,6 +437,8 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
u64 addr;
u8 ar;
+ vcpu->stat.instruction_tpi++;
+
addr = kvm_s390_get_base_disp_s(vcpu, &ar);
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -484,6 +499,8 @@ static int handle_tsch(struct kvm_vcpu *vcpu)
struct kvm_s390_interrupt_info *inti = NULL;
const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+ vcpu->stat.instruction_tsch++;
+
/* a valid schid has at least one bit set */
if (vcpu->run->s.regs.gprs[1])
inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
@@ -527,6 +544,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->ipa == 0xb235)
return handle_tsch(vcpu);
/* Handle in userspace. */
+ vcpu->stat.instruction_io_other++;
return -EOPNOTSUPP;
} else {
/*
@@ -592,6 +610,8 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
int rc;
u8 ar;
+ vcpu->stat.instruction_lpsw++;
+
if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -619,6 +639,8 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
int rc;
u8 ar;
+ vcpu->stat.instruction_lpswe++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -828,6 +850,8 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
{
int reg1, reg2;
+ vcpu->stat.instruction_epsw++;
+
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
/* This basically extracts the mask half of the psw. */
@@ -1332,6 +1356,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
{
u32 value;
+ vcpu->stat.instruction_sckpf++;
+
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
@@ -1347,6 +1373,8 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
static int handle_ptff(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.instruction_ptff++;
+
/* we don't emulate any control instructions yet */
kvm_s390_set_psw_cc(vcpu, 3);
return 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index c1f5cde2c878..683036c1c92a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -20,22 +20,18 @@
static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
u64 *reg)
{
- struct kvm_s390_local_interrupt *li;
- int cpuflags;
+ const bool stopped = kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED);
int rc;
int ext_call_pending;
- li = &dst_vcpu->arch.local_int;
-
- cpuflags = atomic_read(li->cpuflags);
ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
- if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
+ if (!stopped && !ext_call_pending)
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
else {
*reg &= 0xffffffff00000000UL;
if (ext_call_pending)
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
- if (cpuflags & CPUSTAT_STOPPED)
+ if (stopped)
*reg |= SIGP_STATUS_STOPPED;
rc = SIGP_CC_STATUS_STORED;
}
@@ -208,11 +204,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu,
u32 addr, u64 *reg)
{
- int flags;
int rc;
- flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
- if (!(flags & CPUSTAT_STOPPED)) {
+ if (!kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
return SIGP_CC_STATUS_STORED;
@@ -231,7 +225,6 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
static int __sigp_sense_running(struct kvm_vcpu *vcpu,
struct kvm_vcpu *dst_vcpu, u64 *reg)
{
- struct kvm_s390_local_interrupt *li;
int rc;
if (!test_kvm_facility(vcpu->kvm, 9)) {
@@ -240,8 +233,7 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
return SIGP_CC_STATUS_STORED;
}
- li = &dst_vcpu->arch.local_int;
- if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ if (kvm_s390_test_cpuflags(dst_vcpu, CPUSTAT_RUNNING)) {
/* running */
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
} else {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 751348348477..ec772700ff96 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -28,13 +28,23 @@ struct vsie_page {
* the same offset as that in struct sie_page!
*/
struct mcck_volatile_info mcck_info; /* 0x0200 */
- /* the pinned originial scb */
+ /*
+ * The pinned original scb. Be aware that other VCPUs can modify
+ * it while we read from it. Values that are used for conditions or
+ * are reused conditionally, should be accessed via READ_ONCE.
+ */
struct kvm_s390_sie_block *scb_o; /* 0x0218 */
/* the shadow gmap in use by the vsie_page */
struct gmap *gmap; /* 0x0220 */
/* address of the last reported fault to guest2 */
unsigned long fault_addr; /* 0x0228 */
- __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */
+ /* calculated guest addresses of satellite control blocks */
+ gpa_t sca_gpa; /* 0x0230 */
+ gpa_t itdba_gpa; /* 0x0238 */
+ gpa_t gvrd_gpa; /* 0x0240 */
+ gpa_t riccbd_gpa; /* 0x0248 */
+ gpa_t sdnx_gpa; /* 0x0250 */
+ __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */
struct kvm_s390_crypto_cb crycb; /* 0x0700 */
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
};
@@ -140,12 +150,13 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
- u32 crycb_addr = scb_o->crycbd & 0x7ffffff8U;
+ const uint32_t crycbd_o = READ_ONCE(scb_o->crycbd);
+ const u32 crycb_addr = crycbd_o & 0x7ffffff8U;
unsigned long *b1, *b2;
u8 ecb3_flags;
scb_s->crycbd = 0;
- if (!(scb_o->crycbd & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
+ if (!(crycbd_o & vcpu->arch.sie_block->crycbd & CRYCB_FORMAT1))
return 0;
/* format-1 is supported with message-security-assist extension 3 */
if (!test_kvm_facility(vcpu->kvm, 76))
@@ -183,12 +194,15 @@ static void prepare_ibc(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_ibc = scb_o->ibc;
+ const uint32_t new_ibc = READ_ONCE(__new_ibc) & 0x0fffU;
__u64 min_ibc = (sclp.ibc >> 16) & 0x0fffU;
scb_s->ibc = 0;
/* ibc installed in g2 and requested for g3 */
- if (vcpu->kvm->arch.model.ibc && (scb_o->ibc & 0x0fffU)) {
- scb_s->ibc = scb_o->ibc & 0x0fffU;
+ if (vcpu->kvm->arch.model.ibc && new_ibc) {
+ scb_s->ibc = new_ibc;
/* takte care of the minimum ibc level of the machine */
if (scb_s->ibc < min_ibc)
scb_s->ibc = min_ibc;
@@ -259,6 +273,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ /* READ_ONCE does not work on bitfields - use a temporary variable */
+ const uint32_t __new_prefix = scb_o->prefix;
+ const uint32_t new_prefix = READ_ONCE(__new_prefix);
+ const bool wants_tx = READ_ONCE(scb_o->ecb) & ECB_TE;
bool had_tx = scb_s->ecb & ECB_TE;
unsigned long new_mso = 0;
int rc;
@@ -306,14 +324,14 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->icpua = scb_o->icpua;
if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
- new_mso = scb_o->mso & 0xfffffffffff00000UL;
+ new_mso = READ_ONCE(scb_o->mso) & 0xfffffffffff00000UL;
/* if the hva of the prefix changes, we have to remap the prefix */
- if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
+ if (scb_s->mso != new_mso || scb_s->prefix != new_prefix)
prefix_unmapped(vsie_page);
/* SIE will do mso/msl validity and exception checks for us */
scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
scb_s->mso = new_mso;
- scb_s->prefix = scb_o->prefix;
+ scb_s->prefix = new_prefix;
/* We have to definetly flush the tlb if this scb never ran */
if (scb_s->ihcpu != 0xffffU)
@@ -325,11 +343,11 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
/* transactional execution */
- if (test_kvm_facility(vcpu->kvm, 73)) {
+ if (test_kvm_facility(vcpu->kvm, 73) && wants_tx) {
/* remap the prefix is tx is toggled on */
- if ((scb_o->ecb & ECB_TE) && !had_tx)
+ if (!had_tx)
prefix_unmapped(vsie_page);
- scb_s->ecb |= scb_o->ecb & ECB_TE;
+ scb_s->ecb |= ECB_TE;
}
/* branch prediction */
if (test_kvm_facility(vcpu->kvm, 82))
@@ -473,46 +491,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
- struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
hpa_t hpa;
- gpa_t gpa;
hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
if (hpa) {
- gpa = scb_o->scaol & ~0xfUL;
- if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
- gpa |= (u64) scb_o->scaoh << 32;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
+ vsie_page->sca_gpa = 0;
scb_s->scaol = 0;
scb_s->scaoh = 0;
}
hpa = scb_s->itdba;
if (hpa) {
- gpa = scb_o->itdba & ~0xffUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
+ vsie_page->itdba_gpa = 0;
scb_s->itdba = 0;
}
hpa = scb_s->gvrd;
if (hpa) {
- gpa = scb_o->gvrd & ~0x1ffUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
+ vsie_page->gvrd_gpa = 0;
scb_s->gvrd = 0;
}
hpa = scb_s->riccbd;
if (hpa) {
- gpa = scb_o->riccbd & ~0x3fUL;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
+ vsie_page->riccbd_gpa = 0;
scb_s->riccbd = 0;
}
hpa = scb_s->sdnxo;
if (hpa) {
- gpa = scb_o->sdnxo;
- unpin_guest_page(vcpu->kvm, gpa, hpa);
+ unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
+ vsie_page->sdnx_gpa = 0;
scb_s->sdnxo = 0;
}
}
@@ -539,9 +553,9 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
gpa_t gpa;
int rc = 0;
- gpa = scb_o->scaol & ~0xfUL;
+ gpa = READ_ONCE(scb_o->scaol) & ~0xfUL;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
- gpa |= (u64) scb_o->scaoh << 32;
+ gpa |= (u64) READ_ONCE(scb_o->scaoh) << 32;
if (gpa) {
if (!(gpa & ~0x1fffUL))
rc = set_validity_icpt(scb_s, 0x0038U);
@@ -557,11 +571,12 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
if (rc)
goto unpin;
+ vsie_page->sca_gpa = gpa;
scb_s->scaoh = (u32)((u64)hpa >> 32);
scb_s->scaol = (u32)(u64)hpa;
}
- gpa = scb_o->itdba & ~0xffUL;
+ gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
if (gpa && (scb_s->ecb & ECB_TE)) {
if (!(gpa & ~0x1fffU)) {
rc = set_validity_icpt(scb_s, 0x0080U);
@@ -573,10 +588,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
}
+ vsie_page->itdba_gpa = gpa;
scb_s->itdba = hpa;
}
- gpa = scb_o->gvrd & ~0x1ffUL;
+ gpa = READ_ONCE(scb_o->gvrd) & ~0x1ffUL;
if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x1310U);
@@ -591,10 +607,11 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x1310U);
goto unpin;
}
+ vsie_page->gvrd_gpa = gpa;
scb_s->gvrd = hpa;
}
- gpa = scb_o->riccbd & ~0x3fUL;
+ gpa = READ_ONCE(scb_o->riccbd) & ~0x3fUL;
if (gpa && (scb_s->ecb3 & ECB3_RI)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0043U);
@@ -607,13 +624,14 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
goto unpin;
}
/* Validity 0x0044 will be checked by SIE */
+ vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa;
}
if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
unsigned long sdnxc;
- gpa = scb_o->sdnxo & ~0xfUL;
- sdnxc = scb_o->sdnxo & 0xfUL;
+ gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
+ sdnxc = READ_ONCE(scb_o->sdnxo) & 0xfUL;
if (!gpa || !(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x10b0U);
goto unpin;
@@ -634,6 +652,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
rc = set_validity_icpt(scb_s, 0x10b0U);
goto unpin;
}
+ vsie_page->sdnx_gpa = gpa;
scb_s->sdnxo = hpa | sdnxc;
}
return 0;
@@ -778,7 +797,7 @@ static void retry_vsie_icpt(struct vsie_page *vsie_page)
static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- __u32 fac = vsie_page->scb_o->fac & 0x7ffffff8U;
+ __u32 fac = READ_ONCE(vsie_page->scb_o->fac) & 0x7ffffff8U;
if (fac && test_kvm_facility(vcpu->kvm, 7)) {
retry_vsie_icpt(vsie_page);
@@ -904,7 +923,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
* External calls have to lead to a kick of the vcpu and
* therefore the vsie -> Simulate Wait state.
*/
- atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
/*
* We have to adjust the g3 epoch by the g2 epoch. The epoch will
* automatically be adjusted on tod clock changes via kvm_sync_clock.
@@ -926,7 +945,7 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu,
*/
static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
{
- atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
WRITE_ONCE(vcpu->arch.vsie_block, NULL);
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 05d459b638f5..2c55a2b9d6c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -815,27 +815,17 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
* @ptl: pointer to the spinlock pointer
*
* Returns a pointer to the locked pte for a guest address, or NULL
- *
- * Note: Can also be called for shadow gmaps.
*/
static pte_t *gmap_pte_op_walk(struct gmap *gmap, unsigned long gaddr,
spinlock_t **ptl)
{
unsigned long *table;
- if (gmap_is_shadow(gmap))
- spin_lock(&gmap->guest_table_lock);
+ BUG_ON(gmap_is_shadow(gmap));
/* Walk the gmap page table, lock and get pte pointer */
table = gmap_table_walk(gmap, gaddr, 1); /* get segment pointer */
- if (!table || *table & _SEGMENT_ENTRY_INVALID) {
- if (gmap_is_shadow(gmap))
- spin_unlock(&gmap->guest_table_lock);
+ if (!table || *table & _SEGMENT_ENTRY_INVALID)
return NULL;
- }
- if (gmap_is_shadow(gmap)) {
- *ptl = &gmap->guest_table_lock;
- return pte_offset_map((pmd_t *) table, gaddr);
- }
return pte_alloc_map_lock(gmap->mm, (pmd_t *) table, gaddr, ptl);
}
@@ -889,8 +879,6 @@ static void gmap_pte_op_end(spinlock_t *ptl)
* -EFAULT if gaddr is invalid (or mapping for shadows is missing).
*
* Called with sg->mm->mmap_sem in read.
- *
- * Note: Can also be called for shadow gmaps.
*/
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
@@ -900,6 +888,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
pte_t *ptep;
int rc;
+ BUG_ON(gmap_is_shadow(gmap));
while (len) {
rc = -EAGAIN;
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
@@ -960,7 +949,8 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
* @val: pointer to the unsigned long value to return
*
* Returns 0 if the value was read, -ENOMEM if out of memory and -EFAULT
- * if reading using the virtual address failed.
+ * if reading using the virtual address failed. -EINVAL if called on a gmap
+ * shadow.
*
* Called with gmap->mm->mmap_sem in read.
*/
@@ -971,6 +961,9 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
pte_t *ptep, pte;
int rc;
+ if (gmap_is_shadow(gmap))
+ return -EINVAL;
+
while (1) {
rc = -EAGAIN;
ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
@@ -1028,18 +1021,17 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
}
/**
- * gmap_protect_rmap - modify access rights to memory and create an rmap
+ * gmap_protect_rmap - restrict access rights to memory (RO) and create an rmap
* @sg: pointer to the shadow guest address space structure
* @raddr: rmap address in the shadow gmap
* @paddr: address in the parent guest address space
* @len: length of the memory area to protect
- * @prot: indicates access rights: none, read-only or read-write
*
* Returns 0 if successfully protected and the rmap was created, -ENOMEM
* if out of memory and -EFAULT if paddr is invalid.
*/
static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
- unsigned long paddr, unsigned long len, int prot)
+ unsigned long paddr, unsigned long len)
{
struct gmap *parent;
struct gmap_rmap *rmap;
@@ -1067,7 +1059,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
ptep = gmap_pte_op_walk(parent, paddr, &ptl);
if (ptep) {
spin_lock(&sg->guest_table_lock);
- rc = ptep_force_prot(parent->mm, paddr, ptep, prot,
+ rc = ptep_force_prot(parent->mm, paddr, ptep, PROT_READ,
PGSTE_VSIE_BIT);
if (!rc)
gmap_insert_rmap(sg, vmaddr, rmap);
@@ -1077,7 +1069,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
radix_tree_preload_end();
if (rc) {
kfree(rmap);
- rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+ rc = gmap_pte_op_fixup(parent, paddr, vmaddr, PROT_READ);
if (rc)
return rc;
continue;
@@ -1616,7 +1608,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
origin = r2t & _REGION_ENTRY_ORIGIN;
offset = ((r2t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((r2t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 4);
@@ -1699,7 +1691,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
origin = r3t & _REGION_ENTRY_ORIGIN;
offset = ((r3t & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((r3t & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 3);
@@ -1783,7 +1775,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
origin = sgt & _REGION_ENTRY_ORIGIN;
offset = ((sgt & _REGION_ENTRY_OFFSET) >> 6) * PAGE_SIZE;
len = ((sgt & _REGION_ENTRY_LENGTH) + 1) * PAGE_SIZE - offset;
- rc = gmap_protect_rmap(sg, raddr, origin + offset, len, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin + offset, len);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 2);
@@ -1902,7 +1894,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* Make pgt read-only in parent gmap page table (not the pgste) */
raddr = (saddr & _SEGMENT_MASK) | _SHADOW_RMAP_SEGMENT;
origin = pgt & _SEGMENT_ENTRY_ORIGIN & PAGE_MASK;
- rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE, PROT_READ);
+ rc = gmap_protect_rmap(sg, raddr, origin, PAGE_SIZE);
spin_lock(&sg->guest_table_lock);
if (!rc) {
table = gmap_table_walk(sg, saddr, 1);
@@ -2005,7 +1997,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_page);
* Called with sg->parent->shadow_lock.
*/
static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
- unsigned long gaddr, pte_t *pte)
+ unsigned long gaddr)
{
struct gmap_rmap *rmap, *rnext, *head;
unsigned long start, end, bits, raddr;
@@ -2090,7 +2082,7 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
spin_lock(&gmap->shadow_lock);
list_for_each_entry_safe(sg, next,
&gmap->children, list)
- gmap_shadow_notify(sg, vmaddr, gaddr, pte);
+ gmap_shadow_notify(sg, vmaddr, gaddr);
spin_unlock(&gmap->shadow_lock);
}
if (bits & PGSTE_IN_BIT)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 671535e64aba..3fa3e5323612 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init);
#endif /* CONFIG_CMA */
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
@@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
if (rc)
return rc;
- rc = __add_pages(nid, start_pfn, size_pages, want_memblock);
+ rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
if (rc)
vmem_remove_mapping(start, size);
return rc;
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
/*
* There is no hardware or firmware interface which could trigger a
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 3316d463fc29..db55561c5981 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
/*
* Add a backed mem_map array to the virtual mem_map array.
*/
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
unsigned long pgt_prot, sgt_prot;
unsigned long address = start;
@@ -296,7 +297,8 @@ out:
return ret;
}
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c
index f3a0649ab521..627416bbd0b1 100644
--- a/arch/score/kernel/setup.c
+++ b/arch/score/kernel/setup.c
@@ -124,9 +124,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long n = (unsigned long) v - 1;
- seq_printf(m, "processor\t\t: %ld\n", n);
- seq_printf(m, "\n");
-
+ seq_printf(m, "processor\t\t: %ld\n\n", n);
return 0;
}
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index bf726af5f1a5..ce0bbaa7e404 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end)
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
/* We only have ZONE_NORMAL, so this is easy.. */
- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
if (unlikely(ret))
printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
return ret;
}
-EXPORT_SYMBOL_GPL(arch_add_memory);
#ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 addr)
@@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size)
int ret;
zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
if (unlikely(ret))
pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
ret);
diff --git a/arch/sparc/include/uapi/asm/poll.h b/arch/sparc/include/uapi/asm/poll.h
index 2a81e79aa3ea..72356c999125 100644
--- a/arch/sparc/include/uapi/asm/poll.h
+++ b/arch/sparc/include/uapi/asm/poll.h
@@ -2,31 +2,11 @@
#ifndef __SPARC_POLL_H
#define __SPARC_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)256
-#define POLLMSG (__force __poll_t)512
-#define POLLREMOVE (__force __poll_t)1024
-#define POLLRDHUP (__force __poll_t)2048
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2, bit 13 -> bit 11 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6) |
- ((v & 0x2000) >> 2);
-
-
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6) | ((v & 0x800) << 2));
-}
-#endif
+#define POLLWRBAND 256
+#define POLLMSG 512
+#define POLLREMOVE 1024
+#define POLLRDHUP 2048
#include <asm-generic/poll.h>
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 55ba62957e64..995f9490334d 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE);
#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
- int node)
+ int node, struct vmem_altmap *altmap)
{
unsigned long pte_base;
@@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
return 0;
}
-void vmemmap_free(unsigned long start, unsigned long end)
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index c4d162a94be9..d5f9a2d1da1b 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -130,6 +130,7 @@ void mconsole_proc(struct mc_request *req)
struct file *file;
int first_chunk = 1;
char *ptr = req->request.data;
+ loff_t pos = 0;
ptr += strlen("proc");
ptr = skip_spaces(ptr);
@@ -148,7 +149,7 @@ void mconsole_proc(struct mc_request *req)
}
do {
- len = kernel_read(file, buf, PAGE_SIZE - 1, &file->f_pos);
+ len = kernel_read(file, buf, PAGE_SIZE - 1, &pos);
if (len < 0) {
mconsole_reply(req, "Read of file failed", 1, 0);
goto out_free;
diff --git a/arch/unicore32/include/asm/bitops.h b/arch/unicore32/include/asm/bitops.h
index 401f597bc38c..c0cbdbe17168 100644
--- a/arch/unicore32/include/asm/bitops.h
+++ b/arch/unicore32/include/asm/bitops.h
@@ -44,4 +44,6 @@ static inline int fls(int x)
#define find_first_bit find_first_bit
#define find_first_zero_bit find_first_zero_bit
+#include <asm-generic/bitops/find.h>
+
#endif /* __UNICORE_BITOPS_H__ */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0771ceabb4b..63bf349b2b24 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,7 @@ config X86
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_PHYS_TO_DMA
+ select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
@@ -62,6 +63,7 @@ config X86
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
+ select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_ZONE_DEVICE if X86_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -322,7 +324,7 @@ config X86_64_SMP
config X86_32_LAZY_GS
def_bool y
- depends on X86_32 && !CC_STACKPROTECTOR
+ depends on X86_32 && CC_STACKPROTECTOR_NONE
config ARCH_SUPPORTS_UPROBES
def_bool y
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
index 36870b26067a..d08805032f01 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
{
unsigned int j;
- state->lens[0] = 0;
- state->lens[1] = 1;
- state->lens[2] = 2;
- state->lens[3] = 3;
+ /* initially all lanes are unused */
+ state->lens[0] = 0xFFFFFFFF00000000;
+ state->lens[1] = 0xFFFFFFFF00000001;
+ state->lens[2] = 0xFFFFFFFF00000002;
+ state->lens[3] = 0xFFFFFFFF00000003;
+
state->unused_lanes = 0xFF03020100;
for (j = 0; j < 4; j++)
state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2a35b1e0fb90..16c2c022540d 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -566,6 +566,11 @@ restore_all:
.Lrestore_nocheck:
RESTORE_REGS 4 # skip orig_eax/error_code
.Lirq_return:
+ /*
+ * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
+ * when returning from IPI handler and when returning from
+ * scheduler to user-space.
+ */
INTERRUPT_RETURN
.section .fixup, "ax"
@@ -895,6 +900,9 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
hyperv_vector_handler)
+BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
+ hyperv_reenlightenment_intr)
+
#endif /* CONFIG_HYPERV */
ENTRY(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c752abe89d80..30c8c5344c4a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -691,6 +691,10 @@ GLOBAL(restore_regs_and_return_to_kernel)
POP_EXTRA_REGS
POP_C_REGS
addq $8, %rsp /* skip regs->orig_ax */
+ /*
+ * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
+ * when returning from IPI handler.
+ */
INTERRUPT_RETURN
ENTRY(native_iret)
@@ -1132,6 +1136,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
#if IS_ENABLED(CONFIG_HYPERV)
apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
+
+apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
+ hyperv_reenlightenment_vector hyperv_reenlightenment_intr
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8e4ea143ed96..78f91ec1056e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -93,7 +93,8 @@ struct amd_nb {
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
- PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
+ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
+ PERF_SAMPLE_PERIOD)
#define PEBS_REGS \
(PERF_REG_X86_AX | \
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a0a206556919..2edc49e7409b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -18,6 +18,8 @@
*/
#include <linux/types.h>
+#include <asm/apic.h>
+#include <asm/desc.h>
#include <asm/hypervisor.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
@@ -37,6 +39,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return tsc_pg;
}
+EXPORT_SYMBOL_GPL(hv_get_tsc_page);
static u64 read_hv_clock_tsc(struct clocksource *arg)
{
@@ -101,6 +104,115 @@ static int hv_cpu_init(unsigned int cpu)
return 0;
}
+static void (*hv_reenlightenment_cb)(void);
+
+static void hv_reenlightenment_notify(struct work_struct *dummy)
+{
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ /* Don't issue the callback if TSC accesses are not emulated */
+ if (hv_reenlightenment_cb && emu_status.inprogress)
+ hv_reenlightenment_cb();
+}
+static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify);
+
+void hyperv_stop_tsc_emulation(void)
+{
+ u64 freq;
+ struct hv_tsc_emulation_status emu_status;
+
+ rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+ emu_status.inprogress = 0;
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status);
+
+ rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq);
+ tsc_khz = div64_u64(freq, 1000);
+}
+EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation);
+
+static inline bool hv_reenlightenment_available(void)
+{
+ /*
+ * Check for required features and priviliges to make TSC frequency
+ * change notifications work.
+ */
+ return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
+ ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE &&
+ ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT;
+}
+
+__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs)
+{
+ entering_ack_irq();
+
+ inc_irq_stat(irq_hv_reenlightenment_count);
+
+ schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
+
+ exiting_irq();
+}
+
+void set_hv_tscchange_cb(void (*cb)(void))
+{
+ struct hv_reenlightenment_control re_ctrl = {
+ .vector = HYPERV_REENLIGHTENMENT_VECTOR,
+ .enabled = 1,
+ .target_vp = hv_vp_index[smp_processor_id()]
+ };
+ struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1};
+
+ if (!hv_reenlightenment_available()) {
+ pr_warn("Hyper-V: reenlightenment support is unavailable\n");
+ return;
+ }
+
+ hv_reenlightenment_cb = cb;
+
+ /* Make sure callback is registered before we write to MSRs */
+ wmb();
+
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl));
+}
+EXPORT_SYMBOL_GPL(set_hv_tscchange_cb);
+
+void clear_hv_tscchange_cb(void)
+{
+ struct hv_reenlightenment_control re_ctrl;
+
+ if (!hv_reenlightenment_available())
+ return;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+ re_ctrl.enabled = 0;
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl);
+
+ hv_reenlightenment_cb = NULL;
+}
+EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb);
+
+static int hv_cpu_die(unsigned int cpu)
+{
+ struct hv_reenlightenment_control re_ctrl;
+ unsigned int new_cpu;
+
+ if (hv_reenlightenment_cb == NULL)
+ return 0;
+
+ rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ if (re_ctrl.target_vp == hv_vp_index[cpu]) {
+ /* Reassign to some other online CPU */
+ new_cpu = cpumask_any_but(cpu_online_mask, cpu);
+
+ re_ctrl.target_vp = hv_vp_index[new_cpu];
+ wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
+ }
+
+ return 0;
+}
+
/*
* This function is to be invoked early in the boot sequence after the
* hypervisor has been detected.
@@ -110,12 +222,19 @@ static int hv_cpu_init(unsigned int cpu)
*/
void hyperv_init(void)
{
- u64 guest_id;
+ u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
+ /* Absolutely required MSRs */
+ required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE |
+ HV_X64_MSR_VP_INDEX_AVAILABLE;
+
+ if ((ms_hyperv.features & required_msrs) != required_msrs)
+ return;
+
/* Allocate percpu VP index */
hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
GFP_KERNEL);
@@ -123,7 +242,7 @@ void hyperv_init(void)
return;
if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
- hv_cpu_init, NULL) < 0)
+ hv_cpu_init, hv_cpu_die) < 0)
goto free_vp_index;
/*
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1d9199e1c2ad..0dfe4d3f74e2 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -210,6 +210,7 @@
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 51cc979dd364..7c341a74ec8c 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -38,6 +38,9 @@ typedef struct {
#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
unsigned int irq_hv_callback_count;
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ unsigned int irq_hv_reenlightenment_count;
+#endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 35a6bc4da8ad..cf090e584202 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -10,6 +10,10 @@
*
* Things ending in "2" are usually because we have no better
* name for them. There's no processor called "SILVERMONT2".
+ *
+ * While adding a new CPUID for a new microarchitecture, add a new
+ * group to keep logically sorted out in chronological order. Within
+ * that group keep the CPUID for the variants sorted by model number.
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
@@ -49,6 +53,8 @@
#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
+#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
index 528ed4be4393..9e7adcdbe031 100644
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ b/arch/x86/include/asm/intel_pmc_ipc.h
@@ -38,6 +38,7 @@ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
u32 *out, u32 outlen);
int intel_pmc_s0ix_counter_read(u64 *data);
int intel_pmc_gcr_read(u32 offset, u32 *data);
+int intel_pmc_gcr_read64(u32 offset, u64 *data);
int intel_pmc_gcr_write(u32 offset, u32 data);
int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val);
@@ -70,6 +71,11 @@ static inline int intel_pmc_gcr_read(u32 offset, u32 *data)
return -EINVAL;
}
+static inline int intel_pmc_gcr_read64(u32 offset, u64 *data)
+{
+ return -EINVAL;
+}
+
static inline int intel_pmc_gcr_write(u32 offset, u32 data)
{
return -EINVAL;
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 67421f649cfa..e71c1120426b 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -103,7 +103,12 @@
#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
-#define LOCAL_TIMER_VECTOR 0xee
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
+#endif
+
+#define LOCAL_TIMER_VECTOR 0xed
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index b577dd0916aa..13e70da38bed 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -4,6 +4,7 @@
#include <linux/const.h>
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+#define KASAN_SHADOW_SCALE_SHIFT 3
/*
* Compiler uses shadow offset assuming that addresses start
@@ -12,12 +13,15 @@
* 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT
*/
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
- ((-1UL << __VIRTUAL_MASK_SHIFT) >> 3))
+ ((-1UL << __VIRTUAL_MASK_SHIFT) >> \
+ KASAN_SHADOW_SCALE_SHIFT))
/*
- * 47 bits for kernel address -> (47 - 3) bits for shadow
- * 56 bits for kernel address -> (56 - 3) bits for shadow
+ * 47 bits for kernel address -> (47 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow
+ * 56 bits for kernel address -> (56 - KASAN_SHADOW_SCALE_SHIFT) bits for shadow
*/
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (__VIRTUAL_MASK_SHIFT - 3)))
+#define KASAN_SHADOW_END (KASAN_SHADOW_START + \
+ (1ULL << (__VIRTUAL_MASK_SHIFT - \
+ KASAN_SHADOW_SCALE_SHIFT)))
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 516798431328..dd6f57a54a26 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -86,7 +86,7 @@
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- | X86_CR4_SMAP | X86_CR4_PKE))
+ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -504,6 +504,7 @@ struct kvm_vcpu_arch {
int mp_state;
u64 ia32_misc_enable_msr;
u64 smbase;
+ u64 smi_count;
bool tpr_access_reporting;
u64 ia32_xss;
@@ -760,6 +761,15 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
+};
+
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -847,6 +857,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+
+ struct kvm_sev_info sev_info;
};
struct kvm_vm_stat {
@@ -883,7 +895,6 @@ struct kvm_vcpu_stat {
u64 request_irq_exits;
u64 irq_exits;
u64 host_state_reload;
- u64 efer_reload;
u64 fpu_reload;
u64 insn_emulation;
u64 insn_emulation_fail;
@@ -965,7 +976,7 @@ struct kvm_x86_ops {
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*tlb_flush)(struct kvm_vcpu *vcpu);
+ void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1017,6 +1028,7 @@ struct kvm_x86_ops {
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
+ bool (*umip_emulated)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
@@ -1079,6 +1091,10 @@ struct kvm_x86_ops {
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+
+ int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
+ int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index b52af150cbd8..25283f7eb299 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -160,6 +160,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val)
void hyperv_callback_vector(void);
+void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#define trace_hyperv_callback_vector hyperv_callback_vector
#endif
@@ -316,18 +317,27 @@ void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
bool hv_is_hyperv_initialized(void);
void hyperv_cleanup(void);
+
+void hyperv_reenlightenment_intr(struct pt_regs *regs);
+void set_hv_tscchange_cb(void (*cb)(void));
+void clear_hv_tscchange_cb(void);
+void hyperv_stop_tsc_emulation(void);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline void hyperv_cleanup(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
+static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
+static inline void clear_hv_tscchange_cb(void) {}
+static inline void hyperv_stop_tsc_emulation(void) {};
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
{
- u64 scale, offset, cur_tsc;
+ u64 scale, offset;
u32 sequence;
/*
@@ -358,7 +368,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
- cur_tsc = rdtsc_ordered();
+ *cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
@@ -368,7 +378,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
- return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
+ return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
+}
+
+static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 cur_tsc;
+
+ return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
}
#else
@@ -376,5 +393,12 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
+
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
+{
+ BUG();
+ return U64_MAX;
+}
#endif
#endif
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e520a1e6fc11..c9084dedfcfa 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -397,6 +397,8 @@
#define MSR_K7_PERFCTR3 0xc0010007
#define MSR_K7_CLK_CTL 0xc001001b
#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 8a3ee355b422..92015c65fa2a 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -22,4 +22,6 @@ int io_reserve_memtype(resource_size_t start, resource_size_t end,
void io_free_memtype(resource_size_t start, resource_size_t end);
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn);
+
#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pmc_core.h b/arch/x86/include/asm/pmc_core.h
deleted file mode 100644
index d4855f11136d..000000000000
--- a/arch/x86/include/asm/pmc_core.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Intel Core SoC Power Management Controller Header File
- *
- * Copyright (c) 2016, Intel Corporation.
- * All Rights Reserved.
- *
- * Authors: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
- * Vishwanath Somayaji <vishwanath.somayaji@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- */
-
-#ifndef _ASM_PMC_CORE_H
-#define _ASM_PMC_CORE_H
-
-/* API to read SLP_S0_RESIDENCY counter */
-int intel_pmc_slp_s0_counter_read(u32 *data);
-
-#endif /* _ASM_PMC_CORE_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 78dd9df88157..0487ac054870 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -146,6 +146,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
+#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
+#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+
struct __attribute__ ((__packed__)) vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
new file mode 100644
index 000000000000..c67caafd3381
--- /dev/null
+++ b/arch/x86/include/asm/sync_core.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SYNC_CORE_H
+#define _ASM_X86_SYNC_CORE_H
+
+#include <linux/preempt.h>
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+
+/*
+ * Ensure that a core serializing instruction is issued before returning
+ * to user-mode. x86 implements return to user-space through sysexit,
+ * sysrel, and sysretq, which are not core serializing.
+ */
+static inline void sync_core_before_usermode(void)
+{
+ /* With PTI, we unconditionally serialize before running user code. */
+ if (static_cpu_has(X86_FEATURE_PTI))
+ return;
+ /*
+ * Return from interrupt and NMI is done through iret, which is core
+ * serializing.
+ */
+ if (in_irq() || in_nmi())
+ return;
+ sync_core();
+}
+
+#endif /* _ASM_X86_SYNC_CORE_H */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 1a5bfead93b4..197c2e6c7376 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -40,6 +40,9 @@
*/
#define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11)
+/* AccessReenlightenmentControls privilege */
+#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+
/*
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
@@ -234,6 +237,30 @@
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+/* TSC emulation after migration */
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+
+struct hv_reenlightenment_control {
+ u64 vector:8;
+ u64 reserved1:8;
+ u64 enabled:1;
+ u64 reserved2:15;
+ u64 target_vp:32;
+};
+
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+
+struct hv_tsc_emulation_control {
+ u64 enabled:1;
+ u64 reserved:63;
+};
+
+struct hv_tsc_emulation_status {
+ u64 inprogress:1;
+ u64 reserved:63;
+};
+
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 09cc06483bed..7a2ade4aa235 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -25,6 +25,7 @@
#define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
+#define KVM_FEATURE_PV_TLB_FLUSH 9
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -51,6 +52,9 @@ struct kvm_steal_time {
__u32 pad[11];
};
+#define KVM_VCPU_PREEMPTED (1 << 0)
+#define KVM_VCPU_FLUSH_TLB (1 << 1)
+
#define KVM_CLOCK_PAIRING_WALLCLOCK 0
struct kvm_clock_pairing {
__s64 sec;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ec3a286163c3..2aa92094b59d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -36,6 +36,7 @@
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/efi-bgrt.h>
+#include <linux/serial_core.h>
#include <asm/e820/api.h>
#include <asm/irqdomain.h>
@@ -1625,6 +1626,8 @@ int __init acpi_boot_init(void)
if (!acpi_noirq)
x86_init.pci.init = pci_acpi_init;
+ /* Do not enable ACPI SPCR console by default */
+ acpi_parse_spcr(earlycon_acpi_spcr_enable, false);
return 0;
}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ab1865342002..dfcbe6924eaf 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1515,7 +1515,7 @@ static __poll_t do_poll(struct file *fp, poll_table *wait)
return 0;
poll_wait(fp, &apm_waitqueue, wait);
if (!queue_empty(as))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
@@ -2389,6 +2389,7 @@ static int __init apm_init(void)
if (HZ != 100)
idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) {
+ cpuidle_poll_state_init(&apm_idle_driver);
if (!cpuidle_register_driver(&apm_idle_driver))
if (cpuidle_register_device(&apm_cpuidle_device))
cpuidle_unregister_driver(&apm_idle_driver);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ea831c858195..5bddbdcbc4a3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -556,6 +556,51 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
}
+static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ /*
+ * BIOS support is required for SME and SEV.
+ * For SME: If BIOS has enabled SME then adjust x86_phys_bits by
+ * the SME physical address space reduction value.
+ * If BIOS has not enabled SME then don't advertise the
+ * SME feature (set in scattered.c).
+ * For SEV: If BIOS has not enabled SEV then don't advertise the
+ * SEV feature (set in scattered.c).
+ *
+ * In all cases, since support for SME and SEV requires long mode,
+ * don't advertise the feature under CONFIG_X86_32.
+ */
+ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
+ /* Check if memory encryption is enabled */
+ rdmsrl(MSR_K8_SYSCFG, msr);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ goto clear_all;
+
+ /*
+ * Always adjust physical address bits. Even though this
+ * will be a value above 32-bits this is still done for
+ * CONFIG_X86_32 so that accurate values are reported.
+ */
+ c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ goto clear_all;
+
+ rdmsrl(MSR_K7_HWCR, msr);
+ if (!(msr & MSR_K7_HWCR_SMMLOCK))
+ goto clear_sev;
+
+ return;
+
+clear_all:
+ clear_cpu_cap(c, X86_FEATURE_SME);
+clear_sev:
+ clear_cpu_cap(c, X86_FEATURE_SEV);
+ }
+}
+
static void early_init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
@@ -627,26 +672,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_E400);
- /*
- * BIOS support is required for SME. If BIOS has enabled SME then
- * adjust x86_phys_bits by the SME physical address space reduction
- * value. If BIOS has not enabled SME then don't advertise the
- * feature (set in scattered.c). Also, since the SME support requires
- * long mode, don't advertise the feature under CONFIG_X86_32.
- */
- if (cpu_has(c, X86_FEATURE_SME)) {
- u64 msr;
-
- /* Check if SME is enabled */
- rdmsrl(MSR_K8_SYSCFG, msr);
- if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
- c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
- if (IS_ENABLED(CONFIG_X86_32))
- clear_cpu_cap(c, X86_FEATURE_SME);
- } else {
- clear_cpu_cap(c, X86_FEATURE_SME);
- }
- }
+ early_detect_mem_encrypt(c);
}
static void init_amd_k8(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 213e8c2ca702..97685a0c3175 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -247,9 +247,9 @@ static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &mce_chrdev_wait, wait);
if (READ_ONCE(mcelog.next))
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 85eb5fc180c8..9340f41ce8d3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void)
hyperv_setup_mmu_ops();
/* Setup the IDT for hypervisor callback */
alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+
+ /* Setup the IDT for reenlightenment notifications */
+ if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT)
+ alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
+ hyperv_reenlightenment_vector);
+
#endif
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 4075d2be5357..772c219b6889 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -30,6 +30,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
+ { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 68e1867cca80..45fb4d2565f8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -142,6 +142,15 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Hypervisor callback interrupts\n");
}
#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
+ seq_printf(p, "%*s: ", prec, "HRE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->irq_hv_reenlightenment_count);
+ seq_puts(p, " Hyper-V reenlightenment interrupts\n");
+ }
+#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b40ffbf156c1..4e37d1a851a6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -498,6 +498,34 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
+static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
+
+static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+ const struct flush_tlb_info *info)
+{
+ u8 state;
+ int cpu;
+ struct kvm_steal_time *src;
+ struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
+
+ cpumask_copy(flushmask, cpumask);
+ /*
+ * We have to call flush only on online vCPUs. And
+ * queue flush_on_enter for pre-empted vCPUs
+ */
+ for_each_cpu(cpu, flushmask) {
+ src = &per_cpu(steal_time, cpu);
+ state = READ_ONCE(src->preempted);
+ if ((state & KVM_VCPU_PREEMPTED)) {
+ if (try_cmpxchg(&src->preempted, &state,
+ state | KVM_VCPU_FLUSH_TLB))
+ __cpumask_clear_cpu(cpu, flushmask);
+ }
+ }
+
+ native_flush_tlb_others(flushmask, info);
+}
+
static void __init kvm_guest_init(void)
{
int i;
@@ -517,6 +545,9 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+ pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
+
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
@@ -598,6 +629,22 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
+static __init int kvm_setup_pv_tlb_flush(void)
+{
+ int cpu;
+
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+ for_each_possible_cpu(cpu) {
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+ }
+ pr_info("KVM setup pv remote TLB flush\n");
+ }
+
+ return 0;
+}
+arch_initcall(kvm_setup_pv_tlb_flush);
+
#ifdef CONFIG_PARAVIRT_SPINLOCKS
/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
@@ -643,7 +690,7 @@ __visible bool __kvm_vcpu_is_preempted(long cpu)
{
struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
- return !!src->preempted;
+ return !!(src->preempted & KVM_VCPU_PREEMPTED);
}
PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3df51c287844..92fd433c50b9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -81,6 +81,14 @@ config KVM_AMD
To compile this as a module, choose M here: the module
will be called kvm-amd.
+config KVM_AMD_SEV
+ def_bool y
+ bool "AMD Secure Encrypted Virtualization (SEV) support"
+ depends on KVM_AMD && X86_64
+ depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP
+ ---help---
+ Provides support for launching Encrypted VMs on AMD processors.
+
config KVM_MMU_AUDIT
bool "Audit KVM MMU"
depends on KVM && TRACEPOINTS
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 13f5d4217e4f..a0c5a69bc7c4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
{
switch (func) {
case 0:
- entry->eax = 1; /* only one leaf currently */
+ entry->eax = 7;
++*nent;
break;
case 1:
entry->ecx = F(MOVBE);
++*nent;
break;
+ case 7:
+ entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ if (index == 0)
+ entry->ecx = F(RDPID);
+ ++*nent;
default:
break;
}
@@ -325,6 +330,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
+ unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@@ -363,7 +369,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
- 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+ F(TOPOEXT);
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
@@ -389,8 +396,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) |
- 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
@@ -476,6 +484,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ entry->ecx |= f_umip;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
@@ -597,7 +606,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
- (1 << KVM_FEATURE_PV_UNHALT);
+ (1 << KVM_FEATURE_PV_UNHALT) |
+ (1 << KVM_FEATURE_PV_TLB_FLUSH);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@ -607,7 +617,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = 0;
break;
case 0x80000000:
- entry->eax = min(entry->eax, 0x8000001a);
+ entry->eax = min(entry->eax, 0x8000001f);
break;
case 0x80000001:
entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 290ecf711aec..d91eaeb01034 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3533,6 +3533,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_rdpid(struct x86_emulate_ctxt *ctxt)
+{
+ u64 tsc_aux = 0;
+
+ if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
+ return emulate_gp(ctxt, 0);
+ ctxt->dst.val = tsc_aux;
+ return X86EMUL_CONTINUE;
+}
+
static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
{
u64 tsc = 0;
@@ -3652,17 +3662,27 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
+static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
{
- if (ctxt->modrm_reg > VCPU_SREG_GS)
- return emulate_ud(ctxt);
+ if (segment > VCPU_SREG_GS &&
+ (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
- ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+ ctxt->dst.val = get_segment_selector(ctxt, segment);
if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
ctxt->dst.bytes = 2;
return X86EMUL_CONTINUE;
}
+static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
+{
+ if (ctxt->modrm_reg > VCPU_SREG_GS)
+ return emulate_ud(ctxt);
+
+ return em_store_sreg(ctxt, ctxt->modrm_reg);
+}
+
static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3678,6 +3698,11 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
}
+static int em_sldt(struct x86_emulate_ctxt *ctxt)
+{
+ return em_store_sreg(ctxt, VCPU_SREG_LDTR);
+}
+
static int em_lldt(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3687,6 +3712,11 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt)
return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
}
+static int em_str(struct x86_emulate_ctxt *ctxt)
+{
+ return em_store_sreg(ctxt, VCPU_SREG_TR);
+}
+
static int em_ltr(struct x86_emulate_ctxt *ctxt)
{
u16 sel = ctxt->src.val;
@@ -3739,6 +3769,10 @@ static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
{
struct desc_ptr desc_ptr;
+ if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
+
if (ctxt->mode == X86EMUL_MODE_PROT64)
ctxt->op_bytes = 8;
get(ctxt, &desc_ptr);
@@ -3798,6 +3832,10 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
static int em_smsw(struct x86_emulate_ctxt *ctxt)
{
+ if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
+ ctxt->ops->cpl(ctxt) > 0)
+ return emulate_gp(ctxt, 0);
+
if (ctxt->dst.type == OP_MEM)
ctxt->dst.bytes = 2;
ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
@@ -4383,8 +4421,8 @@ static const struct opcode group5[] = {
};
static const struct opcode group6[] = {
- DI(Prot | DstMem, sldt),
- DI(Prot | DstMem, str),
+ II(Prot | DstMem, em_sldt, sldt),
+ II(Prot | DstMem, em_str, str),
II(Prot | Priv | SrcMem16, em_lldt, lldt),
II(Prot | Priv | SrcMem16, em_ltr, ltr),
N, N, N, N,
@@ -4415,10 +4453,20 @@ static const struct opcode group8[] = {
F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};
+/*
+ * The "memory" destination is actually always a register, since we come
+ * from the register case of group9.
+ */
+static const struct gprefix pfx_0f_c7_7 = {
+ N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+};
+
+
static const struct group_dual group9 = { {
N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
}, {
- N, N, N, N, N, N, N, N,
+ N, N, N, N, N, N, N,
+ GP(0, &pfx_0f_c7_7),
} };
static const struct opcode group11[] = {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 5c24811e8b0b..f171051eecf3 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -79,7 +79,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
if (kvm_cpu_has_extint(v))
return 1;
- if (kvm_vcpu_apicv_active(v))
+ if (!is_guest_mode(v) && kvm_vcpu_apicv_active(v))
return 0;
return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e2c1fb8d35ce..924ac8ce9d50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -364,32 +364,41 @@ static u8 count_vectors(void *bitmap)
return count;
}
-int __kvm_apic_update_irr(u32 *pir, void *regs)
+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
{
u32 i, vec;
- u32 pir_val, irr_val;
- int max_irr = -1;
+ u32 pir_val, irr_val, prev_irr_val;
+ int max_updated_irr;
+
+ max_updated_irr = -1;
+ *max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
pir_val = READ_ONCE(pir[i]);
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
if (pir_val) {
+ prev_irr_val = irr_val;
irr_val |= xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
+ if (prev_irr_val != irr_val) {
+ max_updated_irr =
+ __fls(irr_val ^ prev_irr_val) + vec;
+ }
}
if (irr_val)
- max_irr = __fls(irr_val) + vec;
+ *max_irr = __fls(irr_val) + vec;
}
- return max_irr;
+ return ((max_updated_irr != -1) &&
+ (max_updated_irr == *max_irr));
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- return __kvm_apic_update_irr(pir, apic->regs);
+ return __kvm_apic_update_irr(pir, apic->regs, max_irr);
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
@@ -581,7 +590,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
int highest_irr;
- if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
+ if (apic->vcpu->arch.apicv_active)
highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
else
highest_irr = apic_find_highest_irr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4b9935a38347..56c36014f7b7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -75,8 +75,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
-int __kvm_apic_update_irr(u32 *pir, void *regs);
-int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
+bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr);
+bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr);
void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2b8eb4da4d08..8eca1d04aeb8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -42,6 +42,7 @@
#include <linux/kern_levels.h>
#include <asm/page.h>
+#include <asm/pat.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/vmx.h>
@@ -381,7 +382,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
-void kvm_mmu_clear_all_pte_masks(void)
+static void kvm_mmu_clear_all_pte_masks(void)
{
shadow_user_mask = 0;
shadow_accessed_mask = 0;
@@ -2708,7 +2709,18 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
{
if (pfn_valid(pfn))
- return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
+ /*
+ * Some reserved pages, such as those from NVDIMM
+ * DAX devices, are not for MMIO, and can be mapped
+ * with cached memory type for better performance.
+ * However, the above check misconceives those pages
+ * as MMIO, and results in KVM mapping them with UC
+ * memory type, which would hurt the performance.
+ * Therefore, we check the host memory type in addition
+ * and only treat UC/UC-/WC pages as MMIO.
+ */
+ (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
return true;
}
@@ -4951,6 +4963,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
emulate:
+ /*
+ * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+ * This can happen if a guest gets a page-fault on data access but the HW
+ * table walker is not able to read the instruction page (e.g instruction
+ * page is not present in memory). In those cases we simply restart the
+ * guest.
+ */
+ if (unlikely(insn && !insn_len))
+ return 1;
+
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
switch (er) {
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index d22ddbdf5e6e..1272861e77b9 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -19,7 +19,7 @@
#include <linux/ratelimit.h>
-char const *audit_point_name[] = {
+static char const *audit_point_name[] = {
"pre page fault",
"post page fault",
"pre pte write",
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4e3c79530526..b3e488a74828 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -37,6 +37,10 @@
#include <linux/amd-iommu.h>
#include <linux/hashtable.h>
#include <linux/frame.h>
+#include <linux/psp-sev.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -214,6 +218,9 @@ struct vcpu_svm {
*/
struct list_head ir_list;
spinlock_t ir_list_lock;
+
+ /* which host CPU was used for running this vcpu */
+ unsigned int last_cpu;
};
/*
@@ -289,8 +296,12 @@ module_param(vls, int, 0444);
static int vgif = true;
module_param(vgif, int, 0444);
+/* enable/disable SEV support */
+static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev, int, 0444);
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
static int nested_svm_exit_handled(struct vcpu_svm *svm);
@@ -324,6 +335,38 @@ enum {
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+static unsigned int max_sev_asid;
+static unsigned int min_sev_asid;
+static unsigned long *sev_asid_bitmap;
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+struct enc_region {
+ struct list_head list;
+ unsigned long npages;
+ struct page **pages;
+ unsigned long uaddr;
+ unsigned long size;
+};
+
+static inline bool svm_sev_enabled(void)
+{
+ return max_sev_asid;
+}
+
+static inline bool sev_guest(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return sev->active;
+}
+
+static inline int sev_get_asid(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return sev->asid;
+}
+
static inline void mark_all_dirty(struct vmcb *vmcb)
{
vmcb->control.clean = 0;
@@ -530,10 +573,14 @@ struct svm_cpu_data {
u64 asid_generation;
u32 max_asid;
u32 next_asid;
+ u32 min_asid;
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
struct vmcb *current_vmcb;
+
+ /* index = sev_asid, value = vmcb pointer */
+ struct vmcb **sev_vmcbs;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -788,6 +835,7 @@ static int svm_hardware_enable(void)
sd->asid_generation = 1;
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
+ sd->min_asid = max_sev_asid + 1;
gdt = get_current_gdt_rw();
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
@@ -846,6 +894,7 @@ static void svm_cpu_uninit(int cpu)
return;
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+ kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
kfree(sd);
}
@@ -859,11 +908,18 @@ static int svm_cpu_init(int cpu)
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- sd->save_area = alloc_page(GFP_KERNEL);
r = -ENOMEM;
+ sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto err_1;
+ if (svm_sev_enabled()) {
+ r = -ENOMEM;
+ sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
+ if (!sd->sev_vmcbs)
+ goto err_1;
+ }
+
per_cpu(svm_data, cpu) = sd;
return 0;
@@ -1070,6 +1126,48 @@ static int avic_ga_log_notifier(u32 ga_tag)
return 0;
}
+static __init int sev_hardware_setup(void)
+{
+ struct sev_user_data_status *status;
+ int rc;
+
+ /* Maximum number of encrypted guests supported simultaneously */
+ max_sev_asid = cpuid_ecx(0x8000001F);
+
+ if (!max_sev_asid)
+ return 1;
+
+ /* Minimum ASID value that should be used for SEV guest */
+ min_sev_asid = cpuid_edx(0x8000001F);
+
+ /* Initialize SEV ASID bitmap */
+ sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!sev_asid_bitmap)
+ return 1;
+
+ status = kmalloc(sizeof(*status), GFP_KERNEL);
+ if (!status)
+ return 1;
+
+ /*
+ * Check SEV platform status.
+ *
+ * PLATFORM_STATUS can be called in any state, if we failed to query
+ * the PLATFORM status then either PSP firmware does not support SEV
+ * feature or SEV firmware is dead.
+ */
+ rc = sev_platform_status(status, NULL);
+ if (rc)
+ goto err;
+
+ pr_info("SEV supported\n");
+
+err:
+ kfree(status);
+ return rc;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1105,6 +1203,17 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
+ if (sev) {
+ if (boot_cpu_has(X86_FEATURE_SEV) &&
+ IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
+ r = sev_hardware_setup();
+ if (r)
+ sev = false;
+ } else {
+ sev = false;
+ }
+ }
+
for_each_possible_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -1166,6 +1275,9 @@ static __exit void svm_hardware_unsetup(void)
{
int cpu;
+ if (svm_sev_enabled())
+ kfree(sev_asid_bitmap);
+
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
@@ -1318,7 +1430,7 @@ static void init_vmcb(struct vcpu_svm *svm)
if (npt_enabled) {
/* Setup VMCB for Nested Paging */
- control->nested_ctl = 1;
+ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
clr_intercept(svm, INTERCEPT_INVLPG);
clr_exception_intercept(svm, PF_VECTOR);
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
@@ -1356,6 +1468,11 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
+ if (sev_guest(svm->vcpu.kvm)) {
+ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+ clr_exception_intercept(svm, UD_VECTOR);
+ }
+
mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -1438,6 +1555,179 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+static void __sev_asid_free(int asid)
+{
+ struct svm_cpu_data *sd;
+ int cpu, pos;
+
+ pos = asid - 1;
+ clear_bit(pos, sev_asid_bitmap);
+
+ for_each_possible_cpu(cpu) {
+ sd = per_cpu(svm_data, cpu);
+ sd->sev_vmcbs[pos] = NULL;
+ }
+}
+
+static void sev_asid_free(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ __sev_asid_free(sev->asid);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
+ struct sev_data_decommission *decommission;
+ struct sev_data_deactivate *data;
+
+ if (!handle)
+ return;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ /* deactivate handle */
+ data->handle = handle;
+ sev_guest_deactivate(data, NULL);
+
+ wbinvd_on_all_cpus();
+ sev_guest_df_flush(NULL);
+ kfree(data);
+
+ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+ if (!decommission)
+ return;
+
+ /* decommission handle */
+ decommission->handle = handle;
+ sev_guest_decommission(decommission, NULL);
+
+ kfree(decommission);
+}
+
+static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
+ unsigned long ulen, unsigned long *n,
+ int write)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ unsigned long npages, npinned, size;
+ unsigned long locked, lock_limit;
+ struct page **pages;
+ int first, last;
+
+ /* Calculate number of pages. */
+ first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
+ last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
+ npages = (last - first + 1);
+
+ locked = sev->pages_locked + npages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+ pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
+ return NULL;
+ }
+
+ /* Avoid using vmalloc for smaller buffers. */
+ size = npages * sizeof(struct page *);
+ if (size > PAGE_SIZE)
+ pages = vmalloc(size);
+ else
+ pages = kmalloc(size, GFP_KERNEL);
+
+ if (!pages)
+ return NULL;
+
+ /* Pin the user virtual address. */
+ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+ if (npinned != npages) {
+ pr_err("SEV: Failure locking %lu pages.\n", npages);
+ goto err;
+ }
+
+ *n = npages;
+ sev->pages_locked = locked;
+
+ return pages;
+
+err:
+ if (npinned > 0)
+ release_pages(pages, npinned);
+
+ kvfree(pages);
+ return NULL;
+}
+
+static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
+ unsigned long npages)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ release_pages(pages, npages);
+ kvfree(pages);
+ sev->pages_locked -= npages;
+}
+
+static void sev_clflush_pages(struct page *pages[], unsigned long npages)
+{
+ uint8_t *page_virtual;
+ unsigned long i;
+
+ if (npages == 0 || pages == NULL)
+ return;
+
+ for (i = 0; i < npages; i++) {
+ page_virtual = kmap_atomic(pages[i]);
+ clflush_cache_range(page_virtual, PAGE_SIZE);
+ kunmap_atomic(page_virtual);
+ }
+}
+
+static void __unregister_enc_region_locked(struct kvm *kvm,
+ struct enc_region *region)
+{
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
+ sev_unpin_memory(kvm, region->pages, region->npages);
+ list_del(&region->list);
+ kfree(region);
+}
+
+static void sev_vm_destroy(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct list_head *pos, *q;
+
+ if (!sev_guest(kvm))
+ return;
+
+ mutex_lock(&kvm->lock);
+
+ /*
+ * if userspace was terminated before unregistering the memory regions
+ * then lets unpin all the registered memory.
+ */
+ if (!list_empty(head)) {
+ list_for_each_safe(pos, q, head) {
+ __unregister_enc_region_locked(kvm,
+ list_entry(pos, struct enc_region, list));
+ }
+ }
+
+ mutex_unlock(&kvm->lock);
+
+ sev_unbind_asid(kvm, sev->handle);
+ sev_asid_free(kvm);
+}
+
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
@@ -1456,6 +1746,12 @@ static void avic_vm_destroy(struct kvm *kvm)
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
+static void svm_vm_destroy(struct kvm *kvm)
+{
+ avic_vm_destroy(kvm);
+ sev_vm_destroy(kvm);
+}
+
static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
@@ -2066,7 +2362,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
vcpu->arch.cr4 = cr4;
if (!npt_enabled)
@@ -2125,7 +2421,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
{
if (sd->next_asid > sd->max_asid) {
++sd->asid_generation;
- sd->next_asid = 1;
+ sd->next_asid = sd->min_asid;
svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
@@ -2173,22 +2469,24 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm)
{
- u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
- svm->vmcb->control.insn_bytes,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
static int npf_interception(struct vcpu_svm *svm)
{
- u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
- svm->vmcb->control.insn_bytes,
+ static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+ svm->vmcb->control.insn_bytes : NULL,
svm->vmcb->control.insn_len);
}
@@ -2415,7 +2713,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -2957,7 +3255,8 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
if (vmcb->control.asid == 0)
return false;
- if (vmcb->control.nested_ctl && !npt_enabled)
+ if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+ !npt_enabled)
return false;
return true;
@@ -2971,7 +3270,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
- if (nested_vmcb->control.nested_ctl) {
+ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
kvm_mmu_unload(&svm->vcpu);
svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
nested_svm_init_mmu_context(&svm->vcpu);
@@ -3019,7 +3318,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
svm->nested.intercept = nested_vmcb->control.intercept;
- svm_flush_tlb(&svm->vcpu);
+ svm_flush_tlb(&svm->vcpu, true);
svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@ -4442,12 +4741,39 @@ static void reload_tss(struct kvm_vcpu *vcpu)
load_TR_desc();
}
+static void pre_sev_run(struct vcpu_svm *svm, int cpu)
+{
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ int asid = sev_get_asid(svm->vcpu.kvm);
+
+ /* Assign the asid allocated with this SEV guest */
+ svm->vmcb->control.asid = asid;
+
+ /*
+ * Flush guest TLB:
+ *
+ * 1) when different VMCB for the same ASID is to be run on the same host CPU.
+ * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
+ */
+ if (sd->sev_vmcbs[asid] == svm->vmcb &&
+ svm->last_cpu == cpu)
+ return;
+
+ svm->last_cpu = cpu;
+ sd->sev_vmcbs[asid] = svm->vmcb;
+ svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+ mark_dirty(svm->vmcb, VMCB_ASID);
+}
+
static void pre_svm_run(struct vcpu_svm *svm)
{
int cpu = raw_smp_processor_id();
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ if (sev_guest(svm->vcpu.kvm))
+ return pre_sev_run(svm, cpu);
+
/* FIXME: handle wraparound of asid_generation */
if (svm->asid_generation != sd->asid_generation)
new_asid(svm, sd);
@@ -4865,7 +5191,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void svm_flush_tlb(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5208,7 +5534,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -5222,7 +5548,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu);
+ svm_flush_tlb(vcpu, true);
}
static int is_disabled(void)
@@ -5308,6 +5634,12 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
entry->edx |= SVM_FEATURE_NPT;
break;
+ case 0x8000001F:
+ /* Support memory encryption cpuid if host supports it */
+ if (boot_cpu_has(X86_FEATURE_SEV))
+ cpuid(0x8000001f, &entry->eax, &entry->ebx,
+ &entry->ecx, &entry->edx);
+
}
}
@@ -5336,6 +5668,11 @@ static bool svm_xsaves_supported(void)
return false;
}
+static bool svm_umip_emulated(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -5637,6 +5974,828 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static int sev_asid_new(void)
+{
+ int pos;
+
+ /*
+ * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+ */
+ pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
+ if (pos >= max_sev_asid)
+ return -EBUSY;
+
+ set_bit(pos, sev_asid_bitmap);
+ return pos + 1;
+}
+
+static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ int asid, ret;
+
+ ret = -EBUSY;
+ asid = sev_asid_new();
+ if (asid < 0)
+ return ret;
+
+ ret = sev_platform_init(&argp->error);
+ if (ret)
+ goto e_free;
+
+ sev->active = true;
+ sev->asid = asid;
+ INIT_LIST_HEAD(&sev->regions_list);
+
+ return 0;
+
+e_free:
+ __sev_asid_free(asid);
+ return ret;
+}
+
+static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+{
+ struct sev_data_activate *data;
+ int asid = sev_get_asid(kvm);
+ int ret;
+
+ wbinvd_on_all_cpus();
+
+ ret = sev_guest_df_flush(error);
+ if (ret)
+ return ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* activate ASID on the given handle */
+ data->handle = handle;
+ data->asid = asid;
+ ret = sev_guest_activate(data, error);
+ kfree(data);
+
+ return ret;
+}
+
+static int __sev_issue_cmd(int fd, int id, void *data, int *error)
+{
+ struct fd f;
+ int ret;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ ret = sev_issue_cmd_external_user(f.file, id, data, error);
+
+ fdput(f);
+ return ret;
+}
+
+static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+
+ return __sev_issue_cmd(sev->fd, id, data, error);
+}
+
+static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_start *start;
+ struct kvm_sev_launch_start params;
+ void *dh_blob, *session_blob;
+ int *error = &argp->error;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ start = kzalloc(sizeof(*start), GFP_KERNEL);
+ if (!start)
+ return -ENOMEM;
+
+ dh_blob = NULL;
+ if (params.dh_uaddr) {
+ dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
+ if (IS_ERR(dh_blob)) {
+ ret = PTR_ERR(dh_blob);
+ goto e_free;
+ }
+
+ start->dh_cert_address = __sme_set(__pa(dh_blob));
+ start->dh_cert_len = params.dh_len;
+ }
+
+ session_blob = NULL;
+ if (params.session_uaddr) {
+ session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
+ if (IS_ERR(session_blob)) {
+ ret = PTR_ERR(session_blob);
+ goto e_free_dh;
+ }
+
+ start->session_address = __sme_set(__pa(session_blob));
+ start->session_len = params.session_len;
+ }
+
+ start->handle = params.handle;
+ start->policy = params.policy;
+
+ /* create memory encryption context */
+ ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+ if (ret)
+ goto e_free_session;
+
+ /* Bind ASID to this guest */
+ ret = sev_bind_asid(kvm, start->handle, error);
+ if (ret)
+ goto e_free_session;
+
+ /* return handle to userspace */
+ params.handle = start->handle;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
+ sev_unbind_asid(kvm, start->handle);
+ ret = -EFAULT;
+ goto e_free_session;
+ }
+
+ sev->handle = start->handle;
+ sev->fd = argp->sev_fd;
+
+e_free_session:
+ kfree(session_blob);
+e_free_dh:
+ kfree(dh_blob);
+e_free:
+ kfree(start);
+ return ret;
+}
+
+static int get_num_contig_pages(int idx, struct page **inpages,
+ unsigned long npages)
+{
+ unsigned long paddr, next_paddr;
+ int i = idx + 1, pages = 1;
+
+ /* find the number of contiguous pages starting from idx */
+ paddr = __sme_page_pa(inpages[idx]);
+ while (i < npages) {
+ next_paddr = __sme_page_pa(inpages[i++]);
+ if ((paddr + PAGE_SIZE) == next_paddr) {
+ pages++;
+ paddr = next_paddr;
+ continue;
+ }
+ break;
+ }
+
+ return pages;
+}
+
+static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_launch_update_data params;
+ struct sev_data_launch_update_data *data;
+ struct page **inpages;
+ int i, ret, pages;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ vaddr = params.uaddr;
+ size = params.len;
+ vaddr_end = vaddr + size;
+
+ /* Lock the user memory. */
+ inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+ if (!inpages) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ /*
+ * The LAUNCH_UPDATE command will perform in-place encryption of the
+ * memory content (i.e it will write the same memory region with C=1).
+ * It's possible that the cache may contain the data with C=0, i.e.,
+ * unencrypted so invalidate it first.
+ */
+ sev_clflush_pages(inpages, npages);
+
+ for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
+ int offset, len;
+
+ /*
+ * If the user buffer is not page-aligned, calculate the offset
+ * within the page.
+ */
+ offset = vaddr & (PAGE_SIZE - 1);
+
+ /* Calculate the number of pages that can be encrypted in one go. */
+ pages = get_num_contig_pages(i, inpages, npages);
+
+ len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
+
+ data->handle = sev->handle;
+ data->len = len;
+ data->address = __sme_page_pa(inpages[i]) + offset;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+ if (ret)
+ goto e_unpin;
+
+ size -= len;
+ next_vaddr = vaddr + len;
+ }
+
+e_unpin:
+ /* content of memory is updated, mark pages dirty */
+ for (i = 0; i < npages; i++) {
+ set_page_dirty_lock(inpages[i]);
+ mark_page_accessed(inpages[i]);
+ }
+ /* unlock the user pages */
+ sev_unpin_memory(kvm, inpages, npages);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_measure *data;
+ struct kvm_sev_launch_measure params;
+ void *blob = NULL;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* User wants to query the blob length */
+ if (!params.len)
+ goto cmd;
+
+ if (params.uaddr) {
+ if (params.len > SEV_FW_BLOB_MAX_SIZE) {
+ ret = -EINVAL;
+ goto e_free;
+ }
+
+ if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+
+ ret = -ENOMEM;
+ blob = kmalloc(params.len, GFP_KERNEL);
+ if (!blob)
+ goto e_free;
+
+ data->address = __psp_pa(blob);
+ data->len = params.len;
+ }
+
+cmd:
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+
+ /*
+ * If we query the session length, FW responded with expected data.
+ */
+ if (!params.len)
+ goto done;
+
+ if (ret)
+ goto e_free_blob;
+
+ if (blob) {
+ if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+ ret = -EFAULT;
+ }
+
+done:
+ params.len = data->len;
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ ret = -EFAULT;
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_finish *data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
+
+ kfree(data);
+ return ret;
+}
+
+static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_guest_status params;
+ struct sev_data_guest_status *data;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+ if (ret)
+ goto e_free;
+
+ params.policy = data->policy;
+ params.state = data->state;
+ params.handle = data->handle;
+
+ if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ ret = -EFAULT;
+e_free:
+ kfree(data);
+ return ret;
+}
+
+static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
+ unsigned long dst, int size,
+ int *error, bool enc)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_dbg *data;
+ int ret;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->handle = sev->handle;
+ data->dst_addr = dst;
+ data->src_addr = src;
+ data->len = size;
+
+ ret = sev_issue_cmd(kvm,
+ enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+ data, error);
+ kfree(data);
+ return ret;
+}
+
+static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
+ unsigned long dst_paddr, int sz, int *err)
+{
+ int offset;
+
+ /*
+ * Its safe to read more than we are asked, caller should ensure that
+ * destination has enough space.
+ */
+ src_paddr = round_down(src_paddr, 16);
+ offset = src_paddr & 15;
+ sz = round_up(sz + offset, 16);
+
+ return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
+}
+
+static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
+ unsigned long __user dst_uaddr,
+ unsigned long dst_paddr,
+ int size, int *err)
+{
+ struct page *tpage = NULL;
+ int ret, offset;
+
+ /* if inputs are not 16-byte then use intermediate buffer */
+ if (!IS_ALIGNED(dst_paddr, 16) ||
+ !IS_ALIGNED(paddr, 16) ||
+ !IS_ALIGNED(size, 16)) {
+ tpage = (void *)alloc_page(GFP_KERNEL);
+ if (!tpage)
+ return -ENOMEM;
+
+ dst_paddr = __sme_page_pa(tpage);
+ }
+
+ ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
+ if (ret)
+ goto e_free;
+
+ if (tpage) {
+ offset = paddr & 15;
+ if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
+ page_address(tpage) + offset, size))
+ ret = -EFAULT;
+ }
+
+e_free:
+ if (tpage)
+ __free_page(tpage);
+
+ return ret;
+}
+
+static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+ unsigned long __user vaddr,
+ unsigned long dst_paddr,
+ unsigned long __user dst_vaddr,
+ int size, int *error)
+{
+ struct page *src_tpage = NULL;
+ struct page *dst_tpage = NULL;
+ int ret, len = size;
+
+ /* If source buffer is not aligned then use an intermediate buffer */
+ if (!IS_ALIGNED(vaddr, 16)) {
+ src_tpage = alloc_page(GFP_KERNEL);
+ if (!src_tpage)
+ return -ENOMEM;
+
+ if (copy_from_user(page_address(src_tpage),
+ (void __user *)(uintptr_t)vaddr, size)) {
+ __free_page(src_tpage);
+ return -EFAULT;
+ }
+
+ paddr = __sme_page_pa(src_tpage);
+ }
+
+ /*
+ * If destination buffer or length is not aligned then do read-modify-write:
+ * - decrypt destination in an intermediate buffer
+ * - copy the source buffer in an intermediate buffer
+ * - use the intermediate buffer as source buffer
+ */
+ if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ int dst_offset;
+
+ dst_tpage = alloc_page(GFP_KERNEL);
+ if (!dst_tpage) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ ret = __sev_dbg_decrypt(kvm, dst_paddr,
+ __sme_page_pa(dst_tpage), size, error);
+ if (ret)
+ goto e_free;
+
+ /*
+ * If source is kernel buffer then use memcpy() otherwise
+ * copy_from_user().
+ */
+ dst_offset = dst_paddr & 15;
+
+ if (src_tpage)
+ memcpy(page_address(dst_tpage) + dst_offset,
+ page_address(src_tpage), size);
+ else {
+ if (copy_from_user(page_address(dst_tpage) + dst_offset,
+ (void __user *)(uintptr_t)vaddr, size)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+ }
+
+ paddr = __sme_page_pa(dst_tpage);
+ dst_paddr = round_down(dst_paddr, 16);
+ len = round_up(size, 16);
+ }
+
+ ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
+
+e_free:
+ if (src_tpage)
+ __free_page(src_tpage);
+ if (dst_tpage)
+ __free_page(dst_tpage);
+ return ret;
+}
+
+static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
+{
+ unsigned long vaddr, vaddr_end, next_vaddr;
+ unsigned long dst_vaddr, dst_vaddr_end;
+ struct page **src_p, **dst_p;
+ struct kvm_sev_dbg debug;
+ unsigned long n;
+ int ret, size;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
+ return -EFAULT;
+
+ vaddr = debug.src_uaddr;
+ size = debug.len;
+ vaddr_end = vaddr + size;
+ dst_vaddr = debug.dst_uaddr;
+ dst_vaddr_end = dst_vaddr + size;
+
+ for (; vaddr < vaddr_end; vaddr = next_vaddr) {
+ int len, s_off, d_off;
+
+ /* lock userspace source and destination page */
+ src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+ if (!src_p)
+ return -EFAULT;
+
+ dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+ if (!dst_p) {
+ sev_unpin_memory(kvm, src_p, n);
+ return -EFAULT;
+ }
+
+ /*
+ * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
+ * memory content (i.e it will write the same memory region with C=1).
+ * It's possible that the cache may contain the data with C=0, i.e.,
+ * unencrypted so invalidate it first.
+ */
+ sev_clflush_pages(src_p, 1);
+ sev_clflush_pages(dst_p, 1);
+
+ /*
+ * Since user buffer may not be page aligned, calculate the
+ * offset within the page.
+ */
+ s_off = vaddr & ~PAGE_MASK;
+ d_off = dst_vaddr & ~PAGE_MASK;
+ len = min_t(size_t, (PAGE_SIZE - s_off), size);
+
+ if (dec)
+ ret = __sev_dbg_decrypt_user(kvm,
+ __sme_page_pa(src_p[0]) + s_off,
+ dst_vaddr,
+ __sme_page_pa(dst_p[0]) + d_off,
+ len, &argp->error);
+ else
+ ret = __sev_dbg_encrypt_user(kvm,
+ __sme_page_pa(src_p[0]) + s_off,
+ vaddr,
+ __sme_page_pa(dst_p[0]) + d_off,
+ dst_vaddr,
+ len, &argp->error);
+
+ sev_unpin_memory(kvm, src_p, 1);
+ sev_unpin_memory(kvm, dst_p, 1);
+
+ if (ret)
+ goto err;
+
+ next_vaddr = vaddr + len;
+ dst_vaddr = dst_vaddr + len;
+ size -= len;
+ }
+err:
+ return ret;
+}
+
+static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct sev_data_launch_secret *data;
+ struct kvm_sev_launch_secret params;
+ struct page **pages;
+ void *blob, *hdr;
+ unsigned long n;
+ int ret;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ return -EFAULT;
+
+ pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+ if (!pages)
+ return -ENOMEM;
+
+ /*
+ * The secret must be copied into contiguous memory region, lets verify
+ * that userspace memory pages are contiguous before we issue command.
+ */
+ if (get_num_contig_pages(0, pages, n) != n) {
+ ret = -EINVAL;
+ goto e_unpin_memory;
+ }
+
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto e_unpin_memory;
+
+ blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+ if (IS_ERR(blob)) {
+ ret = PTR_ERR(blob);
+ goto e_free;
+ }
+
+ data->trans_address = __psp_pa(blob);
+ data->trans_len = params.trans_len;
+
+ hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+ if (IS_ERR(hdr)) {
+ ret = PTR_ERR(hdr);
+ goto e_free_blob;
+ }
+ data->trans_address = __psp_pa(blob);
+ data->trans_len = params.trans_len;
+
+ data->handle = sev->handle;
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+
+ kfree(hdr);
+
+e_free_blob:
+ kfree(blob);
+e_free:
+ kfree(data);
+e_unpin_memory:
+ sev_unpin_memory(kvm, pages, n);
+ return ret;
+}
+
+static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
+{
+ struct kvm_sev_cmd sev_cmd;
+ int r;
+
+ if (!svm_sev_enabled())
+ return -ENOTTY;
+
+ if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
+ return -EFAULT;
+
+ mutex_lock(&kvm->lock);
+
+ switch (sev_cmd.id) {
+ case KVM_SEV_INIT:
+ r = sev_guest_init(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_START:
+ r = sev_launch_start(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_UPDATE_DATA:
+ r = sev_launch_update_data(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_MEASURE:
+ r = sev_launch_measure(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_LAUNCH_FINISH:
+ r = sev_launch_finish(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_GUEST_STATUS:
+ r = sev_guest_status(kvm, &sev_cmd);
+ break;
+ case KVM_SEV_DBG_DECRYPT:
+ r = sev_dbg_crypt(kvm, &sev_cmd, true);
+ break;
+ case KVM_SEV_DBG_ENCRYPT:
+ r = sev_dbg_crypt(kvm, &sev_cmd, false);
+ break;
+ case KVM_SEV_LAUNCH_SECRET:
+ r = sev_launch_secret(kvm, &sev_cmd);
+ break;
+ default:
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
+ r = -EFAULT;
+
+out:
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+
+static int svm_register_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct enc_region *region;
+ int ret = 0;
+
+ if (!sev_guest(kvm))
+ return -ENOTTY;
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+ if (!region->pages) {
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ /*
+ * The guest may change the memory encryption attribute from C=0 -> C=1
+ * or vice versa for this memory range. Lets make sure caches are
+ * flushed to ensure that guest data gets written into memory with
+ * correct C-bit.
+ */
+ sev_clflush_pages(region->pages, region->npages);
+
+ region->uaddr = range->addr;
+ region->size = range->size;
+
+ mutex_lock(&kvm->lock);
+ list_add_tail(&region->list, &sev->regions_list);
+ mutex_unlock(&kvm->lock);
+
+ return ret;
+
+e_free:
+ kfree(region);
+ return ret;
+}
+
+static struct enc_region *
+find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
+{
+ struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct list_head *head = &sev->regions_list;
+ struct enc_region *i;
+
+ list_for_each_entry(i, head, list) {
+ if (i->uaddr == range->addr &&
+ i->size == range->size)
+ return i;
+ }
+
+ return NULL;
+}
+
+
+static int svm_unregister_enc_region(struct kvm *kvm,
+ struct kvm_enc_region *range)
+{
+ struct enc_region *region;
+ int ret;
+
+ mutex_lock(&kvm->lock);
+
+ if (!sev_guest(kvm)) {
+ ret = -ENOTTY;
+ goto failed;
+ }
+
+ region = find_enc_region(kvm, range);
+ if (!region) {
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ __unregister_enc_region_locked(kvm, region);
+
+ mutex_unlock(&kvm->lock);
+ return 0;
+
+failed:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5653,7 +6812,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_reset = svm_vcpu_reset,
.vm_init = avic_vm_init,
- .vm_destroy = avic_vm_destroy,
+ .vm_destroy = svm_vm_destroy,
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
@@ -5713,6 +6872,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.load_eoi_exitmap = svm_load_eoi_exitmap,
.hwapic_irr_update = svm_hwapic_irr_update,
.hwapic_isr_update = svm_hwapic_isr_update,
+ .sync_pir_to_irr = kvm_lapic_find_highest_irr,
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
@@ -5729,6 +6889,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.invpcid_supported = svm_invpcid_supported,
.mpx_supported = svm_mpx_supported,
.xsaves_supported = svm_xsaves_supported,
+ .umip_emulated = svm_umip_emulated,
.set_supported_cpuid = svm_set_supported_cpuid,
@@ -5752,6 +6913,10 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pre_enter_smm = svm_pre_enter_smm,
.pre_leave_smm = svm_pre_leave_smm,
.enable_smi_window = enable_smi_window,
+
+ .mem_enc_op = svm_mem_enc_op,
+ .mem_enc_reg_region = svm_register_enc_region,
+ .mem_enc_unreg_region = svm_unregister_enc_region,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bee4c49f6dd0..f427723dc7db 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -419,6 +419,12 @@ struct __packed vmcs12 {
#define VMCS12_SIZE 0x1000
/*
+ * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
+ * supported VMCS12 field encoding.
+ */
+#define VMCS12_MAX_FIELD_INDEX 0x17
+
+/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
*/
@@ -441,6 +447,7 @@ struct nested_vmx {
* data hold by vmcs12
*/
bool sync_shadow_vmcs;
+ bool dirty_vmcs12;
bool change_vmcs01_virtual_x2apic_mode;
/* L2 must run next, and mustn't decide to exit to L1. */
@@ -664,6 +671,8 @@ struct vcpu_vmx {
u32 host_pkru;
+ unsigned long host_debugctlmsr;
+
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@@ -692,67 +701,24 @@ static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
return &(to_vmx(vcpu)->pi_desc);
}
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
-#define FIELD(number, name) [number] = VMCS12_OFFSET(name)
-#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
- [number##_HIGH] = VMCS12_OFFSET(name)+4
+#define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name)
+#define FIELD64(number, name) \
+ FIELD(number, name), \
+ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
-static unsigned long shadow_read_only_fields[] = {
- /*
- * We do NOT shadow fields that are modified when L0
- * traps and emulates any vmx instruction (e.g. VMPTRLD,
- * VMXON...) executed by L1.
- * For example, VM_INSTRUCTION_ERROR is read
- * by L1 if a vmx instruction fails (part of the error path).
- * Note the code assumes this logic. If for some reason
- * we start shadowing these fields then we need to
- * force a shadow sync when L0 emulates vmx instructions
- * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
- * by nested_vmx_failValid)
- */
- VM_EXIT_REASON,
- VM_EXIT_INTR_INFO,
- VM_EXIT_INSTRUCTION_LEN,
- IDT_VECTORING_INFO_FIELD,
- IDT_VECTORING_ERROR_CODE,
- VM_EXIT_INTR_ERROR_CODE,
- EXIT_QUALIFICATION,
- GUEST_LINEAR_ADDRESS,
- GUEST_PHYSICAL_ADDRESS
+static u16 shadow_read_only_fields[] = {
+#define SHADOW_FIELD_RO(x) x,
+#include "vmx_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
-static unsigned long shadow_read_write_fields[] = {
- TPR_THRESHOLD,
- GUEST_RIP,
- GUEST_RSP,
- GUEST_CR0,
- GUEST_CR3,
- GUEST_CR4,
- GUEST_INTERRUPTIBILITY_INFO,
- GUEST_RFLAGS,
- GUEST_CS_SELECTOR,
- GUEST_CS_AR_BYTES,
- GUEST_CS_LIMIT,
- GUEST_CS_BASE,
- GUEST_ES_BASE,
- GUEST_BNDCFGS,
- CR0_GUEST_HOST_MASK,
- CR0_READ_SHADOW,
- CR4_READ_SHADOW,
- TSC_OFFSET,
- EXCEPTION_BITMAP,
- CPU_BASED_VM_EXEC_CONTROL,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- VM_ENTRY_INTR_INFO_FIELD,
- VM_ENTRY_INSTRUCTION_LEN,
- VM_ENTRY_EXCEPTION_ERROR_CODE,
- HOST_FS_BASE,
- HOST_GS_BASE,
- HOST_FS_SELECTOR,
- HOST_GS_SELECTOR
+static u16 shadow_read_write_fields[] = {
+#define SHADOW_FIELD_RW(x) x,
+#include "vmx_shadow_fields.h"
};
static int max_shadow_read_write_fields =
ARRAY_SIZE(shadow_read_write_fields);
@@ -905,13 +871,17 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
unsigned short offset;
+ unsigned index;
+
+ if (field >> 15)
+ return -ENOENT;
- BUILD_BUG_ON(size > SHRT_MAX);
- if (field >= size)
+ index = ROL16(field, 6);
+ if (index >= size)
return -ENOENT;
- field = array_index_nospec(field, size);
- offset = vmcs_field_to_offset_table[field];
+ index = array_index_nospec(index, size);
+ offset = vmcs_field_to_offset_table[index];
if (offset == 0)
return -ENOENT;
return offset;
@@ -957,8 +927,6 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
enum {
- VMX_IO_BITMAP_A,
- VMX_IO_BITMAP_B,
VMX_VMREAD_BITMAP,
VMX_VMWRITE_BITMAP,
VMX_BITMAP_NR
@@ -966,8 +934,6 @@ enum {
static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
-#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
-#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
@@ -2373,6 +2339,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
+ vmx->host_debugctlmsr = get_debugctlmsr();
}
static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -2930,7 +2897,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
/* highest index: VMX_PREEMPTION_TIMER_VALUE */
- vmx->nested.nested_vmx_vmcs_enum = 0x2e;
+ vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
}
/*
@@ -3266,6 +3233,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
*/
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
switch (msr_info->index) {
@@ -3277,8 +3245,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(to_vmx(vcpu));
- msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ vmx_load_host_state(vmx);
+ msr_info->data = vmx->msr_guest_kernel_gs_base;
break;
#endif
case MSR_EFER:
@@ -3318,13 +3286,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_MCG_EXT_CTL:
if (!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
+ !(vmx->msr_ia32_feature_control &
FEATURE_CONTROL_LMCE))
return 1;
msr_info->data = vcpu->arch.mcg_ext_ctl;
break;
case MSR_IA32_FEATURE_CONTROL:
- msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
+ msr_info->data = vmx->msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -3341,7 +3309,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
+ msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
break;
@@ -3727,7 +3695,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#endif
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_INVLPG_EXITING |
@@ -3757,6 +3725,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -3982,17 +3951,17 @@ static void free_kvm_area(void)
}
}
-enum vmcs_field_type {
- VMCS_FIELD_TYPE_U16 = 0,
- VMCS_FIELD_TYPE_U64 = 1,
- VMCS_FIELD_TYPE_U32 = 2,
- VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+enum vmcs_field_width {
+ VMCS_FIELD_WIDTH_U16 = 0,
+ VMCS_FIELD_WIDTH_U64 = 1,
+ VMCS_FIELD_WIDTH_U32 = 2,
+ VMCS_FIELD_WIDTH_NATURAL_WIDTH = 3
};
-static inline int vmcs_field_type(unsigned long field)
+static inline int vmcs_field_width(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
- return VMCS_FIELD_TYPE_U32;
+ return VMCS_FIELD_WIDTH_U32;
return (field >> 13) & 0x3 ;
}
@@ -4005,43 +3974,66 @@ static void init_vmcs_shadow_fields(void)
{
int i, j;
- /* No checks for read only fields yet */
+ for (i = j = 0; i < max_shadow_read_only_fields; i++) {
+ u16 field = shadow_read_only_fields[i];
+ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
+ (i + 1 == max_shadow_read_only_fields ||
+ shadow_read_only_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_only_field %x\n",
+ field + 1);
+
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
+ if (j < i)
+ shadow_read_only_fields[j] = field;
+ j++;
+ }
+ max_shadow_read_only_fields = j;
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
- switch (shadow_read_write_fields[i]) {
- case GUEST_BNDCFGS:
- if (!kvm_mpx_supported())
+ u16 field = shadow_read_write_fields[i];
+ if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
+ (i + 1 == max_shadow_read_write_fields ||
+ shadow_read_write_fields[i + 1] != field + 1))
+ pr_err("Missing field from shadow_read_write_field %x\n",
+ field + 1);
+
+ /*
+ * PML and the preemption timer can be emulated, but the
+ * processor cannot vmwrite to fields that don't exist
+ * on bare metal.
+ */
+ switch (field) {
+ case GUEST_PML_INDEX:
+ if (!cpu_has_vmx_pml())
+ continue;
+ break;
+ case VMX_PREEMPTION_TIMER_VALUE:
+ if (!cpu_has_vmx_preemption_timer())
+ continue;
+ break;
+ case GUEST_INTR_STATUS:
+ if (!cpu_has_vmx_apicv())
continue;
break;
default:
break;
}
+ clear_bit(field, vmx_vmwrite_bitmap);
+ clear_bit(field, vmx_vmread_bitmap);
+#ifdef CONFIG_X86_64
+ if (field & 1)
+ continue;
+#endif
if (j < i)
- shadow_read_write_fields[j] =
- shadow_read_write_fields[i];
+ shadow_read_write_fields[j] = field;
j++;
}
max_shadow_read_write_fields = j;
-
- /* shadowed fields guest access without vmexit */
- for (i = 0; i < max_shadow_read_write_fields; i++) {
- unsigned long field = shadow_read_write_fields[i];
-
- clear_bit(field, vmx_vmwrite_bitmap);
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
- clear_bit(field + 1, vmx_vmwrite_bitmap);
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
- }
- for (i = 0; i < max_shadow_read_only_fields; i++) {
- unsigned long field = shadow_read_only_fields[i];
-
- clear_bit(field, vmx_vmread_bitmap);
- if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
- clear_bit(field + 1, vmx_vmread_bitmap);
- }
}
static __init int alloc_kvm_area(void)
@@ -4254,9 +4246,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif
-static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
+static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid,
+ bool invalidate_gpa)
{
- if (enable_ept) {
+ if (enable_ept && (invalidate_gpa || !enable_vpid)) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa));
@@ -4265,15 +4258,15 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
}
}
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
}
static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
{
if (enable_ept)
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -4471,7 +4464,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
ept_load_pdptrs(vcpu);
}
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
vmcs_writel(GUEST_CR3, guest_cr3);
}
@@ -4488,6 +4481,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(to_vmx(vcpu)->rmode.vm86_active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+ hw_cr4 &= ~X86_CR4_UMIP;
+ } else
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+
if (cr4 & X86_CR4_VMXE) {
/*
* To use VMXON (and later other VMX instructions), a guest
@@ -5119,11 +5120,6 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
{
int f = sizeof(unsigned long);
- if (!cpu_has_vmx_msr_bitmap()) {
- WARN_ON(1);
- return;
- }
-
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
@@ -5263,7 +5259,8 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
if (max_irr != 256) {
vapic_page = kmap(vmx->nested.virtual_apic_page);
- __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
+ __kvm_apic_update_irr(vmx->nested.pi_desc->pir,
+ vapic_page, &max_irr);
kunmap(vmx->nested.virtual_apic_page);
status = vmcs_read16(GUEST_INTR_STATUS);
@@ -5323,14 +5320,15 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
- /* the PIR and ON have been set by L1. */
- kvm_vcpu_trigger_posted_interrupt(vcpu, true);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
*/
vmx->nested.pi_pending = true;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ /* the PIR and ON have been set by L1. */
+ if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
+ kvm_vcpu_kick(vcpu);
return 0;
}
return -1;
@@ -5509,6 +5507,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
struct kvm_vcpu *vcpu = &vmx->vcpu;
u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
+
if (!cpu_need_virtualize_apic_accesses(vcpu))
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
if (vmx->vpid == 0)
@@ -5527,6 +5526,11 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+
+ /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
+ * in vmx_set_cr4. */
+ exec_control &= ~SECONDARY_EXEC_DESC;
+
/* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
(handle_vmptrld).
We can NOT enable shadow_vmcs here because we don't have yet
@@ -5646,10 +5650,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
#endif
int i;
- /* I/O */
- vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a));
- vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b));
-
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
@@ -6304,6 +6304,12 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
return kvm_set_cr4(vcpu, val);
}
+static int handle_desc(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
+ return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification, val;
@@ -6760,7 +6766,21 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu) &&
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
trace_kvm_fast_mmio(gpa);
- return kvm_skip_emulated_instruction(vcpu);
+ /*
+ * Doing kvm_skip_emulated_instruction() depends on undefined
+ * behavior: Intel's manual doesn't mandate
+ * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
+ * occurs and while on real hardware it was observed to be set,
+ * other hypervisors (namely Hyper-V) don't set it, we end up
+ * advancing IP with some random value. Disable fast mmio when
+ * running nested and keep it for real hardware in hope that
+ * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
+ */
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ return kvm_skip_emulated_instruction(vcpu);
+ else
+ return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
+ NULL, 0) == EMULATE_DONE;
}
ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -6957,10 +6977,6 @@ static __init int hardware_setup(void)
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-
- memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
goto out;
@@ -6973,11 +6989,6 @@ static __init int hardware_setup(void)
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
- if (!cpu_has_vmx_shadow_vmcs())
- enable_shadow_vmcs = 0;
- if (enable_shadow_vmcs)
- init_vmcs_shadow_fields();
-
if (!cpu_has_vmx_ept() ||
!cpu_has_vmx_ept_4levels() ||
!cpu_has_vmx_ept_mt_wb() ||
@@ -7063,6 +7074,11 @@ static __init int hardware_setup(void)
kvm_x86_ops->cancel_hv_timer = NULL;
}
+ if (!cpu_has_vmx_shadow_vmcs())
+ enable_shadow_vmcs = 0;
+ if (enable_shadow_vmcs)
+ init_vmcs_shadow_fields();
+
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
kvm_mce_cap_supported |= MCG_LMCE_P;
@@ -7593,17 +7609,17 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
p = ((char *)(get_vmcs12(vcpu))) + offset;
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+ switch (vmcs_field_width(field)) {
+ case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*ret = *((natural_width *)p);
return 0;
- case VMCS_FIELD_TYPE_U16:
+ case VMCS_FIELD_WIDTH_U16:
*ret = *((u16 *)p);
return 0;
- case VMCS_FIELD_TYPE_U32:
+ case VMCS_FIELD_WIDTH_U32:
*ret = *((u32 *)p);
return 0;
- case VMCS_FIELD_TYPE_U64:
+ case VMCS_FIELD_WIDTH_U64:
*ret = *((u64 *)p);
return 0;
default:
@@ -7620,17 +7636,17 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
if (offset < 0)
return offset;
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
+ switch (vmcs_field_width(field)) {
+ case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_U32:
+ case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_U64:
+ case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
return 0;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+ case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
return 0;
default:
@@ -7646,7 +7662,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
unsigned long field;
u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
- const unsigned long *fields = shadow_read_write_fields;
+ const u16 *fields = shadow_read_write_fields;
const int num_fields = max_shadow_read_write_fields;
preempt_disable();
@@ -7655,23 +7671,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
for (i = 0; i < num_fields; i++) {
field = fields[i];
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- field_value = vmcs_read16(field);
- break;
- case VMCS_FIELD_TYPE_U32:
- field_value = vmcs_read32(field);
- break;
- case VMCS_FIELD_TYPE_U64:
- field_value = vmcs_read64(field);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- field_value = vmcs_readl(field);
- break;
- default:
- WARN_ON(1);
- continue;
- }
+ field_value = __vmcs_readl(field);
vmcs12_write_any(&vmx->vcpu, field, field_value);
}
@@ -7683,7 +7683,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const unsigned long *fields[] = {
+ const u16 *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
@@ -7702,24 +7702,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
vmcs12_read_any(&vmx->vcpu, field, &field_value);
-
- switch (vmcs_field_type(field)) {
- case VMCS_FIELD_TYPE_U16:
- vmcs_write16(field, (u16)field_value);
- break;
- case VMCS_FIELD_TYPE_U32:
- vmcs_write32(field, (u32)field_value);
- break;
- case VMCS_FIELD_TYPE_U64:
- vmcs_write64(field, (u64)field_value);
- break;
- case VMCS_FIELD_TYPE_NATURAL_WIDTH:
- vmcs_writel(field, (long)field_value);
- break;
- default:
- WARN_ON(1);
- break;
- }
+ __vmcs_writel(field, field_value);
}
}
@@ -7788,8 +7771,10 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
{
unsigned long field;
gva_t gva;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+
/* The value to write might be 32 or 64 bits, depending on L1's long
* mode, and eventually we need to write that into a field of several
* possible lengths. The code below first zero-extends the value to 64
@@ -7832,6 +7817,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
+ switch (field) {
+#define SHADOW_FIELD_RW(x) case x:
+#include "vmx_shadow_fields.h"
+ /*
+ * The fields that can be updated by L1 without a vmexit are
+ * always updated in the vmcs02, the others go down the slow
+ * path of prepare_vmcs02.
+ */
+ break;
+ default:
+ vmx->nested.dirty_vmcs12 = true;
+ break;
+ }
+
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -7846,6 +7845,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
__pa(vmx->vmcs01.shadow_vmcs));
vmx->nested.sync_shadow_vmcs = true;
}
+ vmx->nested.dirty_vmcs12 = true;
}
/* Emulate the VMPTRLD instruction */
@@ -8066,7 +8066,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
@@ -8260,6 +8260,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
+ [EXIT_REASON_GDTR_IDTR] = handle_desc,
+ [EXIT_REASON_LDTR_TR] = handle_desc,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
@@ -9069,36 +9071,23 @@ static void vmx_set_rvi(int vector)
static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{
- if (!is_guest_mode(vcpu)) {
- vmx_set_rvi(max_irr);
- return;
- }
-
- if (max_irr == -1)
- return;
-
- /*
- * In guest mode. If a vmexit is needed, vmx_check_nested_events
- * handles it.
- */
- if (nested_exit_on_intr(vcpu))
- return;
-
/*
- * Else, fall back to pre-APICv interrupt injection since L2
- * is run without virtual interrupt delivery.
+ * When running L2, updating RVI is only relevant when
+ * vmcs12 virtual-interrupt-delivery enabled.
+ * However, it can be enabled only when L1 also
+ * intercepts external-interrupts and in that case
+ * we should not update vmcs02 RVI but instead intercept
+ * interrupt. Therefore, do nothing when running L2.
*/
- if (!kvm_event_needs_reinjection(vcpu) &&
- vmx_interrupt_allowed(vcpu)) {
- kvm_queue_interrupt(vcpu, max_irr, false);
- vmx_inject_irq(vcpu);
- }
+ if (!is_guest_mode(vcpu))
+ vmx_set_rvi(max_irr);
}
static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
+ bool max_irr_updated;
WARN_ON(!vcpu->arch.apicv_active);
if (pi_test_on(&vmx->pi_desc)) {
@@ -9108,7 +9097,23 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
- max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+ max_irr_updated =
+ kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
+
+ /*
+ * If we are running L2 and L1 has a new pending interrupt
+ * which can be injected, we should re-evaluate
+ * what should be done with this new L1 interrupt.
+ * If L1 intercepts external-interrupts, we should
+ * exit from L2 to L1. Otherwise, interrupt should be
+ * delivered directly to L2.
+ */
+ if (is_guest_mode(vcpu) && max_irr_updated) {
+ if (nested_exit_on_intr(vcpu))
+ kvm_vcpu_exiting_guest_mode(vcpu);
+ else
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
}
@@ -9223,6 +9228,12 @@ static bool vmx_xsaves_supported(void)
SECONDARY_EXEC_XSAVES;
}
+static bool vmx_umip_emulated(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_DESC;
+}
+
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -9378,7 +9389,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long debugctlmsr, cr3, cr4;
+ unsigned long cr3, cr4;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -9431,7 +9442,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vcpu->arch.pkru);
atomic_switch_perf_msrs(vmx);
- debugctlmsr = get_debugctlmsr();
vmx_arm_hv_timer(vcpu);
@@ -9587,8 +9597,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmexit_fill_RSB();
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
- if (debugctlmsr)
- update_debugctlmsr(debugctlmsr);
+ if (vmx->host_debugctlmsr)
+ update_debugctlmsr(vmx->host_debugctlmsr);
#ifndef CONFIG_X86_64
/*
@@ -9668,10 +9678,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
free_nested(vmx);
vcpu_put(vcpu);
@@ -9871,7 +9879,8 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
u32 mask =
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_DESC;
u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
@@ -10037,8 +10046,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
}
}
-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12);
+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12);
static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
@@ -10127,11 +10136,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
(unsigned long)(vmcs12->posted_intr_desc_addr &
(PAGE_SIZE - 1)));
}
- if (cpu_has_vmx_msr_bitmap() &&
- nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
- ;
- else
+ if (!nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_USE_MSR_BITMAPS);
}
@@ -10199,8 +10204,8 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
*/
-static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
{
int msr;
struct page *page;
@@ -10222,6 +10227,11 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ /* Nothing to do if the MSR bitmap is not in use. */
+ if (!cpu_has_vmx_msr_bitmap() ||
+ !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
+ return false;
+
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
!pred_cmd && !spec_ctrl)
return false;
@@ -10229,32 +10239,41 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
if (is_error_page(page))
return false;
- msr_bitmap_l1 = (unsigned long *)kmap(page);
- memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+ msr_bitmap_l1 = (unsigned long *)kmap(page);
+ if (nested_cpu_has_apic_reg_virt(vmcs12)) {
+ /*
+ * L0 need not intercept reads for MSRs between 0x800 and 0x8ff, it
+ * just lets the processor take the value from the virtual-APIC page;
+ * take those 256 bits directly from the L1 bitmap.
+ */
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap_l0[word] = msr_bitmap_l1[word];
+ msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+ }
+ } else {
+ for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
+ unsigned word = msr / BITS_PER_LONG;
+ msr_bitmap_l0[word] = ~0;
+ msr_bitmap_l0[word + (0x800 / sizeof(long))] = ~0;
+ }
+ }
- if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
- if (nested_cpu_has_apic_reg_virt(vmcs12))
- for (msr = 0x800; msr <= 0x8ff; msr++)
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- msr, MSR_TYPE_R);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_TASKPRI),
+ MSR_TYPE_W);
+ if (nested_cpu_has_vid(vmcs12)) {
nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_TASKPRI >> 4),
- MSR_TYPE_R | MSR_TYPE_W);
-
- if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_EOI >> 4),
- MSR_TYPE_W);
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
- MSR_TYPE_W);
- }
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_EOI),
+ MSR_TYPE_W);
+ nested_vmx_disable_intercept_for_msr(
+ msr_bitmap_l1, msr_bitmap_l0,
+ X2APIC_MSR(APIC_SELF_IPI),
+ MSR_TYPE_W);
}
if (spec_ctrl)
@@ -10534,25 +10553,12 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
return 0;
}
-/*
- * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
- * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
- * guest in a way that will both be appropriate to L1's requests, and our
- * needs. In addition to modifying the active vmcs (which is vmcs02), this
- * function also has additional necessary side-effects, like setting various
- * vcpu->arch fields.
- * Returns 0 on success, 1 on failure. Invalid state exit qualification code
- * is assigned to entry_failure_code on failure.
- */
-static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- bool from_vmentry, u32 *entry_failure_code)
+static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 exec_control, vmcs12_exec_ctrl;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
@@ -10560,7 +10566,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
@@ -10570,15 +10575,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
@@ -10588,6 +10590,125 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs12->guest_pending_dbg_exceptions);
+ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+ if (nested_cpu_has_xsaves(vmcs12))
+ vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
+ vmcs_write64(VMCS_LINK_POINTER, -1ull);
+
+ if (cpu_has_vmx_posted_intr())
+ vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
+
+ /*
+ * Whether page-faults are trapped is determined by a combination of
+ * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
+ * If enable_ept, L0 doesn't care about page faults and we should
+ * set all of these to L1's desires. However, if !enable_ept, L0 does
+ * care about (at least some) page faults, and because it is not easy
+ * (if at all possible?) to merge L0 and L1's desires, we simply ask
+ * to exit on each and every L2 page fault. This is done by setting
+ * MASK=MATCH=0 and (see below) EB.PF=1.
+ * Note that below we don't need special code to set EB.PF beyond the
+ * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
+ * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
+ * !enable_ept, EB.PF is 1, so the "or" will always be 1.
+ */
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
+ enable_ept ? vmcs12->page_fault_error_code_mask : 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
+ enable_ept ? vmcs12->page_fault_error_code_match : 0);
+
+ /* All VMFUNCs are currently emulated through L0 vmexits. */
+ if (cpu_has_vmx_vmfunc())
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
+ if (cpu_has_vmx_apicv()) {
+ vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
+ vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
+ vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
+ vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
+ }
+
+ /*
+ * Set host-state according to L0's settings (vmcs12 is irrelevant here)
+ * Some constant fields are set here by vmx_set_constant_host_state().
+ * Other fields are different per CPU, and will be set later when
+ * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
+ */
+ vmx_set_constant_host_state(vmx);
+
+ /*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ set_cr4_guest_host_mask(vmx);
+
+ if (vmx_mpx_supported())
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
+ if (enable_vpid) {
+ if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
+ else
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+ }
+
+ /*
+ * L1 may access the L2's PDPTR, so save them to construct vmcs12
+ */
+ if (enable_ept) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
+}
+
+/*
+ * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
+ * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
+ * guest in a way that will both be appropriate to L1's requests, and our
+ * needs. In addition to modifying the active vmcs (which is vmcs02), this
+ * function also has additional necessary side-effects, like setting various
+ * vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ bool from_vmentry, u32 *entry_failure_code)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 exec_control, vmcs12_exec_ctrl;
+
+ /*
+ * First, the fields that are shadowed. This must be kept in sync
+ * with vmx_shadow_fields.h.
+ */
+
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+
+ /*
+ * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
+ * HOST_FS_BASE, HOST_GS_BASE.
+ */
+
if (from_vmentry &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
@@ -10610,16 +10731,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
} else {
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
}
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs12->guest_pending_dbg_exceptions);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
-
- if (nested_cpu_has_xsaves(vmcs12))
- vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
- vmcs_write64(VMCS_LINK_POINTER, -1ull);
exec_control = vmcs12->pin_based_vm_exec_control;
@@ -10633,7 +10745,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_posted_intr(vmcs12)) {
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
vmx->nested.pi_pending = false;
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
@@ -10644,25 +10755,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (nested_cpu_has_preemption_timer(vmcs12))
vmx_start_preemption_timer(vcpu);
- /*
- * Whether page-faults are trapped is determined by a combination of
- * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF.
- * If enable_ept, L0 doesn't care about page faults and we should
- * set all of these to L1's desires. However, if !enable_ept, L0 does
- * care about (at least some) page faults, and because it is not easy
- * (if at all possible?) to merge L0 and L1's desires, we simply ask
- * to exit on each and every L2 page fault. This is done by setting
- * MASK=MATCH=0 and (see below) EB.PF=1.
- * Note that below we don't need special code to set EB.PF beyond the
- * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
- * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
- * !enable_ept, EB.PF is 1, so the "or" will always be 1.
- */
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
- enable_ept ? vmcs12->page_fault_error_code_mask : 0);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
- enable_ept ? vmcs12->page_fault_error_code_match : 0);
-
if (cpu_has_secondary_exec_ctrls()) {
exec_control = vmx->secondary_exec_control;
@@ -10681,22 +10773,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
- /* All VMFUNCs are currently emulated through L0 vmexits. */
- if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
- vmcs_write64(VM_FUNCTION_CONTROL, 0);
-
- if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
- vmcs_write64(EOI_EXIT_BITMAP0,
- vmcs12->eoi_exit_bitmap0);
- vmcs_write64(EOI_EXIT_BITMAP1,
- vmcs12->eoi_exit_bitmap1);
- vmcs_write64(EOI_EXIT_BITMAP2,
- vmcs12->eoi_exit_bitmap2);
- vmcs_write64(EOI_EXIT_BITMAP3,
- vmcs12->eoi_exit_bitmap3);
+ if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
vmcs_write16(GUEST_INTR_STATUS,
vmcs12->guest_intr_status);
- }
/*
* Write an illegal value to APIC_ACCESS_ADDR. Later,
@@ -10709,24 +10788,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
-
- /*
- * Set host-state according to L0's settings (vmcs12 is irrelevant here)
- * Some constant fields are set here by vmx_set_constant_host_state().
- * Other fields are different per CPU, and will be set later when
- * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
- */
- vmx_set_constant_host_state(vmx);
-
- /*
- * Set the MSR load/store lists to match L0's settings.
- */
- vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
- vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
- vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
-
/*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
@@ -10758,8 +10819,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
/*
- * Merging of IO bitmap not currently supported.
- * Rather, exit every time.
+ * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
+ * for I/O port accesses.
*/
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
@@ -10796,12 +10857,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
- set_cr4_guest_host_mask(vmx);
-
- if (from_vmentry &&
- vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
-
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vcpu->arch.tsc_offset + vmcs12->tsc_offset);
@@ -10810,9 +10865,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
- if (cpu_has_vmx_msr_bitmap())
- vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
-
if (enable_vpid) {
/*
* There is no direct mapping between vpid02 and vpid12, the
@@ -10823,16 +10875,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* even if spawn a lot of nested vCPUs.
*/
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
+ __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
}
} else {
- vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
-
}
if (enable_pml) {
@@ -10881,6 +10930,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
vmx_set_efer(vcpu, vcpu->arch.efer);
+ if (vmx->nested.dirty_vmcs12) {
+ prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+ vmx->nested.dirty_vmcs12 = false;
+ }
+
/* Shadow page tables on either EPT or shadow page tables. */
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
@@ -10889,16 +10943,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
- /*
- * L1 may access the L2's PDPTR, so save them to construct vmcs12
- */
- if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- }
-
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
return 0;
@@ -11254,7 +11298,6 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
- vcpu->arch.exception.pending = false;
return 0;
}
@@ -11535,11 +11578,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* L1's vpid. TODO: move to a more elaborate solution, giving
* each L2 its own vpid and exposing the vpid feature to L1.
*/
- vmx_flush_tlb(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
- /* Restore posted intr vector. */
- if (nested_cpu_has_posted_intr(vmcs12))
- vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
@@ -11800,6 +11840,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+
+ /*
+ * RDPID causes #UD if disabled through secondary execution controls.
+ * Because it is marked as EmulateOnUD, we need to intercept it here.
+ */
+ if (info->intercept == x86_intercept_rdtscp &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->exception.error_code_valid = false;
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+
+ /* TODO: check more intercepts... */
return X86EMUL_CONTINUE;
}
@@ -12313,6 +12368,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.handle_external_intr = vmx_handle_external_intr,
.mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
+ .umip_emulated = vmx_umip_emulated,
.check_nested_events = vmx_check_nested_events,
diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h
new file mode 100644
index 000000000000..cd0c75f6d037
--- /dev/null
+++ b/arch/x86/kvm/vmx_shadow_fields.h
@@ -0,0 +1,77 @@
+#ifndef SHADOW_FIELD_RO
+#define SHADOW_FIELD_RO(x)
+#endif
+#ifndef SHADOW_FIELD_RW
+#define SHADOW_FIELD_RW(x)
+#endif
+
+/*
+ * We do NOT shadow fields that are modified when L0
+ * traps and emulates any vmx instruction (e.g. VMPTRLD,
+ * VMXON...) executed by L1.
+ * For example, VM_INSTRUCTION_ERROR is read
+ * by L1 if a vmx instruction fails (part of the error path).
+ * Note the code assumes this logic. If for some reason
+ * we start shadowing these fields then we need to
+ * force a shadow sync when L0 emulates vmx instructions
+ * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
+ * by nested_vmx_failValid)
+ *
+ * When adding or removing fields here, note that shadowed
+ * fields must always be synced by prepare_vmcs02, not just
+ * prepare_vmcs02_full.
+ */
+
+/*
+ * Keeping the fields ordered by size is an attempt at improving
+ * branch prediction in vmcs_read_any and vmcs_write_any.
+ */
+
+/* 16-bits */
+SHADOW_FIELD_RW(GUEST_CS_SELECTOR)
+SHADOW_FIELD_RW(GUEST_INTR_STATUS)
+SHADOW_FIELD_RW(GUEST_PML_INDEX)
+SHADOW_FIELD_RW(HOST_FS_SELECTOR)
+SHADOW_FIELD_RW(HOST_GS_SELECTOR)
+
+/* 32-bits */
+SHADOW_FIELD_RO(VM_EXIT_REASON)
+SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
+SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
+SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
+SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
+SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
+SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
+SHADOW_FIELD_RW(EXCEPTION_BITMAP)
+SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
+SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
+SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
+SHADOW_FIELD_RW(TPR_THRESHOLD)
+SHADOW_FIELD_RW(GUEST_CS_LIMIT)
+SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
+SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
+SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
+
+/* Natural width */
+SHADOW_FIELD_RO(EXIT_QUALIFICATION)
+SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
+SHADOW_FIELD_RW(GUEST_RIP)
+SHADOW_FIELD_RW(GUEST_RSP)
+SHADOW_FIELD_RW(GUEST_CR0)
+SHADOW_FIELD_RW(GUEST_CR3)
+SHADOW_FIELD_RW(GUEST_CR4)
+SHADOW_FIELD_RW(GUEST_RFLAGS)
+SHADOW_FIELD_RW(GUEST_CS_BASE)
+SHADOW_FIELD_RW(GUEST_ES_BASE)
+SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
+SHADOW_FIELD_RW(CR0_READ_SHADOW)
+SHADOW_FIELD_RW(CR4_READ_SHADOW)
+SHADOW_FIELD_RW(HOST_FS_BASE)
+SHADOW_FIELD_RW(HOST_GS_BASE)
+
+/* 64-bit */
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
+SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
+
+#undef SHADOW_FIELD_RO
+#undef SHADOW_FIELD_RW
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f9c5171dad2b..c8a0b545ac20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -67,6 +67,8 @@
#include <asm/pvclock.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
+#include <asm/mshyperv.h>
+#include <asm/hypervisor.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -177,7 +179,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "request_irq", VCPU_STAT(request_irq_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
- { "efer_reload", VCPU_STAT(efer_reload) },
{ "fpu_reload", VCPU_STAT(fpu_reload) },
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
@@ -702,7 +703,8 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
/* kvm_set_xcr() also depends on this */
- xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+ if (vcpu->arch.xcr0 != host_xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
vcpu->guest_xcr0_loaded = 1;
}
}
@@ -794,6 +796,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
return 1;
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+ return 1;
+
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
@@ -1037,6 +1042,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
+ MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
};
@@ -1378,6 +1384,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+static inline int gtod_is_based_on_tsc(int mode)
+{
+ return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
+}
+
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
@@ -1397,7 +1408,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
* perform request to enable masterclock.
*/
if (ka->use_master_clock ||
- (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
+ (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1460,6 +1471,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
vcpu->arch.tsc_offset = offset;
}
+static inline bool kvm_check_tsc_unstable(void)
+{
+#ifdef CONFIG_X86_64
+ /*
+ * TSC is marked unstable when we're running on Hyper-V,
+ * 'TSC page' clocksource is good.
+ */
+ if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
+ return false;
+#endif
+ return check_tsc_unstable();
+}
+
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct kvm *kvm = vcpu->kvm;
@@ -1505,7 +1529,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
*/
if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
- if (!check_tsc_unstable()) {
+ if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
@@ -1605,18 +1629,43 @@ static u64 read_tsc(void)
return last;
}
-static inline u64 vgettsc(u64 *cycle_now)
+static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
{
long v;
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+ u64 tsc_pg_val;
+
+ switch (gtod->clock.vclock_mode) {
+ case VCLOCK_HVCLOCK:
+ tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
+ tsc_timestamp);
+ if (tsc_pg_val != U64_MAX) {
+ /* TSC page valid */
+ *mode = VCLOCK_HVCLOCK;
+ v = (tsc_pg_val - gtod->clock.cycle_last) &
+ gtod->clock.mask;
+ } else {
+ /* TSC page invalid */
+ *mode = VCLOCK_NONE;
+ }
+ break;
+ case VCLOCK_TSC:
+ *mode = VCLOCK_TSC;
+ *tsc_timestamp = read_tsc();
+ v = (*tsc_timestamp - gtod->clock.cycle_last) &
+ gtod->clock.mask;
+ break;
+ default:
+ *mode = VCLOCK_NONE;
+ }
- *cycle_now = read_tsc();
+ if (*mode == VCLOCK_NONE)
+ *tsc_timestamp = v = 0;
- v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
return v * gtod->clock.mult;
}
-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
+static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1625,9 +1674,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
do {
seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
+ ns += vgettsc(tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
ns += gtod->boot_ns;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1636,7 +1684,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
return mode;
}
-static int do_realtime(struct timespec *ts, u64 *cycle_now)
+static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
@@ -1645,10 +1693,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
do {
seq = read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->nsec_base;
- ns += vgettsc(cycle_now);
+ ns += vgettsc(tsc_timestamp, &mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1658,25 +1705,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
return mode;
}
-/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
+/* returns true if host is using TSC based clocksource */
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
{
/* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+ return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+ tsc_timestamp));
}
-/* returns true if host is using tsc clocksource */
+/* returns true if host is using TSC based clocksource */
static bool kvm_get_walltime_and_clockread(struct timespec *ts,
- u64 *cycle_now)
+ u64 *tsc_timestamp)
{
/* checked again under seqlock below */
- if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
return false;
- return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+ return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
}
#endif
@@ -2119,6 +2167,12 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
vcpu->arch.pv_time_enabled = false;
}
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
+}
+
static void record_steal_time(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
@@ -2128,7 +2182,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
- vcpu->arch.st.steal.preempted = 0;
+ /*
+ * Doing a TLB flush here, on the guest's behalf, can avoid
+ * expensive IPIs.
+ */
+ if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ kvm_vcpu_flush_tlb(vcpu, false);
if (vcpu->arch.st.steal.version & 1)
vcpu->arch.st.steal.version += 1; /* first time write, random junk */
@@ -2229,6 +2288,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_SMI_COUNT:
+ if (!msr_info->host_initiated)
+ return 1;
+ vcpu->arch.smi_count = data;
+ break;
case MSR_KVM_WALL_CLOCK_NEW:
case MSR_KVM_WALL_CLOCK:
vcpu->kvm->arch.wall_clock = data;
@@ -2503,6 +2567,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.smbase;
break;
+ case MSR_SMI_COUNT:
+ msr_info->data = vcpu->arch.smi_count;
+ break;
case MSR_IA32_PERF_STATUS:
/* TSC increment by tick */
msr_info->data = 1000ULL;
@@ -2870,13 +2937,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
- if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+ if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
rdtsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
- if (check_tsc_unstable()) {
+ if (kvm_check_tsc_unstable()) {
u64 offset = kvm_compute_tsc_offset(vcpu,
vcpu->arch.last_guest_tsc);
kvm_vcpu_write_tsc_offset(vcpu, offset);
@@ -2905,7 +2972,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- vcpu->arch.st.steal.preempted = 1;
+ vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal.preempted,
@@ -2939,12 +3006,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
+ /*
+ * If userspace has set any breakpoints or watchpoints, dr6 is restored
+ * on every vmexit, but if not, we might have a stale dr6 from the
+ * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+ */
+ set_debugreg(0, 6);
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
return kvm_apic_get_state(vcpu, s);
@@ -3473,6 +3546,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void *buffer;
} u;
+ vcpu_load(vcpu);
+
u.buffer = NULL;
switch (ioctl) {
case KVM_GET_LAPIC: {
@@ -3498,8 +3573,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!lapic_in_kernel(vcpu))
goto out;
u.lapic = memdup_user(argp, sizeof(*u.lapic));
- if (IS_ERR(u.lapic))
- return PTR_ERR(u.lapic);
+ if (IS_ERR(u.lapic)) {
+ r = PTR_ERR(u.lapic);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
break;
@@ -3673,8 +3750,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XSAVE: {
u.xsave = memdup_user(argp, sizeof(*u.xsave));
- if (IS_ERR(u.xsave))
- return PTR_ERR(u.xsave);
+ if (IS_ERR(u.xsave)) {
+ r = PTR_ERR(u.xsave);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
@@ -3696,8 +3775,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XCRS: {
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
- if (IS_ERR(u.xcrs))
- return PTR_ERR(u.xcrs);
+ if (IS_ERR(u.xcrs)) {
+ r = PTR_ERR(u.xcrs);
+ goto out_nofree;
+ }
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
break;
@@ -3741,6 +3822,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
out:
kfree(u.buffer);
+out_nofree:
+ vcpu_put(vcpu);
return r;
}
@@ -4297,6 +4380,36 @@ set_identity_unlock:
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
+ case KVM_MEMORY_ENCRYPT_OP: {
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_op)
+ r = kvm_x86_ops->mem_enc_op(kvm, argp);
+ break;
+ }
+ case KVM_MEMORY_ENCRYPT_REG_REGION: {
+ struct kvm_enc_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(&region, argp, sizeof(region)))
+ goto out;
+
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_reg_region)
+ r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+ break;
+ }
+ case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+ struct kvm_enc_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(&region, argp, sizeof(region)))
+ goto out;
+
+ r = -ENOTTY;
+ if (kvm_x86_ops->mem_enc_unreg_region)
+ r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -5705,7 +5818,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
* handle watchpoints yet, those would be handled in
* the emulate_ops.
*/
- if (kvm_vcpu_check_breakpoint(vcpu, &r))
+ if (!(emulation_type & EMULTYPE_SKIP) &&
+ kvm_vcpu_check_breakpoint(vcpu, &r))
return r;
ctxt->interruptibility = 0;
@@ -5891,6 +6005,43 @@ static void tsc_khz_changed(void *data)
__this_cpu_write(cpu_tsc_khz, khz);
}
+#ifdef CONFIG_X86_64
+static void kvm_hyperv_tsc_notifier(void)
+{
+ struct kvm *kvm;
+ struct kvm_vcpu *vcpu;
+ int cpu;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ kvm_make_mclock_inprogress_request(kvm);
+
+ hyperv_stop_tsc_emulation();
+
+ /* TSC frequency always matches when on Hyper-V */
+ for_each_present_cpu(cpu)
+ per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+ kvm_max_guest_tsc_khz = tsc_khz;
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ struct kvm_arch *ka = &kvm->arch;
+
+ spin_lock(&ka->pvclock_gtod_sync_lock);
+
+ pvclock_update_vm_gtod_copy(kvm);
+
+ kvm_for_each_vcpu(cpu, vcpu, kvm)
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+ kvm_for_each_vcpu(cpu, vcpu, kvm)
+ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
+
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+ }
+ spin_unlock(&kvm_lock);
+}
+#endif
+
static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
@@ -6112,9 +6263,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
update_pvclock_gtod(tk);
/* disable master clock if host does not trust, or does not
- * use, TSC clocksource
+ * use, TSC based clocksource.
*/
- if (gtod->clock.vclock_mode != VCLOCK_TSC &&
+ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
atomic_read(&kvm_guest_has_master_clock) != 0)
queue_work(system_long_wq, &pvclock_gtod_work);
@@ -6176,6 +6327,9 @@ int kvm_arch_init(void *opaque)
kvm_lapic_init();
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+
+ if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+ set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
#endif
return 0;
@@ -6188,6 +6342,10 @@ out:
void kvm_arch_exit(void)
{
+#ifdef CONFIG_X86_64
+ if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+ clear_hv_tscchange_cb();
+#endif
kvm_lapic_exit();
perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
@@ -6450,6 +6608,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
kvm_x86_ops->queue_exception(vcpu);
} else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
vcpu->arch.smi_pending = false;
+ ++vcpu->arch.smi_count;
enter_smm(vcpu);
} else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
--vcpu->arch.nmi_pending;
@@ -6751,7 +6910,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+ if (vcpu->arch.apicv_active)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -6760,12 +6919,6 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- kvm_x86_ops->tlb_flush(vcpu);
-}
-
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
@@ -6834,7 +6987,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_vcpu_flush_tlb(vcpu);
+ kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -6983,10 +7136,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* This handles the case where a posted interrupt was
* notified with kvm_vcpu_kick.
*/
- if (kvm_lapic_enabled(vcpu)) {
- if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
- kvm_x86_ops->sync_pir_to_irr(vcpu);
- }
+ if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+ kvm_x86_ops->sync_pir_to_irr(vcpu);
if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
|| need_resched() || signal_pending(current)) {
@@ -7007,7 +7158,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- wait_lapic_expire(vcpu);
+ if (lapic_timer_advance_ns)
+ wait_lapic_expire(vcpu);
guest_enter_irqoff();
if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -7268,8 +7420,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
+ vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
-
kvm_load_guest_fpu(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -7316,11 +7468,14 @@ out:
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
return r;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
+
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
/*
* We are here if userspace calls get_regs() in the middle of
@@ -7354,11 +7509,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
+ vcpu_load(vcpu);
+
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@@ -7388,6 +7546,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
kvm_make_request(KVM_REQ_EVENT, vcpu);
+ vcpu_put(vcpu);
return 0;
}
@@ -7406,6 +7565,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
{
struct desc_ptr dt;
+ vcpu_load(vcpu);
+
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -7437,12 +7598,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ vcpu_load(vcpu);
+
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
vcpu->arch.pv.pv_unhalted)
@@ -7450,21 +7614,26 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
else
mp_state->mp_state = vcpu->arch.mp_state;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
+ int ret = -EINVAL;
+
+ vcpu_load(vcpu);
+
if (!lapic_in_kernel(vcpu) &&
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
- return -EINVAL;
+ goto out;
/* INITs are latched while in SMM */
if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
(mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
- return -EINVAL;
+ goto out;
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
@@ -7472,7 +7641,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
} else
vcpu->arch.mp_state = mp_state->mp_state;
kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
+
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
@@ -7526,18 +7699,21 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int mmu_reset_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
+ int ret = -EINVAL;
+
+ vcpu_load(vcpu);
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
- return -EINVAL;
+ goto out;
if (kvm_valid_sregs(vcpu, sregs))
- return -EINVAL;
+ goto out;
apic_base_msr.data = sregs->apic_base;
apic_base_msr.host_initiated = true;
if (kvm_set_apic_base(vcpu, &apic_base_msr))
- return -EINVAL;
+ goto out;
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
@@ -7603,7 +7779,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_EVENT, vcpu);
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -7612,6 +7791,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
unsigned long rflags;
int i, r;
+ vcpu_load(vcpu);
+
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
@@ -7657,7 +7838,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
r = 0;
out:
-
+ vcpu_put(vcpu);
return r;
}
@@ -7671,6 +7852,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
gpa_t gpa;
int idx;
+ vcpu_load(vcpu);
+
idx = srcu_read_lock(&vcpu->kvm->srcu);
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -7679,14 +7862,17 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
tr->writeable = 1;
tr->usermode = 0;
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ struct fxregs_state *fxsave;
+
+ vcpu_load(vcpu);
+ fxsave = &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
@@ -7696,13 +7882,17 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fpu->last_dp = fxsave->rdp;
memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
+ vcpu_put(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- struct fxregs_state *fxsave =
- &vcpu->arch.guest_fpu.state.fxsave;
+ struct fxregs_state *fxsave;
+
+ vcpu_load(vcpu);
+
+ fxsave = &vcpu->arch.guest_fpu.state.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -7713,6 +7903,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
fxsave->rdp = fpu->last_dp;
memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
+ vcpu_put(vcpu);
return 0;
}
@@ -7769,7 +7960,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+ if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
@@ -7781,16 +7972,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- int r;
-
kvm_vcpu_mtrr_init(vcpu);
- r = vcpu_load(vcpu);
- if (r)
- return r;
+ vcpu_load(vcpu);
kvm_vcpu_reset(vcpu, false);
kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
- return r;
+ return 0;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -7800,13 +7987,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
kvm_hv_vcpu_postcreate(vcpu);
- if (vcpu_load(vcpu))
+ if (mutex_lock_killable(&vcpu->mutex))
return;
+ vcpu_load(vcpu);
msr.data = 0x0;
msr.index = MSR_IA32_TSC;
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+ mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync)
return;
@@ -7817,11 +8006,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- int r;
vcpu->arch.apf.msr_val = 0;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
@@ -7833,6 +8020,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
+ vcpu->arch.smi_count = 0;
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
@@ -7926,7 +8114,7 @@ int kvm_arch_hardware_enable(void)
return ret;
local_tsc = rdtsc();
- stable = !check_tsc_unstable();
+ stable = !kvm_check_tsc_unstable();
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!stable && vcpu->cpu == smp_processor_id())
@@ -8192,9 +8380,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
- int r;
- r = vcpu_load(vcpu);
- BUG_ON(r);
+ vcpu_load(vcpu);
kvm_mmu_unload(vcpu);
vcpu_put(vcpu);
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d0b95b7a90b4..b91215d1fd80 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -12,6 +12,7 @@
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = false;
}
@@ -265,36 +266,8 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
static inline bool kvm_mwait_in_guest(void)
{
- unsigned int eax, ebx, ecx, edx;
-
- if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
- return false;
-
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- /* All AMD CPUs have a working MWAIT implementation */
- return true;
- case X86_VENDOR_INTEL:
- /* Handle Intel below */
- break;
- default:
- return false;
- }
-
- /*
- * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
- * they would allow guest to stop the CPU completely by disabling
- * interrupts then invoking MWAIT.
- */
- if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
- return false;
-
- cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
-
- if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
- return false;
-
- return true;
+ return boot_cpu_has(X86_FEATURE_MWAIT) &&
+ !boot_cpu_has_bug(X86_BUG_MONITOR);
}
#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 135c9a7898c7..79cb066f40c0 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -829,23 +829,24 @@ void __init mem_init(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- return __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
zone = page_zone(pfn_to_page(start_pfn));
- return __remove_pages(zone, start_pfn, nr_pages);
+ return __remove_pages(zone, start_pfn, nr_pages, altmap);
}
#endif
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4a837289f2ad..1ab42c852069 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
}
}
-int add_pages(int nid, unsigned long start_pfn,
- unsigned long nr_pages, bool want_memblock)
+int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap, bool want_memblock)
{
int ret;
- ret = __add_pages(nid, start_pfn, nr_pages, want_memblock);
+ ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
WARN_ON_ONCE(ret);
/* update max_pfn, max_low_pfn and high_memory */
@@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn,
return ret;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
+int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
init_memory_mapping(start, start + size);
- return add_pages(nid, start_pfn, nr_pages, want_memblock);
+ return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
}
-EXPORT_SYMBOL_GPL(arch_add_memory);
#define PAGE_INUSE 0xFD
-static void __meminit free_pagetable(struct page *page, int order)
+static void __meminit free_pagetable(struct page *page, int order,
+ struct vmem_altmap *altmap)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
- struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
if (altmap) {
vmem_altmap_free(altmap, nr_pages);
@@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order)
free_pages((unsigned long)page_address(page), order);
}
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+ struct vmem_altmap *altmap)
{
pte_t *pte;
int i;
@@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
}
/* free a pte talbe */
- free_pagetable(pmd_page(*pmd), 0);
+ free_pagetable(pmd_page(*pmd), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
+ struct vmem_altmap *altmap)
{
pmd_t *pmd;
int i;
@@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
}
/* free a pmd talbe */
- free_pagetable(pud_page(*pud), 0);
+ free_pagetable(pud_page(*pud), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
+ struct vmem_altmap *altmap)
{
pud_t *pud;
int i;
@@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
}
/* free a pud talbe */
- free_pagetable(p4d_page(*p4d), 0);
+ free_pagetable(p4d_page(*p4d), 0, altmap);
spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d);
spin_unlock(&init_mm.page_table_lock);
@@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
- bool direct)
+ struct vmem_altmap *altmap, bool direct)
{
unsigned long next, pages = 0;
pte_t *pte;
@@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
* freed when offlining, or simplely not in use.
*/
if (!direct)
- free_pagetable(pte_page(*pte), 0);
+ free_pagetable(pte_page(*pte), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
page_addr = page_address(pte_page(*pte));
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
- free_pagetable(pte_page(*pte), 0);
+ free_pagetable(pte_page(*pte), 0, altmap);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
static void __meminit
remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
- bool direct)
+ bool direct, struct vmem_altmap *altmap)
{
unsigned long next, pages = 0;
pte_t *pte_base;
@@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PMD_SIZE)) {
if (!direct)
free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE));
+ get_order(PMD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE,
PMD_SIZE)) {
free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE));
+ get_order(PMD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
}
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- remove_pte_table(pte_base, addr, next, direct);
- free_pte_table(pte_base, pmd);
+ remove_pte_table(pte_base, addr, next, altmap, direct);
+ free_pte_table(pte_base, pmd, altmap);
}
/* Call free_pmd_table() in remove_pud_table(). */
@@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
static void __meminit
remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
- bool direct)
+ struct vmem_altmap *altmap, bool direct)
{
unsigned long next, pages = 0;
pmd_t *pmd_base;
@@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PUD_SIZE)) {
if (!direct)
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE));
+ get_order(PUD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE,
PUD_SIZE)) {
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE));
+ get_order(PUD_SIZE),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
}
pmd_base = pmd_offset(pud, 0);
- remove_pmd_table(pmd_base, addr, next, direct);
- free_pmd_table(pmd_base, pud);
+ remove_pmd_table(pmd_base, addr, next, direct, altmap);
+ free_pmd_table(pmd_base, pud, altmap);
}
if (direct)
@@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
static void __meminit
remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
- bool direct)
+ struct vmem_altmap *altmap, bool direct)
{
unsigned long next, pages = 0;
pud_t *pud_base;
@@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
BUILD_BUG_ON(p4d_large(*p4d));
pud_base = pud_offset(p4d, 0);
- remove_pud_table(pud_base, addr, next, direct);
+ remove_pud_table(pud_base, addr, next, altmap, direct);
/*
* For 4-level page tables we do not want to free PUDs, but in the
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
if (CONFIG_PGTABLE_LEVELS == 5)
- free_pud_table(pud_base, p4d);
+ free_pud_table(pud_base, p4d, altmap);
}
if (direct)
@@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
/* start and end are both virtual address. */
static void __meminit
-remove_pagetable(unsigned long start, unsigned long end, bool direct)
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next;
unsigned long addr;
@@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
continue;
p4d = p4d_offset(pgd, 0);
- remove_p4d_table(p4d, addr, next, direct);
+ remove_p4d_table(p4d, addr, next, altmap, direct);
}
flush_tlb_all();
}
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
- remove_pagetable(start, end, false);
+ remove_pagetable(start, end, false, altmap);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
- remove_pagetable(start, end, true);
+ remove_pagetable(start, end, true, NULL);
}
-int __ref arch_remove_memory(u64 start, u64 size)
+int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn);
- struct vmem_altmap *altmap;
struct zone *zone;
int ret;
/* With altmap the first mapped page is offset from @start */
- altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
page += vmem_altmap_offset(altmap);
zone = page_zone(page);
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
WARN_ON_ONCE(ret);
kernel_physical_mapping_remove(start, start + size);
@@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) {
void *p;
- p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (altmap)
+ p = altmap_alloc_block_buf(PMD_SIZE, altmap);
+ else
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (p) {
pte_t entry;
@@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
return 0;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
{
- struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;
if (boot_cpu_has(X86_FEATURE_PSE))
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index fe7d57a8fb60..1555bd7d3449 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -678,6 +678,25 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
}
/**
+ * pat_pfn_immune_to_uc_mtrr - Check whether the PAT memory type
+ * of @pfn cannot be overridden by UC MTRR memory type.
+ *
+ * Only to be called when PAT is enabled.
+ *
+ * Returns true, if the PAT memory type of @pfn is UC, UC-, or WC.
+ * Returns false in other cases.
+ */
+bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn)
+{
+ enum page_cache_mode cm = lookup_memtype(PFN_PHYS(pfn));
+
+ return cm == _PAGE_CACHE_MODE_UC ||
+ cm == _PAGE_CACHE_MODE_UC_MINUS ||
+ cm == _PAGE_CACHE_MODE_WC;
+}
+EXPORT_SYMBOL_GPL(pat_pfn_immune_to_uc_mtrr);
+
+/**
* io_reserve_memtype - Request a memory type mapping for a region of memory
* @start: start (physical address) of the region
* @end: end (physical address) of the region
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 012d02624848..8dcc0607f805 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -229,6 +229,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
#endif
this_cpu_write(cpu_tlbstate.is_lazy, false);
+ /*
+ * The membarrier system call requires a full memory barrier and
+ * core serialization before returning to user-space, after
+ * storing to rq->curr. Writing to CR3 provides that full
+ * memory barrier and core serializing instruction.
+ */
if (real_prev == next) {
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 0452629148be..52e55108404e 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -839,7 +839,8 @@ static void __init pirq_find_router(struct irq_router *r)
DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
rt->rtr_vendor, rt->rtr_device);
- pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
+ pirq_router_dev = pci_get_domain_bus_and_slot(0, rt->rtr_bus,
+ rt->rtr_devfn);
if (!pirq_router_dev) {
DBG(KERN_DEBUG "PCI: Interrupt router not found at "
"%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index c4b3646bd04c..9542a746dc50 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -409,10 +409,8 @@ int __init pci_xen_init(void)
pcibios_enable_irq = xen_pcifront_enable_irq;
pcibios_disable_irq = NULL;
-#ifdef CONFIG_ACPI
/* Keep ACPI out of the picture */
- acpi_noirq = 1;
-#endif
+ acpi_noirq_set();
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 13b4f19b9131..159a897151d6 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -694,6 +694,9 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
int i, ret = 0;
pte_t *pte;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
if (kmap_ops) {
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
kmap_ops, count);
@@ -736,6 +739,9 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
{
int i, ret = 0;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
for (i = 0; i < count; i++) {
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 497cc55a0c16..96f26e026783 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -9,7 +9,9 @@
#include <asm/boot.h>
#include <asm/asm.h>
+#include <asm/msr.h>
#include <asm/page_types.h>
+#include <asm/percpu.h>
#include <asm/unwind_hints.h>
#include <xen/interface/elfnote.h>
@@ -35,6 +37,20 @@ ENTRY(startup_xen)
mov %_ASM_SI, xen_start_info
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
+#ifdef CONFIG_X86_64
+ /* Set up %gs.
+ *
+ * The base of %gs always points to the bottom of the irqstack
+ * union. If the stack protector canary is enabled, it is
+ * located at %gs:40. Note that, on SMP, the boot cpu uses
+ * init data section till per cpu areas are set up.
+ */
+ movl $MSR_GS_BASE,%ecx
+ movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
+ cdq
+ wrmsr
+#endif
+
jmp xen_start_kernel
END(startup_xen)
__FINIT
diff --git a/arch/xtensa/include/asm/kasan.h b/arch/xtensa/include/asm/kasan.h
index 54be80876e57..216b6f32c375 100644
--- a/arch/xtensa/include/asm/kasan.h
+++ b/arch/xtensa/include/asm/kasan.h
@@ -10,6 +10,8 @@
#include <linux/sizes.h>
#include <asm/kmem_layout.h>
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
/* Start of area covered by KASAN */
#define KASAN_START_VADDR __XTENSA_UL_CONST(0x90000000)
/* Start of the shadow map */
diff --git a/arch/xtensa/include/uapi/asm/poll.h b/arch/xtensa/include/uapi/asm/poll.h
index e3246d41182c..4d249040b33d 100644
--- a/arch/xtensa/include/uapi/asm/poll.h
+++ b/arch/xtensa/include/uapi/asm/poll.h
@@ -12,26 +12,9 @@
#ifndef _XTENSA_POLL_H
#define _XTENSA_POLL_H
-#ifndef __KERNEL__
#define POLLWRNORM POLLOUT
-#define POLLWRBAND (__force __poll_t)0x0100
-#define POLLREMOVE (__force __poll_t)0x0800
-#else
-#define __ARCH_HAS_MANGLED_POLL
-static inline __u16 mangle_poll(__poll_t val)
-{
- __u16 v = (__force __u16)val;
- /* bit 9 -> bit 8, bit 8 -> bit 2 */
- return (v & ~0x300) | ((v & 0x200) >> 1) | ((v & 0x100) >> 6);
-}
-
-static inline __poll_t demangle_poll(__u16 v)
-{
- /* bit 8 -> bit 9, bit 2 -> bits 2 and 8 */
- return (__force __poll_t)((v & ~0x100) | ((v & 0x100) << 1) |
- ((v & 4) << 6));
-}
-#endif
+#define POLLWRBAND 0x0100
+#define POLLREMOVE 0x0800
#include <asm-generic/poll.h>