diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-23 13:49:06 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-23 13:51:56 -0500 |
commit | 5ca114400dcd46f19f31573e7c60e638bd8d644b (patch) | |
tree | d78dc56c94548d865e505f022c08006716bdb963 | |
parent | f53d77e19b6587527a3dd60a0e638f115e5cd7a9 (diff) | |
parent | a84a8ab94ed5cb65a1355fe9e8d1d55283375808 (diff) | |
download | linux-stable-5ca114400dcd46f19f31573e7c60e638bd8d644b.tar.gz linux-stable-5ca114400dcd46f19f31573e7c60e638bd8d644b.tar.bz2 linux-stable-5ca114400dcd46f19f31573e7c60e638bd8d644b.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
en_rx_am.c was deleted in 'net-next' but had a bug fixed in it in
'net'.
The esp{4,6}_offload.c conflicts were overlapping changes.
The 'out' label is removed so we just return ERR_PTR(-EINVAL)
directly.
Signed-off-by: David S. Miller <davem@davemloft.net>
68 files changed, 729 insertions, 162 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 57d3ee9e4bde..fc3ae951bc07 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory) or if no page table is present for the addresses (e.g. when using hugepages). +4.108 KVM_PPC_GET_CPU_CHAR + +Capability: KVM_CAP_PPC_GET_CPU_CHAR +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_ppc_cpu_char (out) +Returns: 0 on successful completion + -EFAULT if struct kvm_ppc_cpu_char cannot be written + +This ioctl gives userspace information about certain characteristics +of the CPU relating to speculative execution of instructions and +possible information leakage resulting from speculative execution (see +CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is +returned in struct kvm_ppc_cpu_char, which looks like this: + +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +For extensibility, the character_mask and behaviour_mask fields +indicate which bits of character and behaviour have been filled in by +the kernel. If the set of defined bits is extended in future then +userspace will be able to tell whether it is running on a kernel that +knows about the new bits. + +The character field describes attributes of the CPU which can help +with preventing inadvertent information disclosure - specifically, +whether there is an instruction to flash-invalidate the L1 data cache +(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set +to a mode where entries can only be used by the thread that created +them, whether the bcctr[l] instruction prevents speculation, and +whether a speculation barrier instruction (ori 31,31,0) is provided. + +The behaviour field describes actions that software should take to +prevent inadvertent information disclosure, and thus describes which +vulnerabilities the hardware is subject to; specifically whether the +L1 data cache should be flushed when returning to user mode from the +kernel, and whether a speculation barrier should be placed between an +array bounds check and the array access. + +These fields use the same bit definitions as the new +H_GET_CPU_CHARACTERISTICS hypercall. + 5. The kvm_run structure ------------------------ diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt index d11eff61fc9a..5cd58439ad2d 100644 --- a/Documentation/x86/pti.txt +++ b/Documentation/x86/pti.txt @@ -78,7 +78,7 @@ this protection comes at a cost: non-PTI SYSCALL entry code, so requires mapping fewer things into the userspace page tables. The downside is that stacks must be switched at entry time. - d. Global pages are disabled for all kernel structures not + c. Global pages are disabled for all kernel structures not mapped into both kernel and userspace page tables. This feature of the MMU allows different processes to share TLB entries mapping the kernel. Losing the feature means more diff --git a/MAINTAINERS b/MAINTAINERS index 079af8b7ae8e..7ec1b088c07f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9102,6 +9102,7 @@ F: drivers/usb/image/microtek.* MIPS M: Ralf Baechle <ralf@linux-mips.org> +M: James Hogan <jhogan@kernel.org> L: linux-mips@linux-mips.org W: http://www.linux-mips.org/ T: git git://git.linux-mips.org/pub/scm/ralf/linux.git @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = -rc9 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index 37bd6d9b8eb9..a6bdc1da47ad 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -102,6 +102,15 @@ sio_pci_route(void) alpha_mv.sys.sio.route_tab); } +static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev) +{ + if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && + (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + return false; + + return true; +} + static unsigned int __init sio_collect_irq_levels(void) { @@ -110,8 +119,7 @@ sio_collect_irq_levels(void) /* Iterate through the devices, collecting IRQ levels. */ for_each_pci_dev(dev) { - if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && - (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + if (!sio_pci_dev_irq_needs_level(dev)) continue; if (dev->irq) @@ -120,8 +128,7 @@ sio_collect_irq_levels(void) return level_bits; } -static void __init -sio_fixup_irq_levels(unsigned int level_bits) +static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset) { unsigned int old_level_bits; @@ -139,12 +146,21 @@ sio_fixup_irq_levels(unsigned int level_bits) */ old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8); - level_bits |= (old_level_bits & 0x71ff); + if (reset) + old_level_bits &= 0x71ff; + + level_bits |= old_level_bits; outb((level_bits >> 0) & 0xff, 0x4d0); outb((level_bits >> 8) & 0xff, 0x4d1); } +static inline void +sio_fixup_irq_levels(unsigned int level_bits) +{ + __sio_fixup_irq_levels(level_bits, true); +} + static inline int noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { @@ -181,7 +197,14 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5; int irq = COMMON_TABLE_LOOKUP, tmp; tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq); - return irq >= 0 ? tmp : -1; + + irq = irq >= 0 ? tmp : -1; + + /* Fixup IRQ level if an actual IRQ mapping is detected */ + if (sio_pci_dev_irq_needs_level(dev) && irq >= 0) + __sio_fixup_irq_levels(1 << irq, false); + + return irq; } static inline int diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S index 316a99aa9efe..1cfcfbbea6f0 100644 --- a/arch/alpha/lib/ev6-memset.S +++ b/arch/alpha/lib/ev6-memset.S @@ -18,7 +18,7 @@ * The algorithm for the leading and trailing quadwords remains the same, * however the loop has been unrolled to enable better memory throughput, * and the code has been replicated for each of the entry points: __memset - * and __memsetw to permit better scheduling to eliminate the stalling + * and __memset16 to permit better scheduling to eliminate the stalling * encountered during the mask replication. * A future enhancement might be to put in a byte store loop for really * small (say < 32 bytes) memset()s. Whether or not that change would be @@ -34,7 +34,7 @@ .globl memset .globl __memset .globl ___memset - .globl __memsetw + .globl __memset16 .globl __constant_c_memset .ent ___memset @@ -415,9 +415,9 @@ end: * to mask stalls. Note that entry point names also had to change */ .align 5 - .ent __memsetw + .ent __memset16 -__memsetw: +__memset16: .frame $30,0,$26,0 .prologue 0 @@ -596,8 +596,8 @@ end_w: nop ret $31,($26),1 # L0 : - .end __memsetw - EXPORT_SYMBOL(__memsetw) + .end __memset16 + EXPORT_SYMBOL(__memset16) memset = ___memset __memset = ___memset diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 304203fa9e33..e60494f1eef9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -45,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -54,7 +54,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 350a990fc719..8e0b3702f1c0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -259,6 +259,7 @@ config BCM47XX select LEDS_GPIO_REGISTER select BCM47XX_NVRAM select BCM47XX_SPROM + select BCM47XX_SSB if !BCM47XX_BCMA help Support for BCM47XX based boards @@ -389,6 +390,7 @@ config LANTIQ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_MIPS16 select SYS_SUPPORTS_MULTITHREADING + select SYS_SUPPORTS_VPE_LOADER select SYS_HAS_EARLY_PRINTK select GPIOLIB select SWAP_IO_SPACE @@ -516,6 +518,7 @@ config MIPS_MALTA select SYS_SUPPORTS_MIPS16 select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_SMARTMIPS + select SYS_SUPPORTS_VPE_LOADER select SYS_SUPPORTS_ZBOOT select SYS_SUPPORTS_RELOCATABLE select USE_OF @@ -2281,9 +2284,16 @@ config MIPSR2_TO_R6_EMULATOR The only reason this is a build-time option is to save ~14K from the final kernel image. +config SYS_SUPPORTS_VPE_LOADER + bool + depends on SYS_SUPPORTS_MULTITHREADING + help + Indicates that the platform supports the VPE loader, and provides + physical_memsize. + config MIPS_VPE_LOADER bool "VPE loader support." - depends on SYS_SUPPORTS_MULTITHREADING && MODULES + depends on SYS_SUPPORTS_VPE_LOADER && MODULES select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select MIPS_MT diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 464af5e025d6..0749c3724543 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -124,30 +124,36 @@ config SCACHE_DEBUGFS If unsure, say N. -menuconfig MIPS_CPS_NS16550 +menuconfig MIPS_CPS_NS16550_BOOL bool "CPS SMP NS16550 UART output" depends on MIPS_CPS help Output debug information via an ns16550 compatible UART if exceptions occur early in the boot process of a secondary core. -if MIPS_CPS_NS16550 +if MIPS_CPS_NS16550_BOOL + +config MIPS_CPS_NS16550 + def_bool MIPS_CPS_NS16550_BASE != 0 config MIPS_CPS_NS16550_BASE hex "UART Base Address" default 0x1b0003f8 if MIPS_MALTA + default 0 help The base address of the ns16550 compatible UART on which to output debug information from the early stages of core startup. + This is only used if non-zero. + config MIPS_CPS_NS16550_SHIFT int "UART Register Shift" - default 0 if MIPS_MALTA + default 0 help The number of bits to shift ns16550 register indices by in order to form their addresses. That is, log base 2 of the span between adjacent ns16550 registers in the system. -endif # MIPS_CPS_NS16550 +endif # MIPS_CPS_NS16550_BOOL endmenu diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 4674f1efbe7a..e1675c25d5d4 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -575,7 +575,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE; + uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; uart_port.regshift = 2; uart_port.line = 0; diff --git a/arch/mips/ath25/devices.c b/arch/mips/ath25/devices.c index e1156347da53..301a9028273c 100644 --- a/arch/mips/ath25/devices.c +++ b/arch/mips/ath25/devices.c @@ -73,6 +73,7 @@ const char *get_system_type(void) void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk) { +#ifdef CONFIG_SERIAL_8250_CONSOLE struct uart_port s; memset(&s, 0, sizeof(s)); @@ -85,6 +86,7 @@ void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk) s.uartclk = uartclk; early_serial_setup(&s); +#endif /* CONFIG_SERIAL_8250_CONSOLE */ } int __init ath25_add_wmac(int nr, u32 base, int irq) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index dd5567b1e305..8f5bd04f320a 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -292,7 +292,6 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, *this_cpu_ptr(&cm_core_lock_flags)); } else { WARN_ON(cluster != 0); - WARN_ON(vp != 0); WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL); /* diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 78c2affeabf8..e84e12655fa8 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o # libgcc-style stuff needed in the kernel -obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o +obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \ + ucmpdi2.o diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h index 28002ed90c2c..199a7f96282f 100644 --- a/arch/mips/lib/libgcc.h +++ b/arch/mips/lib/libgcc.h @@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__))); struct DWstruct { int high, low; }; + +struct TWstruct { + long long high, low; +}; #elif defined(__LITTLE_ENDIAN) struct DWstruct { int low, high; }; + +struct TWstruct { + long long low, high; +}; #else #error I feel sick. #endif @@ -23,4 +31,13 @@ typedef union { long long ll; } DWunion; +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) +typedef int ti_type __attribute__((mode(TI))); + +typedef union { + struct TWstruct s; + ti_type ti; +} TWunion; +#endif + #endif /* __ASM_LIBGCC_H */ diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c new file mode 100644 index 000000000000..111ad475aa0c --- /dev/null +++ b/arch/mips/lib/multi3.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> + +#include "libgcc.h" + +/* + * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that + * specific case only we'll implement it here. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 + */ +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) + +/* multiply 64-bit values, low 64-bits returned */ +static inline long long notrace dmulu(long long a, long long b) +{ + long long res; + + asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */ +static inline long long notrace dmuhu(long long a, long long b) +{ + long long res; + + asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* multiply 128-bit values, low 128-bits returned */ +ti_type notrace __multi3(ti_type a, ti_type b) +{ + TWunion res, aa, bb; + + aa.ti = a; + bb.ti = b; + + /* + * a * b = (a.lo * b.lo) + * + 2^64 * (a.hi * b.lo + a.lo * b.hi) + * [+ 2^128 * (a.hi * b.hi)] + */ + res.s.low = dmulu(aa.s.low, bb.s.low); + res.s.high = dmuhu(aa.s.low, bb.s.low); + res.s.high += dmulu(aa.s.high, bb.s.low); + res.s.high += dmulu(aa.s.low, bb.s.high); + + return res.ti; +} +EXPORT_SYMBOL(__multi3); + +#endif /* 64BIT && CPU_MIPSR6 && GCC7 */ diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index cdb5a191b9d5..9bb6baa45da3 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -40,7 +40,7 @@ #include "uasm.c" -static const struct insn const insn_table_MM[insn_invalid] = { +static const struct insn insn_table_MM[insn_invalid] = { [insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD}, [insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, [insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD}, diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index d4469b20d176..4f46a4509f79 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -109,9 +109,9 @@ static int rt_timer_probe(struct platform_device *pdev) } rt->irq = platform_get_irq(pdev, 0); - if (!rt->irq) { + if (rt->irq < 0) { dev_err(&pdev->dev, "failed to load irq\n"); - return -ENOENT; + return rt->irq; } rt->membase = devm_ioremap_resource(&pdev->dev, res); diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile index efdecdb6e3ea..8186afca2234 100644 --- a/arch/mips/rb532/Makefile +++ b/arch/mips/rb532/Makefile @@ -2,4 +2,6 @@ # Makefile for the RB532 board specific parts of the kernel # -obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o +obj-$(CONFIG_SERIAL_8250_CONSOLE) += serial.o + +obj-y += irq.o time.o setup.o prom.o gpio.o devices.o diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 32ea3e6731d6..354d258396ff 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -310,6 +310,8 @@ static int __init plat_setup_devices(void) return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); } +#ifdef CONFIG_NET + static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); @@ -322,4 +324,6 @@ static int __init setup_kmac(char *s) __setup("kmac=", setup_kmac); +#endif /* CONFIG_NET */ + arch_initcall(plat_setup_devices); diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..637b7263cb86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info { __u32 ap_encodings[8]; }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1915e86cef6f..0a7c88786ec0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,6 +39,10 @@ #include <asm/iommu.h> #include <asm/switch_to.h> #include <asm/xive.h> +#ifdef CONFIG_PPC_PSERIES +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#endif #include "timing.h" #include "irq.h" @@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif + case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; @@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * These functions check whether the underlying hardware is safe + * against attacks based on observing the effects of speculatively + * executed instructions, and whether it supplies instructions for + * use in workarounds. The information comes from firmware, either + * via the device tree on powernv platforms or from an hcall on + * pseries platforms. + */ +#ifdef CONFIG_PPC_PSERIES +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct h_cpu_char_result c; + unsigned long rc; + + if (!machine_is(pseries)) + return -ENOTTY; + + rc = plpar_get_cpu_characteristics(&c); + if (rc == H_SUCCESS) { + cp->character = c.character; + cp->behaviour = c.behaviour; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | + KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + } + return 0; +} +#else +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + return -ENOTTY; +} +#endif + +static inline bool have_fw_feat(struct device_node *fw_features, + const char *state, const char *name) +{ + struct device_node *np; + bool r = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + r = of_property_read_bool(np, state); + of_node_put(np); + } + return r; +} + +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct device_node *np, *fw_features; + int r; + + memset(cp, 0, sizeof(*cp)); + r = pseries_get_cpu_char(cp); + if (r != -ENOTTY) + return r; + + np = of_find_node_by_name(NULL, "ibm,opal"); + if (np) { + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + if (!fw_features) + return 0; + if (have_fw_feat(fw_features, "enabled", + "inst-spec-barrier-ori31,31,0")) + cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31; + if (have_fw_feat(fw_features, "enabled", + "fw-bcctrl-serialized")) + cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-ori30,30,0")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-trig2")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2; + if (have_fw_feat(fw_features, "enabled", + "fw-l1d-thread-split")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV; + if (have_fw_feat(fw_features, "enabled", + "fw-count-cache-disabled")) + cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + + if (have_fw_feat(fw_features, "enabled", + "speculation-policy-favor-security")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY; + if (!have_fw_feat(fw_features, "disabled", + "needs-l1d-flush-msr-pr-0-to-1")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR; + if (!have_fw_feat(fw_features, "disabled", + "needs-spec-barrier-for-bound-checks")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + + of_node_put(fw_features); + } + + return 0; +} +#endif + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_GET_CPU_CHAR: { + struct kvm_ppc_cpu_char cpuchar; + + r = kvmppc_get_cpu_char(&cpuchar); + if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) + r = -EFAULT; + break; + } default: { struct kvm *kvm = filp->private_data; r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e14f381757f6..c1b0a9ac1dc8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -207,7 +207,8 @@ struct kvm_s390_sie_block { __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ - __u8 reserved60; /* 0x0060 */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ #define ECB_GS 0x40 #define ECB_TE 0x10 #define ECB_SRSI 0x04 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 38535a57fef8..4cdaa55fabfe 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -224,6 +224,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) +#define KVM_SYNC_BPBC (1UL << 10) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -247,7 +248,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding1[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ union { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2c93cbbcd15e..2598cf243b86 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_GS: r = test_facility(133); break; + case KVM_CAP_S390_BPB: + r = test_facility(82); + break; default: r = 0; } @@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133)) vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With @@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) @@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; vcpu->arch.gs_enabled = 1; } + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && + test_kvm_facility(vcpu->kvm, 82)) { + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5d6ae0326d9e..751348348477 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy(scb_o->gcr, scb_s->gcr, 128); scb_o->pp = scb_s->pp; + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) { + scb_o->fpf &= ~FPF_BPBC; + scb_o->fpf |= scb_s->fpf & FPF_BPBC; + } + /* interrupt intercept */ switch (scb_s->icptcode) { case ICPT_PROGI: @@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecb3 = 0; scb_s->ecd = 0; scb_s->fac = 0; + scb_s->fpf = 0; rc = prepare_cpuflags(vcpu, vsie_page); if (rc) @@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= scb_o->ecb & ECB_TE; } + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) + scb_s->fpf |= scb_o->fpf & FPF_BPBC; /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { scb_s->eca |= scb_o->eca & ECA_VX; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index aa15b4c0e3d1..ff6f8022612c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1264,7 +1264,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 #endif #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7b45d8424150..4ad41087ce0e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,9 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + /* * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both @@ -203,16 +206,17 @@ enum spectre_v2_mitigation { static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops = RSB_CLEAR_LOOPS / 2; + unsigned long loops; asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE("jmp 910f", __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), X86_FEATURE_RETPOLINE) "910:" - : "=&r" (loops), ASM_CALL_CONSTRAINT - : "r" (loops) : "memory" ); + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 31051f35cbb7..3de69330e6c5 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif +dotraplinkage void do_mce(struct pt_regs *, long); static inline int get_si_code(unsigned long condition) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1d616d08eee..868e412b4f0c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1785,6 +1785,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index e941136e24d8..203d398802a3 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -40,6 +40,7 @@ #include <asm/debugreg.h> #include <asm/set_memory.h> #include <asm/sections.h> +#include <asm/nospec-branch.h> #include "common.h" @@ -203,7 +204,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) } /* Check whether insn is indirect jump */ -static int insn_is_indirect_jump(struct insn *insn) +static int __insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -237,6 +238,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } +static int insn_is_indirect_jump(struct insn *insn) +{ + int ret = __insn_is_indirect_jump(insn); + +#ifdef CONFIG_RETPOLINE + /* + * Jump to x86_indirect_thunk_* is treated as an indirect jump. + * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with + * older gcc may use indirect jump. So we add this check instead of + * replace indirect-jump check. + */ + if (!ret) + ret = insn_jump_into_range(insn, + (unsigned long)__indirect_thunk_start, + (unsigned long)__indirect_thunk_end - + (unsigned long)__indirect_thunk_start); +#endif + return ret; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 832a6acd730f..cb368c2a22ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -380,19 +380,24 @@ void stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + /* + * Use wbinvd on processors that support SME. This provides support + * for performing a successful kexec when going from SME inactive + * to SME active (or vice-versa). The cache must be cleared so that + * if there are entries with the same physical address, both with and + * without the encryption bit, they don't race each other when flushed + * and potentially end up with the wrong entry being committed to + * memory. + */ + if (boot_cpu_has(X86_FEATURE_SME)) + native_wbinvd(); for (;;) { /* - * Use wbinvd followed by hlt to stop the processor. This - * provides support for kexec on a processor that supports - * SME. With kexec, going from SME inactive to SME active - * requires clearing cache entries so that addresses without - * the encryption bit set don't corrupt the same physical - * address that has the encryption bit set when caches are - * flushed. To achieve this a wbinvd is performed followed by - * a hlt. Even if the processor is not in the kexec/SME - * scenario this only adds a wbinvd to a halting processor. + * Use native_halt() so that memory contents don't change + * (stack usage and variables) after possibly issuing the + * native_wbinvd() above. */ - asm volatile("wbinvd; hlt" : : : "memory"); + native_halt(); } } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1e413a9326aa..9b138a06c1a4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -124,6 +124,12 @@ SECTIONS ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif +#ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; + *(.text.__x86.indirect_thunk) + __indirect_thunk_end = .; +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1cec2c62a0b0..c53298dfbf50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7496,13 +7496,13 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in * 64-bit mode (though maybe in a 32-bit code segment). * CR4.PAE and EFER.LMA must be set. */ - if (!(sregs->cr4 & X86_CR4_PAE_BIT) + if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) return -EINVAL; } else { diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cb45c6cb465f..dfb2ba91b670 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,7 +9,7 @@ #include <asm/nospec-branch.h> .macro THUNK reg - .section .text.__x86.indirect_thunk.\reg + .section .text.__x86.indirect_thunk ENTRY(__x86_indirect_thunk_\reg) CFI_STARTPROC @@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) * than one per register with the correct names. So we do it * the simple and nasty way... */ -#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) GENERATE_THUNK(_ASM_AX) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 3ef362f598e3..e1d61e8500f9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -738,7 +738,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return total; } -void __init sme_encrypt_kernel(struct boot_params *bp) +void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9fc12f556534..554d60394c06 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1954,10 +1954,15 @@ static int crypt_setkey(struct crypt_config *cc) /* Ignore extra keys (which are used for IV etc) */ subkey_size = crypt_subkey_size(cc); - if (crypt_integrity_hmac(cc)) + if (crypt_integrity_hmac(cc)) { + if (subkey_size < cc->key_mac_size) + return -EINVAL; + crypt_copy_authenckey(cc->authenc_key, cc->key, subkey_size - cc->key_mac_size, cc->key_mac_size); + } + for (i = 0; i < cc->tfms_count; i++) { if (crypt_integrity_hmac(cc)) r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], @@ -2053,9 +2058,6 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string ret = crypt_setkey(cc); - /* wipe the kernel key payload copy in each case */ - memset(cc->key, 0, cc->key_size * sizeof(u8)); - if (!ret) { set_bit(DM_CRYPT_KEY_VALID, &cc->flags); kzfree(cc->key_string); @@ -2523,6 +2525,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key) } } + /* wipe the kernel key payload copy */ + if (cc->key_string) + memset(cc->key, 0, cc->key_size * sizeof(u8)); + return ret; } @@ -2740,6 +2746,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->tag_pool_max_sectors * cc->on_disk_tag_size); if (!cc->tag_pool) { ti->error = "Cannot allocate integrity tags mempool"; + ret = -ENOMEM; goto bad; } @@ -2961,6 +2968,9 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) return ret; if (cc->iv_gen_ops && cc->iv_gen_ops->init) ret = cc->iv_gen_ops->init(cc); + /* wipe the kernel key payload copy */ + if (cc->key_string) + memset(cc->key, 0, cc->key_size * sizeof(u8)); return ret; } if (argc == 2 && !strcasecmp(argv[1], "wipe")) { @@ -3007,7 +3017,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 18, 0}, + .version = {1, 18, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 05c7bfd0c9d9..46d7c8749222 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2559,7 +2559,8 @@ static int create_journal(struct dm_integrity_c *ic, char **error) int r = 0; unsigned i; __u64 journal_pages, journal_desc_size, journal_tree_size; - unsigned char *crypt_data = NULL; + unsigned char *crypt_data = NULL, *crypt_iv = NULL; + struct skcipher_request *req = NULL; ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); @@ -2617,9 +2618,20 @@ static int create_journal(struct dm_integrity_c *ic, char **error) if (blocksize == 1) { struct scatterlist *sg; - SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); - unsigned char iv[ivsize]; - skcipher_request_set_tfm(req, ic->journal_crypt); + + req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); + if (!req) { + *error = "Could not allocate crypt request"; + r = -ENOMEM; + goto bad; + } + + crypt_iv = kmalloc(ivsize, GFP_KERNEL); + if (!crypt_iv) { + *error = "Could not allocate iv"; + r = -ENOMEM; + goto bad; + } ic->journal_xor = dm_integrity_alloc_page_list(ic); if (!ic->journal_xor) { @@ -2641,9 +2653,9 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_set_buf(&sg[i], va, PAGE_SIZE); } sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); - memset(iv, 0x00, ivsize); + memset(crypt_iv, 0x00, ivsize); - skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); + skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) @@ -2659,10 +2671,22 @@ static int create_journal(struct dm_integrity_c *ic, char **error) crypto_free_skcipher(ic->journal_crypt); ic->journal_crypt = NULL; } else { - SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); - unsigned char iv[ivsize]; unsigned crypt_len = roundup(ivsize, blocksize); + req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); + if (!req) { + *error = "Could not allocate crypt request"; + r = -ENOMEM; + goto bad; + } + + crypt_iv = kmalloc(ivsize, GFP_KERNEL); + if (!crypt_iv) { + *error = "Could not allocate iv"; + r = -ENOMEM; + goto bad; + } + crypt_data = kmalloc(crypt_len, GFP_KERNEL); if (!crypt_data) { *error = "Unable to allocate crypt data"; @@ -2670,8 +2694,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - skcipher_request_set_tfm(req, ic->journal_crypt); - ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); if (!ic->journal_scatterlist) { *error = "Unable to allocate sg list"; @@ -2695,12 +2717,12 @@ static int create_journal(struct dm_integrity_c *ic, char **error) struct skcipher_request *section_req; __u32 section_le = cpu_to_le32(i); - memset(iv, 0x00, ivsize); + memset(crypt_iv, 0x00, ivsize); memset(crypt_data, 0x00, crypt_len); memcpy(crypt_data, §ion_le, min((size_t)crypt_len, sizeof(section_le))); sg_init_one(&sg, crypt_data, crypt_len); - skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); + skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv); init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) @@ -2758,6 +2780,9 @@ retest_commit_id: } bad: kfree(crypt_data); + kfree(crypt_iv); + skcipher_request_free(req); + return r; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index d31d18d9727c..36ef284ad086 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -80,10 +80,14 @@ #define SECTOR_TO_BLOCK_SHIFT 3 /* + * For btree insert: * 3 for btree insert + * 2 for btree lookup used within space map + * For btree remove: + * 2 for shadow spine + + * 4 for rebalance 3 child node */ -#define THIN_MAX_CONCURRENT_LOCKS 5 +#define THIN_MAX_CONCURRENT_LOCKS 6 /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index f21ce6a3d4cf..58b319757b1e 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -683,23 +683,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) pn->keys[1] = rn->keys[0]; memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); - /* - * rejig the spine. This is ugly, since it knows too - * much about the spine - */ - if (s->nodes[0] != new_parent) { - unlock_block(s->info, s->nodes[0]); - s->nodes[0] = new_parent; - } - if (key < le64_to_cpu(rn->keys[0])) { - unlock_block(s->info, right); - s->nodes[1] = left; - } else { - unlock_block(s->info, left); - s->nodes[1] = right; - } - s->count = 2; - + unlock_block(s->info, left); + unlock_block(s->info, right); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 9b9f3f99b39d..4fee3553e1a3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -111,6 +111,9 @@ static void cxgb4_process_flow_match(struct net_device *dev, ethtype_mask = 0; } + if (ethtype_key == ETH_P_IPV6) + fs->type = 1; + fs->val.ethtype = ethtype_key; fs->mask.ethtype = ethtype_mask; fs->val.proto = key->ip_proto; @@ -205,8 +208,8 @@ static void cxgb4_process_flow_match(struct net_device *dev, VLAN_PRIO_SHIFT); vlan_tci_mask = mask->vlan_id | (mask->vlan_priority << VLAN_PRIO_SHIFT); - fs->val.ivlan = cpu_to_be16(vlan_tci); - fs->mask.ivlan = cpu_to_be16(vlan_tci_mask); + fs->val.ivlan = vlan_tci; + fs->mask.ivlan = vlan_tci_mask; /* Chelsio adapters use ivlan_vld bit to match vlan packets * as 802.1Q. Also, when vlan tag is present in packets, diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c6e859a27ee6..e180657a02ef 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4634,6 +4634,15 @@ int be_update_queues(struct be_adapter *adapter) be_schedule_worker(adapter); + /* + * The IF was destroyed and re-created. We need to clear + * all promiscuous flags valid for the destroyed IF. + * Without this promisc mode is not restored during + * be_open() because the driver thinks that it is + * already enabled in HW. + */ + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + if (netif_running(netdev)) status = be_open(netdev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index be2ce8dece4a..8f2a77ecf4fb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -411,6 +411,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) struct ibmvnic_rx_pool *rx_pool; int rx_scrqs; int i, j, rc; + u64 *size_array; + + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); for (i = 0; i < rx_scrqs; i++) { @@ -418,7 +422,17 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i); - rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff); + if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { + free_long_term_buff(adapter, &rx_pool->long_term_buff); + rx_pool->buff_size = be64_to_cpu(size_array[i]); + alloc_long_term_buff(adapter, &rx_pool->long_term_buff, + rx_pool->size * + rx_pool->buff_size); + } else { + rc = reset_long_term_buff(adapter, + &rx_pool->long_term_buff); + } + if (rc) return rc; @@ -440,14 +454,12 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) static void release_rx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_rx_pool *rx_pool; - int rx_scrqs; int i, j; if (!adapter->rx_pool) return; - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - for (i = 0; i < rx_scrqs; i++) { + for (i = 0; i < adapter->num_active_rx_pools; i++) { rx_pool = &adapter->rx_pool[i]; netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); @@ -470,6 +482,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->rx_pool); adapter->rx_pool = NULL; + adapter->num_active_rx_pools = 0; } static int init_rx_pools(struct net_device *netdev) @@ -494,6 +507,8 @@ static int init_rx_pools(struct net_device *netdev) return -1; } + adapter->num_active_rx_pools = 0; + for (i = 0; i < rxadd_subcrqs; i++) { rx_pool = &adapter->rx_pool[i]; @@ -537,6 +552,8 @@ static int init_rx_pools(struct net_device *netdev) rx_pool->next_free = 0; } + adapter->num_active_rx_pools = rxadd_subcrqs; + return 0; } @@ -587,13 +604,12 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter) static void release_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; - int i, tx_scrqs; + int i; if (!adapter->tx_pool) return; - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - for (i = 0; i < tx_scrqs; i++) { + for (i = 0; i < adapter->num_active_tx_pools; i++) { netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i); tx_pool = &adapter->tx_pool[i]; kfree(tx_pool->tx_buff); @@ -604,6 +620,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->tx_pool); adapter->tx_pool = NULL; + adapter->num_active_tx_pools = 0; } static int init_tx_pools(struct net_device *netdev) @@ -620,6 +637,8 @@ static int init_tx_pools(struct net_device *netdev) if (!adapter->tx_pool) return -1; + adapter->num_active_tx_pools = 0; + for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; @@ -667,6 +686,8 @@ static int init_tx_pools(struct net_device *netdev) tx_pool->producer_index = 0; } + adapter->num_active_tx_pools = tx_subcrqs; + return 0; } @@ -861,7 +882,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) if (adapter->vpd->buff) len = adapter->vpd->len; - reinit_completion(&adapter->fw_done); + init_completion(&adapter->fw_done); crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; crq.get_vpd_size.cmd = GET_VPD_SIZE; ibmvnic_send_crq(adapter, &crq); @@ -923,6 +944,13 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (!adapter->vpd) return -ENOMEM; + /* Vital Product Data (VPD) */ + rc = ibmvnic_get_vpd(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + return rc; + } + adapter->map_id = 1; adapter->napi = kcalloc(adapter->req_rx_queues, sizeof(struct napi_struct), GFP_KERNEL); @@ -996,7 +1024,7 @@ static int __ibmvnic_open(struct net_device *netdev) static int ibmvnic_open(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int rc, vpd; + int rc; mutex_lock(&adapter->reset_lock); @@ -1019,11 +1047,6 @@ static int ibmvnic_open(struct net_device *netdev) rc = __ibmvnic_open(netdev); netif_carrier_on(netdev); - /* Vital Product Data (VPD) */ - vpd = ibmvnic_get_vpd(adapter); - if (vpd) - netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); - mutex_unlock(&adapter->reset_lock); return rc; @@ -1553,6 +1576,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) static int do_reset(struct ibmvnic_adapter *adapter, struct ibmvnic_rwi *rwi, u32 reset_state) { + u64 old_num_rx_queues, old_num_tx_queues; struct net_device *netdev = adapter->netdev; int i, rc; @@ -1562,6 +1586,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; + old_num_rx_queues = adapter->req_rx_queues; + old_num_tx_queues = adapter->req_tx_queues; + if (rwi->reset_reason == VNIC_RESET_MOBILITY) { rc = ibmvnic_reenable_crq_queue(adapter); if (rc) @@ -1606,6 +1633,12 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = init_resources(adapter); if (rc) return rc; + } else if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues) { + release_rx_pools(adapter); + release_tx_pools(adapter); + init_rx_pools(netdev); + init_tx_pools(netdev); } else { rc = reset_tx_pools(adapter); if (rc) @@ -3603,7 +3636,17 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, *req_value, (long int)be64_to_cpu(crq->request_capability_rsp. number), name); - *req_value = be64_to_cpu(crq->request_capability_rsp.number); + + if (be16_to_cpu(crq->request_capability_rsp.capability) == + REQ_MTU) { + pr_err("mtu of %llu is not supported. Reverting.\n", + *req_value); + *req_value = adapter->fallback.mtu; + } else { + *req_value = + be64_to_cpu(crq->request_capability_rsp.number); + } + ibmvnic_send_req_caps(adapter, 1); return; default: diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 2df79fdd800b..fe21a6e2ddae 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1091,6 +1091,8 @@ struct ibmvnic_adapter { u64 opt_rxba_entries_per_subcrq; __be64 tx_rx_desc_req; u8 map_id; + u64 num_active_rx_pools; + u64 num_active_tx_pools; struct tasklet_struct tasklet; enum vnic_state state; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index d56fe32bf48d..8a22ff67b026 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -457,12 +457,10 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb, void usbnet_defer_kevent (struct usbnet *dev, int work) { set_bit (work, &dev->flags); - if (!schedule_work (&dev->kevent)) { - if (net_ratelimit()) - netdev_err(dev->net, "kevent %d may have been dropped\n", work); - } else { + if (!schedule_work (&dev->kevent)) + netdev_dbg(dev->net, "kevent %d may have been dropped\n", work); + else netdev_dbg(dev->net, "kevent %d scheduled\n", work); - } } EXPORT_SYMBOL_GPL(usbnet_defer_kevent); diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 58476b728c57..c9406852c3e9 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -486,15 +486,28 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type, int sas_eh_abort_handler(struct scsi_cmnd *cmd) { - int res; + int res = TMF_RESP_FUNC_FAILED; struct sas_task *task = TO_SAS_TASK(cmd); struct Scsi_Host *host = cmd->device->host; + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_internal *i = to_sas_internal(host->transportt); + unsigned long flags; if (!i->dft->lldd_abort_task) return FAILED; - res = i->dft->lldd_abort_task(task); + spin_lock_irqsave(host->host_lock, flags); + /* We cannot do async aborts for SATA devices */ + if (dev_is_sata(dev) && !host->host_eh_scheduled) { + spin_unlock_irqrestore(host->host_lock, flags); + return FAILED; + } + spin_unlock_irqrestore(host->host_lock, flags); + + if (task) + res = i->dft->lldd_abort_task(task); + else + SAS_DPRINTK("no task to abort\n"); if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) return SUCCESS; diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index f650e475d8f0..fdf2aad73470 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -60,10 +60,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; - - /* Each thread allocates its own gi, no race */ - groups_sort(gi); } + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } else { gi = get_group_info(rqgi); } diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index ded456f17de6..c584ad8d023c 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file, struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - struct orangefs_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ @@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file, return -EAGAIN; restart: + cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 835c6e148afc..0577d6dba8c8 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s */ void purge_waiting_ops(void) { - struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); - list_for_each_entry(op, &orangefs_request_list, list) { + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 9c5a2628d6ce..1d3877c39a00 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return pfn_to_page(swp_offset(entry)); @@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return false; } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return NULL; @@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset(entry)); @@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } + +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { return NULL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 496e59a2738b..8fb90a0819c3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -932,6 +932,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 +#define KVM_CAP_S390_BPB 152 #ifdef KVM_CAP_IRQ_ROUTING @@ -1261,6 +1263,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 0ba0dd8863a7..5187dfe809ac 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -321,15 +321,23 @@ void irq_matrix_remove_reserved(struct irq_matrix *m) int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, bool reserved, unsigned int *mapped_cpu) { - unsigned int cpu; + unsigned int cpu, best_cpu, maxavl = 0; + struct cpumap *cm; + unsigned int bit; + best_cpu = UINT_MAX; for_each_cpu(cpu, msk) { - struct cpumap *cm = per_cpu_ptr(m->maps, cpu); - unsigned int bit; + cm = per_cpu_ptr(m->maps, cpu); - if (!cm->online) + if (!cm->online || cm->available <= maxavl) continue; + best_cpu = cpu; + maxavl = cm->available; + } + + if (maxavl) { + cm = per_cpu_ptr(m->maps, best_cpu); bit = matrix_alloc_area(m, cm, 1, false); if (bit < m->alloc_end) { cm->allocated++; @@ -338,8 +346,8 @@ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, m->global_available--; if (reserved) m->global_reserved--; - *mapped_cpu = cpu; - trace_irq_matrix_alloc(bit, cpu, m, cm); + *mapped_cpu = best_cpu; + trace_irq_matrix_alloc(bit, best_cpu, m, cm); return bit; } } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index d22b84310f6d..ae3c2a35d61b 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -30,10 +30,37 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) return true; } +static inline bool pfn_in_hpage(struct page *hpage, unsigned long pfn) +{ + unsigned long hpage_pfn = page_to_pfn(hpage); + + /* THP can be referenced by any subpage */ + return pfn >= hpage_pfn && pfn - hpage_pfn < hpage_nr_pages(hpage); +} + +/** + * check_pte - check if @pvmw->page is mapped at the @pvmw->pte + * + * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* + * mapped. check_pte() has to validate this. + * + * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary + * page. + * + * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration + * entry that points to @pvmw->page or any subpage in case of THP. + * + * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to + * @pvmw->page or any subpage in case of THP. + * + * Otherwise, return false. + * + */ static bool check_pte(struct page_vma_mapped_walk *pvmw) { + unsigned long pfn; + if (pvmw->flags & PVMW_MIGRATION) { -#ifdef CONFIG_MIGRATION swp_entry_t entry; if (!is_swap_pte(*pvmw->pte)) return false; @@ -41,38 +68,25 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) if (!is_migration_entry(entry)) return false; - if (migration_entry_to_page(entry) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (migration_entry_to_page(entry) < pvmw->page) - return false; -#else - WARN_ON_ONCE(1); -#endif - } else { - if (is_swap_pte(*pvmw->pte)) { - swp_entry_t entry; - entry = pte_to_swp_entry(*pvmw->pte); - if (is_device_private_entry(entry) && - device_private_entry_to_page(entry) == pvmw->page) - return true; - } + pfn = migration_entry_to_pfn(entry); + } else if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; - if (!pte_present(*pvmw->pte)) + /* Handle un-addressable ZONE_DEVICE memory */ + entry = pte_to_swp_entry(*pvmw->pte); + if (!is_device_private_entry(entry)) return false; - /* THP can be referenced by any subpage */ - if (pte_page(*pvmw->pte) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (pte_page(*pvmw->pte) < pvmw->page) + pfn = device_private_entry_to_pfn(entry); + } else { + if (!pte_present(*pvmw->pte)) return false; + + pfn = pte_pfn(*pvmw->pte); } - return true; + return pfn_in_hpage(pvmw->page, pfn); } /** diff --git a/net/core/dev.c b/net/core/dev.c index 77795f66c246..4670ccabe23a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3166,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 32fbd9ba3609..da5635fc52c2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -118,6 +118,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!xo) return ERR_PTR(-EINVAL); + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + return ERR_PTR(-EINVAL); + x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 02f00be12bb0..10f7f74a0831 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev, return htonl(INADDR_ANY); for_ifa(in_dev) { - if (inet_ifa_match(fl4->saddr, ifa)) + if (fl4->saddr == ifa->ifa_local) return fl4->saddr; } endfor_ifa(in_dev); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b6a2aa1dcf56..4d58e2ce0b5b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 01801b77bd0d..ea6e6e7df0ee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 44d109c435bc..3fd1ec775dc2 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -145,6 +145,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!xo) return ERR_PTR(-EINVAL); + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + return ERR_PTR(-EINVAL); + x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a0f89ad76f9d..2a04dc9c781b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2e554ef6d75f..9920d2f84eff 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock) sizeof(val)); } -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { - return tcp_sk(tc->t_sock->sk)->snd_nxt; + /* seq# of the last byte of data in tcp send buffer */ + return tcp_sk(tc->t_sock->sk)->write_seq; } u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index e7858ee8ed8b..c6fa080e9b6d 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -55,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc); u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 73c74763ca72..16f65744d984 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, * m_ack_seq is set to the sequence number of the last byte of * header and data. see rds_tcp_is_acked(). */ - tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); + tc->t_last_sent_nxt = rds_tcp_write_seq(tc); rm->m_ack_seq = tc->t_last_sent_nxt + sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; @@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", - rm, rds_tcp_snd_nxt(tc), + rm, rds_tcp_write_seq(tc), (unsigned long long)rm->m_ack_seq); } diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 275925b93b29..35bc7106d182 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 61f394d369bf..0a9b72fbd761 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -577,6 +577,8 @@ alloc_payload: get_page(page); sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; sg_set_page(sg, page, copy, offset); + sg_unmark_end(sg); + ctx->sg_plaintext_num_elem++; sk_mem_charge(sk, copy); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b4b69c2d1012..9dea96380339 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1310,7 +1310,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma) && !logging_active) { + if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 62310122ee78..743ca5cb05ef 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,9 +285,11 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; - ret = vgic_v4_init(kvm); - if (ret) - goto out; + if (vgic_has_its(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_enable(vcpu); diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 4a37292855bc..bc4265154bac 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -118,7 +118,7 @@ int vgic_v4_init(struct kvm *kvm) struct kvm_vcpu *vcpu; int i, nr_vcpus, ret; - if (!vgic_supports_direct_msis(kvm)) + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ if (dist->its_vm.vpes) |