diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-26 14:11:54 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-26 14:11:54 -0800 |
commit | 02fc87b117a9b9ec325089d098fce86ed11966bd (patch) | |
tree | 537176c1c32b25c781bf8974af854a4ee4dbc77a | |
parent | 6830c8db58c2616d8ba2bf45e7d98dca5f69b07f (diff) | |
parent | 12a78d43de767eaf8fb272facb7a7b6f2dc6a9df (diff) | |
download | linux-02fc87b117a9b9ec325089d098fce86ed11966bd.tar.gz linux-02fc87b117a9b9ec325089d098fce86ed11966bd.tar.bz2 linux-02fc87b117a9b9ec325089d098fce86ed11966bd.zip |
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull misc x86 fixes from Ingo Molnar:
- topology enumeration fixes
- KASAN fix
- two entry fixes (not yet the big series related to KASLR)
- remove obsolete code
- instruction decoder fix
- better /dev/mem sanity checks, hopefully working better this time
- pkeys fixes
- two ACPI fixes
- 5-level paging related fixes
- UMIP fixes that should make application visible faults more debuggable
- boot fix for weird virtualization environment
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
x86/decoder: Add new TEST instruction pattern
x86/PCI: Remove unused HyperTransport interrupt support
x86/umip: Fix insn_get_code_seg_params()'s return value
x86/boot/KASLR: Remove unused variable
x86/entry/64: Add missing irqflags tracing to native_load_gs_index()
x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow
x86/entry/64: Fix entry_SYSCALL_64_after_hwframe() IRQ tracing
x86/pkeys/selftests: Fix protection keys write() warning
x86/pkeys/selftests: Rename 'si_pkey' to 'siginfo_pkey'
x86/mpx/selftests: Fix up weird arrays
x86/pkeys: Update documentation about availability
x86/umip: Print a warning into the syslog if UMIP-protected instructions are used
x86/smpboot: Fix __max_logical_packages estimate
x86/topology: Avoid wasting 128k for package id array
perf/x86/intel/uncore: Cache logical pkg id in uncore driver
x86/acpi: Reduce code duplication in mp_override_legacy_irq()
x86/acpi: Handle SCI interrupts above legacy space gracefully
x86/boot: Fix boot failure when SMP MP-table is based at 0
x86/mm: Limit mmap() of /dev/mem to valid physical addresses
x86/selftests: Add test for mapping placement for 5-level paging
...
38 files changed, 472 insertions, 616 deletions
diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt index fa46dcb347bc..ecb0d2dadfb7 100644 --- a/Documentation/x86/protection-keys.txt +++ b/Documentation/x86/protection-keys.txt @@ -1,5 +1,10 @@ -Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature -which will be found on future Intel CPUs. +Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature +which is found on Intel's Skylake "Scalable Processor" Server CPUs. +It will be avalable in future non-server parts. + +For anyone wishing to test or use this feature, it is available in +Amazon's EC2 C5 instances and is known to work there using an Ubuntu +17.04 image. Memory Protection Keys provides a mechanism for enforcing page-based protections, but without requiring modification of the page tables diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df3276d6bfe3..8eed3f94bfc7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1804,14 +1804,20 @@ config X86_SMAP If unsure, say Y. config X86_INTEL_UMIP - def_bool n + def_bool y depends on CPU_SUP_INTEL prompt "Intel User Mode Instruction Prevention" if EXPERT ---help--- The User Mode Instruction Prevention (UMIP) is a security feature in newer Intel processors. If enabled, a general - protection fault is issued if the instructions SGDT, SLDT, - SIDT, SMSW and STR are executed in user mode. + protection fault is issued if the SGDT, SLDT, SIDT, SMSW + or STR instructions are executed in user mode. These instructions + unnecessarily expose information about the hardware state. + + The vast majority of applications do not use these instructions. + For the very few that do, software emulation is provided in + specific cases in protected and virtual-8086 modes. Emulated + results are dummy. config X86_INTEL_MPX prompt "Intel MPX (Memory Protection Extensions)" diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a63fbc25ce84..8199a6187251 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) static void mem_avoid_memmap(char *str) { static int i; - int rc; if (i >= MAX_MEMMAP_REGIONS) return; @@ -219,7 +218,7 @@ static int handle_mem_memmap(void) return 0; tmp_cmdline = malloc(len + 1); - if (!tmp_cmdline ) + if (!tmp_cmdline) error("Failed to allocate space for tmp_cmdline"); memcpy(tmp_cmdline, args, len); @@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, cmd_line |= boot_params->hdr.cmd_line_ptr; /* Calculate size of cmd_line. */ ptr = (char *)(unsigned long)cmd_line; - for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + for (cmd_line_size = 0; ptr[cmd_line_size++];) ; mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a2b30ec69497..f81d50d7ceac 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64) END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_IRETQ +.macro TRACE_IRQS_FLAGS flags:req #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, EFLAGS(%rsp) /* interrupts off? */ + bt $9, \flags /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: #endif .endm +.macro TRACE_IRQS_IRETQ + TRACE_IRQS_FLAGS EFLAGS(%rsp) +.endm + /* * When dynamic function tracer is enabled it will add a breakpoint * to all locations that it is about to modify, sync CPUs, update @@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64) movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - TRACE_IRQS_OFF - /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ @@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ UNWIND_HINT_REGS extra=0 + TRACE_IRQS_OFF + /* * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. @@ -943,11 +947,13 @@ ENTRY(native_load_gs_index) FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) + TRACE_IRQS_OFF SWAPGS .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS + TRACE_IRQS_FLAGS (%rsp) popfq FRAME_END ret diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d45e06346f14..7874c980d569 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev) int i, phys_id, pkg; phys_id = uncore_pcibus_to_physid(pdev->bus); - pkg = topology_phys_to_logical_pkg(phys_id); box = pci_get_drvdata(pdev); if (!box) { + pkg = topology_phys_to_logical_pkg(phys_id); for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { uncore_extra_pci_dev[pkg].dev[i] = NULL; @@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) return; pci_set_drvdata(pdev, NULL); - pmu->boxes[pkg] = NULL; + pmu->boxes[box->pkgid] = NULL; if (atomic_dec_return(&pmu->activeboxes) == 0) uncore_pmu_unregister(pmu); uncore_box_exit(box); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 4364191e7c6b..414dc7e7c950 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -100,7 +100,7 @@ struct intel_uncore_extra_reg { struct intel_uncore_box { int pci_phys_id; - int pkgid; + int pkgid; /* Logical package ID */ int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index f4e4168455a8..6d8044ab1060 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - int pkg = topology_phys_to_logical_pkg(box->pci_phys_id); + int pkg = box->pkgid; struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; if (filter_pdev) { diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 3a091cea36c5..0d157d2a1e2a 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void) extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); +extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b80e46733909..2851077b6051 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -99,14 +99,6 @@ struct irq_alloc_info { void *dmar_data; }; #endif -#ifdef CONFIG_HT_IRQ - struct { - int ht_pos; - int ht_idx; - struct pci_dev *ht_dev; - void *ht_update; - }; -#endif #ifdef CONFIG_X86_UV struct { int uv_limit; diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h deleted file mode 100644 index 5d55df352879..000000000000 --- a/arch/x86/include/asm/hypertransport.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_HYPERTRANSPORT_H -#define _ASM_X86_HYPERTRANSPORT_H - -/* - * Constants for x86 Hypertransport Interrupts. - */ - -#define HT_IRQ_LOW_BASE 0xf8000000 - -#define HT_IRQ_LOW_VECTOR_SHIFT 16 -#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000 -#define HT_IRQ_LOW_VECTOR(v) \ - (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK) - -#define HT_IRQ_LOW_DEST_ID_SHIFT 8 -#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00 -#define HT_IRQ_LOW_DEST_ID(v) \ - (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK) - -#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000 -#define HT_IRQ_LOW_DM_LOGICAL 0x0000040 - -#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000 -#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020 - - -#define HT_IRQ_LOW_MT_FIXED 0x0000000 -#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004 -#define HT_IRQ_LOW_MT_SMI 0x0000008 -#define HT_IRQ_LOW_MT_NMI 0x000000c -#define HT_IRQ_LOW_MT_INIT 0x0000010 -#define HT_IRQ_LOW_MT_STARTUP 0x0000014 -#define HT_IRQ_LOW_MT_EXTINT 0x0000018 -#define HT_IRQ_LOW_MT_LINT1 0x000008c -#define HT_IRQ_LOW_MT_LINT0 0x0000098 - -#define HT_IRQ_LOW_IRQ_MASKED 0x0000001 - - -#define HT_IRQ_HIGH_DEST_ID_SHIFT 0 -#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff -#define HT_IRQ_HIGH_DEST_ID(v) \ - ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK) - -#endif /* _ASM_X86_HYPERTRANSPORT_H */ diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index e1d3b4ce8a92..2b6ccf2c49f1 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -18,6 +18,6 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); -char insn_get_code_seg_params(struct pt_regs *regs); +int insn_get_code_seg_params(struct pt_regs *regs); #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 93ae8aee1780..95e948627fd0 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #endif +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + /** * virt_to_phys - map virtual addresses to physical * @address: address to remap diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index f695cc6b8e1f..139feef467f7 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain); static inline void arch_init_msi_domain(struct irq_domain *domain) { } #endif -#ifdef CONFIG_HT_IRQ -extern void arch_init_htirq_domain(struct irq_domain *domain); -#else -static inline void arch_init_htirq_domain(struct irq_domain *domain) { } -#endif - #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2db7cf720b04..cc16fa882e3e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,6 +132,7 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; + unsigned initialized : 1; } __randomize_layout; struct cpuid_regs { diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ef9e02e614d0..f4c463df8b08 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi); + static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - /* * Check bus_irq boundary. */ @@ -358,14 +357,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, } /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - - /* * TBD: This check is for faulty timer entries, where the override * erroneously sets the trigger to level, resulting in a HUGE * increase of timer interrupts! @@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0) + return; /* * Reset default identity mapping if gsi is also an legacy IRQ, * otherwise there will be more than one entry with the same GSI @@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi) +{ + struct mpc_intsrc mp_irq; + int ioapic, pin; + + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + pr_warn("Failed to find ioapic for gsi : %u\n", gsi); + return ioapic; + } + + pin = mp_find_ioapic_pin(ioapic, gsi); + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = pin; + + mp_save_irq(&mp_irq); + + return 0; +} + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + if (bus_irq < NR_IRQS_LEGACY) + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + else + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index a9e08924927e..a6fcaf16cdbf 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -12,7 +12,6 @@ obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_HT_IRQ) += htirq.o obj-$(CONFIG_SMP) += ipi.o ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c deleted file mode 100644 index b07075dce8b7..000000000000 --- a/arch/x86/kernel/apic/htirq.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Support Hypertransport IRQ - * - * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo - * Moved from arch/x86/kernel/apic/io_apic.c. - * Jiang Liu <jiang.liu@linux.intel.com> - * Add support of hierarchical irqdomain - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/device.h> -#include <linux/pci.h> -#include <linux/htirq.h> -#include <asm/irqdomain.h> -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/hypertransport.h> - -static struct irq_domain *htirq_domain; - -/* - * Hypertransport interrupt support - */ -static int -ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_data *parent = data->parent_data; - int ret; - - ret = parent->chip->irq_set_affinity(parent, mask, force); - if (ret >= 0) { - struct ht_irq_msg msg; - struct irq_cfg *cfg = irqd_cfg(data); - - fetch_ht_irq_msg(data->irq, &msg); - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | - HT_IRQ_LOW_DEST_ID_MASK); - msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) | - HT_IRQ_LOW_DEST_ID(cfg->dest_apicid); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); - write_ht_irq_msg(data->irq, &msg); - } - - return ret; -} - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .irq_mask = mask_ht_irq, - .irq_unmask = unmask_ht_irq, - .irq_ack = irq_chip_ack_parent, - .irq_set_affinity = ht_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs, void *arg) -{ - struct ht_irq_cfg *ht_cfg; - struct irq_alloc_info *info = arg; - struct pci_dev *dev; - irq_hw_number_t hwirq; - int ret; - - if (nr_irqs > 1 || !info) - return -EINVAL; - - dev = info->ht_dev; - hwirq = (info->ht_idx & 0xFF) | - PCI_DEVID(dev->bus->number, dev->devfn) << 8 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24; - if (irq_find_mapping(domain, hwirq) > 0) - return -EEXIST; - - ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL); - if (!ht_cfg) - return -ENOMEM; - - ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); - if (ret < 0) { - kfree(ht_cfg); - return ret; - } - - /* Initialize msg to a value that will never match the first write. */ - ht_cfg->msg.address_lo = 0xffffffff; - ht_cfg->msg.address_hi = 0xffffffff; - ht_cfg->dev = info->ht_dev; - ht_cfg->update = info->ht_update; - ht_cfg->pos = info->ht_pos; - ht_cfg->idx = 0x10 + (info->ht_idx * 2); - irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg, - handle_edge_irq, ht_cfg, "edge"); - - return 0; -} - -static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs) -{ - struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); - - BUG_ON(nr_irqs != 1); - kfree(irq_data->chip_data); - irq_domain_free_irqs_top(domain, virq, nr_irqs); -} - -static int htirq_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) -{ - struct ht_irq_msg msg; - struct irq_cfg *cfg = irqd_cfg(irq_data); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - write_ht_irq_msg(irq_data->irq, &msg); - return 0; -} - -static void htirq_domain_deactivate(struct irq_domain *domain, - struct irq_data *irq_data) -{ - struct ht_irq_msg msg; - - memset(&msg, 0, sizeof(msg)); - write_ht_irq_msg(irq_data->irq, &msg); -} - -static const struct irq_domain_ops htirq_domain_ops = { - .alloc = htirq_domain_alloc, - .free = htirq_domain_free, - .activate = htirq_domain_activate, - .deactivate = htirq_domain_deactivate, -}; - -void __init arch_init_htirq_domain(struct irq_domain *parent) -{ - struct fwnode_handle *fn; - - if (disable_apic) - return; - - fn = irq_domain_alloc_named_fwnode("PCI-HT"); - if (!fn) - goto warn; - - htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL); - irq_domain_free_fwnode(fn); - if (!htirq_domain) - goto warn; - - htirq_domain->parent = parent; - return; - -warn: - pr_warn("Failed to initialize irqdomain for HTIRQ.\n"); -} - -int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, - ht_irq_update_t *update) -{ - struct irq_alloc_info info; - - if (!htirq_domain) - return -ENOSYS; - - init_irq_alloc_info(&info, NULL); - info.ht_idx = idx; - info.ht_pos = pos; - info.ht_dev = dev; - info.ht_update = update; - - return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev), - &info); -} - -void arch_teardown_ht_irq(unsigned int irq) -{ - irq_domain_free_irqs(irq, 1); -} diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 05c85e693a5d..6a823a25eaff 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -1,5 +1,5 @@ /* - * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc. + * Local APIC related interfaces to support IOAPIC, MSI, etc. * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. @@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void) nr_irqs = NR_VECTORS * nr_cpu_ids; nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) +#if defined(CONFIG_PCI_MSI) /* * for MSI and HT dyn irq */ @@ -663,7 +663,6 @@ int __init arch_early_irq_init(void) irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); - arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 13ae9e5eec2f..fa998ca8aa5a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) cr4_set_bits(X86_CR4_UMIP); + pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n"); + return; out: diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 410c5dadcee3..3a4b12809ab5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } static unsigned long mpf_base; +static bool mpf_found; static unsigned long __init get_mpc_size(unsigned long physptr) { @@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early) if (!smp_found_config) return; - if (!mpf_base) + if (!mpf_found) return; if (acpi_lapic && early) @@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) smp_found_config = 1; #endif mpf_base = base; + mpf_found = true; pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", base, base + sizeof(*mpf) - 1, mpf); @@ -858,7 +860,7 @@ static int __init update_mp_table(void) if (!enable_update_mptable) return 0; - if (!mpf_base) + if (!mpf_found) return 0; mpf = early_memremap(mpf_base, sizeof(*mpf)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5f59e6bee123..3d01df7d7cf6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ -static int *physical_to_logical_pkg __read_mostly; -static unsigned long *physical_package_map __read_mostly;; -static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; @@ -281,108 +278,48 @@ static void notrace start_secondary(void *unused) } /** + * topology_phys_to_logical_pkg - Map a physical package id to a logical + * + * Returns logical package id or -1 if not found + */ +int topology_phys_to_logical_pkg(unsigned int phys_pkg) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && c->phys_proc_id == phys_pkg) + return c->logical_proc_id; + } + return -1; +} +EXPORT_SYMBOL(topology_phys_to_logical_pkg); + +/** * topology_update_package_map - Update the physical to logical package map * @pkg: The physical package id as retrieved via CPUID * @cpu: The cpu for which this is updated */ int topology_update_package_map(unsigned int pkg, unsigned int cpu) { - unsigned int new; + int new; - /* Called from early boot ? */ - if (!physical_package_map) - return 0; - - if (pkg >= max_physical_pkg_id) - return -EINVAL; - - /* Set the logical package id */ - if (test_and_set_bit(pkg, physical_package_map)) + /* Already available somewhere? */ + new = topology_phys_to_logical_pkg(pkg); + if (new >= 0) goto found; - if (logical_packages >= __max_logical_packages) { - pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n", - logical_packages, cpu, __max_logical_packages); - return -ENOSPC; - } - new = logical_packages++; if (new != pkg) { pr_info("CPU %u Converting physical %u to logical package %u\n", cpu, pkg, new); } - physical_to_logical_pkg[pkg] = new; - found: - cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + cpu_data(cpu).logical_proc_id = new; return 0; } -/** - * topology_phys_to_logical_pkg - Map a physical package id to a logical - * - * Returns logical package id or -1 if not found - */ -int topology_phys_to_logical_pkg(unsigned int phys_pkg) -{ - if (phys_pkg >= max_physical_pkg_id) - return -1; - return physical_to_logical_pkg[phys_pkg]; -} -EXPORT_SYMBOL(topology_phys_to_logical_pkg); - -static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) -{ - unsigned int ncpus; - size_t size; - - /* - * Today neither Intel nor AMD support heterogenous systems. That - * might change in the future.... - * - * While ideally we'd want '* smp_num_siblings' in the below @ncpus - * computation, this won't actually work since some Intel BIOSes - * report inconsistent HT data when they disable HT. - * - * In particular, they reduce the APIC-IDs to only include the cores, - * but leave the CPUID topology to say there are (2) siblings. - * This means we don't know how many threads there will be until - * after the APIC enumeration. - * - * By not including this we'll sometimes over-estimate the number of - * logical packages by the amount of !present siblings, but this is - * still better than MAX_LOCAL_APIC. - * - * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited - * on the command line leading to a similar issue as the HT disable - * problem because the hyperthreads are usually enumerated after the - * primary cores. - */ - ncpus = boot_cpu_data.x86_max_cores; - if (!ncpus) { - pr_warn("x86_max_cores == zero !?!?"); - ncpus = 1; - } - - __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - logical_packages = 0; - - /* - * Possibly larger than what we need as the number of apic ids per - * package can be smaller than the actual used apic ids. - */ - max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); - size = max_physical_pkg_id * sizeof(unsigned int); - physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); - memset(physical_to_logical_pkg, 0xff, size); - size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); - physical_package_map = kzalloc(size, GFP_KERNEL); - - pr_info("Max logical packages: %u\n", __max_logical_packages); - - topology_update_package_map(c->phys_proc_id, cpu); -} - void __init smp_store_boot_cpu_info(void) { int id = 0; /* CPU 0 */ @@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; - smp_init_package_map(c, id); + topology_update_package_map(c->phys_proc_id, id); + c->initialized = true; } /* @@ -401,13 +339,16 @@ void smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = &cpu_data(id); - *c = boot_cpu_data; + /* Copy boot_cpu_data only on the first bringup */ + if (!c->initialized) + *c = boot_cpu_data; c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when * bringing up AP or offlined CPU0. */ identify_secondary_cpu(c); + c->initialized = true; } static bool @@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void) void __init native_smp_cpus_done(unsigned int max_cpus) { + int ncpus; + pr_debug("Boot done\n"); + /* + * Today neither Intel nor AMD support heterogenous systems so + * extrapolate the boot cpu's data to all packages. + */ + ncpus = cpu_data(0).booted_cores * smp_num_siblings; + __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + pr_info("Max logical packages: %u\n", __max_logical_packages); if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index a63fe77b3217..676774b9bb8d 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (len > TASK_SIZE) return -ENOMEM; + /* No address checking. See comment at mmap_address_hint_valid() */ if (flags & MAP_FIXED) return addr; @@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* requesting a specific address */ if (addr) { - addr = PAGE_ALIGN(addr); + addr &= PAGE_MASK; + if (!mmap_address_hint_valid(addr, len)) + goto get_unmapped_area; + vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) + if (!vma || addr + len <= vm_start_gap(vma)) return addr; } +get_unmapped_area: info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 6ba82be68cff..f44ce0fb3583 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -78,7 +78,60 @@ #define UMIP_INST_SGDT 0 /* 0F 01 /0 */ #define UMIP_INST_SIDT 1 /* 0F 01 /1 */ -#define UMIP_INST_SMSW 3 /* 0F 01 /4 */ +#define UMIP_INST_SMSW 2 /* 0F 01 /4 */ +#define UMIP_INST_SLDT 3 /* 0F 00 /0 */ +#define UMIP_INST_STR 4 /* 0F 00 /1 */ + +const char * const umip_insns[5] = { + [UMIP_INST_SGDT] = "SGDT", + [UMIP_INST_SIDT] = "SIDT", + [UMIP_INST_SMSW] = "SMSW", + [UMIP_INST_SLDT] = "SLDT", + [UMIP_INST_STR] = "STR", +}; + +#define umip_pr_err(regs, fmt, ...) \ + umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__) +#define umip_pr_warning(regs, fmt, ...) \ + umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__) + +/** + * umip_printk() - Print a rate-limited message + * @regs: Register set with the context in which the warning is printed + * @log_level: Kernel log level to print the message + * @fmt: The text string to print + * + * Print the text contained in @fmt. The print rate is limited to bursts of 5 + * messages every two minutes. The purpose of this customized version of + * printk() is to print messages when user space processes use any of the + * UMIP-protected instructions. Thus, the printed text is prepended with the + * task name and process ID number of the current task as well as the + * instruction and stack pointers in @regs as seen when entering kernel mode. + * + * Returns: + * + * None. + */ +static __printf(3, 4) +void umip_printk(const struct pt_regs *regs, const char *log_level, + const char *fmt, ...) +{ + /* Bursts of 5 messages every two minutes */ + static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5); + struct task_struct *tsk = current; + struct va_format vaf; + va_list args; + + if (!__ratelimit(&ratelimit)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm, + task_pid_nr(tsk), regs->ip, regs->sp, &vaf); + va_end(args); +} /** * identify_insn() - Identify a UMIP-protected instruction @@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn) default: return -EINVAL; } + } else if (insn->opcode.bytes[1] == 0x0) { + if (X86_MODRM_REG(insn->modrm.value) == 0) + return UMIP_INST_SLDT; + else if (X86_MODRM_REG(insn->modrm.value) == 1) + return UMIP_INST_STR; + else + return -EINVAL; + } else { + return -EINVAL; } - - /* SLDT AND STR are not emulated */ - return -EINVAL; } /** @@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; - pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n", - tsk->comm, task_pid_nr(tsk), regs->ip, - regs->sp, X86_PF_USER | X86_PF_WRITE, - regs->ip); + umip_pr_err(regs, "segfault in emulation. error%x\n", + X86_PF_USER | X86_PF_WRITE); } /** @@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs) unsigned char buf[MAX_INSN_SIZE]; void __user *uaddr; struct insn insn; - char seg_defs; + int seg_defs; if (!regs) return false; - /* Do not emulate 64-bit processes. */ - if (user_64bit_mode(regs)) - return false; - /* * If not in user-space long mode, a custom code segment could be in * use. This is true in protected mode (if the process defined a local @@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs) if (umip_inst < 0) return false; + umip_pr_warning(regs, "%s instruction cannot be used by applications.\n", + umip_insns[umip_inst]); + + /* Do not emulate SLDT, STR or user long mode processes. */ + if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs)) + return false; + + umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n"); + if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size)) return false; diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 35625d279458..9119d8e41f1f 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) * * Returns: * - * A signed 8-bit value containing the default parameters on success. + * An int containing ORed-in default parameters on success. * * -EINVAL on error. */ -char insn_get_code_seg_params(struct pt_regs *regs) +int insn_get_code_seg_params(struct pt_regs *regs) { struct desc_struct *desc; short sel; diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 12e377184ee4..c4d55919fac1 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8ae0000cbdb3..00b296617ca4 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (len > TASK_SIZE) return -ENOMEM; + /* No address checking. See comment at mmap_address_hint_valid() */ if (flags & MAP_FIXED) { if (prepare_hugepage_range(file, addr, len)) return -EINVAL; @@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } if (addr) { - addr = ALIGN(addr, huge_page_size(h)); + addr &= huge_page_mask(h); + if (!mmap_address_hint_valid(addr, len)) + goto get_unmapped_area; + vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) + if (!vma || addr + len <= vm_start_gap(vma)) return addr; } + +get_unmapped_area: if (mm->get_unmapped_area == arch_get_unmapped_area) return hugetlb_get_unmapped_area_bottomup(file, addr, len, pgoff, flags); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a99679826846..155ecbac9e28 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -33,6 +33,8 @@ #include <linux/compat.h> #include <asm/elf.h> +#include "physaddr.h" + struct va_alignment __read_mostly va_align = { .flags = -1, }; @@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } + +/** + * mmap_address_hint_valid - Validate the address hint of mmap + * @addr: Address hint + * @len: Mapping length + * + * Check whether @addr and @addr + @len result in a valid mapping. + * + * On 32bit this only checks whether @addr + @len is <= TASK_SIZE. + * + * On 64bit with 5-level page tables another sanity check is required + * because mappings requested by mmap(@addr, 0) which cross the 47-bit + * virtual address boundary can cause the following theoretical issue: + * + * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr + * is below the border of the 47-bit address space and @addr + @len is + * above the border. + * + * With 4-level paging this request succeeds, but the resulting mapping + * address will always be within the 47-bit virtual address space, because + * the hint address does not result in a valid mapping and is + * ignored. Hence applications which are not prepared to handle virtual + * addresses above 47-bit work correctly. + * + * With 5-level paging this request would be granted and result in a + * mapping which crosses the border of the 47-bit virtual address + * space. If the application cannot handle addresses above 47-bit this + * will lead to misbehaviour and hard to diagnose failures. + * + * Therefore ignore address hints which would result in a mapping crossing + * the 47-bit virtual address boundary. + * + * Note, that in the same scenario with MAP_FIXED the behaviour is + * different. The request with @addr < 47-bit and @addr + @len > 47-bit + * fails on a 4-level paging machine but succeeds on a 5-level paging + * machine. It is reasonable to expect that an application does not rely on + * the failure of such a fixed mapping request, so the restriction is not + * applied. + */ +bool mmap_address_hint_valid(unsigned long addr, unsigned long len) +{ + if (TASK_SIZE - len < addr) + return false; + + return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW); +} + +/* Can we access it for direct reading/writing? Must be RAM: */ +int valid_phys_addr_range(phys_addr_t addr, size_t count) +{ + return addr + count <= __pa(high_memory); +} + +/* Can we access it through mmap? Must be a valid physical address: */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; + + return phys_addr_valid(addr + count - 1); +} diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 970e1242a282..6aefe5370e5b 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -343,6 +343,10 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma) size_t size = vma->vm_end - vma->vm_start; phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + /* Does it even fit in phys_addr_t? */ + if (offset >> PAGE_SHIFT != vma->vm_pgoff) + return -EINVAL; + /* It's illegal to wrap around the end of the physical address space. */ if (offset + (phys_addr_t)size - 1 < offset) return -EINVAL; diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 90944667ccea..bda151788f3f 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -80,15 +80,6 @@ config XEN_PCIDEV_FRONTEND The PCI device frontend driver allows the kernel to import arbitrary PCI devices from a PCI backend to support PCI driver domains. -config HT_IRQ - bool "Interrupts on hypertransport devices" - default y - depends on PCI && X86_LOCAL_APIC - help - This allows native hypertransport devices to use interrupts. - - If unsure say Y. - config PCI_ATS bool diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3d5e047f0a32..c7819b973df7 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -21,9 +21,6 @@ obj-$(CONFIG_HOTPLUG_PCI) += hotplug/ # Build the PCI MSI interrupt support obj-$(CONFIG_PCI_MSI) += msi.o -# Build the Hypertransport interrupt support -obj-$(CONFIG_HT_IRQ) += htirq.o - obj-$(CONFIG_PCI_ATS) += ats.o obj-$(CONFIG_PCI_IOV) += iov.o diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c deleted file mode 100644 index bb88c26f5144..000000000000 --- a/drivers/pci/htirq.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * File: htirq.c - * Purpose: Hypertransport Interrupt Capability - * - * Copyright (C) 2006 Linux Networx - * Copyright (C) Eric Biederman <ebiederman@lnxi.com> - */ - -#include <linux/irq.h> -#include <linux/pci.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/slab.h> -#include <linux/htirq.h> - -/* Global ht irq lock. - * - * This is needed to serialize access to the data port in hypertransport - * irq capability. - * - * With multiple simultaneous hypertransport irq devices it might pay - * to make this more fine grained. But start with simple, stupid, and correct. - */ -static DEFINE_SPINLOCK(ht_irq_lock); - -void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) -{ - struct ht_irq_cfg *cfg = irq_get_handler_data(irq); - unsigned long flags; - - spin_lock_irqsave(&ht_irq_lock, flags); - if (cfg->msg.address_lo != msg->address_lo) { - pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx); - pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_lo); - } - if (cfg->msg.address_hi != msg->address_hi) { - pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1); - pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi); - } - if (cfg->update) - cfg->update(cfg->dev, irq, msg); - spin_unlock_irqrestore(&ht_irq_lock, flags); - cfg->msg = *msg; -} - -void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) -{ - struct ht_irq_cfg *cfg = irq_get_handler_data(irq); - - *msg = cfg->msg; -} - -void mask_ht_irq(struct irq_data *data) -{ - struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data); - struct ht_irq_msg msg = cfg->msg; - - msg.address_lo |= 1; - write_ht_irq_msg(data->irq, &msg); -} - -void unmask_ht_irq(struct irq_data *data) -{ - struct ht_irq_cfg *cfg = irq_data_get_irq_handler_data(data); - struct ht_irq_msg msg = cfg->msg; - - msg.address_lo &= ~1; - write_ht_irq_msg(data->irq, &msg); -} - -/** - * __ht_create_irq - create an irq and attach it to a device. - * @dev: The hypertransport device to find the irq capability on. - * @idx: Which of the possible irqs to attach to. - * @update: Function to be called when changing the htirq message - * - * The irq number of the new irq or a negative error value is returned. - */ -int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) -{ - int max_irq, pos, irq; - unsigned long flags; - u32 data; - - pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ); - if (!pos) - return -EINVAL; - - /* Verify the idx I want to use is in range */ - spin_lock_irqsave(&ht_irq_lock, flags); - pci_write_config_byte(dev, pos + 2, 1); - pci_read_config_dword(dev, pos + 4, &data); - spin_unlock_irqrestore(&ht_irq_lock, flags); - - max_irq = (data >> 16) & 0xff; - if (idx > max_irq) - return -EINVAL; - - irq = arch_setup_ht_irq(idx, pos, dev, update); - if (irq > 0) - dev_dbg(&dev->dev, "irq %d for HT\n", irq); - - return irq; -} -EXPORT_SYMBOL(__ht_create_irq); - -/** - * ht_create_irq - create an irq and attach it to a device. - * @dev: The hypertransport device to find the irq capability on. - * @idx: Which of the possible irqs to attach to. - * - * ht_create_irq needs to be called for all hypertransport devices - * that generate irqs. - * - * The irq number of the new irq or a negative error value is returned. - */ -int ht_create_irq(struct pci_dev *dev, int idx) -{ - return __ht_create_irq(dev, idx, NULL); -} -EXPORT_SYMBOL(ht_create_irq); - -/** - * ht_destroy_irq - destroy an irq created with ht_create_irq - * @irq: irq to be destroyed - * - * This reverses ht_create_irq removing the specified irq from - * existence. The irq should be free before this happens. - */ -void ht_destroy_irq(unsigned int irq) -{ - arch_teardown_ht_irq(irq); -} -EXPORT_SYMBOL(ht_destroy_irq); diff --git a/include/linux/htirq.h b/include/linux/htirq.h deleted file mode 100644 index 127c39d815ba..000000000000 --- a/include/linux/htirq.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_HTIRQ_H -#define LINUX_HTIRQ_H - -struct pci_dev; -struct irq_data; - -struct ht_irq_msg { - u32 address_lo; /* low 32 bits of the ht irq message */ - u32 address_hi; /* high 32 bits of the it irq message */ -}; - -typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg); - -struct ht_irq_cfg { - struct pci_dev *dev; - /* Update callback used to cope with buggy hardware */ - ht_irq_update_t *update; - unsigned pos; - unsigned idx; - struct ht_irq_msg msg; -}; - -/* Helper functions.. */ -void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -void mask_ht_irq(struct irq_data *data); -void unmask_ht_irq(struct irq_data *data); - -/* The arch hook for getting things started */ -int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, - ht_irq_update_t *update); -void arch_teardown_ht_irq(unsigned int irq); - -/* For drivers of buggy hardware */ -int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); - -#endif /* LINUX_HTIRQ_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 96c94980d1ff..0403894147a3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1485,12 +1485,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif -#ifdef CONFIG_HT_IRQ -/* The functions a driver should call */ -int ht_create_irq(struct pci_dev *dev, int idx); -void ht_destroy_irq(unsigned int irq); -#endif /* CONFIG_HT_IRQ */ - #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c new file mode 100644 index 000000000000..2eafdcd4c2b3 --- /dev/null +++ b/tools/testing/selftests/x86/5lvl.c @@ -0,0 +1,177 @@ +#include <stdio.h> +#include <sys/mman.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define PAGE_SIZE 4096 +#define LOW_ADDR ((void *) (1UL << 30)) +#define HIGH_ADDR ((void *) (1UL << 50)) + +struct testcase { + void *addr; + unsigned long size; + unsigned long flags; + const char *msg; + unsigned int low_addr_required:1; + unsigned int keep_mapped:1; +}; + +static struct testcase testcases[] = { + { + .addr = NULL, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED)", + }, + { + .addr = (void*) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void*) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = (void *)((1UL << 47) - PAGE_SIZE), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap((1UL << 47), 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)((1UL << 47) - PAGE_SIZE / 2), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)((1UL << 47) - PAGE_SIZE), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void*) -1, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void*) -1, + .size = 2UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)((1UL << 47) - PAGE_SIZE), + .size = 4UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)((1UL << 47) - (2UL << 20)), + .size = 4UL << 20, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)", + }, +}; + +int main(int argc, char **argv) +{ + int i; + void *p; + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + struct testcase *t = testcases + i; + + p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0); + + printf("%s: %p - ", t->msg, p); + + if (p == MAP_FAILED) { + printf("FAILED\n"); + continue; + } + + if (t->low_addr_required && p >= (void *)(1UL << 47)) + printf("FAILED\n"); + else + printf("OK\n"); + if (!t->keep_mapped) + munmap(p, t->size); + } + return 0; +} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 7b1adeee4b0f..939a337128db 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,7 +11,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h index 3f0093911f03..d1b61ab870f8 100644 --- a/tools/testing/selftests/x86/mpx-hw.h +++ b/tools/testing/selftests/x86/mpx-hw.h @@ -52,14 +52,14 @@ struct mpx_bd_entry { union { char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; - void *contents[1]; + void *contents[0]; }; } __attribute__((packed)); struct mpx_bt_entry { union { char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; - unsigned long contents[1]; + unsigned long contents[0]; }; } __attribute__((packed)); diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index 3818f25391c2..b3cb7670e026 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -30,6 +30,7 @@ static inline void sigsafe_printf(const char *format, ...) if (!dprint_in_signal) { vprintf(format, ap); } else { + int ret; int len = vsnprintf(dprint_in_signal_buffer, DPRINT_IN_SIGNAL_BUF_SIZE, format, ap); @@ -39,7 +40,9 @@ static inline void sigsafe_printf(const char *format, ...) */ if (len > DPRINT_IN_SIGNAL_BUF_SIZE) len = DPRINT_IN_SIGNAL_BUF_SIZE; - write(1, dprint_in_signal_buffer, len); + ret = write(1, dprint_in_signal_buffer, len); + if (ret < 0) + abort(); } va_end(ap); } diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 7a1cc0e56d2d..bc1b0735bb50 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -250,7 +250,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) unsigned long ip; char *fpregs; u32 *pkru_ptr; - u64 si_pkey; + u64 siginfo_pkey; u32 *si_pkey_ptr; int pkru_offset; fpregset_t fpregset; @@ -292,9 +292,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); dump_mem(si_pkey_ptr - 8, 24); - si_pkey = *si_pkey_ptr; - pkey_assert(si_pkey < NR_PKEYS); - last_si_pkey = si_pkey; + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -306,7 +306,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("si_pkey from siginfo: %jx\n", si_pkey); + dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkru_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; |