From 7fc150543c73de71859631c8a6b17e3067fe7617 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Oct 2014 22:37:13 +0100 Subject: ARM: Blacklist GCC 4.8.0 to GCC 4.8.2 - PR58854 These stock GCC versions miscompile the kernel by incorrectly optimising the function epilogue code - by first increasing the stack pointer, and then loading entries from below the stack. This means that an opportune interrupt or exception will corrupt the current function's saved state, which may result in the parent function seeing different register values. As this bug has been known to result in corrupted filesystems, and these buggy compiler versions seem to be frequently used, we have little option but to blacklist these compiler versions. Distributions may have fixed PR58854, but as their compilers are totally indistinguishable from the buggy versions, it is unfortunate that this also results in those also being blacklisted. Given the filesystem corruption potential of the original, this is the lesser evil. People who want to build with their fixed compiler versions will need to adjust the kernel source. (Distros need to think about the implications of fixing such a compiler bug, and consider how to ensure that their fixed compiler versions can be detected if they wish to avoid this.) Signed-off-by: Russell King --- arch/arm/kernel/asm-offsets.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 713e807621d2..2d2d6087b9b1 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -10,6 +10,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -39,10 +40,19 @@ * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c * (http://gcc.gnu.org/PR8896) and incorrect structure * initialisation in fs/jffs2/erase.c + * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854 + * miscompiles find_get_entry(), and can result in EXT3 and EXT4 + * filesystem corruption (possibly other FS too). */ +#ifdef __GNUC__ #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) #error Your compiler is too buggy; it is known to miscompile kernels. -#error Known good compilers: 3.3 +#error Known good compilers: 3.3, 4.x +#endif +#if GCC_VERSION >= 40800 && GCC_VERSION < 40803 +#error Your compiler is too buggy; it is known to miscompile kernels +#error and result in filesystem corruption and oopses. +#endif #endif int main(void) -- cgit v1.2.3 From db6a00b4bed3abbb038077ba4fdc5be481fe5559 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 19 Oct 2014 11:41:52 +0200 Subject: x86/smpboot: Move data structure to its primary usage scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes the code more readable by moving variable and usage closer to each other, which also avoids this build warning in the !CONFIG_HOTPLUG_CPU case: arch/x86/kernel/smpboot.c:105:42: warning: ‘die_complete’ defined but not used [-Wunused-variable] Cc: Prarit Bhargava Cc: Lan Tianyu Cc: Borislav Petkov Cc: Peter Zijlstra Cc: srostedt@redhat.com Cc: toshi.kani@hp.com Cc: imammedo@redhat.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1409039025-32310-1-git-send-email-tianyu.lan@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2d5200e56357..4d2128ac70bd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -102,8 +102,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); -static DEFINE_PER_CPU(struct completion, die_complete); - atomic_t init_deasserted; /* @@ -1318,6 +1316,8 @@ void cpu_disable_common(void) fixup_irqs(); } +static DEFINE_PER_CPU(struct completion, die_complete); + int native_cpu_disable(void) { int ret; -- cgit v1.2.3 From 961b6a7003acec4f9d70dabc1a253b783cb74272 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 20 Oct 2014 22:45:27 +0800 Subject: x86: ACPI: Do not translate GSI number if IOAPIC is disabled When IOAPIC is disabled, acpi_gsi_to_irq() should return gsi directly instead of calling mp_map_gsi_to_irq() to translate gsi to IRQ by IOAPIC. It fixes https://bugzilla.kernel.org/show_bug.cgi?id=84381. This regression was introduced with commit 6b9fb7082409 "x86, ACPI, irq: Consolidate algorithm of mapping (ioapic, pin) to IRQ number" Reported-and-Tested-by: Thomas Richter Signed-off-by: Jiang Liu Cc: Tony Luck Cc: Thomas Richter Cc: rui.zhang@intel.com Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: # 3.17 Link: http://lkml.kernel.org/r/1413816327-12850-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b436fc735aa4..d5c887216fb3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -604,14 +604,18 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { - int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); + int irq; - if (irq >= 0) { + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { + *irqp = gsi; + } else { + irq = mp_map_gsi_to_irq(gsi, + IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); + if (irq < 0) + return -1; *irqp = irq; - return 0; } - - return -1; + return 0; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); -- cgit v1.2.3 From 178c3dfe853a18391b029e6b62e1eed22be1871e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 19 Oct 2014 22:42:42 +0100 Subject: ARM: fix some printk formats GCC 4.9 complains if we take the difference of two pointers, and it's printed with "%d". Fix this by using the proper flag - "t" for ptrdiff_t. Signed-off-by: Russell King --- arch/arm/mm/init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 659c75d808dc..1dfcc0822074 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -559,10 +559,10 @@ void __init mem_init(void) #ifdef CONFIG_MODULES " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" #endif - " .text : 0x%p" " - 0x%p" " (%4d kB)\n" - " .init : 0x%p" " - 0x%p" " (%4d kB)\n" - " .data : 0x%p" " - 0x%p" " (%4d kB)\n" - " .bss : 0x%p" " - 0x%p" " (%4d kB)\n", + " .text : 0x%p" " - 0x%p" " (%4td kB)\n" + " .init : 0x%p" " - 0x%p" " (%4td kB)\n" + " .data : 0x%p" " - 0x%p" " (%4td kB)\n" + " .bss : 0x%p" " - 0x%p" " (%4td kB)\n", MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + (PAGE_SIZE)), -- cgit v1.2.3 From 6643773ce1a17c99a3ce29ee8ac2b114d2ba771f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 21 Oct 2014 14:25:38 +1100 Subject: powerpc/mm: Fix build error with hugetlfs disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/powerpc/mm/slice.c:704:5: error: expected identifier or ‘(’ before numeric constant int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, ^ make[1]: *** [arch/powerpc/mm/slice.o] Error 1 make: *** [arch/powerpc/mm/slice.o] Error 2 This got introduced via 1217d34b531c76362217057ca70a8ce8950574e0 "powerpc: Ensure global functions include their prototype". We started including linux/hugetlb.h with that patch and now we have #define is_hugepage_only_range(mm, addr, len) 0 with hugetlbfs disabled. Fixes: 1217d34b531c ("powerpc: Ensure global functions include their prototype") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slice.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 8d7bda94d196..ded0ea1afde4 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -682,6 +682,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, slice_convert(mm, mask, psize); } +#ifdef CONFIG_HUGETLB_PAGE /* * is_hugepage_only_range() is used by generic code to verify whether * a normal mmap mapping (non hugetlbfs) is valid on a given area. @@ -726,4 +727,4 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, #endif return !slice_check_fit(mask, available); } - +#endif -- cgit v1.2.3 From ca5f1d16a51900d23d2cf834d3aa62dfe3a37881 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 21 Oct 2014 14:25:59 +1100 Subject: powerpc/mm: Remove redundant #if case Remove the check of CONFIG_PPC_SUBPAGE_PROT when deciding if is_hugepage_only_range() is extern or inline. The extern version is in slice.c and is built if CONFIG_PPC_MM_SLICES=y. There was no build break possible because CONFIG_PPC_SUBPAGE_PROT is only selectable under conditions which also mean CONFIG_PPC_MM_SLICES will be selected. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 623f2971ce0e..766b77d527ac 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -71,7 +71,7 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, void flush_dcache_icache_hugepage(struct page *page); -#if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT) +#if defined(CONFIG_PPC_MM_SLICES) int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); #else -- cgit v1.2.3 From fcbb539f279f7d854bd49819b889fea0612909f8 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Fri, 10 Oct 2014 01:53:45 -0400 Subject: powerpc: Wire up sys_bpf() syscall This patch wires up the new syscall sys_bpf() on powerpc. Passes the tests in samples/bpf: #0 add+sub+mul OK #1 unreachable OK #2 unreachable2 OK #3 out of range jump OK #4 out of range jump2 OK #5 test1 ld_imm64 OK #6 test2 ld_imm64 OK #7 test3 ld_imm64 OK #8 test4 ld_imm64 OK #9 test5 ld_imm64 OK #10 no bpf_exit OK #11 loop (back-edge) OK #12 loop2 (back-edge) OK #13 conditional loop OK #14 read uninitialized register OK #15 read invalid register OK #16 program doesn't init R0 before exit OK #17 stack out of bounds OK #18 invalid call insn1 OK #19 invalid call insn2 OK #20 invalid function call OK #21 uninitialized stack1 OK #22 uninitialized stack2 OK #23 check valid spill/fill OK #24 check corrupted spill/fill OK #25 invalid src register in STX OK #26 invalid dst register in STX OK #27 invalid dst register in ST OK #28 invalid src register in LDX OK #29 invalid dst register in LDX OK #30 junk insn OK #31 junk insn2 OK #32 junk insn3 OK #33 junk insn4 OK #34 junk insn5 OK #35 misaligned read from stack OK #36 invalid map_fd for function call OK #37 don't check return value before access OK #38 access memory with incorrect alignment OK #39 sometimes access memory with incorrect alignment OK #40 jump test 1 OK #41 jump test 2 OK #42 jump test 3 OK #43 jump test 4 OK Signed-off-by: Pranith Kumar [mpe: test using samples/bpf] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 7d8a60068805..ce9577d693be 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -365,3 +365,4 @@ SYSCALL_SPU(renameat2) SYSCALL_SPU(seccomp) SYSCALL_SPU(getrandom) SYSCALL_SPU(memfd_create) +SYSCALL_SPU(bpf) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 4e9af3fd43e7..e0da021caa00 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 361 +#define __NR_syscalls 362 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 0688fc06e183..f55351f2e66e 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -383,5 +383,6 @@ #define __NR_seccomp 358 #define __NR_getrandom 359 #define __NR_memfd_create 360 +#define __NR_bpf 361 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3 From b47dcbdc5161d3d5756f430191e2840d9b855492 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 15 Oct 2014 10:12:07 -0700 Subject: x86, apic: Handle a bad TSC more gracefully If the TSC is unusable or disabled, then this patch fixes: - Confusion while trying to clear old APIC interrupts. - Division by zero and incorrect programming of the TSC deadline timer. This fixes boot if the CPU has a TSC deadline timer but a missing or broken TSC. The failure to boot can be observed with qemu using -cpu qemu64,-tsc,+tsc-deadline This also happens to me in nested KVM for unknown reasons. With this patch, I can boot cleanly (although without a TSC). Signed-off-by: Andy Lutomirski Cc: Bandan Das Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/e2fa274e498c33988efac0ba8b7e3120f7f92d78.1413393027.git.luto@amacapital.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 4 ++-- arch/x86/kernel/tsc.c | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 67760275544b..24b5894396a0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1297,7 +1297,7 @@ void setup_local_APIC(void) unsigned int value, queued; int i, j, acked = 0; unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz; + long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) rdtscll(tsc); @@ -1383,7 +1383,7 @@ void setup_local_APIC(void) break; } if (queued) { - if (cpu_has_tsc) { + if (cpu_has_tsc && cpu_khz) { rdtscll(ntsc); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b6025f9e36c6..b7e50bba3bbb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1166,14 +1166,17 @@ void __init tsc_init(void) x86_init.timers.tsc_pre_init(); - if (!cpu_has_tsc) + if (!cpu_has_tsc) { + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; + } tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } -- cgit v1.2.3 From 19d36c2152aaf093c97431b5b4d13d29305a63a3 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 22 Oct 2014 11:32:52 +0800 Subject: powernv: Use _GLOBAL_TOC for opal wrappers Currently, we can't call opal wrappers from modules when using the LE ABIv2, which requires a TOC init. If we do we'll try and load the opal entry point using the wrong toc and probably explode or worse jump to the wrong address. Nothing in upstream is making opal calls from a module, but we do export one of the wrappers so we should fix this anyway. This change uses the _GLOBAL_TOC() macro (rather than _GLOBAL) for the opal wrappers, so that we can do non-local calls to them. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index e9e2450c1fdd..feb549aa3eea 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -58,7 +58,7 @@ END_FTR_SECTION(0, 1); \ */ #define OPAL_CALL(name, token) \ - _GLOBAL(name); \ + _GLOBAL_TOC(name); \ mflr r0; \ std r0,16(r1); \ li r0,token; \ -- cgit v1.2.3 From 26c2d2b39128adba276d140eefa2745591b88536 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 23 Oct 2014 00:04:03 -0400 Subject: i386/audit: stop scribbling on the stack frame git commit b4f0d3755c5e9cc86292d5fd78261903b4f23d4a was very very dumb. It was writing over %esp/pt_regs semi-randomly on i686 with the expected "system can't boot" results. As noted in: https://bugs.freedesktop.org/show_bug.cgi?id=85277 This patch stops fscking with pt_regs. Instead it sets up the registers for the call to __audit_syscall_entry in the most obvious conceivable way. It then does just a tiny tiny touch of magic. We need to get what started in PT_EDX into 0(%esp) and PT_ESI into 4(%esp). This is as easy as a pair of pushes. After the call to __audit_syscall_entry all we need to do is get that now useless junk off the stack (pair of pops) and reload %eax with the original syscall so other stuff can keep going about it's business. Reported-by: Paulo Zanoni Signed-off-by: Eric Paris Link: http://lkml.kernel.org/r/1414037043-30647-1-git-send-email-eparis@redhat.com Cc: Richard Guy Briggs Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b553ed89e5f5..344b63f18d14 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -447,15 +447,14 @@ sysenter_exit: sysenter_audit: testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry - addl $4,%esp - CFI_ADJUST_CFA_OFFSET -4 - movl %esi,4(%esp) /* 5th arg: 4th syscall arg */ - movl %edx,(%esp) /* 4th arg: 3rd syscall arg */ - /* %ecx already in %ecx 3rd arg: 2nd syscall arg */ - movl %ebx,%edx /* 2nd arg: 1st syscall arg */ - /* %eax already in %eax 1st arg: syscall number */ + /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ + movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ + /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ + pushl_cfi PT_ESI(%esp) /* a3: 5th arg */ + pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */ call __audit_syscall_entry - pushl_cfi %ebx + popl_cfi %ecx /* get that remapped edx off the stack */ + popl_cfi %ecx /* get that remapped esi off the stack */ movl PT_EAX(%esp),%eax /* reload syscall number */ jmp sysenter_do_call -- cgit v1.2.3 From 4cbbbb43d666468d87f99676cf05fb5154d1228b Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 27 Oct 2014 08:28:16 +0100 Subject: microblaze: Fix missing NR_CPUS in menuconfig The time Kconfig expects that NR_CPUS is defined. This patch remove this config warning: "kernel/time/Kconfig:163:warning: range is invalid" Signed-off-by: Michal Simek --- arch/microblaze/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 6feded3b0c4c..a7736fa0580c 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -129,6 +129,10 @@ endmenu menu "Kernel features" +config NR_CPUS + int + default "1" + config ADVANCED_OPTIONS bool "Prompt for advanced kernel configuration options" help -- cgit v1.2.3 From 70dcd942dc4af3cc6c3dcc2ba499cd841c7f65a7 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 27 Oct 2014 08:15:25 +0100 Subject: microblaze: Fix IO space breakage after of_pci_range_to_resource() change Commit 0b0b0893d49b "of/pci: Fix the conversion of IO ranges into IO resources" changed the behaviour of of_pci_range_to_resource(). The issue is described here: "powerpc/pci: Fix IO space breakage after of_pci_range_to_resource() change" (sha1: aeba3731b150188685225b510886f1370d8814de) Signed-off-by: Michal Simek --- arch/microblaze/pci/pci-common.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 9037914f6985..b30e41c0c033 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -660,8 +660,13 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, res = &hose->mem_resources[memno++]; break; } - if (res != NULL) - of_pci_range_to_resource(&range, dev, res); + if (res != NULL) { + res->name = dev->full_name; + res->flags = range.flags; + res->start = range.cpu_addr; + res->end = range.cpu_addr + range.size - 1; + res->parent = res->child = res->sibling = NULL; + } } /* If there's an ISA hole and the pci_mem_offset is -not- matching -- cgit v1.2.3 From a4f174dee4ae842e07cab7eeec194a3e60925c8d Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 27 Oct 2014 08:35:11 +0100 Subject: microblaze: Wire up bpf syscall Add new bpf syscall. Signed-off-by: Michal Simek --- arch/microblaze/include/asm/unistd.h | 2 +- arch/microblaze/include/uapi/asm/unistd.h | 1 + arch/microblaze/kernel/syscall_table.S | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index ea4b233647c1..0a53362d5548 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -38,6 +38,6 @@ #endif /* __ASSEMBLY__ */ -#define __NR_syscalls 387 +#define __NR_syscalls 388 #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h index 1c2380bf8fe6..c712677f8a2a 100644 --- a/arch/microblaze/include/uapi/asm/unistd.h +++ b/arch/microblaze/include/uapi/asm/unistd.h @@ -402,5 +402,6 @@ #define __NR_seccomp 384 #define __NR_getrandom 385 #define __NR_memfd_create 386 +#define __NR_bpf 387 #endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index de59ee1d7010..0166e890486c 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -387,3 +387,4 @@ ENTRY(sys_call_table) .long sys_seccomp .long sys_getrandom /* 385 */ .long sys_memfd_create + .long sys_bpf -- cgit v1.2.3 From bf19edd290d5d2d53bba53f7f4cb2f7492997009 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Oct 2014 12:13:32 +1100 Subject: Revert "powerpc/powernv: Fix endian bug in LPC bus debugfs accessors" This reverts commit bf7588a0859580a45c63cb082825d77c13eca357. Ben says although the code is not correct "[this] fix was completely wrong and does more damages than it fixes things." Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-lpc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index dd2c285ad170..ad4b31df779a 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -191,7 +191,6 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, { struct lpc_debugfs_entry *lpc = filp->private_data; u32 data, pos, len, todo; - __be32 bedata; int rc; if (!access_ok(VERIFY_WRITE, ubuf, count)) @@ -214,10 +213,9 @@ static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, len = 2; } rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos, - &bedata, len); + &data, len); if (rc) return -ENXIO; - data = be32_to_cpu(bedata); switch(len) { case 4: rc = __put_user((u32)data, (u32 __user *)ubuf); -- cgit v1.2.3 From 3c325f8233c35fb35dec3744ba01634aab4ea36a Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Fri, 24 Oct 2014 17:00:34 +0800 Subject: x86, cma: Reserve DMA contiguous area after initmem_init() Fengguang Wu reported a boot crash on the x86 platform via the 0-day Linux Kernel Performance Test: cma: dma_contiguous_reserve: reserving 31 MiB for global area BUG: Int 6: CR2 (null) [<41850786>] dump_stack+0x16/0x18 [<41d2b1db>] early_idt_handler+0x6b/0x6b [<41072227>] ? __phys_addr+0x2e/0xca [<41d4ee4d>] cma_declare_contiguous+0x3c/0x2d7 [<41d6d359>] dma_contiguous_reserve_area+0x27/0x47 [<41d6d4d1>] dma_contiguous_reserve+0x158/0x163 [<41d33e0f>] setup_arch+0x79b/0xc68 [<41d2b7cf>] start_kernel+0x9c/0x456 [<41d2b2ca>] i386_start_kernel+0x79/0x7d (See details at: https://lkml.org/lkml/2014/10/8/708) It is because dma_contiguous_reserve() is called before initmem_init() in x86, the variable high_memory is not initialized but accessed by __pa(high_memory) in dma_contiguous_reserve(). This patch moves dma_contiguous_reserve() after initmem_init() so that high_memory is initialized before accessed. Reported-by: Fengguang Wu Signed-off-by: Weijie Yang Acked-by: Andrew Morton Acked-by: Marek Szyprowski Acked-by: Michal Nazarewicz Cc: iamjoonsoo.kim@lge.com Cc: 'Linux-MM' Cc: 'Weijie Yang' Link: http://lkml.kernel.org/r/000101cfef69%2431e528a0%2495af79e0%24%25yang@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 235cfd39e0d7..ab08aa2276fb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1128,7 +1128,6 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); memblock_set_current_limit(get_max_mapped()); - dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); /* * NOTE: On x86-32, only from this point on, fixmaps are ready for use. @@ -1159,6 +1158,7 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); initmem_init(); + dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); /* * Reserve memory for crash kernel after SRAT is parsed so that it -- cgit v1.2.3 From 03f54397976581e71a3294ac0e6dfcf4aa36e539 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 28 Oct 2014 14:25:29 +1100 Subject: powerpc/mm: Use appropriate ESID mask in copro_calculate_slb() This patch makes copro_calculate_slb() mask the ESID by the correct mask for 1T vs 256M segments. This has no effect by itself as the extra bits were ignored, but it makes debugging the segment table entries easier and means that we can directly compare the ESID values for duplicates without needing to worry about masking in the comparison. This will be used to simplify a comparison in the following patch. Signed-off-by: Ian Munsie Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 0f9939e693df..5a236f082c78 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -99,8 +99,6 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) u64 vsid; int psize, ssize; - slb->esid = (ea & ESID_MASK) | SLB_ESID_V; - switch (REGION_ID(ea)) { case USER_REGION_ID: pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); @@ -133,6 +131,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) vsid |= mmu_psize_defs[psize].sllp | ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); + slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V; slb->vsid = vsid; return 0; -- cgit v1.2.3 From 009f60e2763568cdcd75bd1cf360c7c7165e2e60 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 5 Oct 2014 22:23:22 +0200 Subject: sched: stop the unbound recursion in preempt_schedule_context() preempt_schedule_context() does preempt_enable_notrace() at the end and this can call the same function again; exception_exit() is heavy and it is quite possible that need-resched is true again. 1. Change this code to dec preempt_count() and check need_resched() by hand. 2. As Linus suggested, we can use the PREEMPT_ACTIVE bit and avoid the enable/disable dance around __schedule(). But in this case we need to move into sched/core.c. 3. Cosmetic, but x86 forgets to declare this function. This doesn't really matter because it is only called by asm helpers, still it make sense to add the declaration into asm/preempt.h to match preempt_schedule(). Reported-by: Sasha Levin Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Andrew Morton Cc: Christoph Lameter Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Steven Rostedt Cc: Peter Anvin Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Chuck Ebbert Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20141005202322.GB27962@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 7024c12f7bfe..400873450e33 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -105,6 +105,7 @@ static __always_inline bool should_resched(void) # ifdef CONFIG_CONTEXT_TRACKING extern asmlinkage void ___preempt_schedule_context(void); # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") + extern asmlinkage void preempt_schedule_context(void); # endif #endif -- cgit v1.2.3 From 65d71fe1375b973083733294795bf2b09d45b3c2 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 7 Oct 2014 19:07:33 +0200 Subject: perf: Fix bogus kernel printk Andy spotted the fail in what was intended as a conditional printk level. Reported-by: Andy Lutomirski Fixes: cc6cd47e7395 ("perf/x86: Tone down kernel messages when the PMU check fails in a virtual environment") Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141007124757.GH19379@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1b8299dd3d91..66451a6b9485 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -243,8 +243,9 @@ static bool check_hw_exists(void) msr_fail: printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - printk(boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR - "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); + printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, + reg, val_new); return false; } -- cgit v1.2.3 From c719f56092add9b3d4192f57c64ce7af11105130 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Oct 2014 11:10:21 +0200 Subject: perf: Fix and clean up initialization of pmu::event_idx Andy reported that the current state of event_idx is rather confused. So remove all but the x86_pmu implementation and change the default to return 0 (the safe option). Reported-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Benjamin Herrenschmidt Cc: Christoph Lameter Cc: Cody P Schafer Cc: Cody P Schafer Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Himangi Saraogi Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Gortmaker Cc: Paul Mackerras Cc: sukadev@linux.vnet.ibm.com Cc: Thomas Huth Cc: Vince Weaver Cc: linux390@de.ibm.com Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/powerpc/perf/hv-24x7.c | 6 ------ arch/powerpc/perf/hv-gpci.c | 6 ------ arch/s390/kernel/perf_cpum_sf.c | 6 ------ 3 files changed, 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 6c8710dd90c9..dba34088da28 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -417,11 +417,6 @@ static int h_24x7_event_add(struct perf_event *event, int flags) return 0; } -static int h_24x7_event_idx(struct perf_event *event) -{ - return 0; -} - static struct pmu h_24x7_pmu = { .task_ctx_nr = perf_invalid_context, @@ -433,7 +428,6 @@ static struct pmu h_24x7_pmu = { .start = h_24x7_event_start, .stop = h_24x7_event_stop, .read = h_24x7_event_update, - .event_idx = h_24x7_event_idx, }; static int hv_24x7_init(void) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 15fc76c93022..a051fe946c63 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -246,11 +246,6 @@ static int h_gpci_event_init(struct perf_event *event) return 0; } -static int h_gpci_event_idx(struct perf_event *event) -{ - return 0; -} - static struct pmu h_gpci_pmu = { .task_ctx_nr = perf_invalid_context, @@ -262,7 +257,6 @@ static struct pmu h_gpci_pmu = { .start = h_gpci_event_start, .stop = h_gpci_event_stop, .read = h_gpci_event_update, - .event_idx = h_gpci_event_idx, }; static int hv_gpci_init(void) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 08e761318c17..b878f12a9597 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1411,11 +1411,6 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) perf_pmu_enable(event->pmu); } -static int cpumsf_pmu_event_idx(struct perf_event *event) -{ - return event->hw.idx; -} - CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); @@ -1458,7 +1453,6 @@ static struct pmu cpumf_sampling = { .stop = cpumsf_pmu_stop, .read = cpumsf_pmu_read, - .event_idx = cpumsf_pmu_event_idx, .attr_groups = cpumsf_pmu_attr_groups, }; -- cgit v1.2.3 From 7fb0f1de49fc75a0dcec22531f2d0a79fc2fb625 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Oct 2014 09:12:35 +0200 Subject: perf/x86: Fix compile warnings for intel_uncore The uncore drivers require PCI and generate compile time warnings when !CONFIG_PCI. Reported-by: Andrew Morton Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: Andi Kleen Cc: Borislav Petkov Cc: Josh Triplett Cc: Stephane Eranian Cc: Yan, Zheng Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ arch/x86/kernel/cpu/Makefile | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f2327e88e07c..ded8a6774ac9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -142,6 +142,10 @@ config INSTRUCTION_DECODER def_bool y depends on KPROBES || PERF_EVENTS || UPROBES +config PERF_EVENTS_INTEL_UNCORE + def_bool y + depends on PERF_EVENTS && SUP_SUP_INTEL && PCI + config OUTPUT_FORMAT string default "elf32-i386" if X86_32 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 01d5453b5502..e27b49d7c922 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -39,9 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_uncore_snb.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore_snbep.o perf_event_intel_uncore_nhmex.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o + +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ + perf_event_intel_uncore_snb.o \ + perf_event_intel_uncore_snbep.o \ + perf_event_intel_uncore_nhmex.o endif -- cgit v1.2.3 From 60e684f0d66369add7aa49fc39785d2f26fe9169 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 26 Oct 2014 16:06:28 +0000 Subject: x86/irq: Fix XT-PIC-XT-PIC in /proc/interrupts Fix duplicate XT-PIC seen in /proc/interrupts on x86 systems that make use of 8259A Programmable Interrupt Controllers. Specifically convert output like this: CPU0 0: 76573 XT-PIC-XT-PIC timer 1: 11 XT-PIC-XT-PIC i8042 2: 0 XT-PIC-XT-PIC cascade 4: 8 XT-PIC-XT-PIC serial 6: 3 XT-PIC-XT-PIC floppy 7: 0 XT-PIC-XT-PIC parport0 8: 1 XT-PIC-XT-PIC rtc0 10: 448 XT-PIC-XT-PIC fddi0 12: 23 XT-PIC-XT-PIC eth0 14: 2464 XT-PIC-XT-PIC ide0 NMI: 0 Non-maskable interrupts ERR: 0 to one like this: CPU0 0: 122033 XT-PIC timer 1: 11 XT-PIC i8042 2: 0 XT-PIC cascade 4: 8 XT-PIC serial 6: 3 XT-PIC floppy 7: 0 XT-PIC parport0 8: 1 XT-PIC rtc0 10: 145 XT-PIC fddi0 12: 31 XT-PIC eth0 14: 2245 XT-PIC ide0 NMI: 0 Non-maskable interrupts ERR: 0 that is one like we used to have from ~2.2 till it was changed sometime. The rationale is there is no value in this duplicate information, it merely clutters output and looks ugly. We only have one handler for 8259A interrupts so there is no need to give it a name separate from the name already given to irq_chip. We could define meaningful names for handlers based on bits in the ELCR register on systems that have it or the value of the LTIM bit we use in ICW1 otherwise (hardcoded to 0 though with MCA support gone), to tell edge-triggered and level-triggered inputs apart. While that information does not affect 8259A interrupt handlers it could help people determine which lines are shareable and which are not. That is material for a separate change though. Any tools that parse /proc/interrupts are supposed not to be affected since it was many years we used the format this change converts back to. Signed-off-by: Maciej W. Rozycki Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.LFD.2.11.1410260147190.21390@eddie.linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8259.c | 3 +-- arch/x86/kernel/irqinit.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8af817105e29..e7cc5370cd2f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -111,8 +111,7 @@ static void make_8259A_irq(unsigned int irq) { disable_irq_nosync(irq); io_apic_irqs &= ~(1<chip; - const char *name = chip->name; int i; #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) @@ -79,7 +78,7 @@ void __init init_ISA_irqs(void) legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) - irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); + irq_set_chip_and_handler(i, chip, handle_level_irq); } void __init init_IRQ(void) -- cgit v1.2.3 From c20ce79303c73855d7a2dd0964f153e902ebdbda Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 28 Oct 2014 11:30:43 -0700 Subject: sparc: Hook up bpf system call. Signed-off-by: David S. Miller --- arch/sparc/include/uapi/asm/unistd.h | 3 ++- arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index c842a89b1190..46d83842eddc 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -414,8 +414,9 @@ #define __NR_seccomp 346 #define __NR_getrandom 347 #define __NR_memfd_create 348 +#define __NR_bpf 349 -#define NR_syscalls 349 +#define NR_syscalls 350 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 6a873c344bc0..ad0cdf497b78 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -86,4 +86,4 @@ sys_call_table: /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime /*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr -/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create +/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index d9151b6490d8..580cde9370c9 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -87,7 +87,7 @@ sys_call_table32: /*330*/ .word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr - .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create + .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf #endif /* CONFIG_COMPAT */ @@ -166,4 +166,4 @@ sys_call_table: /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr - .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create + .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf -- cgit v1.2.3 From 49f8d8c04368d2e29cd26752715367ccafec5b1d Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Fri, 17 Oct 2014 17:49:44 -0700 Subject: powerpc/numa: use cached value of update->cpu in update_cpu_topology There isn't any need to keep referring to update->cpu, as we've already checked cpu == update->cpu at this point. Signed-off-by: Nishanth Aravamudan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index e5236c24dc07..ef45abf7319e 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1512,8 +1512,8 @@ static int update_cpu_topology(void *data) if (cpu != update->cpu) continue; - unmap_cpu_from_node(update->cpu); - map_cpu_to_node(update->cpu, update->new_nid); + unmap_cpu_from_node(cpu); + map_cpu_to_node(cpu, update->new_nid); vdso_getcpu_init(); } -- cgit v1.2.3 From 2c0a33f9861d38631245f7ef434ecad3413324fb Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Fri, 17 Oct 2014 17:50:40 -0700 Subject: powerpc/numa: ensure per-cpu NUMA mappings are correct on topology update We received a report of warning in kernel/sched/core.c where the sched group was NULL on an LPAR after a topology update. This seems to occur because after the topology update has moved the CPUs, cpu_to_node is returning the old value still, which ends up breaking the consistency of the NUMA topology in the per-cpu maps. Ensure that we update the per-cpu fields when we re-map CPUs. Signed-off-by: Nishanth Aravamudan Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index ef45abf7319e..b9d1dfdbe5bb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1509,11 +1509,14 @@ static int update_cpu_topology(void *data) cpu = smp_processor_id(); for (update = data; update; update = update->next) { + int new_nid = update->new_nid; if (cpu != update->cpu) continue; unmap_cpu_from_node(cpu); - map_cpu_to_node(cpu, update->new_nid); + map_cpu_to_node(cpu, new_nid); + set_cpu_numa_node(cpu, new_nid); + set_cpu_numa_mem(cpu, local_memory_node(new_nid)); vdso_getcpu_init(); } -- cgit v1.2.3 From 2d605a3029726b881fc9df7fae75e6bf08d8a526 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 28 Oct 2014 15:27:07 +0000 Subject: ARM: enable bpf syscall Signed-off-by: Russell King --- arch/arm/include/uapi/asm/unistd.h | 1 + arch/arm/kernel/calls.S | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 3aaa75cae90c..705bb7620673 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -412,6 +412,7 @@ #define __NR_seccomp (__NR_SYSCALL_BASE+383) #define __NR_getrandom (__NR_SYSCALL_BASE+384) #define __NR_memfd_create (__NR_SYSCALL_BASE+385) +#define __NR_bpf (__NR_SYSCALL_BASE+386) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 9f899d8fdcca..e51833f8cc38 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -395,6 +395,7 @@ CALL(sys_seccomp) CALL(sys_getrandom) /* 385 */ CALL(sys_memfd_create) + CALL(sys_bpf) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted -- cgit v1.2.3 From d4e1a0af1d3a88cdfc8c954d3005eb8745ec518d Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 28 Oct 2014 13:57:53 -0400 Subject: x86: Don't enable F00F workaround on Intel Quark processors The Intel Quark processor is a part of family 5, but does not have the F00F bug present in Pentiums of the same family. Pentiums were models 0 through 8, Quark is model 9. Signed-off-by: Dave Jones Cc: Bryan O'Donoghue Link: http://lkml.kernel.org/r/20141028175753.GA12743@redhat.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1ef456273172..9cc6b6f25f42 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -213,12 +213,13 @@ static void intel_workarounds(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_F00F_BUG /* - * All current models of Pentium and Pentium with MMX technology CPUs + * All models of Pentium and Pentium with MMX technology CPUs * have the F0 0F bug, which lets nonprivileged users lock up the * system. Announce that the fault handler will be checking for it. + * The Quark is also family 5, but does not have the same bug. */ clear_cpu_bug(c, X86_BUG_F00F); - if (!paravirt_enabled() && c->x86 == 5) { + if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { static int f00f_workaround_enabled; set_cpu_bug(c, X86_BUG_F00F); -- cgit v1.2.3 From f18298595aefa2c836a128ec6e0f75f39965dd81 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 27 Oct 2014 13:21:32 +0800 Subject: x86, intel-mid: Create IRQs for APB timers and RTC timers Intel MID platforms has no legacy interrupts, so no IRQ descriptors preallocated. We need to call mp_map_gsi_to_irq() to create IRQ descriptors for APB timers and RTC timers, otherwise it may cause invalid memory access as: [ 0.116839] BUG: unable to handle kernel NULL pointer dereference at 0000003a [ 0.123803] IP: [] setup_irq+0xf/0x4d Tested-by: Andy Shevchenko Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: David Cohen Cc: # 3.17 Link: http://lkml.kernel.org/r/1414387308-27148-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/apb_timer.c | 2 -- arch/x86/platform/intel-mid/sfi.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 5972b108f15a..b708738d016e 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -185,8 +185,6 @@ static void apbt_setup_irq(struct apbt_dev *adev) irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); - /* APB timer irqs are set up as mp_irqs, timer is edge type */ - __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge"); } /* Should be called with per cpu */ diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 3c53a90fdb18..c14ad34776c4 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -106,6 +106,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; mp_save_irq(&mp_irq); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); } return 0; @@ -176,6 +177,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) mp_irq.dstapic = MP_APIC_ALL; mp_irq.dstirq = pentry->irq; mp_save_irq(&mp_irq); + mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); } return 0; } -- cgit v1.2.3 From b77e8f435337baa1cd15852fb9db3f6d26cd8eb7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 27 Oct 2014 13:21:33 +0800 Subject: ACPI, irq, x86: Return IRQ instead of GSI in mp_register_gsi() Function mp_register_gsi() returns blindly the GSI number for the ACPI SCI interrupt. That causes a regression when the GSI for ACPI SCI is shared with other devices. The regression was caused by commit 84245af7297ced9e8fe "x86, irq, ACPI: Change __acpi_register_gsi to return IRQ number instead of GSI" and exposed on a SuperMicro system, which shares one GSI between ACPI SCI and PCI device, with following failure: http://sourceforge.net/p/linux1394/mailman/linux1394-user/?viewmonth=201410 [ 0.000000] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) [ 2.699224] firewire_ohci 0000:06:00.0: failed to allocate interrupt 20 Return mp_map_gsi_to_irq(gsi, 0) instead of the GSI number. Reported-and-Tested-by: Daniel Robbins Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Joerg Roedel Cc: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Rafael J. Wysocki Cc: Bjorn Helgaas Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Len Brown Cc: Pavel Machek Cc: # 3.17 Link: http://lkml.kernel.org/r/1414387308-27148-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d5c887216fb3..a142e77693e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -397,7 +397,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, /* Don't set up the ACPI SCI because it's already set up */ if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; + return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; -- cgit v1.2.3 From d1cd1210834649ce1ca6bafe5ac25d2f40331343 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 29 Oct 2014 03:53:37 -0700 Subject: x86, pageattr: Prevent overflow in slow_virt_to_phys() for X86_PAE pte_pfn() returns a PFN of long (32 bits in 32-PAE), so "long << PAGE_SHIFT" will overflow for PFNs above 4GB. Due to this issue, some Linux 32-PAE distros, running as guests on Hyper-V, with 5GB memory assigned, can't load the netvsc driver successfully and hence the synthetic network device can't work (we can use the kernel parameter mem=3000M to work around the issue). Cast pte_pfn() to phys_addr_t before shifting. Fixes: "commit d76565344512: x86, mm: Create slow_virt_to_phys()" Signed-off-by: Dexuan Cui Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: gregkh@linuxfoundation.org Cc: linux-mm@kvack.org Cc: olaf@aepfle.de Cc: apw@canonical.com Cc: jasowang@redhat.com Cc: dave.hansen@intel.com Cc: riel@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1414580017-27444-1-git-send-email-decui@microsoft.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index ae242a7c11c7..36de293caf25 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -409,7 +409,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) psize = page_level_size(level); pmask = page_level_mask(level); offset = virt_addr & ~pmask; - phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; return (phys_addr | offset); } EXPORT_SYMBOL_GPL(slow_virt_to_phys); -- cgit v1.2.3 From 1776b10627e486dd431fe72d8d47e5a865cf65d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Oct 2014 10:18:17 +0100 Subject: perf/x86/intel: Revert incomplete and undocumented Broadwell client support These patches: 86a349a28b24 ("perf/x86/intel: Add Broadwell core support") c46e665f0377 ("perf/x86: Add INST_RETIRED.ALL workarounds") fdda3c4aacec ("perf/x86/intel: Use Broadwell cache event list for Haswell") introduced magic constants and unexplained changes: https://lkml.org/lkml/2014/10/28/1128 https://lkml.org/lkml/2014/10/27/325 https://lkml.org/lkml/2014/8/27/546 https://lkml.org/lkml/2014/10/28/546 Peter Zijlstra has attempted to help out, to clean up the mess: https://lkml.org/lkml/2014/10/28/543 But has not received helpful and constructive replies which makes me doubt wether it can all be finished in time until v3.18 is released. Despite various review feedback the author (Andi Kleen) has answered only few of the review questions and has generally been uncooperative, only giving replies when prompted repeatedly, and only giving minimal answers instead of constructively explaining and helping along the effort. That kind of behavior is not acceptable. There's also a boot crash on Intel E5-1630 v3 CPUs reported for another commit from Andi Kleen: e735b9db12d7 ("perf/x86/intel/uncore: Add Haswell-EP uncore support") https://lkml.org/lkml/2014/10/22/730 Which is not yet resolved. The uncore driver is independent in theory, but the crash makes me worry about how well all these patches were tested and makes me uneasy about the level of interminging that the Broadwell and Haswell code has received by the commits above. As a first step to resolve the mess revert the Broadwell client commits back to the v3.17 version, before we run out of time and problematic code hits a stable upstream kernel. ( If the Haswell-EP crash is not resolved via a simple fix then we'll have to revert the Haswell-EP uncore driver as well. ) The Broadwell client series has to be submitted in a clean fashion, with single, well documented changes per patch. If they are submitted in time and are accepted during review then they can possibly go into v3.19 but will need additional scrutiny due to the rocky history of this patch set. Cc: Andi Kleen Cc: Peter Zijlstra (Intel) Cc: eranian@google.com Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1409683455-29168-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 -- arch/x86/kernel/cpu/perf_event.h | 1 - arch/x86/kernel/cpu/perf_event_intel.c | 173 +-------------------------------- 3 files changed, 2 insertions(+), 181 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66451a6b9485..143e5f5dc855 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -445,12 +445,6 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; - if (event->attr.sample_period && x86_pmu.limit_period) { - if (x86_pmu.limit_period(event, event->attr.sample_period) > - event->attr.sample_period) - return -EINVAL; - } - return x86_setup_perfctr(event); } @@ -988,9 +982,6 @@ int x86_perf_event_set_period(struct perf_event *event) if (left > x86_pmu.max_period) left = x86_pmu.max_period; - if (x86_pmu.limit_period) - left = x86_pmu.limit_period(event, left); - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; /* diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index d98a34d435d7..fc5eb390b368 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -445,7 +445,6 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; bool late_ack; - unsigned (*limit_period)(struct perf_event *event, unsigned l); /* * sysfs attrs diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a73947c53b65..944bf019b74f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -220,15 +220,6 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct event_constraint intel_bdw_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ - FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ - INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ - EVENT_CONSTRAINT_END -}; - static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -424,126 +415,6 @@ static __initconst const u64 snb_hw_cache_event_ids }; -static __initconst const u64 hsw_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */ - [ C(RESULT_ACCESS) ] = 0x1b7, - /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE| - L3_MISS|ANY_SNOOP */ - [ C(RESULT_MISS) ] = 0x1b7, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */ - /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */ - [ C(RESULT_MISS) ] = 0x1b7, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ - [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static __initconst const u64 hsw_hw_cache_extra_regs - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(LL ) ] = { - [ C(OP_READ) ] = { - /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */ - [ C(RESULT_ACCESS) ] = 0x2d5, - /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE| - L3_MISS|ANY_SNOOP */ - [ C(RESULT_MISS) ] = 0x3fbc0202d5ull, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */ - /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */ - [ C(RESULT_MISS) ] = 0x3fbc020122ull, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, -}; - static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -2034,24 +1905,6 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) return c; } -/* - * Broadwell: - * The INST_RETIRED.ALL period always needs to have lowest - * 6bits cleared (BDM57). It shall not use a period smaller - * than 100 (BDM11). We combine the two to enforce - * a min-period of 128. - */ -static unsigned bdw_limit_period(struct perf_event *event, unsigned left) -{ - if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == - X86_CONFIG(.event=0xc0, .umask=0x01)) { - if (left < 128) - left = 128; - left &= ~0x3fu; - } - return left; -} - PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -2692,8 +2545,8 @@ __init int intel_pmu_init(void) case 69: /* 22nm Haswell ULT */ case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_snb(); @@ -2712,28 +2565,6 @@ __init int intel_pmu_init(void) pr_cont("Haswell events, "); break; - case 61: /* 14nm Broadwell Core-M */ - x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - - intel_pmu_lbr_init_snb(); - - x86_pmu.event_constraints = intel_bdw_event_constraints; - x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; - x86_pmu.extra_regs = intel_snbep_extra_regs; - x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - /* all extra regs are per-cpu when HT is on */ - x86_pmu.er_flags |= ERF_HAS_RSP_1; - x86_pmu.er_flags |= ERF_NO_HT_SHARING; - - x86_pmu.hw_config = hsw_hw_config; - x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; - x86_pmu.limit_period = bdw_limit_period; - pr_cont("Broadwell events, "); - break; - default: switch (x86_pmu.version) { case 1: -- cgit v1.2.3 From d0b92845e54590f0b59fccc0e10159f3b84825bd Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 29 Oct 2014 12:06:31 +0100 Subject: ARM: 8182/1: l2c: Make l2x0_cache_size_of_parse() return 'int' Since commit f3354ab67476dc80 ("ARM: 8169/1: l2c: parse cache properties from ePAPR definitions") the following error is seen on imx6q: [ 0.000000] PL310 OF: cache setting yield illegal associativity [ 0.000000] PL310 OF: -2147097556 calculated, only 8 and 16 legal As imx6q does not pass the "cache-size" and "cache-sets" properties in DT, the function l2x0_cache_size_of_parse() returns early and keep the 'associativity' pointer uninitialized. To fix this problem, return error codes inside l2x0_cache_size_of_parse() and only use the 'associativity' pointer result if l2x0_cache_size_of_parse() succeeds. Signed-off-by: Fabio Estevam Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 55f9d6e0cc88..4a785fc27629 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -956,7 +956,7 @@ static u32 cache_id_part_number_from_dt; * @associativity: variable to return the calculated associativity in * @max_way_size: the maximum size in bytes for the cache ways */ -static void __init l2x0_cache_size_of_parse(const struct device_node *np, +static int __init l2x0_cache_size_of_parse(const struct device_node *np, u32 *aux_val, u32 *aux_mask, u32 *associativity, u32 max_way_size) @@ -974,7 +974,7 @@ static void __init l2x0_cache_size_of_parse(const struct device_node *np, of_property_read_u32(np, "cache-line-size", &line_size); if (!cache_size || !sets) - return; + return -ENODEV; /* All these l2 caches have the same line = block size actually */ if (!line_size) { @@ -1009,7 +1009,7 @@ static void __init l2x0_cache_size_of_parse(const struct device_node *np, if (way_size > max_way_size) { pr_err("L2C OF: set size %dKB is too large\n", way_size); - return; + return -EINVAL; } pr_info("L2C OF: override cache size: %d bytes (%dKB)\n", @@ -1027,7 +1027,7 @@ static void __init l2x0_cache_size_of_parse(const struct device_node *np, if (way_size_bits < 1 || way_size_bits > 6) { pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n", way_size); - return; + return -EINVAL; } mask |= L2C_AUX_CTRL_WAY_SIZE_MASK; @@ -1036,6 +1036,8 @@ static void __init l2x0_cache_size_of_parse(const struct device_node *np, *aux_val &= ~mask; *aux_val |= val; *aux_mask &= ~mask; + + return 0; } static void __init l2x0_of_parse(const struct device_node *np, @@ -1046,6 +1048,7 @@ static void __init l2x0_of_parse(const struct device_node *np, u32 dirty = 0; u32 val = 0, mask = 0; u32 assoc; + int ret; of_property_read_u32(np, "arm,tag-latency", &tag); if (tag) { @@ -1068,7 +1071,10 @@ static void __init l2x0_of_parse(const struct device_node *np, val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; } - l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K); + ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K); + if (ret) + return; + if (assoc > 8) { pr_err("l2x0 of: cache setting yield too high associativity\n"); pr_err("l2x0 of: %d calculated, max 8\n", assoc); @@ -1125,6 +1131,7 @@ static void __init l2c310_of_parse(const struct device_node *np, u32 tag[3] = { 0, 0, 0 }; u32 filter[2] = { 0, 0 }; u32 assoc; + int ret; of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); if (tag[0] && tag[1] && tag[2]) @@ -1152,7 +1159,10 @@ static void __init l2c310_of_parse(const struct device_node *np, l2x0_base + L310_ADDR_FILTER_START); } - l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); + ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); + if (ret) + return; + switch (assoc) { case 16: *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; -- cgit v1.2.3 From 005757298fdd3c181a11f463fea2646ca816a9a5 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 24 Oct 2014 19:49:01 +0100 Subject: ARM: 8181/1: Drop extra return statement Commit 513510ddba9650fc7da456eefeb0ead7632324f6 (common: dma-mapping: introduce common remapping functions) managed to end up with an extra return statement from the original patch. Drop it. Signed-off-by: Laura Abbott Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c245d903927f..e8907117861e 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1198,7 +1198,6 @@ __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, { return dma_common_pages_remap(pages, size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); - return NULL; } /* -- cgit v1.2.3 From 6d0ec1dd90afa0166a5fdadb1228bb026b09b925 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 29 Oct 2014 12:42:08 +0100 Subject: ARM: 8183/1: l2c: Improve l2c310_of_parse() error message Russell King suggested [1]: "I'd ask for one change. Please make all these messages start with "L2C-310 OF" not "PL310 OF:". The device is described in ARM documentation as a L2C-310 not PL310. (Also note the : is dropped too - most of the other messages don't have the : either.) The: "PL310 OF: cache setting yield illegal associativity PL310 OF: -1073346556 calculated, only 8 and 16 legal" message could also be changed to something like: "L2C-310 OF cache associativity %d invalid, only 8 or 16 permittedn" [1] http://www.spinics.net/lists/arm-kernel/msg372776.html Signed-off-by: Fabio Estevam Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 4a785fc27629..5e65ca8dea62 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1174,8 +1174,8 @@ static void __init l2c310_of_parse(const struct device_node *np, *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; break; default: - pr_err("PL310 OF: cache setting yield illegal associativity\n"); - pr_err("PL310 OF: %d calculated, only 8 and 16 legal\n", assoc); + pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", + assoc); break; } } -- cgit v1.2.3 From 9ff0bb5ba60638a688a46e93df8c5009896672eb Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 20 Oct 2014 19:42:18 +0100 Subject: ARM: 8180/1: mm: implement no-highmem fast path in kmap_atomic_pfn() Since CONFIG_HIGHMEM got enabled on ARMv5 Kirkwood, we have noticed a very significant drop in networking performance. The test were conducted on an OpenBlocks A7 board. Without this patch, the outgoing performance measured with iperf are: - highmem OFF, TSO OFF 544 Mbit/s - highmem OFF, TSO ON 942 Mbit/s - highmem ON, TSO OFF 306 Mbit/s - highmem ON, TSO ON 246 Mbit/s On this Kirkwood platform, the L2 cache is a Feroceon cache, and with this cache, all the range operations have to be done on virtual addresses and not physical addresses. Therefore, whenever CONFIG_HIGHMEM is enabled, the cache maintenance operations call kmap_atomic_pfn() and kunmap_atomic(). However, kmap_atomic_pfn() does not implement the same fast path for non-highmem pages as the one implemented in kmap_atomic(), and this is one of the reason for the performance drop. While this patch does not fully restore the performances, it clearly improves them a lot: without patch with patch - highmem ON, TSO OFF 306 Mbit/s 387 Mbit/s - highmem ON, TSO ON 246 Mbit/s 434 Mbit/s We're still far from the !CONFIG_HIGHMEM performances, but it does improve a bit the situation. Thanks a lot to Ezequiel Garcia and Gregory Clement for all the testing work around this topic. Signed-off-by: Thomas Petazzoni Signed-off-by: Russell King --- arch/arm/mm/highmem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 45aeaaca9052..e17ed00828d7 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -127,8 +127,11 @@ void *kmap_atomic_pfn(unsigned long pfn) { unsigned long vaddr; int idx, type; + struct page *page = pfn_to_page(pfn); pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); -- cgit v1.2.3 From 5417421b270229bfce0795ccc99a4b481e4954ca Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Wed, 29 Oct 2014 14:50:59 -0700 Subject: sh: fix sh770x SCIF memory regions Resources scif1_resources & scif2_resources overlap. Actual SCIF region size is 0x10. This is regression from commit d850acf975be ("sh: Declare SCIF register base and IRQ as resources") Signed-off-by: Andriy Skulysh Acked-by: Laurent Pinchart Cc: Geert Uytterhoeven Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/kernel/cpu/sh3/setup-sh770x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c index 9139d14b9c53..538c10db3537 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c @@ -118,7 +118,7 @@ static struct plat_sci_port scif0_platform_data = { }; static struct resource scif0_resources[] = { - DEFINE_RES_MEM(0xfffffe80, 0x100), + DEFINE_RES_MEM(0xfffffe80, 0x10), DEFINE_RES_IRQ(evt2irq(0x4e0)), }; @@ -143,7 +143,7 @@ static struct plat_sci_port scif1_platform_data = { }; static struct resource scif1_resources[] = { - DEFINE_RES_MEM(0xa4000150, 0x100), + DEFINE_RES_MEM(0xa4000150, 0x10), DEFINE_RES_IRQ(evt2irq(0x900)), }; @@ -169,7 +169,7 @@ static struct plat_sci_port scif2_platform_data = { }; static struct resource scif2_resources[] = { - DEFINE_RES_MEM(0xa4000140, 0x100), + DEFINE_RES_MEM(0xa4000140, 0x10), DEFINE_RES_IRQ(evt2irq(0x880)), }; -- cgit v1.2.3 From 7d2911c4381555b31ef0bcae42a0dbf9ade7426e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 30 Oct 2014 09:59:27 -0700 Subject: net: smc91x: Fix gpios for device tree based booting With legacy booting, the platform init code was taking care of the configuring of GPIOs. With device tree based booting, things may or may not work depending what bootloader has configured or if the legacy platform code gets called. Let's add support for the pwrdn and reset GPIOs to the smc91x driver to fix the issues of smc91x not working properly when booted in device tree mode. And let's change n900 to use these settings as some versions of the bootloader do not configure things properly causing errors. Reported-by: Kevin Hilman Signed-off-by: Tony Lindgren Signed-off-by: David S. Miller --- arch/arm/boot/dts/omap3-n900.dts | 2 ++ arch/arm/mach-omap2/pdata-quirks.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 9b0494a8ab45..286bbf620c77 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -665,6 +665,8 @@ bank-width = <2>; pinctrl-names = "default"; pinctrl-0 = <ðernet_pins>; + power-gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>; /* gpio86 */ + reset-gpios = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* gpio164 */ gpmc,device-width = <2>; gpmc,sync-clk-ps = <0>; gpmc,cs-on-ns = <0>; diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index c95346c94829..cec9d6c6442c 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -252,9 +252,6 @@ static void __init nokia_n900_legacy_init(void) platform_device_register(&omap3_rom_rng_device); } - - /* Only on some development boards */ - gpio_request_one(164, GPIOF_OUT_INIT_LOW, "smc91x reset"); } static void __init omap3_tao3530_legacy_init(void) -- cgit v1.2.3 From 653bc77af60911ead1f423e588f54fc2547c4957 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 31 Oct 2014 18:08:45 -0700 Subject: x86_64, entry: Fix out of bounds read on sysenter Rusty noticed a Really Bad Bug (tm) in my NT fix. The entry code reads out of bounds, causing the NT fix to be unreliable. But, and this is much, much worse, if your stack is somehow just below the top of the direct map (or a hole), you read out of bounds and crash. Excerpt from the crash: [ 1.129513] RSP: 0018:ffff88001da4bf88 EFLAGS: 00010296 2b:* f7 84 24 90 00 00 00 testl $0x4000,0x90(%rsp) That read is deterministically above the top of the stack. I thought I even single-stepped through this code when I wrote it to check the offset, but I clearly screwed it up. Fixes: 8c7aa698baca ("x86_64, entry: Filter RFLAGS.NT on entry from userspace") Reported-by: Rusty Russell Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 8ffba18395c8..ffe71228fc10 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -157,7 +157,7 @@ ENTRY(ia32_sysenter_target) * ourselves. To save a few cycles, we can check whether * NT was set instead of doing an unconditional popfq. */ - testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */ + testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) jnz sysenter_fix_flags sysenter_flags_fixed: -- cgit v1.2.3 From 7e46dddd6f6cd5dbf3c7bd04a7e75d19475ac9f2 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 28 Oct 2014 00:03:43 +0200 Subject: KVM: x86: Fix far-jump to non-canonical check Commit d1442d85cc30 ("KVM: x86: Handle errors when RIP is set during far jumps") introduced a bug that caused the fix to be incomplete. Due to incorrect evaluation, far jump to segment with L bit cleared (i.e., 32-bit segment) and RIP with any of the high bits set (i.e, RIP[63:32] != 0) set may not trigger #GP. As we know, this imposes a security problem. In addition, the condition for two warnings was incorrect. Fixes: d1442d85cc30ea75f7d399474ca738e0bc96f715 Reported-by: Dan Carpenter Signed-off-by: Nadav Amit [Add #ifdef CONFIG_X86_64 to avoid complaints of undefined behavior. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 52a96270b560..5edf088ca51e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -574,12 +574,14 @@ static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, case 4: ctxt->_eip = (u32)dst; break; +#ifdef CONFIG_X86_64 case 8: if ((cs_l && is_noncanonical_address(dst)) || - (!cs_l && (dst & ~(u32)-1))) + (!cs_l && (dst >> 32) != 0)) return emulate_gp(ctxt, 0); ctxt->_eip = dst; break; +#endif default: WARN(1, "unsupported eip assignment size\n"); } @@ -2035,7 +2037,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); if (rc != X86EMUL_CONTINUE) { - WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); /* assigning eip failed; restore the old cs */ ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); return rc; @@ -2132,7 +2134,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) return rc; rc = assign_eip_far(ctxt, eip, new_desc.l); if (rc != X86EMUL_CONTINUE) { - WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); } return rc; -- cgit v1.2.3 From 282da870f4b5868efadcd4dc2a5a738d6fdf65d4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 8 Oct 2014 18:05:39 +0200 Subject: KVM: nVMX: Disable preemption while reading from shadow VMCS In order to access the shadow VMCS, we need to load it. At this point, vmx->loaded_vmcs->vmcs and the actually loaded one start to differ. If we now get preempted by Linux, vmx_vcpu_put and, on return, the vmx_vcpu_load will work against the wrong vmcs. That can cause copy_shadow_to_vmcs12 to corrupt the vmcs12 state. Fix the issue by disabling preemption during the copy operation. copy_vmcs12_to_shadow is safe from this issue as it is executed by vmx_vcpu_run when preemption is already disabled before vmentry. This bug is exposed by running Jailhouse within KVM on CPUs with shadow VMCS support. Jailhouse never expects an interrupt pending vmexit, but the bug can cause it if, after copy_shadow_to_vmcs12 is preempted, the active VMCS happens to have the virtual interrupt pending flag set in the CPU-based execution controls. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a8b76c4c95e2..a0f78dbaabee 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6426,6 +6426,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) const unsigned long *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; + preempt_disable(); + vmcs_load(shadow_vmcs); for (i = 0; i < num_fields; i++) { @@ -6449,6 +6451,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) vmcs_clear(shadow_vmcs); vmcs_load(vmx->loaded_vmcs->vmcs); + + preempt_enable(); } static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) -- cgit v1.2.3 From a73896cb5bbdce672945745db8224352a689f580 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 2 Nov 2014 07:54:30 +0100 Subject: KVM: vmx: defer load of APIC access page address during reset Most call paths to vmx_vcpu_reset do not hold the SRCU lock. Defer loading the APIC access page to the next vmentry. This avoids the following lockdep splat: [ INFO: suspicious RCU usage. ] 3.18.0-rc2-test2+ #70 Not tainted ------------------------------- include/linux/kvm_host.h:474 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 1 lock held by qemu-system-x86/2371: #0: (&vcpu->mutex){+.+...}, at: [] vcpu_load+0x20/0xd0 [kvm] stack backtrace: CPU: 4 PID: 2371 Comm: qemu-system-x86 Not tainted 3.18.0-rc2-test2+ #70 Hardware name: Dell Inc. OptiPlex 9010/0M9KCM, BIOS A12 01/10/2013 0000000000000001 ffff880209983ca8 ffffffff816f514f 0000000000000000 ffff8802099b8990 ffff880209983cd8 ffffffff810bd687 00000000000fee00 ffff880208a2c000 ffff880208a10000 ffff88020ef50040 ffff880209983d08 Call Trace: [] dump_stack+0x4e/0x71 [] lockdep_rcu_suspicious+0xe7/0x120 [] gfn_to_memslot+0xd5/0xe0 [kvm] [] __gfn_to_pfn+0x33/0x60 [kvm] [] gfn_to_page+0x25/0x90 [kvm] [] kvm_vcpu_reload_apic_access_page+0x3c/0x80 [kvm] [] vmx_vcpu_reset+0x20c/0x460 [kvm_intel] [] kvm_vcpu_reset+0x15e/0x1b0 [kvm] [] kvm_arch_vcpu_setup+0x2c/0x50 [kvm] [] kvm_vm_ioctl+0x1d0/0x780 [kvm] [] ? __lock_is_held+0x54/0x80 [] do_vfs_ioctl+0x300/0x520 [] ? __fget+0x5/0x250 [] ? __fget_light+0x2a/0xe0 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x1b Reported-by: Takashi Iwai Reported-by: Alexei Starovoitov Reviewed-by: Wanpeng Li Tested-by: Wanpeng Li Fixes: 38b9917350cb2946e368ba684cfc33d1672f104e Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a0f78dbaabee..3e556c68351b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4579,7 +4579,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, 0); } - kvm_vcpu_reload_apic_access_page(vcpu); + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); if (vmx_vm_has_apicv(vcpu->kvm)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); -- cgit v1.2.3