summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-04-14 16:44:42 +0200
committerIngo Molnar <mingo@kernel.org>2014-04-14 16:44:42 +0200
commit740c699a8d316c8bf8593f19e2ca47795e690622 (patch)
treea78886955770a477945c5d84e06b2e7678733b54 /arch/x86
parente69af4657e7764d03ad555f0b583d9c4217bcefa (diff)
parentc9eaa447e77efe77b7fa4c953bd62de8297fd6c5 (diff)
downloadlinux-stable-740c699a8d316c8bf8593f19e2ca47795e690622.tar.gz
linux-stable-740c699a8d316c8bf8593f19e2ca47795e690622.tar.bz2
linux-stable-740c699a8d316c8bf8593f19e2ca47795e690622.zip
Merge tag 'v3.15-rc1' into perf/urgent
Pick up the latest fixes. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig118
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/boot.h13
-rw-r--r--arch/x86/boot/compressed/eboot.c19
-rw-r--r--arch/x86/boot/compressed/head_32.S8
-rw-r--r--arch/x86/boot/compressed/head_64.S9
-rw-r--r--arch/x86/boot/compressed/misc.c51
-rw-r--r--arch/x86/boot/compressed/string.c46
-rw-r--r--arch/x86/boot/cpucheck.c1
-rw-r--r--arch/x86/boot/edd.c1
-rw-r--r--arch/x86/boot/main.c1
-rw-r--r--arch/x86/boot/regs.c1
-rw-r--r--arch/x86/boot/string.c14
-rw-r--r--arch/x86/boot/string.h21
-rw-r--r--arch/x86/boot/video-vesa.c1
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/blowfish_glue.c3
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c3
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S29
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c14
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S708
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c53
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/archrandom.h42
-rw-r--r--arch/x86/include/asm/bug.h3
-rw-r--r--arch/x86/include/asm/clocksource.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/include/asm/fixmap.h20
-rw-r--r--arch/x86/include/asm/hw_irq.h1
-rw-r--r--arch/x86/include/asm/io.h14
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/mmzone_32.h3
-rw-r--r--arch/x86/include/asm/mpspec.h6
-rw-r--r--arch/x86/include/asm/numaq.h171
-rw-r--r--arch/x86/include/asm/pci.h7
-rw-r--r--arch/x86/include/asm/percpu.h98
-rw-r--r--arch/x86/include/asm/pgtable_types.h7
-rw-r--r--arch/x86/include/asm/preempt.h16
-rw-r--r--arch/x86/include/asm/setup.h6
-rw-r--r--arch/x86/include/asm/syscall.h10
-rw-r--r--arch/x86/include/asm/topology.h14
-rw-r--r--arch/x86/include/asm/vdso.h52
-rw-r--r--arch/x86/include/asm/vdso32.h11
-rw-r--r--arch/x86/include/asm/vgtod.h71
-rw-r--r--arch/x86/include/asm/visws/cobalt.h127
-rw-r--r--arch/x86/include/asm/visws/lithium.h53
-rw-r--r--arch/x86/include/asm/visws/piix4.h107
-rw-r--r--arch/x86/include/asm/visws/sgivw.h5
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/asm/vvar.h29
-rw-r--r--arch/x86/include/asm/xen/page.h11
-rw-r--r--arch/x86/include/asm/xsave.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/cstate.c4
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/kernel/apic/Makefile3
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/apic/es7000_32.c738
-rw-r--r--arch/x86/kernel/apic/numaq_32.c524
-rw-r--r--arch/x86/kernel/apic/summit_32.c550
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c13
-rw-r--r--arch/x86/kernel/cpu/match.c42
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c6
-rw-r--r--arch/x86/kernel/cpuid.c15
-rw-r--r--arch/x86/kernel/dumpstack_64.c25
-rw-r--r--arch/x86/kernel/early-quirks.c211
-rw-r--r--arch/x86/kernel/ftrace.c55
-rw-r--r--arch/x86/kernel/hpet.c6
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/ldt.c11
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/pci-calgary_64.c31
-rw-r--r--arch/x86/kernel/setup.c1
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86/kernel/vsyscall_64.c51
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c69
-rw-r--r--arch/x86/kvm/cpuid.c37
-rw-r--r--arch/x86/kvm/emulate.c8
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c84
-rw-r--r--arch/x86/kvm/vmx.c334
-rw-r--r--arch/x86/kvm/x86.c152
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/mm/ioremap.c224
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c8
-rw-r--r--arch/x86/mm/numa.c4
-rw-r--r--arch/x86/mm/pgtable_32.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c11
-rw-r--r--arch/x86/oprofile/nmi_int.c15
-rw-r--r--arch/x86/pci/Makefile3
-rw-r--r--arch/x86/pci/acpi.c59
-rw-r--r--arch/x86/pci/amd_bus.c15
-rw-r--r--arch/x86/pci/bus_numa.c13
-rw-r--r--arch/x86/pci/common.c133
-rw-r--r--arch/x86/pci/fixup.c24
-rw-r--r--arch/x86/pci/irq.c6
-rw-r--r--arch/x86/pci/legacy.c4
-rw-r--r--arch/x86/pci/numaq_32.c165
-rw-r--r--arch/x86/pci/visws.c87
-rw-r--r--arch/x86/pci/xen.c29
-rw-r--r--arch/x86/platform/Makefile1
-rw-r--r--arch/x86/platform/visws/Makefile1
-rw-r--r--arch/x86/platform/visws/visws_quirks.c608
-rw-r--r--arch/x86/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/tools/relocs.c2
-rw-r--r--arch/x86/vdso/Makefile16
-rw-r--r--arch/x86/vdso/vclock_gettime.c256
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S29
-rw-r--r--arch/x86/vdso/vdso.S22
-rw-r--r--arch/x86/vdso/vdso32-setup.c301
-rw-r--r--arch/x86/vdso/vdso32.S21
-rw-r--r--arch/x86/vdso/vdso32/vclock_gettime.c30
-rw-r--r--arch/x86/vdso/vdso32/vdso32.lds.S15
-rw-r--r--arch/x86/vdso/vdsox32.S22
-rw-r--r--arch/x86/vdso/vma.c20
-rw-r--r--arch/x86/xen/Kconfig7
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/p2m.c121
135 files changed, 2643 insertions, 4812 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8453fe1342ea..25d2c6f7325e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -43,6 +43,7 @@ config X86
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
+ select GENERIC_EARLY_IOREMAP
select HAVE_OPTPROBES
select HAVE_KPROBES_ON_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
@@ -107,9 +108,9 @@ config X86
select HAVE_ARCH_SOFT_DIRTY
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
- select ARCH_CLOCKSOURCE_DATA if X86_64
+ select ARCH_CLOCKSOURCE_DATA
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
- select GENERIC_TIME_VSYSCALL if X86_64
+ select GENERIC_TIME_VSYSCALL
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
@@ -127,6 +128,8 @@ config X86
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
select HAVE_CC_STACKPROTECTOR
+ select GENERIC_CPU_AUTOPROBE
+ select HAVE_ARCH_AUDITSYSCALL
config INSTRUCTION_DECODER
def_bool y
@@ -195,9 +198,6 @@ config ARCH_HAS_CPU_RELAX
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
-config ARCH_HAS_CPU_AUTOPROBE
- def_bool y
-
config HAVE_SETUP_PER_CPU_AREA
def_bool y
@@ -346,12 +346,9 @@ config X86_EXTENDED_PLATFORM
for the following (non-PC) 32 bit x86 platforms:
Goldfish (Android emulator)
AMD Elan
- NUMAQ (IBM/Sequent)
RDC R-321x SoC
SGI 320/540 (Visual Workstation)
STA2X11-based (e.g. Northville)
- Summit/EXA (IBM x440)
- Unisys ES7000 IA32 series
Moorestown MID devices
If you have one of these systems, or if you want to build a
@@ -489,49 +486,22 @@ config X86_32_NON_STANDARD
depends on X86_32 && SMP
depends on X86_EXTENDED_PLATFORM
---help---
- This option compiles in the NUMAQ, Summit, bigsmp, ES7000,
- STA2X11, default subarchitectures. It is intended for a generic
- binary kernel. If you select them all, kernel will probe it
- one by one and will fallback to default.
+ This option compiles in the bigsmp and STA2X11 default
+ subarchitectures. It is intended for a generic binary
+ kernel. If you select them all, kernel will probe it one by
+ one and will fallback to default.
# Alphabetically sorted list of Non standard 32 bit platforms
-config X86_NUMAQ
- bool "NUMAQ (IBM/Sequent)"
- depends on X86_32_NON_STANDARD
- depends on PCI
- select NUMA
- select X86_MPPARSE
- ---help---
- This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
- NUMA multiquad box. This changes the way that processors are
- bootstrapped, and uses Clustered Logical APIC addressing mode instead
- of Flat Logical. You will need a new lynxer.elf file to flash your
- firmware with - send email to <Martin.Bligh@us.ibm.com>.
-
config X86_SUPPORTS_MEMORY_FAILURE
def_bool y
# MCE code calls memory_failure():
depends on X86_MCE
# On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
- depends on !X86_NUMAQ
# On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
depends on X86_64 || !SPARSEMEM
select ARCH_SUPPORTS_MEMORY_FAILURE
-config X86_VISWS
- bool "SGI 320/540 (Visual Workstation)"
- depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
- depends on X86_32_NON_STANDARD
- ---help---
- The SGI Visual Workstation series is an IA32-based workstation
- based on SGI systems chips with some legacy PC hardware attached.
-
- Say Y here to create a kernel to run on the SGI 320 or 540.
-
- A kernel compiled for the Visual Workstation will run on general
- PCs as well. See <file:Documentation/sgi-visws.txt> for details.
-
config STA2X11
bool "STA2X11 Companion Chip Support"
depends on X86_32_NON_STANDARD && PCI
@@ -548,20 +518,6 @@ config STA2X11
option is selected the kernel will still be able to boot on
standard PC machines.
-config X86_SUMMIT
- bool "Summit/EXA (IBM x440)"
- depends on X86_32_NON_STANDARD
- ---help---
- This option is needed for IBM systems that use the Summit/EXA chipset.
- In particular, it is needed for the x440.
-
-config X86_ES7000
- bool "Unisys ES7000 IA32 series"
- depends on X86_32_NON_STANDARD && X86_BIGSMP
- ---help---
- Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
- supposed to run on an IA32-based Unisys ES7000 system.
-
config X86_32_IRIS
tristate "Eurobraille/Iris poweroff module"
depends on X86_32
@@ -684,14 +640,6 @@ config MEMTEST
memtest=4, mean do 4 test patterns.
If you are unsure how to answer this question, answer N.
-config X86_SUMMIT_NUMA
- def_bool y
- depends on X86_32 && NUMA && X86_32_NON_STANDARD
-
-config X86_CYCLONE_TIMER
- def_bool y
- depends on X86_SUMMIT
-
source "arch/x86/Kconfig.cpu"
config HPET_TIMER
@@ -820,7 +768,7 @@ config NR_CPUS
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
default "1" if !SMP
default "8192" if MAXSMP
- default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+ default "32" if SMP && X86_BIGSMP
default "8" if SMP
---help---
This allows you to specify the maximum number of CPUs which this
@@ -884,10 +832,6 @@ config X86_IO_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
-config X86_VISWS_APIC
- def_bool y
- depends on X86_32 && X86_VISWS
-
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
bool "Reroute for broken boot IRQs"
depends on X86_IO_APIC
@@ -1105,13 +1049,11 @@ config X86_CPUID
choice
prompt "High Memory Support"
- default HIGHMEM64G if X86_NUMAQ
default HIGHMEM4G
depends on X86_32
config NOHIGHMEM
bool "off"
- depends on !X86_NUMAQ
---help---
Linux can use up to 64 Gigabytes of physical memory on x86 systems.
However, the address space of 32-bit x86 processors is only 4
@@ -1148,7 +1090,6 @@ config NOHIGHMEM
config HIGHMEM4G
bool "4GB"
- depends on !X86_NUMAQ
---help---
Select this if you have a 32-bit processor and between 1 and 4
gigabytes of physical RAM.
@@ -1240,8 +1181,8 @@ config DIRECT_GBPAGES
config NUMA
bool "Numa Memory Allocation and Scheduler Support"
depends on SMP
- depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI))
- default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
+ depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP)
+ default y if X86_BIGSMP
---help---
Enable NUMA (Non Uniform Memory Access) support.
@@ -1252,15 +1193,11 @@ config NUMA
For 64-bit this is recommended if the system is Intel Core i7
(or later), AMD Opteron, or EM64T NUMA.
- For 32-bit this is only needed on (rare) 32-bit-only platforms
- that support NUMA topologies, such as NUMAQ / Summit, or if you
- boot a 32-bit kernel on a 64-bit NUMA platform.
+ For 32-bit this is only needed if you boot a 32-bit
+ kernel on a 64-bit NUMA platform.
Otherwise, you should say N.
-comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
- depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
-
config AMD_NUMA
def_bool y
prompt "Old style AMD Opteron NUMA detection"
@@ -1302,7 +1239,6 @@ config NODES_SHIFT
range 1 10
default "10" if MAXSMP
default "6" if X86_64
- default "4" if X86_NUMAQ
default "3"
depends on NEED_MULTIPLE_NODES
---help---
@@ -1850,17 +1786,29 @@ config DEBUG_HOTPLUG_CPU0
If unsure, say N.
config COMPAT_VDSO
- def_bool y
- prompt "Compat VDSO support"
+ def_bool n
+ prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
depends on X86_32 || IA32_EMULATION
---help---
- Map the 32-bit VDSO to the predictable old-style address too.
+ Certain buggy versions of glibc will crash if they are
+ presented with a 32-bit vDSO that is not mapped at the address
+ indicated in its segment table.
- Say N here if you are running a sufficiently recent glibc
- version (2.3.3 or later), to remove the high-mapped
- VDSO mapping and to exclusively use the randomized VDSO.
+ The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a
+ and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and
+ 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is
+ the only released version with the bug, but OpenSUSE 9
+ contains a buggy "glibc 2.3.2".
- If unsure, say Y.
+ The symptom of the bug is that everything crashes on startup, saying:
+ dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
+
+ Saying Y here changes the default value of the vdso32 boot
+ option from 1 to 0, which turns off the 32-bit vDSO entirely.
+ This works around the glibc bug but hurts performance.
+
+ If unsure, say N: if you are compiling your own kernel, you
+ are unlikely to be using a buggy version of glibc.
config CMDLINE_BOOL
bool "Built-in kernel command line"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f3aaf231b4e5..6983314c8b37 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -359,7 +359,7 @@ config X86_P6_NOP
config X86_TSC
def_bool y
- depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64
+ depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
config X86_CMPXCHG64
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3b9348a0c1a4..602f57e590b5 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -108,7 +108,7 @@ else
# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
- KBUILD_CFLAGS += -maccumulate-outgoing-args
+ KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
endif
# Make sure compiler does not have buggy stack-protector support.
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 50f8c5e0f37e..bd49ec61255c 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -177,14 +177,6 @@ static inline void wrgs32(u32 v, addr_t addr)
}
/* Note: these only return true/false, not a signed return value! */
-static inline int memcmp(const void *s1, const void *s2, size_t len)
-{
- u8 diff;
- asm("repe; cmpsb; setnz %0"
- : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
- return diff;
-}
-
static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
{
u8 diff;
@@ -228,11 +220,6 @@ void copy_to_fs(addr_t dst, void *src, size_t len);
void *copy_from_fs(void *dst, addr_t src, size_t len);
void copy_to_gs(addr_t dst, void *src, size_t len);
void *copy_from_gs(void *dst, addr_t src, size_t len);
-void *memcpy(void *dst, void *src, size_t len);
-void *memset(void *dst, int c, size_t len);
-
-#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
-#define memset(d,c,l) __builtin_memset(d,c,l)
/* a20.c */
int enable_a20(void);
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 1e6146137f8e..4703a6c4b8e3 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -112,7 +112,7 @@ __file_size64(void *__fh, efi_char16_t *filename_16,
efi_file_info_t *info;
efi_status_t status;
efi_guid_t info_guid = EFI_FILE_INFO_ID;
- u32 info_sz;
+ u64 info_sz;
status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16,
EFI_FILE_MODE_READ, (u64)0);
@@ -167,31 +167,31 @@ efi_file_size(efi_system_table_t *sys_table, void *__fh,
}
static inline efi_status_t
-efi_file_read(void *__fh, void *handle, unsigned long *size, void *addr)
+efi_file_read(void *handle, unsigned long *size, void *addr)
{
unsigned long func;
if (efi_early->is64) {
- efi_file_handle_64_t *fh = __fh;
+ efi_file_handle_64_t *fh = handle;
func = (unsigned long)fh->read;
return efi_early->call(func, handle, size, addr);
} else {
- efi_file_handle_32_t *fh = __fh;
+ efi_file_handle_32_t *fh = handle;
func = (unsigned long)fh->read;
return efi_early->call(func, handle, size, addr);
}
}
-static inline efi_status_t efi_file_close(void *__fh, void *handle)
+static inline efi_status_t efi_file_close(void *handle)
{
if (efi_early->is64) {
- efi_file_handle_64_t *fh = __fh;
+ efi_file_handle_64_t *fh = handle;
return efi_early->call((unsigned long)fh->close, handle);
} else {
- efi_file_handle_32_t *fh = __fh;
+ efi_file_handle_32_t *fh = handle;
return efi_early->call((unsigned long)fh->close, handle);
}
@@ -1016,6 +1016,9 @@ void setup_graphics(struct boot_params *boot_params)
* Because the x86 boot code expects to be passed a boot_params we
* need to create one ourselves (usually the bootloader would create
* one for us).
+ *
+ * The caller is responsible for filling out ->code32_start in the
+ * returned boot_params.
*/
struct boot_params *make_boot_params(struct efi_config *c)
{
@@ -1081,8 +1084,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
hdr->vid_mode = 0xffff;
hdr->boot_flag = 0xAA55;
- hdr->code32_start = (__u64)(unsigned long)image->image_base;
-
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index de9d4200d305..cbed1407a5cd 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -59,6 +59,7 @@ ENTRY(efi_pe_entry)
call make_boot_params
cmpl $0, %eax
je fail
+ movl %esi, BP_code32_start(%eax)
popl %ecx
pushl %eax
pushl %ecx
@@ -90,12 +91,7 @@ fail:
hlt
jmp fail
2:
- call 3f
-3:
- popl %eax
- subl $3b, %eax
- subl BP_pref_address(%esi), %eax
- add BP_code32_start(%esi), %eax
+ movl BP_code32_start(%esi), %eax
leal preferred_addr(%eax), %eax
jmp *%eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 57e58a5fa210..0d558ee899ae 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -261,6 +261,8 @@ ENTRY(efi_pe_entry)
cmpq $0,%rax
je fail
mov %rax, %rsi
+ leaq startup_32(%rip), %rax
+ movl %eax, BP_code32_start(%rsi)
jmp 2f /* Skip the relocation */
handover_entry:
@@ -284,12 +286,7 @@ fail:
hlt
jmp fail
2:
- call 3f
-3:
- popq %rax
- subq $3b, %rax
- subq BP_pref_address(%rsi), %rax
- add BP_code32_start(%esi), %eax
+ movl BP_code32_start(%esi), %eax
leaq preferred_addr(%rax), %rax
jmp *%rax
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 196eaf373a06..17684615374b 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -10,6 +10,7 @@
*/
#include "misc.h"
+#include "../string.h"
/* WARNING!!
* This code is compiled with -fPIC and it is relocated dynamically
@@ -97,8 +98,14 @@
*/
#define STATIC static
-#undef memset
#undef memcpy
+
+/*
+ * Use a normal definition of memset() from string.c. There are already
+ * included header files which expect a definition of memset() and by
+ * the time we define memset macro, it is too late.
+ */
+#undef memset
#define memzero(s, n) memset((s), 0, (n))
@@ -109,9 +116,6 @@ static void error(char *m);
*/
struct boot_params *real_mode; /* Pointer to real-mode data */
-void *memset(void *s, int c, size_t n);
-void *memcpy(void *dest, const void *src, size_t n);
-
memptr free_mem_ptr;
memptr free_mem_end_ptr;
@@ -216,45 +220,6 @@ void __putstr(const char *s)
outb(0xff & (pos >> 1), vidport+1);
}
-void *memset(void *s, int c, size_t n)
-{
- int i;
- char *ss = s;
-
- for (i = 0; i < n; i++)
- ss[i] = c;
- return s;
-}
-#ifdef CONFIG_X86_32
-void *memcpy(void *dest, const void *src, size_t n)
-{
- int d0, d1, d2;
- asm volatile(
- "rep ; movsl\n\t"
- "movl %4,%%ecx\n\t"
- "rep ; movsb\n\t"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
- : "memory");
-
- return dest;
-}
-#else
-void *memcpy(void *dest, const void *src, size_t n)
-{
- long d0, d1, d2;
- asm volatile(
- "rep ; movsq\n\t"
- "movq %4,%%rcx\n\t"
- "rep ; movsb\n\t"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
- : "memory");
-
- return dest;
-}
-#endif
-
static void error(char *x)
{
error_putstr("\n\n");
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index ffb9c5c9d748..f3c57e341402 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -1,11 +1,45 @@
#include "misc.h"
+#include "../string.c"
+
+/* misc.h might pull in string_32.h which has a macro for memcpy. undef that */
+#undef memcpy
-int memcmp(const void *s1, const void *s2, size_t len)
+#ifdef CONFIG_X86_32
+void *memcpy(void *dest, const void *src, size_t n)
{
- u8 diff;
- asm("repe; cmpsb; setnz %0"
- : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
- return diff;
+ int d0, d1, d2;
+ asm volatile(
+ "rep ; movsl\n\t"
+ "movl %4,%%ecx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src)
+ : "memory");
+
+ return dest;
}
+#else
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ long d0, d1, d2;
+ asm volatile(
+ "rep ; movsq\n\t"
+ "movq %4,%%rcx\n\t"
+ "rep ; movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src)
+ : "memory");
-#include "../string.c"
+ return dest;
+}
+#endif
+
+void *memset(void *s, int c, size_t n)
+{
+ int i;
+ char *ss = s;
+
+ for (i = 0; i < n; i++)
+ ss[i] = c;
+ return s;
+}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index f0d0b20fe149..1fd7d575092e 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -27,6 +27,7 @@
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
+#include "string.h"
static u32 err_flags[NCAPINTS];
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index c501a5b466f8..223e42527077 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -15,6 +15,7 @@
#include "boot.h"
#include <linux/edd.h>
+#include "string.h"
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index cf6083d444f4..fd6c9f236996 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -14,6 +14,7 @@
*/
#include "boot.h"
+#include "string.h"
struct boot_params boot_params __attribute__((aligned(16)));
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
index 958019b1cfa5..c0fb356a3092 100644
--- a/arch/x86/boot/regs.c
+++ b/arch/x86/boot/regs.c
@@ -17,6 +17,7 @@
*/
#include "boot.h"
+#include "string.h"
void initregs(struct biosregs *reg)
{
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 574dedfe2890..5339040ef86e 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,6 +14,20 @@
#include "boot.h"
+/*
+ * This file gets included in compressed/string.c which might pull in
+ * string_32.h and which in turn maps memcmp to __builtin_memcmp(). Undo
+ * that first.
+ */
+#undef memcmp
+int memcmp(const void *s1, const void *s2, size_t len)
+{
+ u8 diff;
+ asm("repe; cmpsb; setnz %0"
+ : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ return diff;
+}
+
int strcmp(const char *str1, const char *str2)
{
const unsigned char *s1 = (const unsigned char *)str1;
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
new file mode 100644
index 000000000000..725e820602b1
--- /dev/null
+++ b/arch/x86/boot/string.h
@@ -0,0 +1,21 @@
+#ifndef BOOT_STRING_H
+#define BOOT_STRING_H
+
+/* Undef any of these macros coming from string_32.h. */
+#undef memcpy
+#undef memset
+#undef memcmp
+
+void *memcpy(void *dst, const void *src, size_t len);
+void *memset(void *dst, int c, size_t len);
+int memcmp(const void *s1, const void *s2, size_t len);
+
+/*
+ * Access builtin version by default. If one needs to use optimized version,
+ * do "undef memcpy" in .c file and link against right string.c
+ */
+#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
+#define memset(d,c,l) __builtin_memset(d,c,l)
+#define memcmp __builtin_memcmp
+
+#endif /* BOOT_STRING_H */
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 11e8c6eb80a1..ba3e100654db 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -16,6 +16,7 @@
#include "boot.h"
#include "video.h"
#include "vesa.h"
+#include "string.h"
/* VESA information */
static struct vesa_general_info vginfo;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index a7fef2621cc9..619e7f7426c6 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -60,7 +60,6 @@ CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
-CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_STAT is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index c1119d4c1281..6181c69b786b 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -58,7 +58,6 @@ CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
-CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
# CONFIG_CPU_FREQ_STAT is not set
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 6ba54d640383..61d6e281898b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+ifeq ($(avx2_supported),yes)
+sha1-ssse3-y += sha1_avx2_x86_64_asm.o
+endif
crc32c-intel-y := crc32c-intel_glue.o
crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 50ec333b70e6..8af519ed73d1 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -223,9 +223,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
src -= 1;
dst -= 1;
} while (nbytes >= bsize * 4);
-
- if (nbytes < bsize)
- goto done;
}
/* Handle leftovers */
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index e6a3700489b9..e57e20ab5e0b 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -203,9 +203,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
src -= 1;
dst -= 1;
} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
}
/* Handle leftovers */
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 586f41aac361..185fad49d86f 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -24,10 +24,6 @@
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090a0b0c0d0e0f
-.Lpoly:
- .octa 0xc2000000000000000000000000000001
-.Ltwo_one:
- .octa 0x00000001000000000000000000000001
#define DATA %xmm0
#define SHASH %xmm1
@@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update)
.Lupdate_just_ret:
ret
ENDPROC(clmul_ghash_update)
-
-/*
- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
- *
- * Calculate hash_key << 1 mod poly
- */
-ENTRY(clmul_ghash_setkey)
- movaps .Lbswap_mask, BSWAP
- movups (%rsi), %xmm0
- PSHUFB_XMM BSWAP %xmm0
- movaps %xmm0, %xmm1
- psllq $1, %xmm0
- psrlq $63, %xmm1
- movaps %xmm1, %xmm2
- pslldq $8, %xmm1
- psrldq $8, %xmm2
- por %xmm1, %xmm0
- # reduction
- pshufd $0b00100100, %xmm2, %xmm1
- pcmpeqd .Ltwo_one, %xmm1
- pand .Lpoly, %xmm1
- pxor %xmm1, %xmm0
- movups %xmm0, (%rdi)
- ret
-ENDPROC(clmul_ghash_setkey)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 6759dd1135be..d785cf2c529c 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash);
void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
const be128 *shash);
-void clmul_ghash_setkey(be128 *shash, const u8 *key);
-
struct ghash_async_ctx {
struct cryptd_ahash *cryptd_tfm;
};
@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *key, unsigned int keylen)
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 *x = (be128 *)key;
+ u64 a, b;
if (keylen != GHASH_BLOCK_SIZE) {
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
- clmul_ghash_setkey(&ctx->shash, key);
+ /* perform multiplication by 'x' in GF(2^128) */
+ a = be64_to_cpu(x->a);
+ b = be64_to_cpu(x->b);
+
+ ctx->shash.a = (__be64)((b << 1) | (a >> 63));
+ ctx->shash.b = (__be64)((a << 1) | (b >> 63));
+
+ if (a >> 63)
+ ctx->shash.b ^= cpu_to_be64(0xc2);
return 0;
}
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
new file mode 100644
index 000000000000..1cd792db15ef
--- /dev/null
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -0,0 +1,708 @@
+/*
+ * Implement fast SHA-1 with AVX2 instructions. (x86_64)
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Ilya Albrekht <ilya.albrekht@intel.com>
+ * Maxim Locktyukhin <maxim.locktyukhin@intel.com>
+ * Ronen Zohar <ronen.zohar@intel.com>
+ * Chandramouli Narayanan <mouli@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * SHA-1 implementation with Intel(R) AVX2 instruction set extensions.
+ *
+ *This implementation is based on the previous SSSE3 release:
+ *Visit http://software.intel.com/en-us/articles/
+ *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/
+ *
+ *Updates 20-byte SHA-1 record in 'hash' for even number of
+ *'num_blocks' consecutive 64-byte blocks
+ *
+ *extern "C" void sha1_transform_avx2(
+ * int *hash, const char* input, size_t num_blocks );
+ */
+
+#include <linux/linkage.h>
+
+#define CTX %rdi /* arg1 */
+#define BUF %rsi /* arg2 */
+#define CNT %rdx /* arg3 */
+
+#define REG_A %ecx
+#define REG_B %esi
+#define REG_C %edi
+#define REG_D %eax
+#define REG_E %edx
+#define REG_TB %ebx
+#define REG_TA %r12d
+#define REG_RA %rcx
+#define REG_RB %rsi
+#define REG_RC %rdi
+#define REG_RD %rax
+#define REG_RE %rdx
+#define REG_RTA %r12
+#define REG_RTB %rbx
+#define REG_T1 %ebp
+#define xmm_mov vmovups
+#define avx2_zeroupper vzeroupper
+#define RND_F1 1
+#define RND_F2 2
+#define RND_F3 3
+
+.macro REGALLOC
+ .set A, REG_A
+ .set B, REG_B
+ .set C, REG_C
+ .set D, REG_D
+ .set E, REG_E
+ .set TB, REG_TB
+ .set TA, REG_TA
+
+ .set RA, REG_RA
+ .set RB, REG_RB
+ .set RC, REG_RC
+ .set RD, REG_RD
+ .set RE, REG_RE
+
+ .set RTA, REG_RTA
+ .set RTB, REG_RTB
+
+ .set T1, REG_T1
+.endm
+
+#define K_BASE %r8
+#define HASH_PTR %r9
+#define BUFFER_PTR %r10
+#define BUFFER_PTR2 %r13
+#define BUFFER_END %r11
+
+#define PRECALC_BUF %r14
+#define WK_BUF %r15
+
+#define W_TMP %xmm0
+#define WY_TMP %ymm0
+#define WY_TMP2 %ymm9
+
+# AVX2 variables
+#define WY0 %ymm3
+#define WY4 %ymm5
+#define WY08 %ymm7
+#define WY12 %ymm8
+#define WY16 %ymm12
+#define WY20 %ymm13
+#define WY24 %ymm14
+#define WY28 %ymm15
+
+#define YMM_SHUFB_BSWAP %ymm10
+
+/*
+ * Keep 2 iterations precalculated at a time:
+ * - 80 DWORDs per iteration * 2
+ */
+#define W_SIZE (80*2*2 +16)
+
+#define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF)
+#define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF)
+
+
+.macro UPDATE_HASH hash, val
+ add \hash, \val
+ mov \val, \hash
+.endm
+
+.macro PRECALC_RESET_WY
+ .set WY_00, WY0
+ .set WY_04, WY4
+ .set WY_08, WY08
+ .set WY_12, WY12
+ .set WY_16, WY16
+ .set WY_20, WY20
+ .set WY_24, WY24
+ .set WY_28, WY28
+ .set WY_32, WY_00
+.endm
+
+.macro PRECALC_ROTATE_WY
+ /* Rotate macros */
+ .set WY_32, WY_28
+ .set WY_28, WY_24
+ .set WY_24, WY_20
+ .set WY_20, WY_16
+ .set WY_16, WY_12
+ .set WY_12, WY_08
+ .set WY_08, WY_04
+ .set WY_04, WY_00
+ .set WY_00, WY_32
+
+ /* Define register aliases */
+ .set WY, WY_00
+ .set WY_minus_04, WY_04
+ .set WY_minus_08, WY_08
+ .set WY_minus_12, WY_12
+ .set WY_minus_16, WY_16
+ .set WY_minus_20, WY_20
+ .set WY_minus_24, WY_24
+ .set WY_minus_28, WY_28
+ .set WY_minus_32, WY
+.endm
+
+.macro PRECALC_00_15
+ .if (i == 0) # Initialize and rotate registers
+ PRECALC_RESET_WY
+ PRECALC_ROTATE_WY
+ .endif
+
+ /* message scheduling pre-compute for rounds 0-15 */
+ .if ((i & 7) == 0)
+ /*
+ * blended AVX2 and ALU instruction scheduling
+ * 1 vector iteration per 8 rounds
+ */
+ vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ .elseif ((i & 7) == 1)
+ vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ WY_TMP, WY_TMP
+ .elseif ((i & 7) == 2)
+ vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
+ .elseif ((i & 7) == 4)
+ vpaddd K_XMM(K_BASE), WY, WY_TMP
+ .elseif ((i & 7) == 7)
+ vmovdqu WY_TMP, PRECALC_WK(i&~7)
+
+ PRECALC_ROTATE_WY
+ .endif
+.endm
+
+.macro PRECALC_16_31
+ /*
+ * message scheduling pre-compute for rounds 16-31
+ * calculating last 32 w[i] values in 8 XMM registers
+ * pre-calculate K+w[i] values and store to mem
+ * for later load by ALU add instruction
+ *
+ * "brute force" vectorization for rounds 16-31 only
+ * due to w[i]->w[i-3] dependency
+ */
+ .if ((i & 7) == 0)
+ /*
+ * blended AVX2 and ALU instruction scheduling
+ * 1 vector iteration per 8 rounds
+ */
+ /* w[i-14] */
+ vpalignr $8, WY_minus_16, WY_minus_12, WY
+ vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */
+ .elseif ((i & 7) == 1)
+ vpxor WY_minus_08, WY, WY
+ vpxor WY_minus_16, WY_TMP, WY_TMP
+ .elseif ((i & 7) == 2)
+ vpxor WY_TMP, WY, WY
+ vpslldq $12, WY, WY_TMP2
+ .elseif ((i & 7) == 3)
+ vpslld $1, WY, WY_TMP
+ vpsrld $31, WY, WY
+ .elseif ((i & 7) == 4)
+ vpor WY, WY_TMP, WY_TMP
+ vpslld $2, WY_TMP2, WY
+ .elseif ((i & 7) == 5)
+ vpsrld $30, WY_TMP2, WY_TMP2
+ vpxor WY, WY_TMP, WY_TMP
+ .elseif ((i & 7) == 7)
+ vpxor WY_TMP2, WY_TMP, WY
+ vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vmovdqu WY_TMP, PRECALC_WK(i&~7)
+
+ PRECALC_ROTATE_WY
+ .endif
+.endm
+
+.macro PRECALC_32_79
+ /*
+ * in SHA-1 specification:
+ * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1
+ * instead we do equal:
+ * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
+ * allows more efficient vectorization
+ * since w[i]=>w[i-3] dependency is broken
+ */
+
+ .if ((i & 7) == 0)
+ /*
+ * blended AVX2 and ALU instruction scheduling
+ * 1 vector iteration per 8 rounds
+ */
+ vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP
+ .elseif ((i & 7) == 1)
+ /* W is W_minus_32 before xor */
+ vpxor WY_minus_28, WY, WY
+ .elseif ((i & 7) == 2)
+ vpxor WY_minus_16, WY_TMP, WY_TMP
+ .elseif ((i & 7) == 3)
+ vpxor WY_TMP, WY, WY
+ .elseif ((i & 7) == 4)
+ vpslld $2, WY, WY_TMP
+ .elseif ((i & 7) == 5)
+ vpsrld $30, WY, WY
+ vpor WY, WY_TMP, WY
+ .elseif ((i & 7) == 7)
+ vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vmovdqu WY_TMP, PRECALC_WK(i&~7)
+
+ PRECALC_ROTATE_WY
+ .endif
+.endm
+
+.macro PRECALC r, s
+ .set i, \r
+
+ .if (i < 40)
+ .set K_XMM, 32*0
+ .elseif (i < 80)
+ .set K_XMM, 32*1
+ .elseif (i < 120)
+ .set K_XMM, 32*2
+ .else
+ .set K_XMM, 32*3
+ .endif
+
+ .if (i<32)
+ PRECALC_00_15 \s
+ .elseif (i<64)
+ PRECALC_16_31 \s
+ .elseif (i < 160)
+ PRECALC_32_79 \s
+ .endif
+.endm
+
+.macro ROTATE_STATE
+ .set T_REG, E
+ .set E, D
+ .set D, C
+ .set C, B
+ .set B, TB
+ .set TB, A
+ .set A, T_REG
+
+ .set T_REG, RE
+ .set RE, RD
+ .set RD, RC
+ .set RC, RB
+ .set RB, RTB
+ .set RTB, RA
+ .set RA, T_REG
+.endm
+
+/* Macro relies on saved ROUND_Fx */
+
+.macro RND_FUN f, r
+ .if (\f == RND_F1)
+ ROUND_F1 \r
+ .elseif (\f == RND_F2)
+ ROUND_F2 \r
+ .elseif (\f == RND_F3)
+ ROUND_F3 \r
+ .endif
+.endm
+
+.macro RR r
+ .set round_id, (\r % 80)
+
+ .if (round_id == 0) /* Precalculate F for first round */
+ .set ROUND_FUNC, RND_F1
+ mov B, TB
+
+ rorx $(32-30), B, B /* b>>>2 */
+ andn D, TB, T1
+ and C, TB
+ xor T1, TB
+ .endif
+
+ RND_FUN ROUND_FUNC, \r
+ ROTATE_STATE
+
+ .if (round_id == 18)
+ .set ROUND_FUNC, RND_F2
+ .elseif (round_id == 38)
+ .set ROUND_FUNC, RND_F3
+ .elseif (round_id == 58)
+ .set ROUND_FUNC, RND_F2
+ .endif
+
+ .set round_id, ( (\r+1) % 80)
+
+ RND_FUN ROUND_FUNC, (\r+1)
+ ROTATE_STATE
+.endm
+
+.macro ROUND_F1 r
+ add WK(\r), E
+
+ andn C, A, T1 /* ~b&d */
+ lea (RE,RTB), E /* Add F from the previous round */
+
+ rorx $(32-5), A, TA /* T2 = A >>> 5 */
+ rorx $(32-30),A, TB /* b>>>2 for next round */
+
+ PRECALC (\r) /* msg scheduling for next 2 blocks */
+
+ /*
+ * Calculate F for the next round
+ * (b & c) ^ andn[b, d]
+ */
+ and B, A /* b&c */
+ xor T1, A /* F1 = (b&c) ^ (~b&d) */
+
+ lea (RE,RTA), E /* E += A >>> 5 */
+.endm
+
+.macro ROUND_F2 r
+ add WK(\r), E
+ lea (RE,RTB), E /* Add F from the previous round */
+
+ /* Calculate F for the next round */
+ rorx $(32-5), A, TA /* T2 = A >>> 5 */
+ .if ((round_id) < 79)
+ rorx $(32-30), A, TB /* b>>>2 for next round */
+ .endif
+ PRECALC (\r) /* msg scheduling for next 2 blocks */
+
+ .if ((round_id) < 79)
+ xor B, A
+ .endif
+
+ add TA, E /* E += A >>> 5 */
+
+ .if ((round_id) < 79)
+ xor C, A
+ .endif
+.endm
+
+.macro ROUND_F3 r
+ add WK(\r), E
+ PRECALC (\r) /* msg scheduling for next 2 blocks */
+
+ lea (RE,RTB), E /* Add F from the previous round */
+
+ mov B, T1
+ or A, T1
+
+ rorx $(32-5), A, TA /* T2 = A >>> 5 */
+ rorx $(32-30), A, TB /* b>>>2 for next round */
+
+ /* Calculate F for the next round
+ * (b and c) or (d and (b or c))
+ */
+ and C, T1
+ and B, A
+ or T1, A
+
+ add TA, E /* E += A >>> 5 */
+
+.endm
+
+/*
+ * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
+ */
+.macro SHA1_PIPELINED_MAIN_BODY
+
+ REGALLOC
+
+ mov (HASH_PTR), A
+ mov 4(HASH_PTR), B
+ mov 8(HASH_PTR), C
+ mov 12(HASH_PTR), D
+ mov 16(HASH_PTR), E
+
+ mov %rsp, PRECALC_BUF
+ lea (2*4*80+32)(%rsp), WK_BUF
+
+ # Precalc WK for first 2 blocks
+ PRECALC_OFFSET = 0
+ .set i, 0
+ .rept 160
+ PRECALC i
+ .set i, i + 1
+ .endr
+ PRECALC_OFFSET = 128
+ xchg WK_BUF, PRECALC_BUF
+
+ .align 32
+_loop:
+ /*
+ * code loops through more than one block
+ * we use K_BASE value as a signal of a last block,
+ * it is set below by: cmovae BUFFER_PTR, K_BASE
+ */
+ cmp K_BASE, BUFFER_PTR
+ jne _begin
+ .align 32
+ jmp _end
+ .align 32
+_begin:
+
+ /*
+ * Do first block
+ * rounds: 0,2,4,6,8
+ */
+ .set j, 0
+ .rept 5
+ RR j
+ .set j, j+2
+ .endr
+
+ jmp _loop0
+_loop0:
+
+ /*
+ * rounds:
+ * 10,12,14,16,18
+ * 20,22,24,26,28
+ * 30,32,34,36,38
+ * 40,42,44,46,48
+ * 50,52,54,56,58
+ */
+ .rept 25
+ RR j
+ .set j, j+2
+ .endr
+
+ add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
+ cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
+ cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+
+ /*
+ * rounds
+ * 60,62,64,66,68
+ * 70,72,74,76,78
+ */
+ .rept 10
+ RR j
+ .set j, j+2
+ .endr
+
+ UPDATE_HASH (HASH_PTR), A
+ UPDATE_HASH 4(HASH_PTR), TB
+ UPDATE_HASH 8(HASH_PTR), C
+ UPDATE_HASH 12(HASH_PTR), D
+ UPDATE_HASH 16(HASH_PTR), E
+
+ cmp K_BASE, BUFFER_PTR /* is current block the last one? */
+ je _loop
+
+ mov TB, B
+
+ /* Process second block */
+ /*
+ * rounds
+ * 0+80, 2+80, 4+80, 6+80, 8+80
+ * 10+80,12+80,14+80,16+80,18+80
+ */
+
+ .set j, 0
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ jmp _loop1
+_loop1:
+ /*
+ * rounds
+ * 20+80,22+80,24+80,26+80,28+80
+ * 30+80,32+80,34+80,36+80,38+80
+ */
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ jmp _loop2
+_loop2:
+
+ /*
+ * rounds
+ * 40+80,42+80,44+80,46+80,48+80
+ * 50+80,52+80,54+80,56+80,58+80
+ */
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
+
+ cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
+ cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+
+ jmp _loop3
+_loop3:
+
+ /*
+ * rounds
+ * 60+80,62+80,64+80,66+80,68+80
+ * 70+80,72+80,74+80,76+80,78+80
+ */
+ .rept 10
+ RR j+80
+ .set j, j+2
+ .endr
+
+ UPDATE_HASH (HASH_PTR), A
+ UPDATE_HASH 4(HASH_PTR), TB
+ UPDATE_HASH 8(HASH_PTR), C
+ UPDATE_HASH 12(HASH_PTR), D
+ UPDATE_HASH 16(HASH_PTR), E
+
+ /* Reset state for AVX2 reg permutation */
+ mov A, TA
+ mov TB, A
+ mov C, TB
+ mov E, C
+ mov D, B
+ mov TA, D
+
+ REGALLOC
+
+ xchg WK_BUF, PRECALC_BUF
+
+ jmp _loop
+
+ .align 32
+ _end:
+
+.endm
+/*
+ * macro implements SHA-1 function's body for several 64-byte blocks
+ * param: function's name
+ */
+.macro SHA1_VECTOR_ASM name
+ ENTRY(\name)
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ RESERVE_STACK = (W_SIZE*4 + 8+24)
+
+ /* Align stack */
+ mov %rsp, %rbx
+ and $~(0x20-1), %rsp
+ push %rbx
+ sub $RESERVE_STACK, %rsp
+
+ avx2_zeroupper
+
+ lea K_XMM_AR(%rip), K_BASE
+
+ mov CTX, HASH_PTR
+ mov BUF, BUFFER_PTR
+ lea 64(BUF), BUFFER_PTR2
+
+ shl $6, CNT /* mul by 64 */
+ add BUF, CNT
+ add $64, CNT
+ mov CNT, BUFFER_END
+
+ cmp BUFFER_END, BUFFER_PTR2
+ cmovae K_BASE, BUFFER_PTR2
+
+ xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
+
+ SHA1_PIPELINED_MAIN_BODY
+
+ avx2_zeroupper
+
+ add $RESERVE_STACK, %rsp
+ pop %rsp
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+
+ ENDPROC(\name)
+.endm
+
+.section .rodata
+
+#define K1 0x5a827999
+#define K2 0x6ed9eba1
+#define K3 0x8f1bbcdc
+#define K4 0xca62c1d6
+
+.align 128
+K_XMM_AR:
+ .long K1, K1, K1, K1
+ .long K1, K1, K1, K1
+ .long K2, K2, K2, K2
+ .long K2, K2, K2, K2
+ .long K3, K3, K3, K3
+ .long K3, K3, K3, K3
+ .long K4, K4, K4, K4
+ .long K4, K4, K4, K4
+
+BSWAP_SHUFB_CTL:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+.text
+
+SHA1_VECTOR_ASM sha1_transform_avx2
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 4a11a9d72451..74d16ef707c7 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -10,6 +10,7 @@
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds);
#endif
+#ifdef CONFIG_AS_AVX2
+#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
+
+asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
+ unsigned int rounds);
+#endif
static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
@@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
+#ifdef CONFIG_AS_AVX2
+static void sha1_apply_transform_avx2(u32 *digest, const char *data,
+ unsigned int rounds)
+{
+ /* Select the optimal transform based on data block size */
+ if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
+ sha1_transform_avx2(digest, data, rounds);
+ else
+ sha1_transform_avx(digest, data, rounds);
+}
+#endif
+
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_ssse3_init,
@@ -201,27 +220,49 @@ static bool __init avx_usable(void)
return true;
}
+
+#ifdef CONFIG_AS_AVX2
+static bool __init avx2_usable(void)
+{
+ if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) &&
+ boot_cpu_has(X86_FEATURE_BMI2))
+ return true;
+
+ return false;
+}
+#endif
#endif
static int __init sha1_ssse3_mod_init(void)
{
+ char *algo_name;
+
/* test for SSSE3 first */
- if (cpu_has_ssse3)
+ if (cpu_has_ssse3) {
sha1_transform_asm = sha1_transform_ssse3;
+ algo_name = "SSSE3";
+ }
#ifdef CONFIG_AS_AVX
/* allow AVX to override SSSE3, it's a little faster */
- if (avx_usable())
+ if (avx_usable()) {
sha1_transform_asm = sha1_transform_avx;
+ algo_name = "AVX";
+#ifdef CONFIG_AS_AVX2
+ /* allow AVX2 to override AVX, it's a little faster */
+ if (avx2_usable()) {
+ sha1_transform_asm = sha1_apply_transform_avx2;
+ algo_name = "AVX2";
+ }
+#endif
+ }
#endif
if (sha1_transform_asm) {
- pr_info("Using %s optimized SHA-1 implementation\n",
- sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
- : "AVX");
+ pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
return crypto_register_shash(&alg);
}
- pr_info("Neither AVX nor SSSE3 is available/usable.\n");
+ pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
return -ENODEV;
}
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4acddc43ee0c..3ca9762e1649 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -5,5 +5,6 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
generic-y += clkdev.h
+generic-y += early_ioremap.h
generic-y += cputime.h
generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index e6a92455740e..69f1366f1aa3 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -1,7 +1,7 @@
/*
* This file is part of the Linux kernel.
*
- * Copyright (c) 2011, Intel Corporation
+ * Copyright (c) 2011-2014, Intel Corporation
* Authors: Fenghua Yu <fenghua.yu@intel.com>,
* H. Peter Anvin <hpa@linux.intel.com>
*
@@ -31,10 +31,13 @@
#define RDRAND_RETRY_LOOPS 10
#define RDRAND_INT ".byte 0x0f,0xc7,0xf0"
+#define RDSEED_INT ".byte 0x0f,0xc7,0xf8"
#ifdef CONFIG_X86_64
# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0"
+# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8"
#else
# define RDRAND_LONG RDRAND_INT
+# define RDSEED_LONG RDSEED_INT
#endif
#ifdef CONFIG_ARCH_RANDOM
@@ -53,6 +56,16 @@ static inline int rdrand_long(unsigned long *v)
return ok;
}
+/* A single attempt at RDSEED */
+static inline bool rdseed_long(unsigned long *v)
+{
+ unsigned char ok;
+ asm volatile(RDSEED_LONG "\n\t"
+ "setc %0"
+ : "=qm" (ok), "=a" (*v));
+ return ok;
+}
+
#define GET_RANDOM(name, type, rdrand, nop) \
static inline int name(type *v) \
{ \
@@ -70,18 +83,40 @@ static inline int name(type *v) \
return ok; \
}
+#define GET_SEED(name, type, rdseed, nop) \
+static inline int name(type *v) \
+{ \
+ unsigned char ok; \
+ alternative_io("movb $0, %0\n\t" \
+ nop, \
+ rdseed "\n\t" \
+ "setc %0", \
+ X86_FEATURE_RDSEED, \
+ ASM_OUTPUT2("=q" (ok), "=a" (*v))); \
+ return ok; \
+}
+
#ifdef CONFIG_X86_64
GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
+GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5);
+GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
+
#else
GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
+GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4);
+GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
+
#endif /* CONFIG_X86_64 */
+#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND)
+#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED)
+
#else
static inline int rdrand_long(unsigned long *v)
@@ -89,6 +124,11 @@ static inline int rdrand_long(unsigned long *v)
return 0;
}
+static inline bool rdseed_long(unsigned long *v)
+{
+ return 0;
+}
+
#endif /* CONFIG_ARCH_RANDOM */
extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 2f03ff018d36..ba38ebbaced3 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -1,7 +1,6 @@
#ifndef _ASM_X86_BUG_H
#define _ASM_X86_BUG_H
-#ifdef CONFIG_BUG
#define HAVE_ARCH_BUG
#ifdef CONFIG_DEBUG_BUGVERBOSE
@@ -33,8 +32,6 @@ do { \
} while (0)
#endif
-#endif /* !CONFIG_BUG */
-
#include <asm-generic/bug.h>
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index 16a57f4ed64d..eda81dc0f4ae 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,8 +3,6 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#ifdef CONFIG_X86_64
-
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
@@ -14,6 +12,4 @@ struct arch_clocksource_data {
int vclock_mode;
};
-#endif /* CONFIG_X86_64 */
-
#endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 63211ef5046a..e265ff95d16d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -546,6 +546,13 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
#define static_cpu_has_bug(bit) static_cpu_has((bit))
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
+#define MAX_CPU_FEATURES (NCAPINTS * 32)
+#define cpu_have_feature boot_cpu_has
+
+#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X"
+#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
+ boot_cpu_data.x86_model
+
#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9c999c1674fa..2c71182d30ef 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -281,16 +281,12 @@ do { \
#define STACK_RND_MASK (0x7ff)
-#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
-
#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
#else /* CONFIG_X86_32 */
-#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
-
/* 1GB for 64bit, 8MB for 32bit */
#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 7252cd339175..43f482a0db37 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -40,15 +40,8 @@
*/
extern unsigned long __FIXADDR_TOP;
#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
-
-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
#else
#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
-
-/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
-#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
-#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
#endif
@@ -74,7 +67,6 @@ extern unsigned long __FIXADDR_TOP;
enum fixed_addresses {
#ifdef CONFIG_X86_32
FIX_HOLE,
- FIX_VDSO,
#else
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
@@ -98,12 +90,6 @@ enum fixed_addresses {
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
-#ifdef CONFIG_X86_VISWS_APIC
- FIX_CO_CPU, /* Cobalt timer */
- FIX_CO_APIC, /* Cobalt APIC Redirection Table */
- FIX_LI_PCIA, /* Lithium PCI Bridge A */
- FIX_LI_PCIB, /* Lithium PCI Bridge B */
-#endif
FIX_RO_IDT, /* Virtual mapping for read-only IDT */
#ifdef CONFIG_X86_32
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
@@ -177,5 +163,11 @@ static inline void __set_fixmap(enum fixed_addresses idx,
#include <asm-generic/fixmap.h>
+#define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags)
+#define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0))
+
+void __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 67d69b8e2d20..a307b7530e54 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -98,7 +98,6 @@ extern void trace_call_function_single_interrupt(void);
#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
extern unsigned long io_apic_irqs;
-extern void init_VISWS_APIC_irqs(void);
extern void setup_IO_APIC(void);
extern void disable_IO_APIC(void);
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 91d9c69a629e..b8237d8a1e0c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -39,6 +39,7 @@
#include <linux/string.h>
#include <linux/compiler.h>
#include <asm/page.h>
+#include <asm/early_ioremap.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -316,19 +317,6 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
unsigned long prot_val);
extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
-/*
- * early_ioremap() and early_iounmap() are for temporary early boot-time
- * mappings, before the real ioremap() is functional.
- * A boot-time mapping is currently limited to at most 16 pages.
- */
-extern void early_ioremap_init(void);
-extern void early_ioremap_reset(void);
-extern void __iomem *early_ioremap(resource_size_t phys_addr,
- unsigned long size);
-extern void __iomem *early_memremap(resource_size_t phys_addr,
- unsigned long size);
-extern void early_iounmap(void __iomem *addr, unsigned long size);
-extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
#ifdef CONFIG_XEN
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83afbb7d9..fcaf9c961265 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -337,6 +337,11 @@ struct kvm_pmu {
u64 reprogram_pmi;
};
+enum {
+ KVM_DEBUGREG_BP_ENABLED = 1,
+ KVM_DEBUGREG_WONT_EXIT = 2,
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -444,7 +449,6 @@ struct kvm_vcpu_arch {
} st;
u64 last_guest_tsc;
- u64 last_kernel_ns;
u64 last_host_tsc;
u64 tsc_offset_adjustment;
u64 this_tsc_nsec;
@@ -464,7 +468,7 @@ struct kvm_vcpu_arch {
struct mtrr_state_type mtrr_state;
u32 pat;
- int switch_db_regs;
+ unsigned switch_db_regs;
unsigned long db[KVM_NR_DB_REGS];
unsigned long dr6;
unsigned long dr7;
@@ -599,6 +603,8 @@ struct kvm_arch {
bool use_master_clock;
u64 master_kernel_ns;
cycle_t master_cycle_now;
+ struct delayed_work kvmclock_update_work;
+ struct delayed_work kvmclock_sync_work;
struct kvm_xen_hvm_config xen_hvm_config;
@@ -702,6 +708,7 @@ struct kvm_x86_ops {
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
u64 (*get_dr6)(struct kvm_vcpu *vcpu);
void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
+ void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -728,8 +735,8 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
- int (*enable_nmi_window)(struct kvm_vcpu *vcpu);
- int (*enable_irq_window)(struct kvm_vcpu *vcpu);
+ void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+ void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*vm_has_apicv)(struct kvm *kvm);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
@@ -765,6 +772,9 @@ struct kvm_x86_ops {
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+ bool (*mpx_supported)(void);
+
+ int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 8a9b3e288cb4..1ec990bd7dc0 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -11,9 +11,6 @@
#ifdef CONFIG_NUMA
extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[nid])
-
-#include <asm/numaq.h>
-
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 3e6b4920ef5d..f5a617956735 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,12 +25,6 @@ extern int pic_mode;
extern unsigned int def_to_bigsmp;
-#ifdef CONFIG_X86_NUMAQ
-extern int mp_bus_id_to_node[MAX_MP_BUSSES];
-extern int mp_bus_id_to_local[MAX_MP_BUSSES];
-extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-#endif
-
#else /* CONFIG_X86_64: */
#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
deleted file mode 100644
index c3b3c322fd87..000000000000
--- a/arch/x86/include/asm/numaq.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Written by: Patricia Gaughen, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <gone@us.ibm.com>
- */
-
-#ifndef _ASM_X86_NUMAQ_H
-#define _ASM_X86_NUMAQ_H
-
-#ifdef CONFIG_X86_NUMAQ
-
-extern int found_numaq;
-extern int numaq_numa_init(void);
-extern int pci_numaq_init(void);
-
-extern void *xquad_portio;
-
-#define XQUAD_PORTIO_BASE 0xfe400000
-#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
-#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
-
-/*
- * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the
- */
-#define SYS_CFG_DATA_PRIV_ADDR 0x0009d000 /* place for scd in private
- quad space */
-
-/*
- * Communication area for each processor on lynxer-processor tests.
- *
- * NOTE: If you change the size of this eachproc structure you need
- * to change the definition for EACH_QUAD_SIZE.
- */
-struct eachquadmem {
- unsigned int priv_mem_start; /* Starting address of this */
- /* quad's private memory. */
- /* This is always 0. */
- /* In MB. */
- unsigned int priv_mem_size; /* Size of this quad's */
- /* private memory. */
- /* In MB. */
- unsigned int low_shrd_mem_strp_start;/* Starting address of this */
- /* quad's low shared block */
- /* (untranslated). */
- /* In MB. */
- unsigned int low_shrd_mem_start; /* Starting address of this */
- /* quad's low shared memory */
- /* (untranslated). */
- /* In MB. */
- unsigned int low_shrd_mem_size; /* Size of this quad's low */
- /* shared memory. */
- /* In MB. */
- unsigned int lmmio_copb_start; /* Starting address of this */
- /* quad's local memory */
- /* mapped I/O in the */
- /* compatibility OPB. */
- /* In MB. */
- unsigned int lmmio_copb_size; /* Size of this quad's local */
- /* memory mapped I/O in the */
- /* compatibility OPB. */
- /* In MB. */
- unsigned int lmmio_nopb_start; /* Starting address of this */
- /* quad's local memory */
- /* mapped I/O in the */
- /* non-compatibility OPB. */
- /* In MB. */
- unsigned int lmmio_nopb_size; /* Size of this quad's local */
- /* memory mapped I/O in the */
- /* non-compatibility OPB. */
- /* In MB. */
- unsigned int io_apic_0_start; /* Starting address of I/O */
- /* APIC 0. */
- unsigned int io_apic_0_sz; /* Size I/O APIC 0. */
- unsigned int io_apic_1_start; /* Starting address of I/O */
- /* APIC 1. */
- unsigned int io_apic_1_sz; /* Size I/O APIC 1. */
- unsigned int hi_shrd_mem_start; /* Starting address of this */
- /* quad's high shared memory.*/
- /* In MB. */
- unsigned int hi_shrd_mem_size; /* Size of this quad's high */
- /* shared memory. */
- /* In MB. */
- unsigned int mps_table_addr; /* Address of this quad's */
- /* MPS tables from BIOS, */
- /* in system space.*/
- unsigned int lcl_MDC_pio_addr; /* Port-I/O address for */
- /* local access of MDC. */
- unsigned int rmt_MDC_mmpio_addr; /* MM-Port-I/O address for */
- /* remote access of MDC. */
- unsigned int mm_port_io_start; /* Starting address of this */
- /* quad's memory mapped Port */
- /* I/O space. */
- unsigned int mm_port_io_size; /* Size of this quad's memory*/
- /* mapped Port I/O space. */
- unsigned int mm_rmt_io_apic_start; /* Starting address of this */
- /* quad's memory mapped */
- /* remote I/O APIC space. */
- unsigned int mm_rmt_io_apic_size; /* Size of this quad's memory*/
- /* mapped remote I/O APIC */
- /* space. */
- unsigned int mm_isa_start; /* Starting address of this */
- /* quad's memory mapped ISA */
- /* space (contains MDC */
- /* memory space). */
- unsigned int mm_isa_size; /* Size of this quad's memory*/
- /* mapped ISA space (contains*/
- /* MDC memory space). */
- unsigned int rmt_qmi_addr; /* Remote addr to access QMI.*/
- unsigned int lcl_qmi_addr; /* Local addr to access QMI. */
-};
-
-/*
- * Note: This structure must be NOT be changed unless the multiproc and
- * OS are changed to reflect the new structure.
- */
-struct sys_cfg_data {
- unsigned int quad_id;
- unsigned int bsp_proc_id; /* Boot Strap Processor in this quad. */
- unsigned int scd_version; /* Version number of this table. */
- unsigned int first_quad_id;
- unsigned int quads_present31_0; /* 1 bit for each quad */
- unsigned int quads_present63_32; /* 1 bit for each quad */
- unsigned int config_flags;
- unsigned int boot_flags;
- unsigned int csr_start_addr; /* Absolute value (not in MB) */
- unsigned int csr_size; /* Absolute value (not in MB) */
- unsigned int lcl_apic_start_addr; /* Absolute value (not in MB) */
- unsigned int lcl_apic_size; /* Absolute value (not in MB) */
- unsigned int low_shrd_mem_base; /* 0 or 512MB or 1GB */
- unsigned int low_shrd_mem_quad_offset; /* 0,128M,256M,512M,1G */
- /* may not be totally populated */
- unsigned int split_mem_enbl; /* 0 for no low shared memory */
- unsigned int mmio_sz; /* Size of total system memory mapped I/O */
- /* (in MB). */
- unsigned int quad_spin_lock; /* Spare location used for quad */
- /* bringup. */
- unsigned int nonzero55; /* For checksumming. */
- unsigned int nonzeroaa; /* For checksumming. */
- unsigned int scd_magic_number;
- unsigned int system_type;
- unsigned int checksum;
- /*
- * memory configuration area for each quad
- */
- struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */
-};
-
-void numaq_tsc_disable(void);
-
-#endif /* CONFIG_X86_NUMAQ */
-#endif /* _ASM_X86_NUMAQ_H */
-
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 1ac6114c9ea5..96ae4f4040bb 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -26,11 +26,6 @@ extern int pci_routeirq;
extern int noioapicquirk;
extern int noioapicreroute;
-/* scan a bus after allocating a pci_sysdata for it */
-extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
- int node);
-extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
-
#ifdef CONFIG_PCI
#ifdef CONFIG_PCI_DOMAINS
@@ -70,7 +65,7 @@ extern unsigned long pci_mem_start;
extern int pcibios_enabled;
void pcibios_config_init(void);
-struct pci_bus *pcibios_scan_root(int bus);
+void pcibios_scan_root(int bus);
void pcibios_set_master(struct pci_dev *dev);
void pcibios_penalize_isa_irq(int irq, int active);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 94220d14d5cc..851bcdc5db04 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -52,7 +52,7 @@
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
*/
-#define __this_cpu_ptr(ptr) \
+#define raw_cpu_ptr(ptr) \
({ \
unsigned long tcp_ptr__; \
__verify_pcpu_ptr(ptr); \
@@ -362,25 +362,25 @@ do { \
*/
#define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var)))
-#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-
-#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
-#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
-#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
+#define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+
+#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -401,16 +401,16 @@ do { \
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
@@ -427,7 +427,7 @@ do { \
__ret; \
})
-#define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
+#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
#endif /* CONFIG_X86_CMPXCHG64 */
@@ -436,22 +436,22 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
-#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
-#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
+#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
/*
@@ -474,7 +474,7 @@ do { \
__ret; \
})
-#define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
+#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
#endif
@@ -495,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
#ifdef CONFIG_X86_64
- return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0;
+ return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
#else
- return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0;
+ return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
#endif
}
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 708f19fb4fc7..eb3d44945133 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -214,13 +214,8 @@
#ifdef CONFIG_X86_64
#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
#else
-/*
- * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
- * bits are combined, this will alow user to access the high address mapped
- * VDSO in the presence of CONFIG_COMPAT_VDSO
- */
#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
-#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */
#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
#endif
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index c8b051933b1b..7024c12f7bfe 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -19,12 +19,12 @@ DECLARE_PER_CPU(int, __preempt_count);
*/
static __always_inline int preempt_count(void)
{
- return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
{
- __this_cpu_write_4(__preempt_count, pc);
+ raw_cpu_write_4(__preempt_count, pc);
}
/*
@@ -53,17 +53,17 @@ static __always_inline void preempt_count_set(int pc)
static __always_inline void set_preempt_need_resched(void)
{
- __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
+ raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
- __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
+ raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
@@ -72,12 +72,12 @@ static __always_inline bool test_preempt_need_resched(void)
static __always_inline void __preempt_count_add(int val)
{
- __this_cpu_add_4(__preempt_count, val);
+ raw_cpu_add_4(__preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
- __this_cpu_add_4(__preempt_count, -val);
+ raw_cpu_add_4(__preempt_count, -val);
}
/*
@@ -95,7 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
*/
static __always_inline bool should_resched(void)
{
- return unlikely(!__this_cpu_read_4(__preempt_count));
+ return unlikely(!raw_cpu_read_4(__preempt_count));
}
#ifdef CONFIG_PREEMPT
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index d62c9f809bc5..9264f04a4c55 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -39,12 +39,6 @@ static inline void vsmp_init(void) { }
void setup_bios_corruption_check(void);
-#ifdef CONFIG_X86_VISWS
-extern void visws_early_detect(void);
-#else
-static inline void visws_early_detect(void) { }
-#endif
-
extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index aea284b41312..d6a756ae04c8 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -13,7 +13,7 @@
#ifndef _ASM_X86_SYSCALL_H
#define _ASM_X86_SYSCALL_H
-#include <linux/audit.h>
+#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
#include <asm/asm-offsets.h> /* For NR_syscalls */
@@ -91,8 +91,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
-static inline int syscall_get_arch(struct task_struct *task,
- struct pt_regs *regs)
+static inline int syscall_get_arch(void)
{
return AUDIT_ARCH_I386;
}
@@ -221,8 +220,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
}
}
-static inline int syscall_get_arch(struct task_struct *task,
- struct pt_regs *regs)
+static inline int syscall_get_arch(void)
{
#ifdef CONFIG_IA32_EMULATION
/*
@@ -234,7 +232,7 @@ static inline int syscall_get_arch(struct task_struct *task,
*
* x32 tasks should be considered AUDIT_ARCH_X86_64.
*/
- if (task_thread_info(task)->status & TS_COMPAT)
+ if (task_thread_info(current)->status & TS_COMPAT)
return AUDIT_ARCH_I386;
#endif
/* Both x32 and x86_64 are considered "64-bit". */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index b28097e4c8c3..0e8f04f2c26f 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -132,19 +132,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot)
}
struct pci_bus;
+int x86_pci_root_bus_node(int bus);
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
-#ifdef CONFIG_NUMA
-extern int get_mp_bus_to_node(int busnum);
-extern void set_mp_bus_to_node(int busnum, int node);
-#else
-static inline int get_mp_bus_to_node(int busnum)
-{
- return 0;
-}
-static inline void set_mp_bus_to_node(int busnum, int node)
-{
-}
-#endif
-
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index fddb53d63915..d1dc55404ff1 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,8 +1,45 @@
#ifndef _ASM_X86_VDSO_H
#define _ASM_X86_VDSO_H
+#include <asm/page_types.h>
+#include <linux/linkage.h>
+
+#ifdef __ASSEMBLER__
+
+#define DEFINE_VDSO_IMAGE(symname, filename) \
+__PAGE_ALIGNED_DATA ; \
+ .globl symname##_start, symname##_end ; \
+ .align PAGE_SIZE ; \
+ symname##_start: ; \
+ .incbin filename ; \
+ symname##_end: ; \
+ .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \
+ \
+.previous ; \
+ \
+ .globl symname##_pages ; \
+ .bss ; \
+ .align 8 ; \
+ .type symname##_pages, @object ; \
+ symname##_pages: ; \
+ .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \
+ .size symname##_pages, .-symname##_pages
+
+#else
+
+#define DECLARE_VDSO_IMAGE(symname) \
+ extern char symname##_start[], symname##_end[]; \
+ extern struct page *symname##_pages[]
+
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const char VDSO32_PRELINK[];
+
+#include <asm/vdso32.h>
+
+DECLARE_VDSO_IMAGE(vdso32_int80);
+#ifdef CONFIG_COMPAT
+DECLARE_VDSO_IMAGE(vdso32_syscall);
+#endif
+DECLARE_VDSO_IMAGE(vdso32_sysenter);
/*
* Given a pointer to the vDSO image, find the pointer to VDSO32_name
@@ -11,8 +48,7 @@ extern const char VDSO32_PRELINK[];
#define VDSO32_SYMBOL(base, name) \
({ \
extern const char VDSO32_##name[]; \
- (void __user *)(VDSO32_##name - VDSO32_PRELINK + \
- (unsigned long)(base)); \
+ (void __user *)(VDSO32_##name + (unsigned long)(base)); \
})
#endif
@@ -23,12 +59,8 @@ extern const char VDSO32_PRELINK[];
extern void __user __kernel_sigreturn;
extern void __user __kernel_rt_sigreturn;
-/*
- * These symbols are defined by vdso32.S to mark the bounds
- * of the ELF DSO images included therein.
- */
-extern const char vdso32_int80_start, vdso32_int80_end;
-extern const char vdso32_syscall_start, vdso32_syscall_end;
-extern const char vdso32_sysenter_start, vdso32_sysenter_end;
+void __init patch_vdso32(void *vdso, size_t len);
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000000000000..7efb7018406e
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_X86_VDSO32_H
+#define _ASM_X86_VDSO32_H
+
+#define VDSO_BASE_PAGE 0
+#define VDSO_VVAR_PAGE 1
+#define VDSO_HPET_PAGE 2
+#define VDSO_PAGES 3
+#define VDSO_PREV_PAGES 2
+#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
+
+#endif
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d36b7da..3c3366c2e37f 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
#ifndef _ASM_X86_VGTOD_H
#define _ASM_X86_VGTOD_H
-#include <asm/vsyscall.h>
+#include <linux/compiler.h>
#include <linux/clocksource.h>
+#ifdef BUILD_VDSO32_64
+typedef u64 gtod_long_t;
+#else
+typedef unsigned long gtod_long_t;
+#endif
+/*
+ * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
+ * so be carefull by modifying this structure.
+ */
struct vsyscall_gtod_data {
- seqcount_t seq;
+ unsigned seq;
- struct { /* extract of a clocksource struct */
- int vclock_mode;
- cycle_t cycle_last;
- cycle_t mask;
- u32 mult;
- u32 shift;
- } clock;
+ int vclock_mode;
+ cycle_t cycle_last;
+ cycle_t mask;
+ u32 mult;
+ u32 shift;
/* open coded 'struct timespec' */
- time_t wall_time_sec;
u64 wall_time_snsec;
+ gtod_long_t wall_time_sec;
+ gtod_long_t monotonic_time_sec;
u64 monotonic_time_snsec;
- time_t monotonic_time_sec;
+ gtod_long_t wall_time_coarse_sec;
+ gtod_long_t wall_time_coarse_nsec;
+ gtod_long_t monotonic_time_coarse_sec;
+ gtod_long_t monotonic_time_coarse_nsec;
- struct timezone sys_tz;
- struct timespec wall_time_coarse;
- struct timespec monotonic_time_coarse;
+ int tz_minuteswest;
+ int tz_dsttime;
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
+static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
+{
+ unsigned ret;
+
+repeat:
+ ret = ACCESS_ONCE(s->seq);
+ if (unlikely(ret & 1)) {
+ cpu_relax();
+ goto repeat;
+ }
+ smp_rmb();
+ return ret;
+}
+
+static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
+ unsigned start)
+{
+ smp_rmb();
+ return unlikely(s->seq != start);
+}
+
+static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
+{
+ ++s->seq;
+ smp_wmb();
+}
+
+static inline void gtod_write_end(struct vsyscall_gtod_data *s)
+{
+ smp_wmb();
+ ++s->seq;
+}
+
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h
deleted file mode 100644
index 2edb37637ead..000000000000
--- a/arch/x86/include/asm/visws/cobalt.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef _ASM_X86_VISWS_COBALT_H
-#define _ASM_X86_VISWS_COBALT_H
-
-#include <asm/fixmap.h>
-
-/*
- * Cobalt SGI Visual Workstation system ASIC
- */
-
-#define CO_CPU_NUM_PHYS 0x1e00
-#define CO_CPU_TAB_PHYS (CO_CPU_NUM_PHYS + 2)
-
-#define CO_CPU_MAX 4
-
-#define CO_CPU_PHYS 0xc2000000
-#define CO_APIC_PHYS 0xc4000000
-
-/* see set_fixmap() and asm/fixmap.h */
-#define CO_CPU_VADDR (fix_to_virt(FIX_CO_CPU))
-#define CO_APIC_VADDR (fix_to_virt(FIX_CO_APIC))
-
-/* Cobalt CPU registers -- relative to CO_CPU_VADDR, use co_cpu_*() */
-#define CO_CPU_REV 0x08
-#define CO_CPU_CTRL 0x10
-#define CO_CPU_STAT 0x20
-#define CO_CPU_TIMEVAL 0x30
-
-/* CO_CPU_CTRL bits */
-#define CO_CTRL_TIMERUN 0x04 /* 0 == disabled */
-#define CO_CTRL_TIMEMASK 0x08 /* 0 == unmasked */
-
-/* CO_CPU_STATUS bits */
-#define CO_STAT_TIMEINTR 0x02 /* (r) 1 == int pend, (w) 0 == clear */
-
-/* CO_CPU_TIMEVAL value */
-#define CO_TIME_HZ 100000000 /* Cobalt core rate */
-
-/* Cobalt APIC registers -- relative to CO_APIC_VADDR, use co_apic_*() */
-#define CO_APIC_HI(n) (((n) * 0x10) + 4)
-#define CO_APIC_LO(n) ((n) * 0x10)
-#define CO_APIC_ID 0x0ffc
-
-/* CO_APIC_ID bits */
-#define CO_APIC_ENABLE 0x00000100
-
-/* CO_APIC_LO bits */
-#define CO_APIC_MASK 0x00010000 /* 0 = enabled */
-#define CO_APIC_LEVEL 0x00008000 /* 0 = edge */
-
-/*
- * Where things are physically wired to Cobalt
- * #defines with no board _<type>_<rev>_ are common to all (thus far)
- */
-#define CO_APIC_IDE0 4
-#define CO_APIC_IDE1 2 /* Only on 320 */
-
-#define CO_APIC_8259 12 /* serial, floppy, par-l-l */
-
-/* Lithium PCI Bridge A -- "the one with 82557 Ethernet" */
-#define CO_APIC_PCIA_BASE0 0 /* and 1 */ /* slot 0, line 0 */
-#define CO_APIC_PCIA_BASE123 5 /* and 6 */ /* slot 0, line 1 */
-
-#define CO_APIC_PIIX4_USB 7 /* this one is weird */
-
-/* Lithium PCI Bridge B -- "the one with PIIX4" */
-#define CO_APIC_PCIB_BASE0 8 /* and 9-12 *//* slot 0, line 0 */
-#define CO_APIC_PCIB_BASE123 13 /* 14.15 */ /* slot 0, line 1 */
-
-#define CO_APIC_VIDOUT0 16
-#define CO_APIC_VIDOUT1 17
-#define CO_APIC_VIDIN0 18
-#define CO_APIC_VIDIN1 19
-
-#define CO_APIC_LI_AUDIO 22
-
-#define CO_APIC_AS 24
-#define CO_APIC_RE 25
-
-#define CO_APIC_CPU 28 /* Timer and Cache interrupt */
-#define CO_APIC_NMI 29
-#define CO_APIC_LAST CO_APIC_NMI
-
-/*
- * This is how irqs are assigned on the Visual Workstation.
- * Legacy devices get irq's 1-15 (system clock is 0 and is CO_APIC_CPU).
- * All other devices (including PCI) go to Cobalt and are irq's 16 on up.
- */
-#define CO_IRQ_APIC0 16 /* irq of apic entry 0 */
-#define IS_CO_APIC(irq) ((irq) >= CO_IRQ_APIC0)
-#define CO_IRQ(apic) (CO_IRQ_APIC0 + (apic)) /* apic ent to irq */
-#define CO_APIC(irq) ((irq) - CO_IRQ_APIC0) /* irq to apic ent */
-#define CO_IRQ_IDE0 14 /* knowledge of... */
-#define CO_IRQ_IDE1 15 /* ... ide driver defaults! */
-#define CO_IRQ_8259 CO_IRQ(CO_APIC_8259)
-
-#ifdef CONFIG_X86_VISWS_APIC
-static inline void co_cpu_write(unsigned long reg, unsigned long v)
-{
- *((volatile unsigned long *)(CO_CPU_VADDR+reg))=v;
-}
-
-static inline unsigned long co_cpu_read(unsigned long reg)
-{
- return *((volatile unsigned long *)(CO_CPU_VADDR+reg));
-}
-
-static inline void co_apic_write(unsigned long reg, unsigned long v)
-{
- *((volatile unsigned long *)(CO_APIC_VADDR+reg))=v;
-}
-
-static inline unsigned long co_apic_read(unsigned long reg)
-{
- return *((volatile unsigned long *)(CO_APIC_VADDR+reg));
-}
-#endif
-
-extern char visws_board_type;
-
-#define VISWS_320 0
-#define VISWS_540 1
-
-extern char visws_board_rev;
-
-extern int pci_visws_init(void);
-
-#endif /* _ASM_X86_VISWS_COBALT_H */
diff --git a/arch/x86/include/asm/visws/lithium.h b/arch/x86/include/asm/visws/lithium.h
deleted file mode 100644
index a10d89bc1270..000000000000
--- a/arch/x86/include/asm/visws/lithium.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _ASM_X86_VISWS_LITHIUM_H
-#define _ASM_X86_VISWS_LITHIUM_H
-
-#include <asm/fixmap.h>
-
-/*
- * Lithium is the SGI Visual Workstation I/O ASIC
- */
-
-#define LI_PCI_A_PHYS 0xfc000000 /* Enet is dev 3 */
-#define LI_PCI_B_PHYS 0xfd000000 /* PIIX4 is here */
-
-/* see set_fixmap() and asm/fixmap.h */
-#define LI_PCIA_VADDR (fix_to_virt(FIX_LI_PCIA))
-#define LI_PCIB_VADDR (fix_to_virt(FIX_LI_PCIB))
-
-/* Not a standard PCI? (not in linux/pci.h) */
-#define LI_PCI_BUSNUM 0x44 /* lo8: primary, hi8: sub */
-#define LI_PCI_INTEN 0x46
-
-/* LI_PCI_INTENT bits */
-#define LI_INTA_0 0x0001
-#define LI_INTA_1 0x0002
-#define LI_INTA_2 0x0004
-#define LI_INTA_3 0x0008
-#define LI_INTA_4 0x0010
-#define LI_INTB 0x0020
-#define LI_INTC 0x0040
-#define LI_INTD 0x0080
-
-/* More special purpose macros... */
-static inline void li_pcia_write16(unsigned long reg, unsigned short v)
-{
- *((volatile unsigned short *)(LI_PCIA_VADDR+reg))=v;
-}
-
-static inline unsigned short li_pcia_read16(unsigned long reg)
-{
- return *((volatile unsigned short *)(LI_PCIA_VADDR+reg));
-}
-
-static inline void li_pcib_write16(unsigned long reg, unsigned short v)
-{
- *((volatile unsigned short *)(LI_PCIB_VADDR+reg))=v;
-}
-
-static inline unsigned short li_pcib_read16(unsigned long reg)
-{
- return *((volatile unsigned short *)(LI_PCIB_VADDR+reg));
-}
-
-#endif /* _ASM_X86_VISWS_LITHIUM_H */
-
diff --git a/arch/x86/include/asm/visws/piix4.h b/arch/x86/include/asm/visws/piix4.h
deleted file mode 100644
index d0af4d338e7f..000000000000
--- a/arch/x86/include/asm/visws/piix4.h
+++ /dev/null
@@ -1,107 +0,0 @@
-#ifndef _ASM_X86_VISWS_PIIX4_H
-#define _ASM_X86_VISWS_PIIX4_H
-
-/*
- * PIIX4 as used on SGI Visual Workstations
- */
-
-#define PIIX_PM_START 0x0F80
-
-#define SIO_GPIO_START 0x0FC0
-
-#define SIO_PM_START 0x0FC8
-
-#define PMBASE PIIX_PM_START
-#define GPIREG0 (PMBASE+0x30)
-#define GPIREG(x) (GPIREG0+((x)/8))
-#define GPIBIT(x) (1 << ((x)%8))
-
-#define PIIX_GPI_BD_ID1 18
-#define PIIX_GPI_BD_ID2 19
-#define PIIX_GPI_BD_ID3 20
-#define PIIX_GPI_BD_ID4 21
-#define PIIX_GPI_BD_REG GPIREG(PIIX_GPI_BD_ID1)
-#define PIIX_GPI_BD_MASK (GPIBIT(PIIX_GPI_BD_ID1) | \
- GPIBIT(PIIX_GPI_BD_ID2) | \
- GPIBIT(PIIX_GPI_BD_ID3) | \
- GPIBIT(PIIX_GPI_BD_ID4) )
-
-#define PIIX_GPI_BD_SHIFT (PIIX_GPI_BD_ID1 % 8)
-
-#define SIO_INDEX 0x2e
-#define SIO_DATA 0x2f
-
-#define SIO_DEV_SEL 0x7
-#define SIO_DEV_ENB 0x30
-#define SIO_DEV_MSB 0x60
-#define SIO_DEV_LSB 0x61
-
-#define SIO_GP_DEV 0x7
-
-#define SIO_GP_BASE SIO_GPIO_START
-#define SIO_GP_MSB (SIO_GP_BASE>>8)
-#define SIO_GP_LSB (SIO_GP_BASE&0xff)
-
-#define SIO_GP_DATA1 (SIO_GP_BASE+0)
-
-#define SIO_PM_DEV 0x8
-
-#define SIO_PM_BASE SIO_PM_START
-#define SIO_PM_MSB (SIO_PM_BASE>>8)
-#define SIO_PM_LSB (SIO_PM_BASE&0xff)
-#define SIO_PM_INDEX (SIO_PM_BASE+0)
-#define SIO_PM_DATA (SIO_PM_BASE+1)
-
-#define SIO_PM_FER2 0x1
-
-#define SIO_PM_GP_EN 0x80
-
-
-
-/*
- * This is the dev/reg where generating a config cycle will
- * result in a PCI special cycle.
- */
-#define SPECIAL_DEV 0xff
-#define SPECIAL_REG 0x00
-
-/*
- * PIIX4 needs to see a special cycle with the following data
- * to be convinced the processor has gone into the stop grant
- * state. PIIX4 insists on seeing this before it will power
- * down a system.
- */
-#define PIIX_SPECIAL_STOP 0x00120002
-
-#define PIIX4_RESET_PORT 0xcf9
-#define PIIX4_RESET_VAL 0x6
-
-#define PMSTS_PORT 0xf80 // 2 bytes PM Status
-#define PMEN_PORT 0xf82 // 2 bytes PM Enable
-#define PMCNTRL_PORT 0xf84 // 2 bytes PM Control
-
-#define PM_SUSPEND_ENABLE 0x2000 // start sequence to suspend state
-
-/*
- * PMSTS and PMEN I/O bit definitions.
- * (Bits are the same in both registers)
- */
-#define PM_STS_RSM (1<<15) // Resume Status
-#define PM_STS_PWRBTNOR (1<<11) // Power Button Override
-#define PM_STS_RTC (1<<10) // RTC status
-#define PM_STS_PWRBTN (1<<8) // Power Button Pressed?
-#define PM_STS_GBL (1<<5) // Global Status
-#define PM_STS_BM (1<<4) // Bus Master Status
-#define PM_STS_TMROF (1<<0) // Timer Overflow Status.
-
-/*
- * Stop clock GPI register
- */
-#define PIIX_GPIREG0 (0xf80 + 0x30)
-
-/*
- * Stop clock GPI bit in GPIREG0
- */
-#define PIIX_GPI_STPCLK 0x4 // STPCLK signal routed back in
-
-#endif /* _ASM_X86_VISWS_PIIX4_H */
diff --git a/arch/x86/include/asm/visws/sgivw.h b/arch/x86/include/asm/visws/sgivw.h
deleted file mode 100644
index 5fbf63e1003c..000000000000
--- a/arch/x86/include/asm/visws/sgivw.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/*
- * Frame buffer position and size:
- */
-extern unsigned long sgivwfb_mem_phys;
-extern unsigned long sgivwfb_mem_size;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2067264fb7f5..7004d21e6219 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -85,6 +85,7 @@
#define VM_EXIT_SAVE_IA32_EFER 0x00100000
#define VM_EXIT_LOAD_IA32_EFER 0x00200000
#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
@@ -95,6 +96,7 @@
#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
@@ -174,6 +176,8 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
HOST_IA32_PAT = 0x00002c00,
HOST_IA32_PAT_HIGH = 0x00002c01,
HOST_IA32_EFER = 0x00002c02,
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index d76ac40da206..081d909bc495 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,8 +16,8 @@
* you mess up, the linker will catch it.)
*/
-/* Base address of vvars. This is not ABI. */
-#define VVAR_ADDRESS (-10*1024*1024 - 4096)
+#ifndef _ASM_X86_VVAR_H
+#define _ASM_X86_VVAR_H
#if defined(__VVAR_KERNEL_LDS)
@@ -29,16 +29,35 @@
#else
+#ifdef BUILD_VDSO32
+
+#define DECLARE_VVAR(offset, type, name) \
+ extern type vvar_ ## name __attribute__((visibility("hidden")));
+
+#define VVAR(name) (vvar_ ## name)
+
+#else
+
+extern char __vvar_page;
+
+/* Base address of vvars. This is not ABI. */
+#ifdef CONFIG_X86_64
+#define VVAR_ADDRESS (-10*1024*1024 - 4096)
+#else
+#define VVAR_ADDRESS (&__vvar_page)
+#endif
+
#define DECLARE_VVAR(offset, type, name) \
static type const * const vvaraddr_ ## name = \
(void *)(VVAR_ADDRESS + (offset));
+#define VVAR(name) (*vvaraddr_ ## name)
+#endif
+
#define DEFINE_VVAR(type, name) \
type name \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
-#define VVAR(name) (*vvaraddr_ ## name)
-
#endif
/* DECLARE_VVAR(offset, type, name) */
@@ -48,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
#undef DECLARE_VVAR
+
+#endif
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 3e276eb23d1b..c949923a5668 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -49,10 +49,17 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern unsigned long set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e);
+extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count);
extern int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op);
+extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count);
extern int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op);
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long mfn);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
@@ -121,7 +128,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
pfn = m2p_find_override_pfn(mfn, ~0);
}
- /*
+ /*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
* entry doesn't map back to the mfn and m2p_override doesn't have a
* valid entry for it.
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 6c1d7411eb00..d949ef28c48b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,8 @@
#define XSTATE_Hi16_ZMM 0x80
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+/* Bit 63 of XCR0 is reserved for future expansion */
+#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
#define FXSAVE_SIZE 512
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 4924f4be2b99..c827ace3121b 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -295,6 +295,7 @@
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_BNDCFGS 0x00000d90
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb648c84b327..f4d96000d33a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
-obj-y += syscall_$(BITS).o
+obj-y += syscall_$(BITS).o vsyscall_gtod.o
obj-$(CONFIG_X86_64) += vsyscall_64.o
obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9f46f2b1cfc2..86281ffb96d6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -53,10 +53,6 @@ EXPORT_SYMBOL(acpi_disabled);
# include <asm/proto.h>
#endif /* X86 */
-#define BAD_MADT_ENTRY(entry, end) ( \
- (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
- ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
-
#define PREFIX "ACPI: "
int acpi_noirq; /* skip ACPI IRQ initialization */
@@ -907,10 +903,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
-#ifdef CONFIG_X86_ES7000
-extern int es7000_plat;
-#endif
-
void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
int ioapic;
@@ -960,14 +952,6 @@ void __init mp_config_acpi_legacy_irqs(void)
set_bit(MP_ISA_BUS, mp_bus_not_pci);
pr_debug("Bus #%d is ISA\n", MP_ISA_BUS);
-#ifdef CONFIG_X86_ES7000
- /*
- * Older generations of ES7000 have no legacy identity mappings
- */
- if (es7000_plat == 1)
- return;
-#endif
-
/*
* Use the default configuration for the IRQs 0-15. Unless
* overridden by (MADT) interrupt source override entries.
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index e69182fd01cf..4b28159e0421 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -87,7 +87,9 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
retval = 0;
- if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) {
+ /* If the HW does not support any sub-states in this C-state */
+ if (num_cstate_subtype == 0) {
+ pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", cx->address, edx_part);
retval = -1;
goto out;
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index dec8de4e1663..f04dbb3069b8 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -22,6 +22,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{}
};
EXPORT_SYMBOL(amd_nb_misc_ids);
@@ -30,6 +31,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 0ae0323b1f9c..dcb5b15401ce 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -18,10 +18,7 @@ obj-y += apic_flat_64.o
endif
# APIC probe will depend on the listing order here
-obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
-obj-$(CONFIG_X86_SUMMIT) += summit_32.o
obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
-obj-$(CONFIG_X86_ES7000) += es7000_32.o
# For 32bit, probe_32 need to be listed last
obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 53e20531470e..ad28db7e6bde 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1996,7 +1996,8 @@ static inline void __smp_error_interrupt(struct pt_regs *regs)
};
/* First tickle the hardware, only then report what went on. -- REW */
- apic_write(APIC_ESR, 0);
+ if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
+ apic_write(APIC_ESR, 0);
v = apic_read(APIC_ESR);
ack_APIC_irq();
atomic_inc(&irq_err_count);
@@ -2136,7 +2137,6 @@ int generic_processor_info(int apicid, int version)
*
* - arch/x86/kernel/mpparse.c: MP_processor_info()
* - arch/x86/mm/amdtopology.c: amd_numa_init()
- * - arch/x86/platform/visws/visws_quirks.c: MP_processor_info()
*
* This function is executed with the modified
* boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
deleted file mode 100644
index 6f8f8b348a39..000000000000
--- a/arch/x86/kernel/apic/es7000_32.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Written by: Garry Forsgren, Unisys Corporation
- * Natalie Protasevich, Unisys Corporation
- *
- * This file contains the code to configure and interface
- * with Unisys ES7000 series hardware system manager.
- *
- * Copyright (c) 2003 Unisys Corporation.
- * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
- *
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Unisys Corporation, Township Line & Union Meeting
- * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
- *
- * http://www.unisys.com
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/notifier.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/gfp.h>
-#include <linux/nmi.h>
-#include <linux/smp.h>
-#include <linux/io.h>
-
-#include <asm/apicdef.h>
-#include <linux/atomic.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-
-/*
- * ES7000 chipsets
- */
-
-#define NON_UNISYS 0
-#define ES7000_CLASSIC 1
-#define ES7000_ZORRO 2
-
-#define MIP_REG 1
-#define MIP_PSAI_REG 4
-
-#define MIP_BUSY 1
-#define MIP_SPIN 0xf0000
-#define MIP_VALID 0x0100000000000000ULL
-#define MIP_SW_APIC 0x1020b
-
-#define MIP_PORT(val) ((val >> 32) & 0xffff)
-
-#define MIP_RD_LO(val) (val & 0xffffffff)
-
-struct mip_reg {
- unsigned long long off_0x00;
- unsigned long long off_0x08;
- unsigned long long off_0x10;
- unsigned long long off_0x18;
- unsigned long long off_0x20;
- unsigned long long off_0x28;
- unsigned long long off_0x30;
- unsigned long long off_0x38;
-};
-
-struct mip_reg_info {
- unsigned long long mip_info;
- unsigned long long delivery_info;
- unsigned long long host_reg;
- unsigned long long mip_reg;
-};
-
-struct psai {
- unsigned long long entry_type;
- unsigned long long addr;
- unsigned long long bep_addr;
-};
-
-#ifdef CONFIG_ACPI
-
-struct es7000_oem_table {
- struct acpi_table_header Header;
- u32 OEMTableAddr;
- u32 OEMTableSize;
-};
-
-static unsigned long oem_addrX;
-static unsigned long oem_size;
-
-#endif
-
-/*
- * ES7000 Globals
- */
-
-static volatile unsigned long *psai;
-static struct mip_reg *mip_reg;
-static struct mip_reg *host_reg;
-static int mip_port;
-static unsigned long mip_addr;
-static unsigned long host_addr;
-
-int es7000_plat;
-
-/*
- * GSI override for ES7000 platforms.
- */
-
-
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
-{
- unsigned long vect = 0, psaival = 0;
-
- if (psai == NULL)
- return -1;
-
- vect = ((unsigned long)__pa(eip)/0x1000) << 16;
- psaival = (0x1000000 | vect | cpu);
-
- while (*psai & 0x1000000)
- ;
-
- *psai = psaival;
-
- return 0;
-}
-
-static int es7000_apic_is_cluster(void)
-{
- /* MPENTIUMIII */
- if (boot_cpu_data.x86 == 6 &&
- (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
- return 1;
-
- return 0;
-}
-
-static void setup_unisys(void)
-{
- /*
- * Determine the generation of the ES7000 currently running.
- *
- * es7000_plat = 1 if the machine is a 5xx ES7000 box
- * es7000_plat = 2 if the machine is a x86_64 ES7000 box
- *
- */
- if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
- es7000_plat = ES7000_ZORRO;
- else
- es7000_plat = ES7000_CLASSIC;
-}
-
-/*
- * Parse the OEM Table:
- */
-static int parse_unisys_oem(char *oemptr)
-{
- int i;
- int success = 0;
- unsigned char type, size;
- unsigned long val;
- char *tp = NULL;
- struct psai *psaip = NULL;
- struct mip_reg_info *mi;
- struct mip_reg *host, *mip;
-
- tp = oemptr;
-
- tp += 8;
-
- for (i = 0; i <= 6; i++) {
- type = *tp++;
- size = *tp++;
- tp -= 2;
- switch (type) {
- case MIP_REG:
- mi = (struct mip_reg_info *)tp;
- val = MIP_RD_LO(mi->host_reg);
- host_addr = val;
- host = (struct mip_reg *)val;
- host_reg = __va(host);
- val = MIP_RD_LO(mi->mip_reg);
- mip_port = MIP_PORT(mi->mip_info);
- mip_addr = val;
- mip = (struct mip_reg *)val;
- mip_reg = __va(mip);
- pr_debug("host_reg = 0x%lx\n",
- (unsigned long)host_reg);
- pr_debug("mip_reg = 0x%lx\n",
- (unsigned long)mip_reg);
- success++;
- break;
- case MIP_PSAI_REG:
- psaip = (struct psai *)tp;
- if (tp != NULL) {
- if (psaip->addr)
- psai = __va(psaip->addr);
- else
- psai = NULL;
- success++;
- }
- break;
- default:
- break;
- }
- tp += size;
- }
-
- if (success < 2)
- es7000_plat = NON_UNISYS;
- else
- setup_unisys();
-
- return es7000_plat;
-}
-
-#ifdef CONFIG_ACPI
-static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
-{
- struct acpi_table_header *header = NULL;
- struct es7000_oem_table *table;
- acpi_size tbl_size;
- acpi_status ret;
- int i = 0;
-
- for (;;) {
- ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
- if (!ACPI_SUCCESS(ret))
- return -1;
-
- if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
- break;
-
- early_acpi_os_unmap_memory(header, tbl_size);
- }
-
- table = (void *)header;
-
- oem_addrX = table->OEMTableAddr;
- oem_size = table->OEMTableSize;
-
- early_acpi_os_unmap_memory(header, tbl_size);
-
- *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size);
-
- return 0;
-}
-
-static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
-{
- if (!oem_addr)
- return;
-
- __acpi_unmap_table((char *)oem_addr, oem_size);
-}
-
-static int es7000_check_dsdt(void)
-{
- struct acpi_table_header header;
-
- if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
- !strncmp(header.oem_id, "UNISYS", 6))
- return 1;
- return 0;
-}
-
-static int es7000_acpi_ret;
-
-/* Hook from generic ACPI tables.c */
-static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- unsigned long oem_addr = 0;
- int check_dsdt;
- int ret = 0;
-
- /* check dsdt at first to avoid clear fix_map for oem_addr */
- check_dsdt = es7000_check_dsdt();
-
- if (!find_unisys_acpi_oem_table(&oem_addr)) {
- if (check_dsdt) {
- ret = parse_unisys_oem((char *)oem_addr);
- } else {
- setup_unisys();
- ret = 1;
- }
- /*
- * we need to unmap it
- */
- unmap_unisys_acpi_oem_table(oem_addr);
- }
-
- es7000_acpi_ret = ret;
-
- return ret && !es7000_apic_is_cluster();
-}
-
-static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
-{
- int ret = es7000_acpi_ret;
-
- return ret && es7000_apic_is_cluster();
-}
-
-#else /* !CONFIG_ACPI: */
-static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-
-static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
-{
- return 0;
-}
-#endif /* !CONFIG_ACPI */
-
-static void es7000_spin(int n)
-{
- int i = 0;
-
- while (i++ < n)
- rep_nop();
-}
-
-static int es7000_mip_write(struct mip_reg *mip_reg)
-{
- int status = 0;
- int spin;
-
- spin = MIP_SPIN;
- while ((host_reg->off_0x38 & MIP_VALID) != 0) {
- if (--spin <= 0) {
- WARN(1, "Timeout waiting for Host Valid Flag\n");
- return -1;
- }
- es7000_spin(MIP_SPIN);
- }
-
- memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
- outb(1, mip_port);
-
- spin = MIP_SPIN;
-
- while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
- if (--spin <= 0) {
- WARN(1, "Timeout waiting for MIP Valid Flag\n");
- return -1;
- }
- es7000_spin(MIP_SPIN);
- }
-
- status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
- mip_reg->off_0x38 &= ~MIP_VALID;
-
- return status;
-}
-
-static void es7000_enable_apic_mode(void)
-{
- struct mip_reg es7000_mip_reg;
- int mip_status;
-
- if (!es7000_plat)
- return;
-
- pr_info("Enabling APIC mode.\n");
- memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
- es7000_mip_reg.off_0x00 = MIP_SW_APIC;
- es7000_mip_reg.off_0x38 = MIP_VALID;
-
- while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
- WARN(1, "Command failed, status = %x\n", mip_status);
-}
-
-static unsigned int es7000_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_phys(mask, vector);
-}
-
-static void es7000_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void es7000_send_IPI_all(int vector)
-{
- es7000_send_IPI_mask(cpu_online_mask, vector);
-}
-
-static int es7000_apic_id_registered(void)
-{
- return 1;
-}
-
-static const struct cpumask *target_cpus_cluster(void)
-{
- return cpu_all_mask;
-}
-
-static const struct cpumask *es7000_target_cpus(void)
-{
- return cpumask_of(smp_processor_id());
-}
-
-static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return 0;
-}
-
-static unsigned long es7000_check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-static int es7000_early_logical_apicid(int cpu)
-{
- /* on es7000, logical apicid is the same as physical */
- return early_per_cpu(x86_bios_cpu_apicid, cpu);
-}
-
-static unsigned long calculate_ldr(int cpu)
-{
- unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
-
- return SET_APIC_LOGICAL_ID(id);
-}
-
-/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LdR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
- */
-static void es7000_init_apic_ldr_cluster(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_CLUSTER);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static void es7000_init_apic_ldr(void)
-{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
-}
-
-static void es7000_setup_apic_routing(void)
-{
- int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
-
- pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
- (apic_version[apic] == 0x14) ?
- "Physical Cluster" : "Logical Cluster",
- nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
-}
-
-static int es7000_cpu_present_to_apicid(int mps_cpu)
-{
- if (!mps_cpu)
- return boot_cpu_physical_apicid;
- else if (mps_cpu < nr_cpu_ids)
- return per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static int cpu_id;
-
-static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
-{
- physid_set_mask_of_physid(cpu_id, retmap);
- ++cpu_id;
-}
-
-static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- physids_promote(0xFFL, retmap);
-}
-
-static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
-{
- boot_cpu_physical_apicid = read_apic_id();
- return 1;
-}
-
-static inline int
-es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
-{
- unsigned int round = 0;
- unsigned int cpu, uninitialized_var(apicid);
-
- /*
- * The cpus in the mask must all be on the apic cluster.
- */
- for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
- int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- WARN(1, "Not a valid mask!");
-
- return -EINVAL;
- }
- apicid |= new_apicid;
- round++;
- }
- if (!round)
- return -EINVAL;
- *dest_id = apicid;
- return 0;
-}
-
-static int
-es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- cpumask_var_t cpumask;
- *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
-
- if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return 0;
-
- cpumask_and(cpumask, inmask, andmask);
- es7000_cpu_mask_to_apicid(cpumask, apicid);
-
- free_cpumask_var(cpumask);
-
- return 0;
-}
-
-static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-static int probe_es7000(void)
-{
- /* probed later in mptable/ACPI hooks */
- return 0;
-}
-
-static int es7000_mps_ret;
-static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- int ret = 0;
-
- if (mpc->oemptr) {
- struct mpc_oemtable *oem_table =
- (struct mpc_oemtable *)mpc->oemptr;
-
- if (!strncmp(oem, "UNISYS", 6))
- ret = parse_unisys_oem((char *)oem_table);
- }
-
- es7000_mps_ret = ret;
-
- return ret && !es7000_apic_is_cluster();
-}
-
-static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- int ret = es7000_mps_ret;
-
- return ret && es7000_apic_is_cluster();
-}
-
-/* We've been warned by a false positive warning.Use __refdata to keep calm. */
-static struct apic __refdata apic_es7000_cluster = {
-
- .name = "es7000",
- .probe = probe_es7000,
- .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = es7000_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- /* logical delivery broadcast to all procs: */
- .irq_dest_mode = 1,
-
- .target_cpus = target_cpus_cluster,
- .disable_esr = 1,
- .dest_logical = 0,
- .check_apicid_used = es7000_check_apicid_used,
- .check_apicid_present = es7000_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = es7000_init_apic_ldr_cluster,
-
- .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
- .setup_apic_routing = es7000_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
- .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = es7000_check_phys_apicid_present,
- .enable_apic_mode = es7000_enable_apic_mode,
- .phys_pkg_id = es7000_phys_pkg_id,
- .mps_oem_check = es7000_mps_oem_check_cluster,
-
- .get_apic_id = es7000_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = es7000_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = es7000_send_IPI_allbutself,
- .send_IPI_all = es7000_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip,
-
- .trampoline_phys_low = 0x467,
- .trampoline_phys_high = 0x469,
-
- .wait_for_init_deassert = false,
- /* Nothing to do for most platforms, since cleared by the INIT cycle: */
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = es7000_early_logical_apicid,
-};
-
-static struct apic __refdata apic_es7000 = {
-
- .name = "es7000",
- .probe = probe_es7000,
- .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = es7000_apic_id_registered,
-
- .irq_delivery_mode = dest_Fixed,
- /* phys delivery to target CPUs: */
- .irq_dest_mode = 0,
-
- .target_cpus = es7000_target_cpus,
- .disable_esr = 1,
- .dest_logical = 0,
- .check_apicid_used = es7000_check_apicid_used,
- .check_apicid_present = es7000_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = es7000_init_apic_ldr,
-
- .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
- .setup_apic_routing = es7000_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
- .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = es7000_check_phys_apicid_present,
- .enable_apic_mode = es7000_enable_apic_mode,
- .phys_pkg_id = es7000_phys_pkg_id,
- .mps_oem_check = es7000_mps_oem_check,
-
- .get_apic_id = es7000_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = es7000_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = es7000_send_IPI_allbutself,
- .send_IPI_all = es7000_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .trampoline_phys_low = 0x467,
- .trampoline_phys_high = 0x469,
-
- .wait_for_init_deassert = true,
- /* Nothing to do for most platforms, since cleared by the INIT cycle: */
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = es7000_early_logical_apicid,
-};
-
-/*
- * Need to check for es7000 followed by es7000_cluster, so this order
- * in apic_drivers is important.
- */
-apic_drivers(apic_es7000, apic_es7000_cluster);
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
deleted file mode 100644
index 030ea1c04f72..000000000000
--- a/arch/x86/kernel/apic/numaq_32.c
+++ /dev/null
@@ -1,524 +0,0 @@
-/*
- * Written by: Patricia Gaughen, IBM Corporation
- *
- * Copyright (C) 2002, IBM Corp.
- * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <gone@us.ibm.com>
- */
-#include <linux/nodemask.h>
-#include <linux/topology.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/kernel.h>
-#include <linux/mmzone.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/numa.h>
-#include <linux/smp.h>
-#include <linux/io.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/fixmap.h>
-#include <asm/mpspec.h>
-#include <asm/numaq.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/e820.h>
-#include <asm/ipi.h>
-
-int found_numaq;
-
-/*
- * Have to match translation table entries to main table entries by counter
- * hence the mpc_record variable .... can't see a less disgusting way of
- * doing this ....
- */
-struct mpc_trans {
- unsigned char mpc_type;
- unsigned char trans_len;
- unsigned char trans_type;
- unsigned char trans_quad;
- unsigned char trans_global;
- unsigned char trans_local;
- unsigned short trans_reserved;
-};
-
-static int mpc_record;
-
-static struct mpc_trans *translation_table[MAX_MPC_ENTRY];
-
-int mp_bus_id_to_node[MAX_MP_BUSSES];
-int mp_bus_id_to_local[MAX_MP_BUSSES];
-int quad_local_to_mp_bus_id[NR_CPUS/4][4];
-
-
-static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
-{
- struct eachquadmem *eq = scd->eq + node;
- u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
- u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
- int ret;
-
- node_set(node, numa_nodes_parsed);
- ret = numa_add_memblk(node, start, end);
- BUG_ON(ret < 0);
-}
-
-/*
- * Function: smp_dump_qct()
- *
- * Description: gets memory layout from the quad config table. This
- * function also updates numa_nodes_parsed with the nodes (quads) present.
- */
-static void __init smp_dump_qct(void)
-{
- struct sys_cfg_data *scd;
- int node;
-
- scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
-
- for_each_node(node) {
- if (scd->quads_present31_0 & (1 << node))
- numaq_register_node(node, scd);
- }
-}
-
-void numaq_tsc_disable(void)
-{
- if (!found_numaq)
- return;
-
- if (num_online_nodes() > 1) {
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- setup_clear_cpu_cap(X86_FEATURE_TSC);
- }
-}
-
-static void __init numaq_tsc_init(void)
-{
- numaq_tsc_disable();
-}
-
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
- return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
-/* x86_quirks member */
-static int mpc_apic_id(struct mpc_cpu *m)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int logical_apicid = generate_logical_apicid(quad, m->apicid);
-
- printk(KERN_DEBUG
- "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
- m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->cpufeature & CPU_MODEL_MASK) >> 4,
- m->apicver, quad, logical_apicid);
-
- return logical_apicid;
-}
-
-/* x86_quirks member */
-static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int local = translation_table[mpc_record]->trans_local;
-
- mp_bus_id_to_node[m->busid] = quad;
- mp_bus_id_to_local[m->busid] = local;
-
- printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
-}
-
-/* x86_quirks member */
-static void mpc_oem_pci_bus(struct mpc_bus *m)
-{
- int quad = translation_table[mpc_record]->trans_quad;
- int local = translation_table[mpc_record]->trans_local;
-
- quad_local_to_mp_bus_id[quad][local] = m->busid;
-}
-
-/*
- * Called from mpparse code.
- * mode = 0: prescan
- * mode = 1: one mpc entry scanned
- */
-static void numaq_mpc_record(unsigned int mode)
-{
- if (!mode)
- mpc_record = 0;
- else
- mpc_record++;
-}
-
-static void __init MP_translation_info(struct mpc_trans *m)
-{
- printk(KERN_INFO
- "Translation: record %d, type %d, quad %d, global %d, local %d\n",
- mpc_record, m->trans_type, m->trans_quad, m->trans_global,
- m->trans_local);
-
- if (mpc_record >= MAX_MPC_ENTRY)
- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
- else
- translation_table[mpc_record] = m; /* stash this for later */
-
- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
- node_set_online(m->trans_quad);
-}
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
- int sum = 0;
-
- while (len--)
- sum += *mp++;
-
- return sum & 0xFF;
-}
-
-/*
- * Read/parse the MPC oem tables
- */
-static void __init smp_read_mpc_oem(struct mpc_table *mpc)
-{
- struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
- int count = sizeof(*oemtable); /* the header size */
- unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-
- mpc_record = 0;
- printk(KERN_INFO
- "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
-
- if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
- printk(KERN_WARNING
- "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
- oemtable->signature[0], oemtable->signature[1],
- oemtable->signature[2], oemtable->signature[3]);
- return;
- }
-
- if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
- return;
- }
-
- while (count < oemtable->length) {
- switch (*oemptr) {
- case MP_TRANSLATION:
- {
- struct mpc_trans *m = (void *)oemptr;
-
- MP_translation_info(m);
- oemptr += sizeof(*m);
- count += sizeof(*m);
- ++mpc_record;
- break;
- }
- default:
- printk(KERN_WARNING
- "Unrecognised OEM table entry type! - %d\n",
- (int)*oemptr);
- return;
- }
- }
-}
-
-static __init void early_check_numaq(void)
-{
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- early_get_smp_config();
-
- if (found_numaq) {
- x86_init.mpparse.mpc_record = numaq_mpc_record;
- x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
- x86_init.mpparse.mpc_apic_id = mpc_apic_id;
- x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
- x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
- x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
- x86_init.timers.tsc_pre_init = numaq_tsc_init;
- x86_init.pci.init = pci_numaq_init;
- }
-}
-
-int __init numaq_numa_init(void)
-{
- early_check_numaq();
- if (!found_numaq)
- return -ENOENT;
- smp_dump_qct();
-
- return 0;
-}
-
-#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-
-static inline unsigned int numaq_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0x0F;
-}
-
-static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_logical(mask, vector);
-}
-
-static inline void numaq_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
-}
-
-static inline void numaq_send_IPI_all(int vector)
-{
- numaq_send_IPI_mask(cpu_online_mask, vector);
-}
-
-#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
-#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
-
-/*
- * Because we use NMIs rather than the INIT-STARTUP sequence to
- * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
- */
-static inline void numaq_smp_callin_clear_local_apic(void)
-{
- clear_local_APIC();
-}
-
-static inline const struct cpumask *numaq_target_cpus(void)
-{
- return cpu_all_mask;
-}
-
-static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return physid_isset(apicid, *map);
-}
-
-static inline unsigned long numaq_check_apicid_present(int bit)
-{
- return physid_isset(bit, phys_cpu_present_map);
-}
-
-static inline int numaq_apic_id_registered(void)
-{
- return 1;
-}
-
-static inline void numaq_init_apic_ldr(void)
-{
- /* Already done in NUMA-Q firmware */
-}
-
-static inline void numaq_setup_apic_routing(void)
-{
- printk(KERN_INFO
- "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n",
- nr_ioapics);
-}
-
-/*
- * Skip adding the timer int on secondary nodes, which causes
- * a small but painful rift in the time-space continuum.
- */
-static inline int numaq_multi_timer_check(int apic, int irq)
-{
- return apic != 0 && irq == 0;
-}
-
-static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
-{
- /* We don't have a good way to do this yet - hack */
- return physids_promote(0xFUL, retmap);
-}
-
-/*
- * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
- * cpu to APIC ID relation to properly interact with the intelligent
- * mode of the cluster controller.
- */
-static inline int numaq_cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < 60)
- return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
- else
- return BAD_APICID;
-}
-
-static inline int numaq_apicid_to_node(int logical_apicid)
-{
- return logical_apicid >> 4;
-}
-
-static int numaq_numa_cpu_node(int cpu)
-{
- int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- if (logical_apicid != BAD_APICID)
- return numaq_apicid_to_node(logical_apicid);
- return NUMA_NO_NODE;
-}
-
-static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
-{
- int node = numaq_apicid_to_node(logical_apicid);
- int cpu = __ffs(logical_apicid & 0xf);
-
- physid_set_mask_of_physid(cpu + 4*node, retmap);
-}
-
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-
-static inline int numaq_check_phys_apicid_present(int phys_apicid)
-{
- return 1;
-}
-
-/*
- * We use physical apicids here, not logical, so just return the default
- * physical broadcast to stop people from breaking us
- */
-static int
-numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- *apicid = 0x0F;
- return 0;
-}
-
-/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
-static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return cpuid_apic >> index_msb;
-}
-
-static int
-numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
-{
- if (strncmp(oem, "IBM NUMA", 8))
- printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
- else
- found_numaq = 1;
-
- return found_numaq;
-}
-
-static int probe_numaq(void)
-{
- /* already know from get_memcfg_numaq() */
- return found_numaq;
-}
-
-static void numaq_setup_portio_remap(void)
-{
- int num_quads = num_online_nodes();
-
- if (num_quads <= 1)
- return;
-
- printk(KERN_INFO
- "Remapping cross-quad port I/O for %d quads\n", num_quads);
-
- xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
-
- printk(KERN_INFO
- "xquad_portio vaddr 0x%08lx, len %08lx\n",
- (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
-}
-
-/* Use __refdata to keep false positive warning calm. */
-static struct apic __refdata apic_numaq = {
-
- .name = "NUMAQ",
- .probe = probe_numaq,
- .acpi_madt_oem_check = NULL,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = numaq_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- /* physical delivery on LOCAL quad: */
- .irq_dest_mode = 0,
-
- .target_cpus = numaq_target_cpus,
- .disable_esr = 1,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = numaq_check_apicid_used,
- .check_apicid_present = numaq_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = numaq_init_apic_ldr,
-
- .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
- .setup_apic_routing = numaq_setup_apic_routing,
- .multi_timer_check = numaq_multi_timer_check,
- .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
- .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
- .setup_portio_remap = numaq_setup_portio_remap,
- .check_phys_apicid_present = numaq_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = numaq_phys_pkg_id,
- .mps_oem_check = numaq_mps_oem_check,
-
- .get_apic_id = numaq_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0x0F << 24,
-
- .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = numaq_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = numaq_send_IPI_allbutself,
- .send_IPI_all = numaq_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi,
- .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
-
- /* We don't do anything here because we use NMI's to boot instead */
- .wait_for_init_deassert = false,
- .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
- .inquire_remote_apic = NULL,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
- .x86_32_numa_cpu_node = numaq_numa_cpu_node,
-};
-
-apic_driver(apic_numaq);
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
deleted file mode 100644
index b656128611cd..000000000000
--- a/arch/x86/kernel/apic/summit_32.c
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * IBM Summit-Specific Code
- *
- * Written By: Matthew Dobson, IBM Corporation
- *
- * Copyright (c) 2003 IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <colpatch@us.ibm.com>
- *
- */
-
-#define pr_fmt(fmt) "summit: %s: " fmt, __func__
-
-#include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/bios_ebda.h>
-
-/*
- * APIC driver for the IBM "Summit" chipset.
- */
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/apic.h>
-#include <asm/smp.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <asm/ipi.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/smp.h>
-
-static unsigned summit_get_apic_id(unsigned long x)
-{
- return (x >> 24) & 0xFF;
-}
-
-static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector)
-{
- default_send_IPI_mask_sequence_logical(mask, vector);
-}
-
-static void summit_send_IPI_allbutself(int vector)
-{
- default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
-}
-
-static void summit_send_IPI_all(int vector)
-{
- summit_send_IPI_mask(cpu_online_mask, vector);
-}
-
-#include <asm/tsc.h>
-
-extern int use_cyclone;
-
-#ifdef CONFIG_X86_SUMMIT_NUMA
-static void setup_summit(void);
-#else
-static inline void setup_summit(void) {}
-#endif
-
-static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
- char *productid)
-{
- if (!strncmp(oem, "IBM ENSW", 8) &&
- (!strncmp(productid, "VIGIL SMP", 9)
- || !strncmp(productid, "EXA", 3)
- || !strncmp(productid, "RUTHLESS SMP", 12))){
- mark_tsc_unstable("Summit based system");
- use_cyclone = 1; /*enable cyclone-timer*/
- setup_summit();
- return 1;
- }
- return 0;
-}
-
-/* Hook from generic ACPI tables.c */
-static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
- if (!strncmp(oem_id, "IBM", 3) &&
- (!strncmp(oem_table_id, "SERVIGIL", 8)
- || !strncmp(oem_table_id, "EXA", 3))){
- mark_tsc_unstable("Summit based system");
- use_cyclone = 1; /*enable cyclone-timer*/
- setup_summit();
- return 1;
- }
- return 0;
-}
-
-struct rio_table_hdr {
- unsigned char version; /* Version number of this data structure */
- /* Version 3 adds chassis_num & WP_index */
- unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
- unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
-} __attribute__((packed));
-
-struct scal_detail {
- unsigned char node_id; /* Scalability Node ID */
- unsigned long CBAR; /* Address of 1MB register space */
- unsigned char port0node; /* Node ID port connected to: 0xFF=None */
- unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port1node; /* Node ID port connected to: 0xFF = None */
- unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port2node; /* Node ID port connected to: 0xFF = None */
- unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
- unsigned char node_id; /* RIO Node ID */
- unsigned long BBAR; /* Address of 1MB register space */
- unsigned char type; /* Type of device */
- unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
- /* For CYC: Node ID of Twister that owns this CYC */
- unsigned char port0node; /* Node ID port connected to: 0xFF=None */
- unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char port1node; /* Node ID port connected to: 0xFF=None */
- unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
- unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
- /* For CYC: 0 */
- unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
- /* = 0 : the XAPIC is not used, ie:*/
- /* ints fwded to another XAPIC */
- /* Bits1:7 Reserved */
- /* For CYC: Bits0:7 Reserved */
- unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
- /* lower slot numbers/PCI bus numbers */
- /* For CYC: No meaning */
- unsigned char chassis_num; /* 1 based Chassis number */
- /* For LookOut WPEGs this field indicates the */
- /* Expansion Chassis #, enumerated from Boot */
- /* Node WPEG external port, then Boot Node CYC */
- /* external port, then Next Vigil chassis WPEG */
- /* external port, etc. */
- /* Shared Lookouts have only 1 chassis number (the */
- /* first one assigned) */
-} __attribute__((packed));
-
-
-typedef enum {
- CompatTwister = 0, /* Compatibility Twister */
- AltTwister = 1, /* Alternate Twister of internal 8-way */
- CompatCyclone = 2, /* Compatibility Cyclone */
- AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
- CompatWPEG = 4, /* Compatibility WPEG */
- AltWPEG = 5, /* Second Planar WPEG */
- LookOutAWPEG = 6, /* LookOut WPEG */
- LookOutBWPEG = 7, /* LookOut WPEG */
-} node_type;
-
-static inline int is_WPEG(struct rio_detail *rio){
- return (rio->type == CompatWPEG || rio->type == AltWPEG ||
- rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
-}
-
-#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-
-static const struct cpumask *summit_target_cpus(void)
-{
- /* CPU_MASK_ALL (0xff) has undefined behaviour with
- * dest_LowestPrio mode logical clustered apic interrupt routing
- * Just start on cpu 0. IRQ balancing will spread load
- */
- return cpumask_of(0);
-}
-
-static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid)
-{
- return 0;
-}
-
-/* we don't use the phys_cpu_present_map to indicate apicid presence */
-static unsigned long summit_check_apicid_present(int bit)
-{
- return 1;
-}
-
-static int summit_early_logical_apicid(int cpu)
-{
- int count = 0;
- u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
- u8 my_cluster = APIC_CLUSTER(my_id);
-#ifdef CONFIG_SMP
- u8 lid;
- int i;
-
- /* Create logical APIC IDs by counting CPUs already in cluster. */
- for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
- if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
- ++count;
- }
-#endif
- /* We only have a 4 wide bitmap in cluster mode. If a deranged
- * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
- BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- return my_cluster | (1UL << count);
-}
-
-static void summit_init_apic_ldr(void)
-{
- int cpu = smp_processor_id();
- unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
- unsigned long val;
-
- apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- val |= SET_APIC_LOGICAL_ID(id);
- apic_write(APIC_LDR, val);
-}
-
-static int summit_apic_id_registered(void)
-{
- return 1;
-}
-
-static void summit_setup_apic_routing(void)
-{
- pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n",
- nr_ioapics);
-}
-
-static int summit_cpu_present_to_apicid(int mps_cpu)
-{
- if (mps_cpu < nr_cpu_ids)
- return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
- else
- return BAD_APICID;
-}
-
-static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap)
-{
- /* For clustered we don't have a good way to do this yet - hack */
- physids_promote(0x0FL, retmap);
-}
-
-static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap)
-{
- physid_set_mask_of_physid(0, retmap);
-}
-
-static int summit_check_phys_apicid_present(int physical_apicid)
-{
- return 1;
-}
-
-static inline int
-summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
-{
- unsigned int round = 0;
- unsigned int cpu, apicid = 0;
-
- /*
- * The cpus in the mask must all be on the apic cluster.
- */
- for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
- int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-
- if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- pr_err("Not a valid mask!\n");
- return -EINVAL;
- }
- apicid |= new_apicid;
- round++;
- }
- if (!round)
- return -EINVAL;
- *dest_id = apicid;
- return 0;
-}
-
-static int
-summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask,
- unsigned int *apicid)
-{
- cpumask_var_t cpumask;
- *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
-
- if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return 0;
-
- cpumask_and(cpumask, inmask, andmask);
- summit_cpu_mask_to_apicid(cpumask, apicid);
-
- free_cpumask_var(cpumask);
-
- return 0;
-}
-
-/*
- * cpuid returns the value latched in the HW at reset, not the APIC ID
- * register's value. For any box whose BIOS changes APIC IDs, like
- * clustered APIC systems, we must use hard_smp_processor_id.
- *
- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
- */
-static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
-static int probe_summit(void)
-{
- /* probed later in mptable/ACPI hooks */
- return 0;
-}
-
-#ifdef CONFIG_X86_SUMMIT_NUMA
-static struct rio_table_hdr *rio_table_hdr;
-static struct scal_detail *scal_devs[MAX_NUMNODES];
-static struct rio_detail *rio_devs[MAX_NUMNODES*4];
-
-#ifndef CONFIG_X86_NUMAQ
-static int mp_bus_id_to_node[MAX_MP_BUSSES];
-#endif
-
-static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
-{
- int twister = 0, node = 0;
- int i, bus, num_buses;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
- if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
- twister = rio_devs[i]->owner_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_rio_dev) {
- pr_err("Couldn't find owner Cyclone for Winnipeg!\n");
- return last_bus;
- }
-
- for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
- if (scal_devs[i]->node_id == twister) {
- node = scal_devs[i]->node_id;
- break;
- }
- }
- if (i == rio_table_hdr->num_scal_dev) {
- pr_err("Couldn't find owner Twister for Cyclone!\n");
- return last_bus;
- }
-
- switch (rio_devs[wpeg_num]->type) {
- case CompatWPEG:
- /*
- * The Compatibility Winnipeg controls the 2 legacy buses,
- * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
- * a PCI-PCI bridge card is used in either slot: total 5 buses.
- */
- num_buses = 5;
- break;
- case AltWPEG:
- /*
- * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
- * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
- * the "extra" buses for each of those slots: total 7 buses.
- */
- num_buses = 7;
- break;
- case LookOutAWPEG:
- case LookOutBWPEG:
- /*
- * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
- * & the "extra" buses for each of those slots: total 9 buses.
- */
- num_buses = 9;
- break;
- default:
- pr_info("Unsupported Winnipeg type!\n");
- return last_bus;
- }
-
- for (bus = last_bus; bus < last_bus + num_buses; bus++)
- mp_bus_id_to_node[bus] = node;
- return bus;
-}
-
-static int build_detail_arrays(void)
-{
- unsigned long ptr;
- int i, scal_detail_size, rio_detail_size;
-
- if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
- pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n",
- MAX_NUMNODES, rio_table_hdr->num_scal_dev);
- return 0;
- }
-
- switch (rio_table_hdr->version) {
- default:
- pr_warn("Invalid Rio Grande Table Version: %d\n",
- rio_table_hdr->version);
- return 0;
- case 2:
- scal_detail_size = 11;
- rio_detail_size = 13;
- break;
- case 3:
- scal_detail_size = 12;
- rio_detail_size = 15;
- break;
- }
-
- ptr = (unsigned long)rio_table_hdr + 3;
- for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
- scal_devs[i] = (struct scal_detail *)ptr;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
- rio_devs[i] = (struct rio_detail *)ptr;
-
- return 1;
-}
-
-void setup_summit(void)
-{
- unsigned long ptr;
- unsigned short offset;
- int i, next_wpeg, next_bus = 0;
-
- /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
- ptr = get_bios_ebda();
- ptr = (unsigned long)phys_to_virt(ptr);
-
- rio_table_hdr = NULL;
- offset = 0x180;
- while (offset) {
- /* The block id is stored in the 2nd word */
- if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
- /* set the pointer past the offset & block id */
- rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
- break;
- }
- /* The next offset is stored in the 1st word. 0 means no more */
- offset = *((unsigned short *)(ptr + offset));
- }
- if (!rio_table_hdr) {
- pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");
- return;
- }
-
- if (!build_detail_arrays())
- return;
-
- /* The first Winnipeg we're looking for has an index of 0 */
- next_wpeg = 0;
- do {
- for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
- if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
- /* It's the Winnipeg we're looking for! */
- next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
- next_wpeg++;
- break;
- }
- }
- /*
- * If we go through all Rio devices and don't find one with
- * the next index, it means we've found all the Winnipegs,
- * and thus all the PCI buses.
- */
- if (i == rio_table_hdr->num_rio_dev)
- next_wpeg = 0;
- } while (next_wpeg != 0);
-}
-#endif
-
-static struct apic apic_summit = {
-
- .name = "summit",
- .probe = probe_summit,
- .acpi_madt_oem_check = summit_acpi_madt_oem_check,
- .apic_id_valid = default_apic_id_valid,
- .apic_id_registered = summit_apic_id_registered,
-
- .irq_delivery_mode = dest_LowestPrio,
- /* logical delivery broadcast to all CPUs: */
- .irq_dest_mode = 1,
-
- .target_cpus = summit_target_cpus,
- .disable_esr = 1,
- .dest_logical = APIC_DEST_LOGICAL,
- .check_apicid_used = summit_check_apicid_used,
- .check_apicid_present = summit_check_apicid_present,
-
- .vector_allocation_domain = flat_vector_allocation_domain,
- .init_apic_ldr = summit_init_apic_ldr,
-
- .ioapic_phys_id_map = summit_ioapic_phys_id_map,
- .setup_apic_routing = summit_setup_apic_routing,
- .multi_timer_check = NULL,
- .cpu_present_to_apicid = summit_cpu_present_to_apicid,
- .apicid_to_cpu_present = summit_apicid_to_cpu_present,
- .setup_portio_remap = NULL,
- .check_phys_apicid_present = summit_check_phys_apicid_present,
- .enable_apic_mode = NULL,
- .phys_pkg_id = summit_phys_pkg_id,
- .mps_oem_check = summit_mps_oem_check,
-
- .get_apic_id = summit_get_apic_id,
- .set_apic_id = NULL,
- .apic_id_mask = 0xFF << 24,
-
- .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
-
- .send_IPI_mask = summit_send_IPI_mask,
- .send_IPI_mask_allbutself = NULL,
- .send_IPI_allbutself = summit_send_IPI_allbutself,
- .send_IPI_all = summit_send_IPI_all,
- .send_IPI_self = default_send_IPI_self,
-
- .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
- .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
-
- .wait_for_init_deassert = true,
- .smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = default_inquire_remote_apic,
-
- .read = native_apic_mem_read,
- .write = native_apic_mem_write,
- .eoi_write = native_apic_mem_write,
- .icr_read = native_apic_icr_read,
- .icr_write = native_apic_icr_write,
- .wait_icr_idle = native_apic_wait_icr_idle,
- .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
-
- .x86_32_early_logical_apicid = summit_early_logical_apicid,
-};
-
-apic_driver(apic_summit);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 897d6201ef10..a80029035bf2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -274,10 +274,6 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
}
#endif
-#ifdef CONFIG_X86_NUMAQ
- numaq_tsc_disable();
-#endif
-
intel_smp_check(c);
}
#else
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0641113e2965..a952e9c85b6f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1225,21 +1225,24 @@ static struct notifier_block cacheinfo_cpu_notifier = {
static int __init cache_sysfs_init(void)
{
- int i;
+ int i, err = 0;
if (num_cache_leaves == 0)
return 0;
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
- int err;
struct device *dev = get_cpu_device(i);
err = cache_add_dev(dev);
if (err)
- return err;
+ goto out;
}
- register_hotcpu_notifier(&cacheinfo_cpu_notifier);
- return 0;
+ __register_hotcpu_notifier(&cacheinfo_cpu_notifier);
+
+out:
+ cpu_notifier_register_done();
+ return err;
}
device_initcall(cache_sysfs_init);
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 36565373af87..afa9f0d487ea 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -47,45 +47,3 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
return NULL;
}
EXPORT_SYMBOL(x86_match_cpu);
-
-ssize_t arch_print_cpu_modalias(struct device *dev,
- struct device_attribute *attr,
- char *bufptr)
-{
- int size = PAGE_SIZE;
- int i, n;
- char *buf = bufptr;
-
- n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
- "model:%04X:feature:",
- boot_cpu_data.x86_vendor,
- boot_cpu_data.x86,
- boot_cpu_data.x86_model);
- size -= n;
- buf += n;
- size -= 1;
- for (i = 0; i < NCAPINTS*32; i++) {
- if (boot_cpu_has(i)) {
- n = snprintf(buf, size, ",%04X", i);
- if (n >= size) {
- WARN(1, "x86 features overflow page\n");
- break;
- }
- size -= n;
- buf += n;
- }
- }
- *buf++ = '\n';
- return buf - bufptr;
-}
-
-int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
- if (buf) {
- arch_print_cpu_modalias(NULL, NULL, buf);
- add_uevent_var(env, "MODALIAS=%s", buf);
- kfree(buf);
- }
- return 0;
-}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4d5419b249da..eeee23ff75ef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,6 +89,9 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+/* CMCI storm detection filter */
+static DEFINE_PER_CPU(unsigned long, mce_polled_error);
+
/*
* MCA banks polled by the period polling timer for corrected events.
* With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
@@ -595,6 +598,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce m;
int i;
+ unsigned long *v;
this_cpu_inc(mce_poll_count);
@@ -614,6 +618,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!(m.status & MCI_STATUS_VAL))
continue;
+ v = &get_cpu_var(mce_polled_error);
+ set_bit(0, v);
/*
* Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here.
@@ -1278,10 +1284,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
mce_adjust_timer_default;
+static int cmc_error_seen(void)
+{
+ unsigned long *v = &__get_cpu_var(mce_polled_error);
+
+ return test_and_clear_bit(0, v);
+}
+
static void mce_timer_fn(unsigned long data)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
unsigned long iv;
+ int notify;
WARN_ON(smp_processor_id() != data);
@@ -1296,7 +1310,9 @@ static void mce_timer_fn(unsigned long data)
* polling interval, otherwise increase the polling interval.
*/
iv = __this_cpu_read(mce_next_interval);
- if (mce_notify_irq()) {
+ notify = mce_notify_irq();
+ notify |= cmc_error_seen();
+ if (notify) {
iv = max(iv / 2, (unsigned long) HZ/100);
} else {
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
@@ -2434,14 +2450,18 @@ static __init int mcheck_init_device(void)
if (err)
return err;
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
err = mce_device_create(i);
- if (err)
+ if (err) {
+ cpu_notifier_register_done();
return err;
+ }
}
register_syscore_ops(&mce_syscore_ops);
- register_hotcpu_notifier(&mce_cpu_notifier);
+ __register_hotcpu_notifier(&mce_cpu_notifier);
+ cpu_notifier_register_done();
/* register character device /dev/mcelog */
misc_register(&mce_chrdev_device);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index fb6156fee6f7..3bdb95ae8c43 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -9,6 +9,7 @@
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/sched.h>
+#include <linux/cpumask.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -137,6 +138,22 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
}
}
+static void cmci_storm_disable_banks(void)
+{
+ unsigned long flags, *owned;
+ int bank;
+ u64 val;
+
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ owned = __get_cpu_var(mce_banks_owned);
+ for_each_set_bit(bank, owned, MAX_NR_BANKS) {
+ rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ val &= ~MCI_CTL2_CMCI_EN;
+ wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ }
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
static bool cmci_storm_detect(void)
{
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -158,7 +175,7 @@ static bool cmci_storm_detect(void)
if (cnt <= CMCI_STORM_THRESHOLD)
return false;
- cmci_clear();
+ cmci_storm_disable_banks();
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
r = atomic_add_return(1, &cmci_storm_on_cpus);
mce_timer_kick(CMCI_POLL_INTERVAL);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 3eec7de76efb..d921b7ee6595 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -271,9 +271,6 @@ static void thermal_throttle_remove_dev(struct device *dev)
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
}
-/* Mutex protecting device creation against CPU hotplug: */
-static DEFINE_MUTEX(therm_cpu_lock);
-
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static int
thermal_throttle_cpu_callback(struct notifier_block *nfb,
@@ -289,18 +286,14 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(dev, cpu);
- mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- mutex_lock(&therm_cpu_lock);
thermal_throttle_remove_dev(dev);
- mutex_unlock(&therm_cpu_lock);
break;
}
return notifier_from_errno(err);
@@ -319,19 +312,16 @@ static __init int thermal_throttle_init_device(void)
if (!atomic_read(&therm_throt_en))
return 0;
- register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+ cpu_notifier_register_begin();
-#ifdef CONFIG_HOTPLUG_CPU
- mutex_lock(&therm_cpu_lock);
-#endif
/* connect live CPUs to sysfs */
for_each_online_cpu(cpu) {
err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
WARN_ON(err);
}
-#ifdef CONFIG_HOTPLUG_CPU
- mutex_unlock(&therm_cpu_lock);
-#endif
+
+ __register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 4b8e4d3cd6ea..4c36bbe3173a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -926,13 +926,13 @@ static __init int amd_ibs_init(void)
goto out;
perf_ibs_pm_init();
- get_online_cpus();
+ cpu_notifier_register_begin();
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
- perf_cpu_notifier(perf_ibs_cpu_notifier);
smp_call_function(setup_APIC_ibs, NULL, 1);
- put_online_cpus();
+ __perf_cpu_notifier(perf_ibs_cpu_notifier);
+ cpu_notifier_register_done();
ret = perf_event_ibs_init();
out:
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 754291adec33..3bbdf4cd38b9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -531,15 +531,16 @@ static int __init amd_uncore_init(void)
if (ret)
return -ENODEV;
- get_online_cpus();
+ cpu_notifier_register_begin();
+
/* init cpus already online before registering for hotplug notifier */
for_each_online_cpu(cpu) {
amd_uncore_cpu_up_prepare(cpu);
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
}
- register_cpu_notifier(&amd_uncore_cpu_notifier_block);
- put_online_cpus();
+ __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 3cec947e3b98..4b9a9e9466bd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -673,19 +673,20 @@ static int __init rapl_pmu_init(void)
/* unsupported */
return 0;
}
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
rapl_cpu_prepare(cpu);
rapl_cpu_init(cpu);
}
- perf_cpu_notifier(rapl_cpu_notifier);
+ __perf_cpu_notifier(rapl_cpu_notifier);
ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
if (WARN_ON(ret)) {
pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
- put_online_cpus();
+ cpu_notifier_register_done();
return -1;
}
@@ -699,7 +700,7 @@ static int __init rapl_pmu_init(void)
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
- put_online_cpus();
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index bd2253d40cff..65bbbea38b9c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -4244,7 +4244,7 @@ static void __init uncore_cpumask_init(void)
if (!cpumask_empty(&uncore_cpu_mask))
return;
- get_online_cpus();
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
int i, phys_id = topology_physical_package_id(cpu);
@@ -4263,9 +4263,9 @@ static void __init uncore_cpumask_init(void)
}
on_each_cpu(uncore_cpu_setup, NULL, 1);
- register_cpu_notifier(&uncore_cpu_nb);
+ __register_cpu_notifier(&uncore_cpu_nb);
- put_online_cpus();
+ cpu_notifier_register_done();
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 7d9481c743f8..3225ae6c5180 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -198,14 +198,15 @@ static int __init cpuid_init(void)
goto out_chrdev;
}
cpuid_class->devnode = cpuid_devnode;
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)
goto out_class;
}
- register_hotcpu_notifier(&cpuid_class_cpu_notifier);
- put_online_cpus();
+ __register_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ cpu_notifier_register_done();
err = 0;
goto out;
@@ -215,7 +216,7 @@ out_class:
for_each_online_cpu(i) {
cpuid_device_destroy(i);
}
- put_online_cpus();
+ cpu_notifier_register_done();
class_destroy(cpuid_class);
out_chrdev:
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -227,13 +228,13 @@ static void __exit cpuid_exit(void)
{
int cpu = 0;
- get_online_cpus();
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
cpuid_device_destroy(cpu);
class_destroy(cpuid_class);
__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
- unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
- put_online_cpus();
+ __unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
+ cpu_notifier_register_done();
}
module_init(cpuid_init);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 346b1df2412e..1abcb50b48ae 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -115,27 +115,26 @@ enum stack_type {
};
static enum stack_type
-analyze_stack(int cpu, struct task_struct *task,
- unsigned long *stack, unsigned long **stack_end, char **id)
+analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
+ unsigned long **stack_end, unsigned long *irq_stack,
+ unsigned *used, char **id)
{
- unsigned long *irq_stack;
unsigned long addr;
- unsigned used = 0;
addr = ((unsigned long)stack & (~(THREAD_SIZE - 1)));
if ((unsigned long)task_stack_page(task) == addr)
return STACK_IS_NORMAL;
*stack_end = in_exception_stack(cpu, (unsigned long)stack,
- &used, id);
+ used, id);
if (*stack_end)
return STACK_IS_EXCEPTION;
- *stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
- if (!*stack_end)
- return STACK_IS_UNKNOWN;
+ if (!irq_stack)
+ return STACK_IS_NORMAL;
- irq_stack = *stack_end - irq_stack_size;
+ *stack_end = irq_stack;
+ irq_stack = irq_stack - irq_stack_size;
if (in_irq_stack(stack, irq_stack, *stack_end))
return STACK_IS_IRQ;
@@ -156,8 +155,9 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
{
const unsigned cpu = get_cpu();
struct thread_info *tinfo;
- unsigned long *irq_stack;
+ unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
unsigned long dummy;
+ unsigned used = 0;
int graph = 0;
int done = 0;
@@ -186,7 +186,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
enum stack_type stype;
char *id;
- stype = analyze_stack(cpu, task, stack, &stack_end, &id);
+ stype = analyze_stack(cpu, task, stack, &stack_end,
+ irq_stack, &used, &id);
/* Default finish unless specified to continue */
done = 1;
@@ -226,7 +227,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (stack_end[-1]);
- irq_stack = stack_end - irq_stack_size;
+ irq_stack = NULL;
ops->stack(data, "EOI");
done = 0;
break;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6d7d5a1260a6..b0cc3809723d 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -225,7 +225,7 @@ static void __init intel_remapping_check(int num, int slot, int func)
*
* And yes, so far on current devices the base addr is always under 4G.
*/
-static u32 __init intel_stolen_base(int num, int slot, int func)
+static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_size)
{
u32 base;
@@ -244,6 +244,114 @@ static u32 __init intel_stolen_base(int num, int slot, int func)
#define MB(x) (KB (KB (x)))
#define GB(x) (MB (KB (x)))
+static size_t __init i830_tseg_size(void)
+{
+ u8 tmp = read_pci_config_byte(0, 0, 0, I830_ESMRAMC);
+
+ if (!(tmp & TSEG_ENABLE))
+ return 0;
+
+ if (tmp & I830_TSEG_SIZE_1M)
+ return MB(1);
+ else
+ return KB(512);
+}
+
+static size_t __init i845_tseg_size(void)
+{
+ u8 tmp = read_pci_config_byte(0, 0, 0, I845_ESMRAMC);
+
+ if (!(tmp & TSEG_ENABLE))
+ return 0;
+
+ switch (tmp & I845_TSEG_SIZE_MASK) {
+ case I845_TSEG_SIZE_512K:
+ return KB(512);
+ case I845_TSEG_SIZE_1M:
+ return MB(1);
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+}
+
+static size_t __init i85x_tseg_size(void)
+{
+ u8 tmp = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC);
+
+ if (!(tmp & TSEG_ENABLE))
+ return 0;
+
+ return MB(1);
+}
+
+static size_t __init i830_mem_size(void)
+{
+ return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32);
+}
+
+static size_t __init i85x_mem_size(void)
+{
+ return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32);
+}
+
+/*
+ * On 830/845/85x the stolen memory base isn't available in any
+ * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size.
+ */
+static u32 __init i830_stolen_base(int num, int slot, int func, size_t stolen_size)
+{
+ return i830_mem_size() - i830_tseg_size() - stolen_size;
+}
+
+static u32 __init i845_stolen_base(int num, int slot, int func, size_t stolen_size)
+{
+ return i830_mem_size() - i845_tseg_size() - stolen_size;
+}
+
+static u32 __init i85x_stolen_base(int num, int slot, int func, size_t stolen_size)
+{
+ return i85x_mem_size() - i85x_tseg_size() - stolen_size;
+}
+
+static u32 __init i865_stolen_base(int num, int slot, int func, size_t stolen_size)
+{
+ /*
+ * FIXME is the graphics stolen memory region
+ * always at TOUD? Ie. is it always the last
+ * one to be allocated by the BIOS?
+ */
+ return read_pci_config_16(0, 0, 0, I865_TOUD) << 16;
+}
+
+static size_t __init i830_stolen_size(int num, int slot, int func)
+{
+ size_t stolen_size;
+ u16 gmch_ctrl;
+
+ gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
+
+ switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
+ case I830_GMCH_GMS_STOLEN_512:
+ stolen_size = KB(512);
+ break;
+ case I830_GMCH_GMS_STOLEN_1024:
+ stolen_size = MB(1);
+ break;
+ case I830_GMCH_GMS_STOLEN_8192:
+ stolen_size = MB(8);
+ break;
+ case I830_GMCH_GMS_LOCAL:
+ /* local memory isn't part of the normal address space */
+ stolen_size = 0;
+ break;
+ default:
+ return 0;
+ }
+
+ return stolen_size;
+}
+
static size_t __init gen3_stolen_size(int num, int slot, int func)
{
size_t stolen_size;
@@ -310,7 +418,7 @@ static size_t __init gen6_stolen_size(int num, int slot, int func)
return gmch_ctrl << 25; /* 32 MB units */
}
-static inline size_t gen8_stolen_size(int num, int slot, int func)
+static size_t gen8_stolen_size(int num, int slot, int func)
{
u16 gmch_ctrl;
@@ -320,31 +428,74 @@ static inline size_t gen8_stolen_size(int num, int slot, int func)
return gmch_ctrl << 25; /* 32 MB units */
}
-typedef size_t (*stolen_size_fn)(int num, int slot, int func);
+
+struct intel_stolen_funcs {
+ size_t (*size)(int num, int slot, int func);
+ u32 (*base)(int num, int slot, int func, size_t size);
+};
+
+static const struct intel_stolen_funcs i830_stolen_funcs = {
+ .base = i830_stolen_base,
+ .size = i830_stolen_size,
+};
+
+static const struct intel_stolen_funcs i845_stolen_funcs = {
+ .base = i845_stolen_base,
+ .size = i830_stolen_size,
+};
+
+static const struct intel_stolen_funcs i85x_stolen_funcs = {
+ .base = i85x_stolen_base,
+ .size = gen3_stolen_size,
+};
+
+static const struct intel_stolen_funcs i865_stolen_funcs = {
+ .base = i865_stolen_base,
+ .size = gen3_stolen_size,
+};
+
+static const struct intel_stolen_funcs gen3_stolen_funcs = {
+ .base = intel_stolen_base,
+ .size = gen3_stolen_size,
+};
+
+static const struct intel_stolen_funcs gen6_stolen_funcs = {
+ .base = intel_stolen_base,
+ .size = gen6_stolen_size,
+};
+
+static const struct intel_stolen_funcs gen8_stolen_funcs = {
+ .base = intel_stolen_base,
+ .size = gen8_stolen_size,
+};
static struct pci_device_id intel_stolen_ids[] __initdata = {
- INTEL_I915G_IDS(gen3_stolen_size),
- INTEL_I915GM_IDS(gen3_stolen_size),
- INTEL_I945G_IDS(gen3_stolen_size),
- INTEL_I945GM_IDS(gen3_stolen_size),
- INTEL_VLV_M_IDS(gen6_stolen_size),
- INTEL_VLV_D_IDS(gen6_stolen_size),
- INTEL_PINEVIEW_IDS(gen3_stolen_size),
- INTEL_I965G_IDS(gen3_stolen_size),
- INTEL_G33_IDS(gen3_stolen_size),
- INTEL_I965GM_IDS(gen3_stolen_size),
- INTEL_GM45_IDS(gen3_stolen_size),
- INTEL_G45_IDS(gen3_stolen_size),
- INTEL_IRONLAKE_D_IDS(gen3_stolen_size),
- INTEL_IRONLAKE_M_IDS(gen3_stolen_size),
- INTEL_SNB_D_IDS(gen6_stolen_size),
- INTEL_SNB_M_IDS(gen6_stolen_size),
- INTEL_IVB_M_IDS(gen6_stolen_size),
- INTEL_IVB_D_IDS(gen6_stolen_size),
- INTEL_HSW_D_IDS(gen6_stolen_size),
- INTEL_HSW_M_IDS(gen6_stolen_size),
- INTEL_BDW_M_IDS(gen8_stolen_size),
- INTEL_BDW_D_IDS(gen8_stolen_size)
+ INTEL_I830_IDS(&i830_stolen_funcs),
+ INTEL_I845G_IDS(&i845_stolen_funcs),
+ INTEL_I85X_IDS(&i85x_stolen_funcs),
+ INTEL_I865G_IDS(&i865_stolen_funcs),
+ INTEL_I915G_IDS(&gen3_stolen_funcs),
+ INTEL_I915GM_IDS(&gen3_stolen_funcs),
+ INTEL_I945G_IDS(&gen3_stolen_funcs),
+ INTEL_I945GM_IDS(&gen3_stolen_funcs),
+ INTEL_VLV_M_IDS(&gen6_stolen_funcs),
+ INTEL_VLV_D_IDS(&gen6_stolen_funcs),
+ INTEL_PINEVIEW_IDS(&gen3_stolen_funcs),
+ INTEL_I965G_IDS(&gen3_stolen_funcs),
+ INTEL_G33_IDS(&gen3_stolen_funcs),
+ INTEL_I965GM_IDS(&gen3_stolen_funcs),
+ INTEL_GM45_IDS(&gen3_stolen_funcs),
+ INTEL_G45_IDS(&gen3_stolen_funcs),
+ INTEL_IRONLAKE_D_IDS(&gen3_stolen_funcs),
+ INTEL_IRONLAKE_M_IDS(&gen3_stolen_funcs),
+ INTEL_SNB_D_IDS(&gen6_stolen_funcs),
+ INTEL_SNB_M_IDS(&gen6_stolen_funcs),
+ INTEL_IVB_M_IDS(&gen6_stolen_funcs),
+ INTEL_IVB_D_IDS(&gen6_stolen_funcs),
+ INTEL_HSW_D_IDS(&gen6_stolen_funcs),
+ INTEL_HSW_M_IDS(&gen6_stolen_funcs),
+ INTEL_BDW_M_IDS(&gen8_stolen_funcs),
+ INTEL_BDW_D_IDS(&gen8_stolen_funcs)
};
static void __init intel_graphics_stolen(int num, int slot, int func)
@@ -361,11 +512,13 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) {
if (intel_stolen_ids[i].device == device) {
- stolen_size_fn stolen_size =
- (stolen_size_fn)intel_stolen_ids[i].driver_data;
- size = stolen_size(num, slot, func);
- start = intel_stolen_base(num, slot, func);
+ const struct intel_stolen_funcs *stolen_funcs =
+ (const struct intel_stolen_funcs *)intel_stolen_ids[i].driver_data;
+ size = stolen_funcs->size(num, slot, func);
+ start = stolen_funcs->base(num, slot, func, size);
if (size && start) {
+ printk(KERN_INFO "Reserving Intel graphics stolen memory at 0x%x-0x%x\n",
+ start, start + (u32)size - 1);
/* Mark this space as reserved */
e820_add_region(start, size, E820_RESERVED);
sanitize_e820_map(e820.map,
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index e6253195a301..52819e816f87 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -308,7 +308,10 @@ static int ftrace_write(unsigned long ip, const char *val, int size)
if (within(ip, (unsigned long)_text, (unsigned long)_etext))
ip = (unsigned long)__va(__pa_symbol(ip));
- return probe_kernel_write((void *)ip, val, size);
+ if (probe_kernel_write((void *)ip, val, size))
+ return -EPERM;
+
+ return 0;
}
static int add_break(unsigned long ip, const char *old)
@@ -323,10 +326,7 @@ static int add_break(unsigned long ip, const char *old)
if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
return -EINVAL;
- if (ftrace_write(ip, &brk, 1))
- return -EPERM;
-
- return 0;
+ return ftrace_write(ip, &brk, 1);
}
static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -425,7 +425,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
/* If this does not have a breakpoint, we are done */
if (ins[0] != brk)
- return -1;
+ return 0;
nop = ftrace_nop_replace();
@@ -455,7 +455,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
}
update:
- return probe_kernel_write((void *)ip, &nop[0], 1);
+ return ftrace_write(ip, nop, 1);
}
static int add_update_code(unsigned long ip, unsigned const char *new)
@@ -463,9 +463,7 @@ static int add_update_code(unsigned long ip, unsigned const char *new)
/* skip breakpoint */
ip++;
new++;
- if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
- return -EPERM;
- return 0;
+ return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
}
static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -520,10 +518,7 @@ static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
new = ftrace_call_replace(ip, addr);
- if (ftrace_write(ip, new, 1))
- return -EPERM;
-
- return 0;
+ return ftrace_write(ip, new, 1);
}
static int finish_update_nop(struct dyn_ftrace *rec)
@@ -533,9 +528,7 @@ static int finish_update_nop(struct dyn_ftrace *rec)
new = ftrace_nop_replace();
- if (ftrace_write(ip, new, 1))
- return -EPERM;
- return 0;
+ return ftrace_write(ip, new, 1);
}
static int finish_update(struct dyn_ftrace *rec, int enable)
@@ -632,8 +625,14 @@ void ftrace_replace_code(int enable)
printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
for_ftrace_rec_iter(iter) {
rec = ftrace_rec_iter_record(iter);
- remove_breakpoint(rec);
+ /*
+ * Breakpoints are handled only when this function is in
+ * progress. The system could not work with them.
+ */
+ if (remove_breakpoint(rec))
+ BUG();
}
+ run_sync();
}
static int
@@ -655,16 +654,19 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
run_sync();
ret = ftrace_write(ip, new_code, 1);
- if (ret) {
- ret = -EPERM;
- goto out;
- }
- run_sync();
+ /*
+ * The breakpoint is handled only when this function is in progress.
+ * The system could not work if we could not remove it.
+ */
+ BUG_ON(ret);
out:
+ run_sync();
return ret;
fail_update:
- probe_kernel_write((void *)ip, &old_code[0], 1);
+ /* Also here the system could not work with the breakpoint */
+ if (ftrace_write(ip, old_code, 1))
+ BUG();
goto out;
}
@@ -678,11 +680,8 @@ void arch_ftrace_update_code(int command)
atomic_dec(&modifying_ftrace_code);
}
-int __init ftrace_dyn_arch_init(void *data)
+int __init ftrace_dyn_arch_init(void)
{
- /* The return code is retured via data */
- *(unsigned long *)data = 0;
-
return 0;
}
#endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 014618dbaa7b..8d80ae011603 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -752,9 +752,7 @@ static struct clocksource clocksource_hpet = {
.mask = HPET_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.resume = hpet_resume_counter,
-#ifdef CONFIG_X86_64
.archdata = { .vclock_mode = VCLOCK_HPET },
-#endif
};
static int hpet_clocksource_register(void)
@@ -943,12 +941,14 @@ static __init int hpet_late_init(void)
if (boot_cpu_has(X86_FEATURE_ARAT))
return 0;
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
}
/* This notifier should be called after workqueue is ready */
- hotcpu_notifier(hpet_cpuhp_notify, -20);
+ __hotcpu_notifier(hpet_cpuhp_notify, -20);
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 42805fac0092..283a76a9cc40 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -125,7 +125,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
seq_printf(p, " Machine check polls\n");
#endif
-#if defined(CONFIG_HYPERV) || defined(CONFIG_XEN)
+#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 713f1b3bad52..0331cb389d68 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -417,7 +417,6 @@ void kvm_disable_steal_time(void)
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
- WARN_ON(kvm_register_clock("primary cpu clock"));
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
kvm_spinlock_init();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e6041094ff26..d9156ceecdff 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -242,7 +242,7 @@ void __init kvmclock_init(void)
hv_clock = __va(mem);
memset(hv_clock, 0, size);
- if (kvm_register_clock("boot clock")) {
+ if (kvm_register_clock("primary cpu clock")) {
hv_clock = NULL;
memblock_free(mem, size);
return;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ebc987398923..af1d14a9ebda 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -229,6 +229,17 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
}
}
+ /*
+ * On x86-64 we do not support 16-bit segments due to
+ * IRET leaking the high bits of the kernel stack address.
+ */
+#ifdef CONFIG_X86_64
+ if (!ldt_info.seg_32bit) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+#endif
+
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 05266b5aae22..c9603ac80de5 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -259,14 +259,15 @@ static int __init msr_init(void)
goto out_chrdev;
}
msr_class->devnode = msr_devnode;
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)
goto out_class;
}
- register_hotcpu_notifier(&msr_class_cpu_notifier);
- put_online_cpus();
+ __register_hotcpu_notifier(&msr_class_cpu_notifier);
+ cpu_notifier_register_done();
err = 0;
goto out;
@@ -275,7 +276,7 @@ out_class:
i = 0;
for_each_online_cpu(i)
msr_device_destroy(i);
- put_online_cpus();
+ cpu_notifier_register_done();
class_destroy(msr_class);
out_chrdev:
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -286,13 +287,14 @@ out:
static void __exit msr_exit(void)
{
int cpu = 0;
- get_online_cpus();
+
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
msr_device_destroy(cpu);
class_destroy(msr_class);
__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
- unregister_hotcpu_notifier(&msr_class_cpu_notifier);
- put_online_cpus();
+ __unregister_hotcpu_notifier(&msr_class_cpu_notifier);
+ cpu_notifier_register_done();
}
module_init(msr_init);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 299d49302e7d..0497f719977d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1207,23 +1207,31 @@ error:
return ret;
}
-static inline int __init determine_tce_table_size(u64 ram)
+static inline int __init determine_tce_table_size(void)
{
int ret;
if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
return specified_table_size;
- /*
- * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
- * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
- * larger table size has twice as many entries, so shift the
- * max ram address by 13 to divide by 8K and then look at the
- * order of the result to choose between 0-7.
- */
- ret = get_order(ram >> 13);
- if (ret > TCE_TABLE_SIZE_8M)
+ if (is_kdump_kernel() && saved_max_pfn) {
+ /*
+ * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
+ * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
+ * larger table size has twice as many entries, so shift the
+ * max ram address by 13 to divide by 8K and then look at the
+ * order of the result to choose between 0-7.
+ */
+ ret = get_order((saved_max_pfn * PAGE_SIZE) >> 13);
+ if (ret > TCE_TABLE_SIZE_8M)
+ ret = TCE_TABLE_SIZE_8M;
+ } else {
+ /*
+ * Use 8M by default (suggested by Muli) if it's not
+ * kdump kernel and saved_max_pfn isn't set.
+ */
ret = TCE_TABLE_SIZE_8M;
+ }
return ret;
}
@@ -1418,8 +1426,7 @@ int __init detect_calgary(void)
return -ENOMEM;
}
- specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
- saved_max_pfn : max_pfn) * PAGE_SIZE);
+ specified_table_size = determine_tce_table_size();
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
struct calgary_bus_info *info = &bus_info[bus];
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fa511acff7e6..09c76d265550 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -869,7 +869,6 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
- visws_early_detect();
/*
* copy kernel address range established so far and switch
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cfbe99f88830..57e5ce126d5a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -914,8 +914,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
tsc_khz_ref = tsc_khz;
}
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
*lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
@@ -985,9 +984,7 @@ static struct clocksource clocksource_tsc = {
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
-#ifdef CONFIG_X86_64
.archdata = { .vclock_mode = VCLOCK_TSC },
-#endif
};
void mark_tsc_unstable(char *reason)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index da6b35a98260..49edf2dd3613 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -147,7 +147,6 @@ SECTIONS
_edata = .;
} :data
-#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
__vvar_page = .;
@@ -165,12 +164,15 @@ SECTIONS
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
+ /*
+ * Pad the rest of the page with zeros. Otherwise the loader
+ * can leave garbage here.
+ */
+ . = __vvar_beginning_hack + PAGE_SIZE;
} :data
. = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
-#endif /* CONFIG_X86_64 */
-
/* Init code and data - will be freed after init */
. = ALIGN(PAGE_SIZE);
.init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 1f96f9347ed9..8b3b3eb3cead 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -47,14 +47,12 @@
#include <asm/segment.h>
#include <asm/desc.h>
#include <asm/topology.h>
-#include <asm/vgtod.h>
#include <asm/traps.h>
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
DEFINE_VVAR(int, vgetcpu_mode);
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
@@ -77,48 +75,6 @@ static int __init vsyscall_setup(char *str)
}
early_param("vsyscall", vsyscall_setup);
-void update_vsyscall_tz(void)
-{
- vsyscall_gtod_data.sys_tz = sys_tz;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
-
- write_seqcount_begin(&vdata->seq);
-
- /* copy vsyscall data */
- vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
- vdata->clock.cycle_last = tk->clock->cycle_last;
- vdata->clock.mask = tk->clock->mask;
- vdata->clock.mult = tk->mult;
- vdata->clock.shift = tk->shift;
-
- vdata->wall_time_sec = tk->xtime_sec;
- vdata->wall_time_snsec = tk->xtime_nsec;
-
- vdata->monotonic_time_sec = tk->xtime_sec
- + tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_snsec = tk->xtime_nsec
- + (tk->wall_to_monotonic.tv_nsec
- << tk->shift);
- while (vdata->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->shift)) {
- vdata->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->shift;
- vdata->monotonic_time_sec++;
- }
-
- vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
- vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
-
- vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
- tk->wall_to_monotonic);
-
- write_seqcount_end(&vdata->seq);
-}
-
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
const char *message)
{
@@ -374,7 +330,6 @@ void __init map_vsyscall(void)
{
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- extern char __vvar_page;
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
@@ -393,9 +348,13 @@ static int __init vsyscall_init(void)
{
BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
+ cpu_notifier_register_begin();
+
on_each_cpu(cpu_vsyscall_init, NULL, 1);
/* notifier priority > KVM */
- hotcpu_notifier(cpu_vsyscall_notifier, 30);
+ __hotcpu_notifier(cpu_vsyscall_notifier, 30);
+
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
new file mode 100644
index 000000000000..f9c6e56e14b5
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ *
+ * Modified for x86 32 bit architecture by
+ * Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ *
+ * Thanks to hpa@transmeta.com for some useful hint.
+ * Special thanks to Ingo Molnar for his early experience with
+ * a different vsyscall implementation for Linux/IA32 and for the name.
+ *
+ */
+
+#include <linux/timekeeper_internal.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+
+DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
+
+void update_vsyscall_tz(void)
+{
+ vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
+ vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
+
+ gtod_write_begin(vdata);
+
+ /* copy vsyscall data */
+ vdata->vclock_mode = tk->clock->archdata.vclock_mode;
+ vdata->cycle_last = tk->clock->cycle_last;
+ vdata->mask = tk->clock->mask;
+ vdata->mult = tk->mult;
+ vdata->shift = tk->shift;
+
+ vdata->wall_time_sec = tk->xtime_sec;
+ vdata->wall_time_snsec = tk->xtime_nsec;
+
+ vdata->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_snsec = tk->xtime_nsec
+ + (tk->wall_to_monotonic.tv_nsec
+ << tk->shift);
+ while (vdata->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->shift)) {
+ vdata->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->shift;
+ vdata->monotonic_time_sec++;
+ }
+
+ vdata->wall_time_coarse_sec = tk->xtime_sec;
+ vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift);
+
+ vdata->monotonic_time_coarse_sec =
+ vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_coarse_nsec =
+ vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+
+ while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
+ vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
+ vdata->monotonic_time_coarse_sec++;
+ }
+
+ gtod_write_end(vdata);
+}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e5503d8aec1d..bea60671ef8a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -28,7 +28,7 @@ static u32 xstate_required_size(u64 xstate_bv)
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
- xstate_bv &= ~XSTATE_FPSSE;
+ xstate_bv &= XSTATE_EXTEND_MASK;
while (xstate_bv) {
if (xstate_bv & 0x1) {
u32 eax, ebx, ecx, edx;
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv)
return ret;
}
+u64 kvm_supported_xcr0(void)
+{
+ u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+ if (!kvm_x86_ops->mpx_supported())
+ xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+ return xcr0;
+}
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -73,9 +83,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) &
- host_xcr0 & KVM_SUPPORTED_XCR0;
- vcpu->arch.guest_xstate_size =
- xstate_required_size(vcpu->arch.guest_supported_xcr0);
+ kvm_supported_xcr0();
+ vcpu->arch.guest_xstate_size = best->ebx =
+ xstate_required_size(vcpu->arch.xcr0);
}
kvm_pmu_cpuid_update(vcpu);
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags = 0;
}
-static bool supported_xcr0_bit(unsigned bit)
-{
- u64 mask = ((u64)1 << bit);
-
- return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
-}
-
#define F(x) bit(X86_FEATURE_##x)
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -256,6 +259,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+ unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
/* cpuid 1.edx */
const u32 kvm_supported_word0_x86_features =
@@ -303,7 +307,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+ F(ADX);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -436,16 +441,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
case 0xd: {
int idx, i;
+ u64 supported = kvm_supported_xcr0();
- entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
- entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+ entry->eax &= supported;
+ entry->edx &= supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
+ u64 mask = ((u64)1 << idx);
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[i], function, idx);
- if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+ if (entry[i].eax == 0 || !(supported & mask))
continue;
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 07ffca0a89e9..205b17eed93c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3668,6 +3668,10 @@ static const struct gprefix pfx_vmovntpx = {
I(0, em_mov), N, N, N,
};
+static const struct gprefix pfx_0f_28_0f_29 = {
+ I(Aligned, em_mov), I(Aligned, em_mov), N, N,
+};
+
static const struct escape escape_d9 = { {
N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
}, {
@@ -3870,7 +3874,9 @@ static const struct opcode twobyte_table[256] = {
IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
N, N, N, N,
- N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+ GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
+ GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
+ N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
N, N, N, N,
/* 0x30 - 0x3F */
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b531351a587..f5704d9e5ddc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3329,7 +3329,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
- return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+ return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
}
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index cba218a2f08d..b1e6c1bf68d3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -913,7 +913,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
* and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
* used by guest then tlbs are not flushed, so guest is allowed to access the
* freed pages.
- * And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
+ * We set tlbs_dirty to let the notifier know this change and delay the flush
+ * until such a case actually happens.
*/
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -942,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return -EINVAL;
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
- vcpu->kvm->tlbs_dirty++;
+ vcpu->kvm->tlbs_dirty = true;
continue;
}
@@ -957,7 +958,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (gfn != sp->gfns[i]) {
drop_spte(vcpu->kvm, &sp->spt[i]);
- vcpu->kvm->tlbs_dirty++;
+ vcpu->kvm->tlbs_dirty = true;
continue;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2de1bc09a8d4..7f4f9c2badae 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -34,6 +34,7 @@
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
+#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/virtext.h>
@@ -303,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
return vmcb->control.intercept_cr & (1U << bit);
}
-static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
+static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr |= (1U << bit);
+ vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
+ | (1 << INTERCEPT_DR1_READ)
+ | (1 << INTERCEPT_DR2_READ)
+ | (1 << INTERCEPT_DR3_READ)
+ | (1 << INTERCEPT_DR4_READ)
+ | (1 << INTERCEPT_DR5_READ)
+ | (1 << INTERCEPT_DR6_READ)
+ | (1 << INTERCEPT_DR7_READ)
+ | (1 << INTERCEPT_DR0_WRITE)
+ | (1 << INTERCEPT_DR1_WRITE)
+ | (1 << INTERCEPT_DR2_WRITE)
+ | (1 << INTERCEPT_DR3_WRITE)
+ | (1 << INTERCEPT_DR4_WRITE)
+ | (1 << INTERCEPT_DR5_WRITE)
+ | (1 << INTERCEPT_DR6_WRITE)
+ | (1 << INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
-static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
+static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr &= ~(1U << bit);
+ vmcb->control.intercept_dr = 0;
recalc_intercepts(svm);
}
@@ -1080,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR0_READ);
- set_dr_intercept(svm, INTERCEPT_DR1_READ);
- set_dr_intercept(svm, INTERCEPT_DR2_READ);
- set_dr_intercept(svm, INTERCEPT_DR3_READ);
- set_dr_intercept(svm, INTERCEPT_DR4_READ);
- set_dr_intercept(svm, INTERCEPT_DR5_READ);
- set_dr_intercept(svm, INTERCEPT_DR6_READ);
- set_dr_intercept(svm, INTERCEPT_DR7_READ);
-
- set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
+ set_dr_intercepts(svm);
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
@@ -1684,6 +1684,21 @@ static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
mark_dirty(svm->vmcb, VMCB_DR);
}
+static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ get_debugreg(vcpu->arch.db[0], 0);
+ get_debugreg(vcpu->arch.db[1], 1);
+ get_debugreg(vcpu->arch.db[2], 2);
+ get_debugreg(vcpu->arch.db[3], 3);
+ vcpu->arch.dr6 = svm_get_dr6(vcpu);
+ vcpu->arch.dr7 = svm->vmcb->save.dr7;
+
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+ set_dr_intercepts(svm);
+}
+
static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -2842,6 +2857,7 @@ static int iret_interception(struct vcpu_svm *svm)
clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
return 1;
}
@@ -2974,6 +2990,17 @@ static int dr_interception(struct vcpu_svm *svm)
unsigned long val;
int err;
+ if (svm->vcpu.guest_debug == 0) {
+ /*
+ * No more DR vmexits; force a reload of the debug registers
+ * and reenter on this instruction. The next vmexit will
+ * retrieve the full state of the debug registers.
+ */
+ clr_dr_intercepts(svm);
+ svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ return 1;
+ }
+
if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
return emulate_on_interception(svm);
@@ -3649,7 +3676,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
return ret;
}
-static int enable_irq_window(struct kvm_vcpu *vcpu)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3663,16 +3690,15 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
- return 0;
}
-static int enable_nmi_window(struct kvm_vcpu *vcpu)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
== HF_NMI_MASK)
- return 0; /* IRET will cause a vm exit */
+ return; /* IRET will cause a vm exit */
/*
* Something prevents NMI from been injected. Single step over possible
@@ -3681,7 +3707,6 @@ static int enable_nmi_window(struct kvm_vcpu *vcpu)
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(vcpu);
- return 0;
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4064,6 +4089,11 @@ static bool svm_invpcid_supported(void)
return false;
}
+static bool svm_mpx_supported(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -4302,6 +4332,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_dr6 = svm_get_dr6,
.set_dr6 = svm_set_dr6,
.set_dr7 = svm_set_dr7,
+ .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
@@ -4345,6 +4376,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.rdtscp_supported = svm_rdtscp_supported,
.invpcid_supported = svm_invpcid_supported,
+ .mpx_supported = svm_mpx_supported,
.set_supported_cpuid = svm_set_supported_cpuid,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 392752834751..1320e0f8e611 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
#include <linux/tboot.h>
+#include <linux/hrtimer.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -42,6 +43,7 @@
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/perf_event.h>
+#include <asm/debugreg.h>
#include <asm/kexec.h>
#include "trace.h"
@@ -110,6 +112,8 @@ module_param(nested, bool, S_IRUGO);
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -202,6 +206,7 @@ struct __packed vmcs12 {
u64 guest_pdptr1;
u64 guest_pdptr2;
u64 guest_pdptr3;
+ u64 guest_bndcfgs;
u64 host_ia32_pat;
u64 host_ia32_efer;
u64 host_ia32_perf_global_ctrl;
@@ -374,6 +379,9 @@ struct nested_vmx {
*/
struct page *apic_access_page;
u64 msr_ia32_feature_control;
+
+ struct hrtimer preemption_timer;
+ bool preemption_timer_expired;
};
#define POSTED_INTR_ON 0
@@ -441,6 +449,7 @@ struct vcpu_vmx {
#endif
int gs_ldt_reload_needed;
int fs_reload_needed;
+ u64 msr_host_bndcfgs;
} host_state;
struct {
int vm86_active;
@@ -533,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = {
GUEST_CS_LIMIT,
GUEST_CS_BASE,
GUEST_ES_BASE,
+ GUEST_BNDCFGS,
CR0_GUEST_HOST_MASK,
CR0_READ_SHADOW,
CR4_READ_SHADOW,
@@ -588,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(GUEST_PDPTR1, guest_pdptr1),
FIELD64(GUEST_PDPTR2, guest_pdptr2),
FIELD64(GUEST_PDPTR3, guest_pdptr3),
+ FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
FIELD64(HOST_IA32_PAT, host_ia32_pat),
FIELD64(HOST_IA32_EFER, host_ia32_efer),
FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -718,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
+static bool vmx_mpx_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@@ -728,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static bool vmx_mpx_supported(void);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1047,6 +1060,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1710,6 +1729,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
+ if (boot_cpu_has(X86_FEATURE_MPX))
+ rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
@@ -1747,6 +1768,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
+ if (vmx->host_state.msr_host_bndcfgs)
+ wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
/*
* If the FPU is not active (through the host task or
* the guest vcpu), then restore the cr0.TS bit.
@@ -2248,9 +2271,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
*/
nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
- PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+ PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
+ nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
/*
* Exit controls
@@ -2265,15 +2288,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
- !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
- nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- }
- nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
- VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2287,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
VM_ENTRY_LOAD_IA32_PAT;
nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
VM_ENTRY_LOAD_IA32_EFER);
+ if (vmx_mpx_supported())
+ nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2342,9 +2364,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
- nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
- VMX_MISC_SAVE_EFER_LMA;
- nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
+ nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
+ VMX_MISC_ACTIVITY_HLT;
nested_vmx_misc_high = 0;
}
@@ -2479,6 +2501,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
case MSR_IA32_SYSENTER_ESP:
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
+ case MSR_IA32_BNDCFGS:
+ if (!vmx_mpx_supported())
+ return 1;
+ data = vmcs_read64(GUEST_BNDCFGS);
+ break;
case MSR_IA32_FEATURE_CONTROL:
if (!nested_vmx_allowed(vcpu))
return 1;
@@ -2547,6 +2574,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
+ case MSR_IA32_BNDCFGS:
+ if (!vmx_mpx_supported())
+ return 1;
+ vmcs_write64(GUEST_BNDCFGS, data);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
@@ -2832,12 +2864,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmx_capability.ept, vmx_capability.vpid);
}
- min = 0;
+ min = VM_EXIT_SAVE_DEBUG_CONTROLS;
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
- VM_EXIT_ACK_INTR_ON_EXIT;
+ VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
@@ -2853,8 +2885,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
!(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
- min = 0;
- opt = VM_ENTRY_LOAD_IA32_PAT;
+ min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
+ opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -4223,6 +4255,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
+
+ if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
+ exec_control &= ~CPU_BASED_MOV_DR_EXITING;
+
if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
exec_control &= ~CPU_BASED_TPR_SHADOW;
#ifdef CONFIG_X86_64
@@ -4496,39 +4532,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
PIN_BASED_NMI_EXITING;
}
-static int enable_irq_window(struct kvm_vcpu *vcpu)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
- /*
- * We get here if vmx_interrupt_allowed() said we can't
- * inject to L1 now because L2 must run. The caller will have
- * to make L2 exit right after entry, so we can inject to L1
- * more promptly.
- */
- return -EBUSY;
-
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- return 0;
}
-static int enable_nmi_window(struct kvm_vcpu *vcpu)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- if (!cpu_has_virtual_nmis())
- return enable_irq_window(vcpu);
-
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI)
- return enable_irq_window(vcpu);
+ if (!cpu_has_virtual_nmis() ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ enable_irq_window(vcpu);
+ return;
+ }
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- return 0;
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4620,22 +4645,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_nmi(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
- NMI_VECTOR | INTR_TYPE_NMI_INTR |
- INTR_INFO_VALID_MASK, 0);
- /*
- * The NMI-triggered VM exit counts as injection:
- * clear this one and block further NMIs.
- */
- vcpu->arch.nmi_pending = 0;
- vmx_set_nmi_mask(vcpu, true);
- return 0;
- }
- }
+ if (to_vmx(vcpu)->nested.nested_run_pending)
+ return 0;
if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
return 0;
@@ -4647,19 +4658,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_intr(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
- 0, 0);
- /*
- * fall through to normal code, but now in L1, not L2
- */
- }
- }
-
- return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ return (!to_vmx(vcpu)->nested.nested_run_pending &&
+ vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
@@ -5102,6 +5102,22 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
}
+ if (vcpu->guest_debug == 0) {
+ u32 cpu_based_vm_exec_control;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+
+ /*
+ * No more DR vmexits; force a reload of the debug registers
+ * and reenter on this instruction. The next vmexit will
+ * retrieve the full state of the debug registers.
+ */
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ return 1;
+ }
+
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
@@ -5128,6 +5144,24 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
{
}
+static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ get_debugreg(vcpu->arch.db[0], 0);
+ get_debugreg(vcpu->arch.db[1], 1);
+ get_debugreg(vcpu->arch.db[2], 2);
+ get_debugreg(vcpu->arch.db[3], 3);
+ get_debugreg(vcpu->arch.dr6, 6);
+ vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
+
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
{
vmcs_writel(GUEST_DR7, val);
@@ -5727,6 +5761,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
*/
}
+static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
+{
+ struct vcpu_vmx *vmx =
+ container_of(timer, struct vcpu_vmx, nested.preemption_timer);
+
+ vmx->nested.preemption_timer_expired = true;
+ kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+ kvm_vcpu_kick(&vmx->vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
/*
* Emulate the VMXON instruction.
* Currently, we just remember that VMX is active, and do not save or even
@@ -5791,6 +5837,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
+ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+
vmx->nested.vmxon = true;
skip_emulated_instruction(vcpu);
@@ -6767,9 +6817,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* table is L0's fault.
*/
return 0;
- case EXIT_REASON_PREEMPTION_TIMER:
- return vmcs12->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER;
case EXIT_REASON_WBINVD:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
@@ -6785,27 +6832,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
-static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
-{
- u64 delta_tsc_l1;
- u32 preempt_val_l1, preempt_val_l2, preempt_scale;
-
- if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER))
- return;
- preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
- MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
- preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
- delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
- - vcpu->arch.last_guest_tsc;
- preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
- if (preempt_val_l2 <= preempt_val_l1)
- preempt_val_l2 = 0;
- else
- preempt_val_l2 -= preempt_val_l1;
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
-}
-
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -7052,6 +7078,12 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
local_irq_enable();
}
+static bool vmx_mpx_supported(void)
+{
+ return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
+ (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
+}
+
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -7218,8 +7250,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
- if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
- nested_adjust_preemption_timer(vcpu);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -7616,6 +7646,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, fault);
}
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vcpu->arch.virtual_tsc_khz == 0)
+ return;
+
+ /* Make sure short timeouts reliably trigger an immediate vmexit.
+ * hrtimer_start does not guarantee this. */
+ if (preemption_timeout <= 1) {
+ vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
+ return;
+ }
+
+ preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+ preemption_timeout *= 1000000;
+ do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
+ hrtimer_start(&vmx->nested.preemption_timer,
+ ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7629,7 +7681,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7687,13 +7738,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
- (vmcs_config.pin_based_exec_ctrl |
- vmcs12->pin_based_vm_exec_control));
+ exec_control = vmcs12->pin_based_vm_exec_control;
+ exec_control |= vmcs_config.pin_based_exec_ctrl;
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
- vmcs12->vmx_preemption_timer_value);
+ vmx->nested.preemption_timer_expired = false;
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ vmx_start_preemption_timer(vcpu);
/*
* Whether page-faults are trapped is determined by a combination of
@@ -7721,7 +7773,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (cpu_has_secondary_exec_ctrls()) {
- u32 exec_control = vmx_secondary_exec_control(vmx);
+ exec_control = vmx_secondary_exec_control(vmx);
if (!vmx->rdtscp_enabled)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
@@ -7808,10 +7860,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- exit_control = vmcs_config.vmexit_ctrl;
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- vm_exit_controls_init(vmx, exit_control);
+ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -7830,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
set_cr4_guest_host_mask(vmx);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
vmcs_write64(TSC_OFFSET,
vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8155,6 +8207,58 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
}
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+ vmx->nested.preemption_timer_expired) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
+ return 0;
+ }
+
+ if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+ if (vmx->nested.nested_run_pending ||
+ vcpu->arch.interrupt.pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+ NMI_VECTOR | INTR_TYPE_NMI_INTR |
+ INTR_INFO_VALID_MASK, 0);
+ /*
+ * The NMI-triggered VM exit counts as injection:
+ * clear this one and block further NMIs.
+ */
+ vcpu->arch.nmi_pending = 0;
+ vmx_set_nmi_mask(vcpu, true);
+ return 0;
+ }
+
+ if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+ nested_exit_on_intr(vcpu)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+ }
+
+ return 0;
+}
+
+static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
+{
+ ktime_t remaining =
+ hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
+ u64 value;
+
+ if (ktime_to_ns(remaining) <= 0)
+ return 0;
+
+ value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
+ do_div(value, 1000000);
+ return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8225,10 +8329,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
else
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
- if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
- (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
- vmcs12->vmx_preemption_timer_value =
- vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ if (vmcs12->vm_exit_controls &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+ vmcs12->vmx_preemption_timer_value =
+ vmx_get_preemption_timer_value(vcpu);
+ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+ }
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
@@ -8260,6 +8367,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+ if (vmx_mpx_supported())
+ vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/* update exit information fields: */
@@ -8369,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
+ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+ vmcs_write64(GUEST_BNDCFGS, 0);
+
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
@@ -8495,6 +8608,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
vmx->nested.sync_shadow_vmcs = true;
+
+ /* in case we halted in L2 */
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
/*
@@ -8573,6 +8689,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_dr6 = vmx_get_dr6,
.set_dr6 = vmx_set_dr6,
.set_dr7 = vmx_set_dr7,
+ .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
@@ -8634,6 +8751,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_intercept = vmx_check_intercept,
.handle_external_intr = vmx_handle_external_intr,
+ .mpx_supported = vmx_mpx_supported,
+
+ .check_nested_events = vmx_check_nested_events,
};
static int __init vmx_init(void)
@@ -8721,6 +8841,8 @@ static int __init vmx_init(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
memcpy(vmx_msr_bitmap_longmode_x2apic,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b8578432d5b..9d1b5cd4d34c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -595,13 +595,13 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- u64 xcr0;
+ u64 xcr0 = xcr;
+ u64 old_xcr0 = vcpu->arch.xcr0;
u64 valid_bits;
/* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
if (index != XCR_XFEATURE_ENABLED_MASK)
return 1;
- xcr0 = xcr;
if (!(xcr0 & XSTATE_FP))
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -616,8 +616,14 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
if (xcr0 & ~valid_bits)
return 1;
+ if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
+ return 1;
+
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
+
+ if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
+ kvm_update_cpuid(vcpu);
return 0;
}
@@ -753,7 +759,9 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
else
dr7 = vcpu->arch.dr7;
kvm_x86_ops->set_dr7(vcpu, dr7);
- vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
+ if (dr7 & DR7_BP_EN_MASK)
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
}
static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
@@ -879,7 +887,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
- MSR_IA32_FEATURE_CONTROL
+ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
};
static unsigned num_msrs_to_save;
@@ -1581,7 +1589,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
- vcpu->last_kernel_ns = kernel_ns;
vcpu->last_guest_tsc = tsc_timestamp;
/*
@@ -1623,14 +1630,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* the others.
*
* So in those cases, request a kvmclock update for all vcpus.
- * The worst case for a remote vcpu to update its kvmclock
- * is then bounded by maximum nohz sleep latency.
+ * We need to rate-limit these requests though, as they can
+ * considerably slow guests that have a large number of vcpus.
+ * The time for a remote vcpu to update its kvmclock is bound
+ * by the delay we use to rate-limit the updates.
*/
-static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
+
+static void kvmclock_update_fn(struct work_struct *work)
{
int i;
- struct kvm *kvm = v->kvm;
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
+ kvmclock_update_work);
+ struct kvm *kvm = container_of(ka, struct kvm, arch);
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1639,6 +1653,29 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
}
}
+static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
+{
+ struct kvm *kvm = v->kvm;
+
+ set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+ schedule_delayed_work(&kvm->arch.kvmclock_update_work,
+ KVMCLOCK_UPDATE_DELAY);
+}
+
+#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
+
+static void kvmclock_sync_fn(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
+ kvmclock_sync_work);
+ struct kvm *kvm = container_of(ka, struct kvm, arch);
+
+ schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
+ schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
+ KVMCLOCK_SYNC_PERIOD);
+}
+
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
@@ -2323,9 +2360,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_VP_INDEX: {
int r;
struct kvm_vcpu *v;
- kvm_for_each_vcpu(r, v, vcpu->kvm)
- if (v == vcpu)
+ kvm_for_each_vcpu(r, v, vcpu->kvm) {
+ if (v == vcpu) {
data = r;
+ break;
+ }
+ }
break;
}
case HV_X64_MSR_EOI:
@@ -2617,6 +2657,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
case KVM_CAP_HYPERV_TIME:
+ case KVM_CAP_IOAPIC_POLARITY_IGNORED:
#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
case KVM_CAP_ASSIGN_DEV_IRQ:
case KVM_CAP_PCI_2_3:
@@ -3043,9 +3084,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
* with old userspace.
*/
- if (xstate_bv & ~KVM_SUPPORTED_XCR0)
- return -EINVAL;
- if (xstate_bv & ~host_xcr0)
+ if (xstate_bv & ~kvm_supported_xcr0())
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->xsave,
guest_xsave->region, vcpu->arch.guest_xstate_size);
@@ -3898,6 +3937,23 @@ static void kvm_init_msr_list(void)
for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
continue;
+
+ /*
+ * Even MSRs that are valid in the host may not be exposed
+ * to the guests in some cases. We could work around this
+ * in VMX with the generic MSR save/load machinery, but it
+ * is not really worthwhile since it will really only
+ * happen with nested virtualization.
+ */
+ switch (msrs_to_save[i]) {
+ case MSR_IA32_BNDCFGS:
+ if (!kvm_x86_ops->mpx_supported())
+ continue;
+ break;
+ default:
+ break;
+ }
+
if (j < i)
msrs_to_save[j] = msrs_to_save[i];
j++;
@@ -4394,6 +4450,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
if (!exchanged)
return X86EMUL_CMPXCHG_FAILED;
+ mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
kvm_mmu_pte_write(vcpu, gpa, new, bytes);
return X86EMUL_CONTINUE;
@@ -5365,7 +5422,8 @@ static void kvm_timer_init(void)
int cpu;
max_tsc_khz = tsc_khz;
- register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+
+ cpu_notifier_register_begin();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy policy;
@@ -5382,6 +5440,10 @@ static void kvm_timer_init(void)
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
for_each_online_cpu(cpu)
smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
+
+ __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+ cpu_notifier_register_done();
+
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@ -5537,9 +5599,10 @@ int kvm_arch_init(void *opaque)
goto out_free_percpu;
kvm_set_mmio_spte_mask();
- kvm_init_msr_list();
kvm_x86_ops = ops;
+ kvm_init_msr_list();
+
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -5782,8 +5845,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
-static void inject_pending_event(struct kvm_vcpu *vcpu)
+static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
{
+ int r;
+
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
trace_kvm_inj_exception(vcpu->arch.exception.nr,
@@ -5793,17 +5858,23 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.reinject);
- return;
+ return 0;
}
if (vcpu->arch.nmi_injected) {
kvm_x86_ops->set_nmi(vcpu);
- return;
+ return 0;
}
if (vcpu->arch.interrupt.pending) {
kvm_x86_ops->set_irq(vcpu);
- return;
+ return 0;
+ }
+
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (r != 0)
+ return r;
}
/* try to inject new event if pending */
@@ -5820,6 +5891,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
kvm_x86_ops->set_irq(vcpu);
}
}
+ return 0;
}
static void process_nmi(struct kvm_vcpu *vcpu)
@@ -5924,15 +5996,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- inject_pending_event(vcpu);
-
+ if (inject_pending_event(vcpu, req_int_win) != 0)
+ req_immediate_exit = true;
/* enable NMI/IRQ window open exits if needed */
- if (vcpu->arch.nmi_pending)
- req_immediate_exit =
- kvm_x86_ops->enable_nmi_window(vcpu) != 0;
+ else if (vcpu->arch.nmi_pending)
+ kvm_x86_ops->enable_nmi_window(vcpu);
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- req_immediate_exit =
- kvm_x86_ops->enable_irq_window(vcpu) != 0;
+ kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
/*
@@ -5992,12 +6062,28 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
+ set_debugreg(vcpu->arch.dr6, 6);
}
trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu);
/*
+ * Do this here before restoring debug registers on the host. And
+ * since we do this before handling the vmexit, a DR access vmexit
+ * can (a) read the correct value of the debug registers, (b) set
+ * KVM_DEBUGREG_WONT_EXIT again.
+ */
+ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
+ int i;
+
+ WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
+ kvm_x86_ops->sync_dirty_debug_regs(vcpu);
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ }
+
+ /*
* If the guest has used debug registers, at least dr7
* will be disabled while returning to the host.
* If we don't have active breakpoints in the host, we don't
@@ -6711,6 +6797,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
int r;
struct msr_data msr;
+ struct kvm *kvm = vcpu->kvm;
r = vcpu_load(vcpu);
if (r)
@@ -6721,6 +6808,9 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
+ schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
+ KVMCLOCK_SYNC_PERIOD);
+
return r;
}
@@ -7013,6 +7103,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
pvclock_update_vm_gtod_copy(kvm);
+ INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
+ INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
+
return 0;
}
@@ -7050,6 +7143,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_sync_events(struct kvm *kvm)
{
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
+ cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
}
@@ -7248,6 +7343,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+ kvm_x86_ops->check_nested_events(vcpu, false);
+
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8da5823bcde6..8c97bac9a895 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,9 +122,12 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
-#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
+ | XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;
+extern u64 kvm_supported_xcr0(void);
+
extern unsigned int min_timer_period_us;
extern struct static_key kvm_no_apic_vcpu;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 799580cabc78..597ac155c91c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -328,17 +328,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
return;
}
-static int __initdata early_ioremap_debug;
-
-static int __init early_ioremap_debug_setup(char *str)
-{
- early_ioremap_debug = 1;
-
- return 0;
-}
-early_param("early_ioremap_debug", early_ioremap_debug_setup);
-
-static __initdata int after_paging_init;
static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
@@ -362,18 +351,11 @@ bool __init is_early_ioremap_ptep(pte_t *ptep)
return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
}
-static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
-
void __init early_ioremap_init(void)
{
pmd_t *pmd;
- int i;
- if (early_ioremap_debug)
- printk(KERN_INFO "early_ioremap_init()\n");
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
- slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
+ early_ioremap_setup();
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
@@ -402,13 +384,8 @@ void __init early_ioremap_init(void)
}
}
-void __init early_ioremap_reset(void)
-{
- after_paging_init = 1;
-}
-
-static void __init __early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags)
+void __init __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
@@ -425,198 +402,3 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
pte_clear(&init_mm, addr, pte);
__flush_tlb_one(addr);
}
-
-static inline void __init early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t prot)
-{
- if (after_paging_init)
- __set_fixmap(idx, phys, prot);
- else
- __early_set_fixmap(idx, phys, prot);
-}
-
-static inline void __init early_clear_fixmap(enum fixed_addresses idx)
-{
- if (after_paging_init)
- clear_fixmap(idx);
- else
- __early_set_fixmap(idx, 0, __pgprot(0));
-}
-
-static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
-static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
-
-void __init fixup_early_ioremap(void)
-{
- int i;
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (prev_map[i]) {
- WARN_ON(1);
- break;
- }
- }
-
- early_ioremap_init();
-}
-
-static int __init check_early_ioremap_leak(void)
-{
- int count = 0;
- int i;
-
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
- if (prev_map[i])
- count++;
-
- if (!count)
- return 0;
- WARN(1, KERN_WARNING
- "Debug warning: early ioremap leak of %d areas detected.\n",
- count);
- printk(KERN_WARNING
- "please boot with early_ioremap_debug and report the dmesg.\n");
-
- return 1;
-}
-late_initcall(check_early_ioremap_leak);
-
-static void __init __iomem *
-__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
-{
- unsigned long offset;
- resource_size_t last_addr;
- unsigned int nrpages;
- enum fixed_addresses idx;
- int i, slot;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- slot = -1;
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (!prev_map[i]) {
- slot = i;
- break;
- }
- }
-
- if (slot < 0) {
- printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n",
- __func__, (u64)phys_addr, size);
- WARN_ON(1);
- return NULL;
- }
-
- if (early_ioremap_debug) {
- printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ",
- __func__, (u64)phys_addr, size, slot);
- dump_stack();
- }
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr) {
- WARN_ON(1);
- return NULL;
- }
-
- prev_size[slot] = size;
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr + 1) - phys_addr;
-
- /*
- * Mappings have to fit in the FIX_BTMAP area.
- */
- nrpages = size >> PAGE_SHIFT;
- if (nrpages > NR_FIX_BTMAPS) {
- WARN_ON(1);
- return NULL;
- }
-
- /*
- * Ok, go for it..
- */
- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
- while (nrpages > 0) {
- early_set_fixmap(idx, phys_addr, prot);
- phys_addr += PAGE_SIZE;
- --idx;
- --nrpages;
- }
- if (early_ioremap_debug)
- printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
-
- prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
- return prev_map[slot];
-}
-
-/* Remap an IO device */
-void __init __iomem *
-early_ioremap(resource_size_t phys_addr, unsigned long size)
-{
- return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
-}
-
-/* Remap memory */
-void __init __iomem *
-early_memremap(resource_size_t phys_addr, unsigned long size)
-{
- return __early_ioremap(phys_addr, size, PAGE_KERNEL);
-}
-
-void __init early_iounmap(void __iomem *addr, unsigned long size)
-{
- unsigned long virt_addr;
- unsigned long offset;
- unsigned int nrpages;
- enum fixed_addresses idx;
- int i, slot;
-
- slot = -1;
- for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
- if (prev_map[i] == addr) {
- slot = i;
- break;
- }
- }
-
- if (slot < 0) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n",
- addr, size);
- WARN_ON(1);
- return;
- }
-
- if (prev_size[slot] != size) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
- addr, size, slot, prev_size[slot]);
- WARN_ON(1);
- return;
- }
-
- if (early_ioremap_debug) {
- printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
- size, slot);
- dump_stack();
- }
-
- virt_addr = (unsigned long)addr;
- if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) {
- WARN_ON(1);
- return;
- }
- offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
-
- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
- while (nrpages > 0) {
- early_clear_fixmap(idx);
- --idx;
- --nrpages;
- }
- prev_map[slot] = NULL;
-}
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index d87dd6d042d6..dd89a13f1051 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -78,10 +78,16 @@ early_initcall(kmemcheck_init);
*/
static int __init param_kmemcheck(char *str)
{
+ int val;
+ int ret;
+
if (!str)
return -EINVAL;
- sscanf(str, "%d", &kmemcheck_enabled);
+ ret = kstrtoint(str, 0, &val);
+ if (ret)
+ return ret;
+ kmemcheck_enabled = val;
return 0;
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 27aa0455fab3..1d045f9c390f 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -687,10 +687,6 @@ static int __init dummy_numa_init(void)
void __init x86_numa_init(void)
{
if (!numa_off) {
-#ifdef CONFIG_X86_NUMAQ
- if (!numa_init(numaq_numa_init))
- return;
-#endif
#ifdef CONFIG_ACPI_NUMA
if (!numa_init(x86_acpi_numa_init))
return;
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index a69bcb8c7621..4dd8cf652579 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -127,7 +127,7 @@ static int __init parse_reservetop(char *arg)
address = memparse(arg, &arg);
reserve_top_address(address);
- fixup_early_ioremap();
+ early_ioremap_init();
return 0;
}
early_param("reservetop", parse_reservetop);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4ed75dd81d05..dc017735bb91 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -553,13 +553,13 @@ void bpf_jit_compile(struct sk_filter *fp)
}
break;
case BPF_S_ANC_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4);
- if (is_imm8(offsetof(struct sk_buff, rxhash))) {
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ if (is_imm8(offsetof(struct sk_buff, hash))) {
/* mov off8(%rdi),%eax */
- EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash));
+ EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
} else {
EMIT2(0x8b, 0x87);
- EMIT(offsetof(struct sk_buff, rxhash), 4);
+ EMIT(offsetof(struct sk_buff, hash), 4);
}
break;
case BPF_S_ANC_QUEUE:
@@ -772,6 +772,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
bpf_flush_icache(header, image + proglen);
set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *)image;
+ fp->jited = 1;
}
out:
kfree(addrs);
@@ -791,7 +792,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
void bpf_jit_free(struct sk_filter *fp)
{
- if (fp->bpf_func != sk_run_filter) {
+ if (fp->jited) {
INIT_WORK(&fp->work, bpf_jit_free_deferred);
schedule_work(&fp->work);
} else {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 6890d8498e0b..379e8bd0deea 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -494,14 +494,19 @@ static int nmi_setup(void)
if (err)
goto fail;
+ cpu_notifier_register_begin();
+
+ /* Use get/put_online_cpus() to protect 'nmi_enabled' */
get_online_cpus();
- register_cpu_notifier(&oprofile_cpu_nb);
nmi_enabled = 1;
/* make nmi_enabled visible to the nmi handler: */
smp_mb();
on_each_cpu(nmi_cpu_setup, NULL, 1);
+ __register_cpu_notifier(&oprofile_cpu_nb);
put_online_cpus();
+ cpu_notifier_register_done();
+
return 0;
fail:
free_msrs();
@@ -512,12 +517,18 @@ static void nmi_shutdown(void)
{
struct op_msrs *msrs;
+ cpu_notifier_register_begin();
+
+ /* Use get/put_online_cpus() to protect 'nmi_enabled' & 'ctr_running' */
get_online_cpus();
- unregister_cpu_notifier(&oprofile_cpu_nb);
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
nmi_enabled = 0;
ctr_running = 0;
+ __unregister_cpu_notifier(&oprofile_cpu_nb);
put_online_cpus();
+
+ cpu_notifier_register_done();
+
/* make variables visible to the nmi handler: */
smp_mb();
unregister_nmi_handler(NMI_LOCAL, "oprofile");
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index e063eed0f912..5c6fc3577a49 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -13,9 +13,6 @@ obj-y += legacy.o irq.o
obj-$(CONFIG_STA2X11) += sta2x11-fixup.o
-obj-$(CONFIG_X86_VISWS) += visws.o
-
-obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-$(CONFIG_X86_NUMACHIP) += numachip.o
obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 4f25ec077552..01edac6c5e18 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -218,9 +218,8 @@ static void teardown_mcfg_map(struct pci_root_info *info)
}
#endif
-static acpi_status
-resource_to_addr(struct acpi_resource *resource,
- struct acpi_resource_address64 *addr)
+static acpi_status resource_to_addr(struct acpi_resource *resource,
+ struct acpi_resource_address64 *addr)
{
acpi_status status;
struct acpi_resource_memory24 *memory24;
@@ -265,8 +264,7 @@ resource_to_addr(struct acpi_resource *resource,
return AE_ERROR;
}
-static acpi_status
-count_resource(struct acpi_resource *acpi_res, void *data)
+static acpi_status count_resource(struct acpi_resource *acpi_res, void *data)
{
struct pci_root_info *info = data;
struct acpi_resource_address64 addr;
@@ -278,8 +276,7 @@ count_resource(struct acpi_resource *acpi_res, void *data)
return AE_OK;
}
-static acpi_status
-setup_resource(struct acpi_resource *acpi_res, void *data)
+static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data)
{
struct pci_root_info *info = data;
struct resource *res;
@@ -435,9 +432,9 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
__release_pci_root_info(info);
}
-static void
-probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
- int busnum, int domain)
+static void probe_pci_root_info(struct pci_root_info *info,
+ struct acpi_device *device,
+ int busnum, int domain)
{
size_t size;
@@ -473,16 +470,13 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
struct acpi_device *device = root->device;
- struct pci_root_info *info = NULL;
+ struct pci_root_info *info;
int domain = root->segment;
int busnum = root->secondary.start;
LIST_HEAD(resources);
- struct pci_bus *bus = NULL;
+ struct pci_bus *bus;
struct pci_sysdata *sd;
int node;
-#ifdef CONFIG_ACPI_NUMA
- int pxm;
-#endif
if (pci_ignore_seg)
domain = 0;
@@ -494,19 +488,12 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
return NULL;
}
- node = -1;
-#ifdef CONFIG_ACPI_NUMA
- pxm = acpi_get_pxm(device->handle);
- if (pxm >= 0)
- node = pxm_to_node(pxm);
- if (node != -1)
- set_mp_bus_to_node(busnum, node);
- else
-#endif
- node = get_mp_bus_to_node(busnum);
+ node = acpi_get_node(device->handle);
+ if (node == NUMA_NO_NODE)
+ node = x86_pci_root_bus_node(busnum);
- if (node != -1 && !node_online(node))
- node = -1;
+ if (node != NUMA_NO_NODE && !node_online(node))
+ node = NUMA_NO_NODE;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
@@ -519,15 +506,12 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
sd->domain = domain;
sd->node = node;
sd->companion = device;
- /*
- * Maybe the desired pci bus has been already scanned. In such case
- * it is unnecessary to scan the pci bus with the given domain,busnum.
- */
+
bus = pci_find_bus(domain, busnum);
if (bus) {
/*
- * If the desired bus exits, the content of bus->sysdata will
- * be replaced by sd.
+ * If the desired bus has been scanned already, replace
+ * its bus->sysdata.
*/
memcpy(bus->sysdata, sd, sizeof(*sd));
kfree(info);
@@ -572,15 +556,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
pcie_bus_configure_settings(child);
}
- if (bus && node != -1) {
-#ifdef CONFIG_ACPI_NUMA
- if (pxm >= 0)
- dev_printk(KERN_DEBUG, &bus->dev,
- "on NUMA node %d (pxm %d)\n", node, pxm);
-#else
+ if (bus && node != NUMA_NO_NODE)
dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
-#endif
- }
return bus;
}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index a48be98e9ded..e88f4c53d7f6 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -44,15 +44,6 @@ static struct pci_root_info __init *find_pci_root_info(int node, int link)
return NULL;
}
-static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node)
-{
-#ifdef CONFIG_NUMA
- int j;
-
- for (j = min_bus; j <= max_bus; j++)
- set_mp_bus_to_node(j, node);
-#endif
-}
/**
* early_fill_mp_bus_to_node()
* called before pcibios_scan_root and pci_scan_bus
@@ -117,7 +108,6 @@ static int __init early_fill_mp_bus_info(void)
min_bus = (reg >> 16) & 0xff;
max_bus = (reg >> 24) & 0xff;
node = (reg >> 4) & 0x07;
- set_mp_bus_range_to_node(min_bus, max_bus, node);
link = (reg >> 8) & 0x03;
info = alloc_pci_root_info(min_bus, max_bus, node, link);
@@ -380,10 +370,13 @@ static int __init pci_io_ecs_init(void)
if (early_pci_allowed())
pci_enable_pci_io_ecs();
- register_cpu_notifier(&amd_cpu_notifier);
+ cpu_notifier_register_begin();
for_each_online_cpu(cpu)
amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
(void *)(long)cpu);
+ __register_cpu_notifier(&amd_cpu_notifier);
+ cpu_notifier_register_done();
+
pci_probe |= PCI_HAS_IO_ECS;
return 0;
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index c2735feb2508..f3a2cfc14125 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -10,9 +10,6 @@ static struct pci_root_info *x86_find_pci_root_info(int bus)
{
struct pci_root_info *info;
- if (list_empty(&pci_root_infos))
- return NULL;
-
list_for_each_entry(info, &pci_root_infos, list)
if (info->busn.start == bus)
return info;
@@ -20,6 +17,16 @@ static struct pci_root_info *x86_find_pci_root_info(int bus)
return NULL;
}
+int x86_pci_root_bus_node(int bus)
+{
+ struct pci_root_info *info = x86_find_pci_root_info(bus);
+
+ if (!info)
+ return NUMA_NO_NODE;
+
+ return info->node;
+}
+
void x86_pci_root_bus_resources(int bus, struct list_head *resources)
{
struct pci_root_info *info = x86_find_pci_root_info(bus);
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 981c2dbd72cc..059a76c29739 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -456,19 +456,25 @@ void __init dmi_check_pciprobe(void)
dmi_check_system(pciprobe_dmi_table);
}
-struct pci_bus *pcibios_scan_root(int busnum)
+void pcibios_scan_root(int busnum)
{
- struct pci_bus *bus = NULL;
+ struct pci_bus *bus;
+ struct pci_sysdata *sd;
+ LIST_HEAD(resources);
- while ((bus = pci_find_next_bus(bus)) != NULL) {
- if (bus->number == busnum) {
- /* Already scanned */
- return bus;
- }
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd) {
+ printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busnum);
+ return;
+ }
+ sd->node = x86_pci_root_bus_node(busnum);
+ x86_pci_root_bus_resources(busnum, &resources);
+ printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
+ bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources);
+ if (!bus) {
+ pci_free_resource_list(&resources);
+ kfree(sd);
}
-
- return pci_scan_bus_on_node(busnum, &pci_root_ops,
- get_mp_bus_to_node(busnum));
}
void __init pcibios_set_cache_line_size(void)
@@ -561,7 +567,6 @@ char * __init pcibios_setup(char *str)
pci_probe |= PCI_PROBE_NOEARLY;
return NULL;
}
-#ifndef CONFIG_X86_VISWS
else if (!strcmp(str, "usepirqmask")) {
pci_probe |= PCI_USE_PIRQ_MASK;
return NULL;
@@ -571,9 +576,7 @@ char * __init pcibios_setup(char *str)
} else if (!strncmp(str, "lastbus=", 8)) {
pcibios_last_bus = simple_strtol(str+8, NULL, 0);
return NULL;
- }
-#endif
- else if (!strcmp(str, "rom")) {
+ } else if (!strcmp(str, "rom")) {
pci_probe |= PCI_ASSIGN_ROMS;
return NULL;
} else if (!strcmp(str, "norom")) {
@@ -677,105 +680,3 @@ int pci_ext_cfg_avail(void)
else
return 0;
}
-
-struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
-{
- LIST_HEAD(resources);
- struct pci_bus *bus = NULL;
- struct pci_sysdata *sd;
-
- /*
- * Allocate per-root-bus (not per bus) arch-specific data.
- * TODO: leak; this memory is never freed.
- * It's arguable whether it's worth the trouble to care.
- */
- sd = kzalloc(sizeof(*sd), GFP_KERNEL);
- if (!sd) {
- printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno);
- return NULL;
- }
- sd->node = node;
- x86_pci_root_bus_resources(busno, &resources);
- printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno);
- bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources);
- if (!bus) {
- pci_free_resource_list(&resources);
- kfree(sd);
- }
-
- return bus;
-}
-
-struct pci_bus *pci_scan_bus_with_sysdata(int busno)
-{
- return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
-}
-
-/*
- * NUMA info for PCI busses
- *
- * Early arch code is responsible for filling in reasonable values here.
- * A node id of "-1" means "use current node". In other words, if a bus
- * has a -1 node id, it's not tightly coupled to any particular chunk
- * of memory (as is the case on some Nehalem systems).
- */
-#ifdef CONFIG_NUMA
-
-#define BUS_NR 256
-
-#ifdef CONFIG_X86_64
-
-static int mp_bus_to_node[BUS_NR] = {
- [0 ... BUS_NR - 1] = -1
-};
-
-void set_mp_bus_to_node(int busnum, int node)
-{
- if (busnum >= 0 && busnum < BUS_NR)
- mp_bus_to_node[busnum] = node;
-}
-
-int get_mp_bus_to_node(int busnum)
-{
- int node = -1;
-
- if (busnum < 0 || busnum > (BUS_NR - 1))
- return node;
-
- node = mp_bus_to_node[busnum];
-
- /*
- * let numa_node_id to decide it later in dma_alloc_pages
- * if there is no ram on that node
- */
- if (node != -1 && !node_online(node))
- node = -1;
-
- return node;
-}
-
-#else /* CONFIG_X86_32 */
-
-static int mp_bus_to_node[BUS_NR] = {
- [0 ... BUS_NR - 1] = -1
-};
-
-void set_mp_bus_to_node(int busnum, int node)
-{
- if (busnum >= 0 && busnum < BUS_NR)
- mp_bus_to_node[busnum] = (unsigned char) node;
-}
-
-int get_mp_bus_to_node(int busnum)
-{
- int node;
-
- if (busnum < 0 || busnum > (BUS_NR - 1))
- return 0;
- node = mp_bus_to_node[busnum];
- return node;
-}
-
-#endif /* CONFIG_X86_32 */
-
-#endif /* CONFIG_NUMA */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index bca9e85daaa5..94ae9ae9574f 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -25,9 +25,9 @@ static void pci_fixup_i450nx(struct pci_dev *d)
dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno,
suba, subb);
if (busno)
- pci_scan_bus_with_sysdata(busno); /* Bus A */
+ pcibios_scan_root(busno); /* Bus A */
if (suba < subb)
- pci_scan_bus_with_sysdata(suba+1); /* Bus B */
+ pcibios_scan_root(suba+1); /* Bus B */
}
pcibios_last_bus = -1;
}
@@ -42,7 +42,7 @@ static void pci_fixup_i450gx(struct pci_dev *d)
u8 busno;
pci_read_config_byte(d, 0x4a, &busno);
dev_info(&d->dev, "i440KX/GX host bridge; secondary bus %02x\n", busno);
- pci_scan_bus_with_sysdata(busno);
+ pcibios_scan_root(busno);
pcibios_last_bus = -1;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
@@ -313,9 +313,10 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r
* IORESOURCE_ROM_SHADOW is used to associate the boot video
* card with this copy. On laptops this copy has to be used since
* the main ROM may be compressed or combined with another image.
- * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW
- * is marked here since the boot video device will be the only enabled
- * video device at this point.
+ * See pci_map_rom() for use of this flag. Before marking the device
+ * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set
+ * by either arch cde or vga-arbitration, if so only apply the fixup to this
+ * already determined primary video card.
*/
static void pci_fixup_video(struct pci_dev *pdev)
@@ -346,12 +347,13 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
bus = bus->parent;
}
- pci_read_config_word(pdev, PCI_COMMAND, &config);
- if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
- pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
- dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
- if (!vga_default_device())
+ if (!vga_default_device() || pdev == vga_default_device()) {
+ pci_read_config_word(pdev, PCI_COMMAND, &config);
+ if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
+ pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+ dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
vga_set_default_device(pdev);
+ }
}
}
DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 372e9b8989b3..84112f55dd7a 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -136,13 +136,9 @@ static void __init pirq_peer_trick(void)
busmap[e->bus] = 1;
}
for (i = 1; i < 256; i++) {
- int node;
if (!busmap[i] || pci_find_bus(0, i))
continue;
- node = get_mp_bus_to_node(i);
- if (pci_scan_bus_on_node(i, &pci_root_ops, node))
- printk(KERN_INFO "PCI: Discovered primary peer "
- "bus %02x [IRQ]\n", i);
+ pcibios_scan_root(i);
}
pcibios_last_bus = -1;
}
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 4db96fb1c232..5b662c0faf8c 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -37,19 +37,17 @@ int __init pci_legacy_init(void)
void pcibios_scan_specific_bus(int busn)
{
int devfn;
- long node;
u32 l;
if (pci_find_bus(0, busn))
return;
- node = get_mp_bus_to_node(busn);
for (devfn = 0; devfn < 256; devfn += 8) {
if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
l != 0x0000 && l != 0xffff) {
DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn);
- pci_scan_bus_on_node(busn, &pci_root_ops, node);
+ pcibios_scan_root(busn);
return;
}
}
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
deleted file mode 100644
index 72c229f9ebcf..000000000000
--- a/arch/x86/pci/numaq_32.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * numaq_32.c - Low-level PCI access for NUMA-Q machines
- */
-
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/nodemask.h>
-#include <asm/apic.h>
-#include <asm/mpspec.h>
-#include <asm/pci_x86.h>
-#include <asm/numaq.h>
-
-#define BUS2QUAD(global) (mp_bus_id_to_node[global])
-
-#define BUS2LOCAL(global) (mp_bus_id_to_local[global])
-
-#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-
-#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
- (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
-
-static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
-{
- unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
- if (xquad_portio)
- writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
- else
- outl(val, 0xCF8);
-}
-
-static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value)
-{
- unsigned long flags;
- void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
-
- WARN_ON(seg);
- if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
- return -EINVAL;
-
- raw_spin_lock_irqsave(&pci_config_lock, flags);
-
- write_cf8(bus, devfn, reg);
-
- switch (len) {
- case 1:
- if (xquad_portio)
- *value = readb(adr + (reg & 3));
- else
- *value = inb(0xCFC + (reg & 3));
- break;
- case 2:
- if (xquad_portio)
- *value = readw(adr + (reg & 2));
- else
- *value = inw(0xCFC + (reg & 2));
- break;
- case 4:
- if (xquad_portio)
- *value = readl(adr);
- else
- *value = inl(0xCFC);
- break;
- }
-
- raw_spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value)
-{
- unsigned long flags;
- void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
-
- WARN_ON(seg);
- if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
- return -EINVAL;
-
- raw_spin_lock_irqsave(&pci_config_lock, flags);
-
- write_cf8(bus, devfn, reg);
-
- switch (len) {
- case 1:
- if (xquad_portio)
- writeb(value, adr + (reg & 3));
- else
- outb((u8)value, 0xCFC + (reg & 3));
- break;
- case 2:
- if (xquad_portio)
- writew(value, adr + (reg & 2));
- else
- outw((u16)value, 0xCFC + (reg & 2));
- break;
- case 4:
- if (xquad_portio)
- writel(value, adr + reg);
- else
- outl((u32)value, 0xCFC);
- break;
- }
-
- raw_spin_unlock_irqrestore(&pci_config_lock, flags);
-
- return 0;
-}
-
-#undef PCI_CONF1_MQ_ADDRESS
-
-static const struct pci_raw_ops pci_direct_conf1_mq = {
- .read = pci_conf1_mq_read,
- .write = pci_conf1_mq_write
-};
-
-
-static void pci_fixup_i450nx(struct pci_dev *d)
-{
- /*
- * i450NX -- Find and scan all secondary buses on all PXB's.
- */
- int pxb, reg;
- u8 busno, suba, subb;
- int quad = BUS2QUAD(d->bus->number);
-
- dev_info(&d->dev, "searching for i450NX host bridges\n");
- reg = 0xd0;
- for(pxb=0; pxb<2; pxb++) {
- pci_read_config_byte(d, reg++, &busno);
- pci_read_config_byte(d, reg++, &suba);
- pci_read_config_byte(d, reg++, &subb);
- dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n",
- pxb, busno, suba, subb);
- if (busno) {
- /* Bus A */
- pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
- }
- if (suba < subb) {
- /* Bus B */
- pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1));
- }
- }
- pcibios_last_bus = -1;
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
-
-int __init pci_numaq_init(void)
-{
- int quad;
-
- raw_pci_ops = &pci_direct_conf1_mq;
-
- pcibios_scan_root(0);
- if (num_online_nodes() > 1)
- for_each_online_node(quad) {
- if (quad == 0)
- continue;
- printk("Scanning PCI bus %d for quad %d\n",
- QUADLOCAL2BUS(quad,0), quad);
- pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0));
- }
- return 0;
-}
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
deleted file mode 100644
index 3e6d2a6db866..000000000000
--- a/arch/x86/pci/visws.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Low-Level PCI Support for SGI Visual Workstation
- *
- * (c) 1999--2000 Martin Mares <mj@ucw.cz>
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-
-#include <asm/setup.h>
-#include <asm/pci_x86.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/lithium.h>
-
-static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
-static void pci_visws_disable_irq(struct pci_dev *dev) { }
-
-/* int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; */
-/* void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; */
-
-/* void __init pcibios_penalize_isa_irq(int irq, int active) {} */
-
-
-unsigned int pci_bus0, pci_bus1;
-
-static int __init visws_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
- int irq, bus = dev->bus->number;
-
- pin--;
-
- /* Nothing useful at PIIX4 pin 1 */
- if (bus == pci_bus0 && slot == 4 && pin == 0)
- return -1;
-
- /* PIIX4 USB is on Bus 0, Slot 4, Line 3 */
- if (bus == pci_bus0 && slot == 4 && pin == 3) {
- irq = CO_IRQ(CO_APIC_PIIX4_USB);
- goto out;
- }
-
- /* First pin spread down 1 APIC entry per slot */
- if (pin == 0) {
- irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 :
- CO_APIC_PCIA_BASE0) + slot);
- goto out;
- }
-
- /* lines 1,2,3 from any slot is shared in this twirly pattern */
- if (bus == pci_bus1) {
- /* lines 1-3 from devices 0 1 rotate over 2 apic entries */
- irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2));
- } else { /* bus == pci_bus0 */
- /* lines 1-3 from devices 0-3 rotate over 3 apic entries */
- if (slot == 0)
- slot = 3; /* same pattern */
- irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3));
- }
-out:
- printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq);
- return irq;
-}
-
-int __init pci_visws_init(void)
-{
- pcibios_enable_irq = &pci_visws_enable_irq;
- pcibios_disable_irq = &pci_visws_disable_irq;
-
- /* The VISWS supports configuration access type 1 only */
- pci_probe = (pci_probe | PCI_PROBE_CONF1) &
- ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2);
-
- pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff;
- pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff;
-
- printk(KERN_INFO "PCI: Lithium bridge A bus: %u, "
- "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0);
-
- raw_pci_ops = &pci_direct_conf1;
- pci_scan_bus_with_sysdata(pci_bus0);
- pci_scan_bus_with_sysdata(pci_bus1);
- pci_fixup_irqs(pci_common_swizzle, visws_map_irq);
- pcibios_resource_survey();
- /* Request bus scan */
- return 1;
-}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 103e702ec5a7..905956f16465 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -178,6 +178,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
+ (type == PCI_CAP_ID_MSI) ? nvec : 1,
(type == PCI_CAP_ID_MSIX) ?
"pcifront-msi-x" :
"pcifront-msi",
@@ -245,6 +246,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
"xen: msi already bound to pirq=%d\n", pirq);
}
irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
+ (type == PCI_CAP_ID_MSI) ? nvec : 1,
(type == PCI_CAP_ID_MSIX) ?
"msi-x" : "msi",
DOMID_SELF);
@@ -269,9 +271,6 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int ret = 0;
struct msi_desc *msidesc;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
-
list_for_each_entry(msidesc, &dev->msi_list, list) {
struct physdev_map_pirq map_irq;
domid_t domid;
@@ -291,7 +290,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
(pci_domain_nr(dev->bus) << 16);
map_irq.devfn = dev->devfn;
- if (type == PCI_CAP_ID_MSIX) {
+ if (type == PCI_CAP_ID_MSI && nvec > 1) {
+ map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
+ map_irq.entry_nr = nvec;
+ } else if (type == PCI_CAP_ID_MSIX) {
int pos;
u32 table_offset, bir;
@@ -308,6 +310,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (pci_seg_supported)
ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
&map_irq);
+ if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) {
+ /*
+ * If MAP_PIRQ_TYPE_MULTI_MSI is not available
+ * there's nothing else we can do in this case.
+ * Just set ret > 0 so driver can retry with
+ * single MSI.
+ */
+ ret = 1;
+ goto out;
+ }
if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
map_irq.type = MAP_PIRQ_TYPE_MSI;
map_irq.index = -1;
@@ -324,11 +336,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
goto out;
}
- ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
- map_irq.pirq,
- (type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi",
- domid);
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq,
+ (type == PCI_CAP_ID_MSI) ? nvec : 1,
+ (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi",
+ domid);
if (ret < 0)
goto out;
}
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 20342d4c82ce..85afde1fa3e5 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -9,5 +9,4 @@ obj-y += olpc/
obj-y += scx200/
obj-y += sfi/
obj-y += ts5500/
-obj-y += visws/
obj-y += uv/
diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile
deleted file mode 100644
index 91bc17ab2fd5..000000000000
--- a/arch/x86/platform/visws/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_X86_VISWS) += visws_quirks.o
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
deleted file mode 100644
index 94d8a39332ec..000000000000
--- a/arch/x86/platform/visws/visws_quirks.c
+++ /dev/null
@@ -1,608 +0,0 @@
-/*
- * SGI Visual Workstation support and quirks, unmaintained.
- *
- * Split out from setup.c by davej@suse.de
- *
- * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
- *
- * SGI Visual Workstation interrupt controller
- *
- * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
- * which serves as the main interrupt controller in the system. Non-legacy
- * hardware in the system uses this controller directly. Legacy devices
- * are connected to the PIIX4 which in turn has its 8259(s) connected to
- * a of the Cobalt APIC entry.
- *
- * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
- *
- * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
- */
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/visws/cobalt.h>
-#include <asm/visws/piix4.h>
-#include <asm/io_apic.h>
-#include <asm/fixmap.h>
-#include <asm/reboot.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-#include <asm/e820.h>
-#include <asm/time.h>
-#include <asm/io.h>
-
-#include <linux/kernel_stat.h>
-
-#include <asm/i8259.h>
-#include <asm/irq_vectors.h>
-#include <asm/visws/lithium.h>
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-
-extern int no_broadcast;
-
-char visws_board_type = -1;
-char visws_board_rev = -1;
-
-static void __init visws_time_init(void)
-{
- printk(KERN_INFO "Starting Cobalt Timer system clock\n");
-
- /* Set the countdown value */
- co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
-
- /* Start the timer */
- co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
-
- /* Enable (unmask) the timer interrupt */
- co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
-
- setup_default_timer_irq();
-}
-
-/* Replaces the default init_ISA_irqs in the generic setup */
-static void __init visws_pre_intr_init(void);
-
-/* Quirk for machine specific memory setup. */
-
-#define MB (1024 * 1024)
-
-unsigned long sgivwfb_mem_phys;
-unsigned long sgivwfb_mem_size;
-EXPORT_SYMBOL(sgivwfb_mem_phys);
-EXPORT_SYMBOL(sgivwfb_mem_size);
-
-long long mem_size __initdata = 0;
-
-static char * __init visws_memory_setup(void)
-{
- long long gfx_mem_size = 8 * MB;
-
- mem_size = boot_params.alt_mem_k;
-
- if (!mem_size) {
- printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
- mem_size = 128 * MB;
- }
-
- /*
- * this hardcodes the graphics memory to 8 MB
- * it really should be sized dynamically (or at least
- * set as a boot param)
- */
- if (!sgivwfb_mem_size) {
- printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
- sgivwfb_mem_size = 8 * MB;
- }
-
- /*
- * Trim to nearest MB
- */
- sgivwfb_mem_size &= ~((1 << 20) - 1);
- sgivwfb_mem_phys = mem_size - gfx_mem_size;
-
- e820_add_region(0, LOWMEMSIZE(), E820_RAM);
- e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
- e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
-
- return "PROM";
-}
-
-static void visws_machine_emergency_restart(void)
-{
- /*
- * Visual Workstations restart after this
- * register is poked on the PIIX4
- */
- outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
-}
-
-static void visws_machine_power_off(void)
-{
- unsigned short pm_status;
-/* extern unsigned int pci_bus0; */
-
- while ((pm_status = inw(PMSTS_PORT)) & 0x100)
- outw(pm_status, PMSTS_PORT);
-
- outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
-
- mdelay(10);
-
-#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
- (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
-
-/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
- outl(PIIX_SPECIAL_STOP, 0xCFC);
-}
-
-static void __init visws_get_smp_config(unsigned int early)
-{
-}
-
-/*
- * The Visual Workstation is Intel MP compliant in the hardware
- * sense, but it doesn't have a BIOS(-configuration table).
- * No problem for Linux.
- */
-
-static void __init MP_processor_info(struct mpc_cpu *m)
-{
- int ver, logical_apicid;
- physid_mask_t apic_cpus;
-
- if (!(m->cpuflag & CPU_ENABLED))
- return;
-
- logical_apicid = m->apicid;
- printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
- m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
- m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
- (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver);
-
- if (m->cpuflag & CPU_BOOTPROCESSOR)
- boot_cpu_physical_apicid = m->apicid;
-
- ver = m->apicver;
- if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
- m->apicid, MAX_LOCAL_APIC);
- return;
- }
-
- apic->apicid_to_cpu_present(m->apicid, &apic_cpus);
- physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
- /*
- * Validate version
- */
- if (ver == 0x0) {
- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- m->apicid);
- ver = 0x10;
- }
- apic_version[m->apicid] = ver;
-}
-
-static void __init visws_find_smp_config(void)
-{
- struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
- unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
-
- if (ncpus > CO_CPU_MAX) {
- printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
- ncpus, mp);
-
- ncpus = CO_CPU_MAX;
- }
-
- if (ncpus > setup_max_cpus)
- ncpus = setup_max_cpus;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- smp_found_config = 1;
-#endif
- while (ncpus--)
- MP_processor_info(mp++);
-
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-}
-
-static void visws_trap_init(void);
-
-void __init visws_early_detect(void)
-{
- int raw;
-
- visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
- >> PIIX_GPI_BD_SHIFT;
-
- if (visws_board_type < 0)
- return;
-
- /*
- * Override the default platform setup functions
- */
- x86_init.resources.memory_setup = visws_memory_setup;
- x86_init.mpparse.get_smp_config = visws_get_smp_config;
- x86_init.mpparse.find_smp_config = visws_find_smp_config;
- x86_init.irqs.pre_vector_init = visws_pre_intr_init;
- x86_init.irqs.trap_init = visws_trap_init;
- x86_init.timers.timer_init = visws_time_init;
- x86_init.pci.init = pci_visws_init;
- x86_init.pci.init_irq = x86_init_noop;
-
- /*
- * Install reboot quirks:
- */
- pm_power_off = visws_machine_power_off;
- machine_ops.emergency_restart = visws_machine_emergency_restart;
-
- /*
- * Do not use broadcast IPIs:
- */
- no_broadcast = 0;
-
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Turn off IO-APIC detection and initialization:
- */
- skip_ioapic_setup = 1;
-#endif
-
- /*
- * Get Board rev.
- * First, we have to initialize the 307 part to allow us access
- * to the GPIO registers. Let's map them at 0x0fc0 which is right
- * after the PIIX4 PM section.
- */
- outb_p(SIO_DEV_SEL, SIO_INDEX);
- outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
-
- outb_p(SIO_DEV_MSB, SIO_INDEX);
- outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
-
- outb_p(SIO_DEV_LSB, SIO_INDEX);
- outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
-
- outb_p(SIO_DEV_ENB, SIO_INDEX);
- outb_p(1, SIO_DATA); /* Enable GPIO registers. */
-
- /*
- * Now, we have to map the power management section to write
- * a bit which enables access to the GPIO registers.
- * What lunatic came up with this shit?
- */
- outb_p(SIO_DEV_SEL, SIO_INDEX);
- outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
-
- outb_p(SIO_DEV_MSB, SIO_INDEX);
- outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
-
- outb_p(SIO_DEV_LSB, SIO_INDEX);
- outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
-
- outb_p(SIO_DEV_ENB, SIO_INDEX);
- outb_p(1, SIO_DATA); /* Enable PM registers. */
-
- /*
- * Now, write the PM register which enables the GPIO registers.
- */
- outb_p(SIO_PM_FER2, SIO_PM_INDEX);
- outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
-
- /*
- * Now, initialize the GPIO registers.
- * We want them all to be inputs which is the
- * power on default, so let's leave them alone.
- * So, let's just read the board rev!
- */
- raw = inb_p(SIO_GP_DATA1);
- raw &= 0x7f; /* 7 bits of valid board revision ID. */
-
- if (visws_board_type == VISWS_320) {
- if (raw < 0x6) {
- visws_board_rev = 4;
- } else if (raw < 0xc) {
- visws_board_rev = 5;
- } else {
- visws_board_rev = 6;
- }
- } else if (visws_board_type == VISWS_540) {
- visws_board_rev = 2;
- } else {
- visws_board_rev = raw;
- }
-
- printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
- (visws_board_type == VISWS_320 ? "320" :
- (visws_board_type == VISWS_540 ? "540" :
- "unknown")), visws_board_rev);
-}
-
-#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
-#define BCD (LI_INTB | LI_INTC | LI_INTD)
-#define ALLDEVS (A01234 | BCD)
-
-static __init void lithium_init(void)
-{
- set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
- set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
-
- if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
- (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
- printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
-/* panic("This machine is not SGI Visual Workstation 320/540"); */
- }
-
- if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
- (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
- printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
-/* panic("This machine is not SGI Visual Workstation 320/540"); */
- }
-
- li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
- li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
-}
-
-static __init void cobalt_init(void)
-{
- /*
- * On normal SMP PC this is used only with SMP, but we have to
- * use it and set it up here to start the Cobalt clock
- */
- set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
- setup_local_APIC();
- printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
- (unsigned int)apic_read(APIC_LVR),
- (unsigned int)apic_read(APIC_ID));
-
- set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
- set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
- printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
- co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
-
- /* Enable Cobalt APIC being careful to NOT change the ID! */
- co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
-
- printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
- co_apic_read(CO_APIC_ID));
-}
-
-static void __init visws_trap_init(void)
-{
- lithium_init();
- cobalt_init();
-}
-
-/*
- * IRQ controller / APIC support:
- */
-
-static DEFINE_SPINLOCK(cobalt_lock);
-
-/*
- * Set the given Cobalt APIC Redirection Table entry to point
- * to the given IDT vector/index.
- */
-static inline void co_apic_set(int entry, int irq)
-{
- co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
- co_apic_write(CO_APIC_HI(entry), 0);
-}
-
-/*
- * Cobalt (IO)-APIC functions to handle PCI devices.
- */
-static inline int co_apic_ide0_hack(void)
-{
- extern char visws_board_type;
- extern char visws_board_rev;
-
- if (visws_board_type == VISWS_320 && visws_board_rev == 5)
- return 5;
- return CO_APIC_IDE0;
-}
-
-static int is_co_apic(unsigned int irq)
-{
- if (IS_CO_APIC(irq))
- return CO_APIC(irq);
-
- switch (irq) {
- case 0: return CO_APIC_CPU;
- case CO_IRQ_IDE0: return co_apic_ide0_hack();
- case CO_IRQ_IDE1: return CO_APIC_IDE1;
- default: return -1;
- }
-}
-
-
-/*
- * This is the SGI Cobalt (IO-)APIC:
- */
-static void enable_cobalt_irq(struct irq_data *data)
-{
- co_apic_set(is_co_apic(data->irq), data->irq);
-}
-
-static void disable_cobalt_irq(struct irq_data *data)
-{
- int entry = is_co_apic(data->irq);
-
- co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
- co_apic_read(CO_APIC_LO(entry));
-}
-
-static void ack_cobalt_irq(struct irq_data *data)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cobalt_lock, flags);
- disable_cobalt_irq(data);
- apic_write(APIC_EOI, APIC_EOI_ACK);
- spin_unlock_irqrestore(&cobalt_lock, flags);
-}
-
-static struct irq_chip cobalt_irq_type = {
- .name = "Cobalt-APIC",
- .irq_enable = enable_cobalt_irq,
- .irq_disable = disable_cobalt_irq,
- .irq_ack = ack_cobalt_irq,
-};
-
-
-/*
- * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
- * -- not the manner expected by the code in i8259.c.
- *
- * there is a 'master' physical interrupt source that gets sent to
- * the CPU. But in the chipset there are various 'virtual' interrupts
- * waiting to be handled. We represent this to Linux through a 'master'
- * interrupt controller type, and through a special virtual interrupt-
- * controller. Device drivers only see the virtual interrupt sources.
- */
-static unsigned int startup_piix4_master_irq(struct irq_data *data)
-{
- legacy_pic->init(0);
- enable_cobalt_irq(data);
- return 0;
-}
-
-static struct irq_chip piix4_master_irq_type = {
- .name = "PIIX4-master",
- .irq_startup = startup_piix4_master_irq,
- .irq_ack = ack_cobalt_irq,
-};
-
-static void pii4_mask(struct irq_data *data) { }
-
-static struct irq_chip piix4_virtual_irq_type = {
- .name = "PIIX4-virtual",
- .irq_mask = pii4_mask,
-};
-
-/*
- * PIIX4-8259 master/virtual functions to handle interrupt requests
- * from legacy devices: floppy, parallel, serial, rtc.
- *
- * None of these get Cobalt APIC entries, neither do they have IDT
- * entries. These interrupts are purely virtual and distributed from
- * the 'master' interrupt source: CO_IRQ_8259.
- *
- * When the 8259 interrupts its handler figures out which of these
- * devices is interrupting and dispatches to its handler.
- *
- * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
- * enable_irq gets the right irq. This 'master' irq is never directly
- * manipulated by any driver.
- */
-static irqreturn_t piix4_master_intr(int irq, void *dev_id)
-{
- unsigned long flags;
- int realirq;
-
- raw_spin_lock_irqsave(&i8259A_lock, flags);
-
- /* Find out what's interrupting in the PIIX4 master 8259 */
- outb(0x0c, 0x20); /* OCW3 Poll command */
- realirq = inb(0x20);
-
- /*
- * Bit 7 == 0 means invalid/spurious
- */
- if (unlikely(!(realirq & 0x80)))
- goto out_unlock;
-
- realirq &= 7;
-
- if (unlikely(realirq == 2)) {
- outb(0x0c, 0xa0);
- realirq = inb(0xa0);
-
- if (unlikely(!(realirq & 0x80)))
- goto out_unlock;
-
- realirq = (realirq & 7) + 8;
- }
-
- /* mask and ack interrupt */
- cached_irq_mask |= 1 << realirq;
- if (unlikely(realirq > 7)) {
- inb(0xa1);
- outb(cached_slave_mask, 0xa1);
- outb(0x60 + (realirq & 7), 0xa0);
- outb(0x60 + 2, 0x20);
- } else {
- inb(0x21);
- outb(cached_master_mask, 0x21);
- outb(0x60 + realirq, 0x20);
- }
-
- raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
- /*
- * handle this 'virtual interrupt' as a Cobalt one now.
- */
- generic_handle_irq(realirq);
-
- return IRQ_HANDLED;
-
-out_unlock:
- raw_spin_unlock_irqrestore(&i8259A_lock, flags);
- return IRQ_NONE;
-}
-
-static struct irqaction master_action = {
- .handler = piix4_master_intr,
- .name = "PIIX4-8259",
- .flags = IRQF_NO_THREAD,
-};
-
-static struct irqaction cascade_action = {
- .handler = no_action,
- .name = "cascade",
- .flags = IRQF_NO_THREAD,
-};
-
-static inline void set_piix4_virtual_irq_type(void)
-{
- piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask;
- piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask;
- piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask;
-}
-
-static void __init visws_pre_intr_init(void)
-{
- int i;
-
- set_piix4_virtual_irq_type();
-
- for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
- struct irq_chip *chip = NULL;
-
- if (i == 0)
- chip = &cobalt_irq_type;
- else if (i == CO_IRQ_IDE0)
- chip = &cobalt_irq_type;
- else if (i == CO_IRQ_IDE1)
- chip = &cobalt_irq_type;
- else if (i == CO_IRQ_8259)
- chip = &piix4_master_irq_type;
- else if (i < CO_IRQ_APIC0)
- chip = &piix4_virtual_irq_type;
- else if (IS_CO_APIC(i))
- chip = &cobalt_irq_type;
-
- if (chip)
- irq_set_chip(i, chip);
- }
-
- setup_irq(CO_IRQ_8259, &master_action);
- setup_irq(2, &cascade_action);
-}
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a12bddc7ccea..04376ac3d9ef 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -322,6 +322,7 @@
313 common finit_module sys_finit_module
314 common sched_setattr sys_sched_setattr
315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index cfbdbdb4e173..bbb1d2259ecf 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -69,8 +69,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"__per_cpu_load|"
"init_per_cpu__.*|"
"__end_rodata_hpage_align|"
- "__vvar_page|"
#endif
+ "__vvar_page|"
"_end)$"
};
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 9206ac7961a5..c580d1210ffe 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -23,7 +23,8 @@ vobjs-$(VDSOX32-y) += $(vobjx32s-compat)
vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
# files to link into kernel
-obj-$(VDSO64-y) += vma.o vdso.o
+obj-y += vma.o
+obj-$(VDSO64-y) += vdso.o
obj-$(VDSOX32-y) += vdsox32.o
obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
@@ -138,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
targets += $(vdso32-images) $(vdso32-images:=.dbg)
-targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
+targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
extra-y += $(vdso32-images)
@@ -148,8 +149,19 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
+KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
+KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
+KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \
+ $(obj)/vdso32/vclock_gettime.o \
$(obj)/vdso32/note.o \
$(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index eb5d7a56f8d4..16d686171e9a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
*
* Fast user context implementation of clock_gettime, gettimeofday, and time.
*
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ *
* The code should have no internal unresolved relocations.
* Check with readelf after changing.
*/
@@ -11,56 +14,55 @@
/* Disable profiling for userspace code: */
#define DISABLE_BRANCH_PROFILING
-#include <linux/kernel.h>
-#include <linux/posix-timers.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <asm/vsyscall.h>
-#include <asm/fixmap.h>
+#include <uapi/linux/time.h>
#include <asm/vgtod.h>
-#include <asm/timex.h>
#include <asm/hpet.h>
+#include <asm/vvar.h>
#include <asm/unistd.h>
-#include <asm/io.h>
-#include <asm/pvclock.h>
+#include <asm/msr.h>
+#include <linux/math64.h>
+#include <linux/time.h>
#define gtod (&VVAR(vsyscall_gtod_data))
-notrace static cycle_t vread_tsc(void)
+extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
+extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern time_t __vdso_time(time_t *t);
+
+#ifdef CONFIG_HPET_TIMER
+static inline u32 read_hpet_counter(const volatile void *addr)
{
- cycle_t ret;
- u64 last;
+ return *(const volatile u32 *) (addr + HPET_COUNTER);
+}
+#endif
- /*
- * Empirically, a fence (of type that depends on the CPU)
- * before rdtsc is enough to ensure that rdtsc is ordered
- * with respect to loads. The various CPU manuals are unclear
- * as to whether rdtsc can be reordered with later loads,
- * but no one has ever seen it happen.
- */
- rdtsc_barrier();
- ret = (cycle_t)vget_cycles();
+#ifndef BUILD_VDSO32
- last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+#include <asm/fixmap.h>
+#include <asm/pvclock.h>
- if (likely(ret >= last))
- return ret;
+static notrace cycle_t vread_hpet(void)
+{
+ return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
+}
- /*
- * GCC likes to generate cmov here, but this branch is extremely
- * predictable (it's just a funciton of time and the likely is
- * very likely) and there's a data dependence, so force GCC
- * to generate a branch instead. I don't barrier() because
- * we don't actually need a barrier, and if this function
- * ever gets inlined it will generate worse code.
- */
- asm volatile ("");
- return last;
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+ long ret;
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
+ return ret;
}
-static notrace cycle_t vread_hpet(void)
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
- return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ return ret;
}
#ifdef CONFIG_PARAVIRT_CLOCK
@@ -124,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*mode = VCLOCK_NONE;
/* refer to tsc.c read_tsc() comment for rationale */
- last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+ last = gtod->cycle_last;
if (likely(ret >= last))
return ret;
@@ -133,11 +135,30 @@ static notrace cycle_t vread_pvclock(int *mode)
}
#endif
+#else
+
+extern u8 hpet_page
+ __attribute__((visibility("hidden")));
+
+#ifdef CONFIG_HPET_TIMER
+static notrace cycle_t vread_hpet(void)
+{
+ return read_hpet_counter((const void *)(&hpet_page));
+}
+#endif
+
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
+
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call VDSO32_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
+ : "memory", "edx");
return ret;
}
@@ -145,28 +166,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
{
long ret;
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call VDSO32_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+ : "memory", "edx");
return ret;
}
+#ifdef CONFIG_PARAVIRT_CLOCK
+
+static notrace cycle_t vread_pvclock(int *mode)
+{
+ *mode = VCLOCK_NONE;
+ return 0;
+}
+#endif
+
+#endif
+
+notrace static cycle_t vread_tsc(void)
+{
+ cycle_t ret;
+ u64 last;
+
+ /*
+ * Empirically, a fence (of type that depends on the CPU)
+ * before rdtsc is enough to ensure that rdtsc is ordered
+ * with respect to loads. The various CPU manuals are unclear
+ * as to whether rdtsc can be reordered with later loads,
+ * but no one has ever seen it happen.
+ */
+ rdtsc_barrier();
+ ret = (cycle_t)__native_read_tsc();
+
+ last = gtod->cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ /*
+ * GCC likes to generate cmov here, but this branch is extremely
+ * predictable (it's just a funciton of time and the likely is
+ * very likely) and there's a data dependence, so force GCC
+ * to generate a branch instead. I don't barrier() because
+ * we don't actually need a barrier, and if this function
+ * ever gets inlined it will generate worse code.
+ */
+ asm volatile ("");
+ return last;
+}
notrace static inline u64 vgetsns(int *mode)
{
- long v;
+ u64 v;
cycles_t cycles;
- if (gtod->clock.vclock_mode == VCLOCK_TSC)
+
+ if (gtod->vclock_mode == VCLOCK_TSC)
cycles = vread_tsc();
- else if (gtod->clock.vclock_mode == VCLOCK_HPET)
+#ifdef CONFIG_HPET_TIMER
+ else if (gtod->vclock_mode == VCLOCK_HPET)
cycles = vread_hpet();
+#endif
#ifdef CONFIG_PARAVIRT_CLOCK
- else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
+ else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
cycles = vread_pvclock(mode);
#endif
else
return 0;
- v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
- return v * gtod->clock.mult;
+ v = (cycles - gtod->cycle_last) & gtod->mask;
+ return v * gtod->mult;
}
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
@@ -176,106 +248,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
u64 ns;
int mode;
- ts->tv_nsec = 0;
do {
- seq = raw_read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
+ seq = gtod_read_begin(gtod);
+ mode = gtod->vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->wall_time_snsec;
ns += vgetsns(&mode);
- ns >>= gtod->clock.shift;
- } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+ ns >>= gtod->shift;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
- timespec_add_ns(ts, ns);
return mode;
}
-notrace static int do_monotonic(struct timespec *ts)
+notrace static int __always_inline do_monotonic(struct timespec *ts)
{
unsigned long seq;
u64 ns;
int mode;
- ts->tv_nsec = 0;
do {
- seq = raw_read_seqcount_begin(&gtod->seq);
- mode = gtod->clock.vclock_mode;
+ seq = gtod_read_begin(gtod);
+ mode = gtod->vclock_mode;
ts->tv_sec = gtod->monotonic_time_sec;
ns = gtod->monotonic_time_snsec;
ns += vgetsns(&mode);
- ns >>= gtod->clock.shift;
- } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
- timespec_add_ns(ts, ns);
+ ns >>= gtod->shift;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
+
+ ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
return mode;
}
-notrace static int do_realtime_coarse(struct timespec *ts)
+notrace static void do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
do {
- seq = raw_read_seqcount_begin(&gtod->seq);
- ts->tv_sec = gtod->wall_time_coarse.tv_sec;
- ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
- } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
- return 0;
+ seq = gtod_read_begin(gtod);
+ ts->tv_sec = gtod->wall_time_coarse_sec;
+ ts->tv_nsec = gtod->wall_time_coarse_nsec;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
}
-notrace static int do_monotonic_coarse(struct timespec *ts)
+notrace static void do_monotonic_coarse(struct timespec *ts)
{
unsigned long seq;
do {
- seq = raw_read_seqcount_begin(&gtod->seq);
- ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
- ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
- } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
-
- return 0;
+ seq = gtod_read_begin(gtod);
+ ts->tv_sec = gtod->monotonic_time_coarse_sec;
+ ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
+ } while (unlikely(gtod_read_retry(gtod, seq)));
}
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
- int ret = VCLOCK_NONE;
-
switch (clock) {
case CLOCK_REALTIME:
- ret = do_realtime(ts);
+ if (do_realtime(ts) == VCLOCK_NONE)
+ goto fallback;
break;
case CLOCK_MONOTONIC:
- ret = do_monotonic(ts);
+ if (do_monotonic(ts) == VCLOCK_NONE)
+ goto fallback;
break;
case CLOCK_REALTIME_COARSE:
- return do_realtime_coarse(ts);
+ do_realtime_coarse(ts);
+ break;
case CLOCK_MONOTONIC_COARSE:
- return do_monotonic_coarse(ts);
+ do_monotonic_coarse(ts);
+ break;
+ default:
+ goto fallback;
}
- if (ret == VCLOCK_NONE)
- return vdso_fallback_gettime(clock, ts);
return 0;
+fallback:
+ return vdso_fallback_gettime(clock, ts);
}
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
- long ret = VCLOCK_NONE;
-
if (likely(tv != NULL)) {
- BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
- offsetof(struct timespec, tv_nsec) ||
- sizeof(*tv) != sizeof(struct timespec));
- ret = do_realtime((struct timespec *)tv);
+ if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
+ return vdso_fallback_gtod(tv, tz);
tv->tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
- /* Avoid memcpy. Some old compilers fail to inline it */
- tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
- tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
+ tz->tz_minuteswest = gtod->tz_minuteswest;
+ tz->tz_dsttime = gtod->tz_dsttime;
}
- if (ret == VCLOCK_NONE)
- return vdso_fallback_gtod(tv, tz);
return 0;
}
int gettimeofday(struct timeval *, struct timezone *)
@@ -287,8 +355,8 @@ int gettimeofday(struct timeval *, struct timezone *)
*/
notrace time_t __vdso_time(time_t *t)
{
- /* This is atomic on x86_64 so we don't need any locks. */
- time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
+ /* This is atomic on x86 so we don't need any locks. */
+ time_t result = ACCESS_ONCE(gtod->wall_time_sec);
if (t)
*t = result;
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 634a2cf62046..2e263f367b13 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -6,7 +6,25 @@
SECTIONS
{
- . = VDSO_PRELINK + SIZEOF_HEADERS;
+#ifdef BUILD_VDSO32
+#include <asm/vdso32.h>
+
+ .hpet_sect : {
+ hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
+ } :text :hpet_sect
+
+ .vvar_sect : {
+ vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
+
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+#undef EMIT_VVAR
+ } :text :vvar_sect
+#endif
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -44,6 +62,11 @@ SECTIONS
. = ALIGN(0x100);
.text : { *(.text*) } :text =0x90909090
+
+ /DISCARD/ : {
+ *(.discard)
+ *(.discard.*)
+ }
}
/*
@@ -61,4 +84,8 @@ PHDRS
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
+#ifdef BUILD_VDSO32
+ vvar_sect PT_NULL FLAGS(4); /* PF_R */
+ hpet_sect PT_NULL FLAGS(4); /* PF_R */
+#endif
}
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1e13eb8c9656..be3f23b09af5 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,21 +1,3 @@
-#include <asm/page_types.h>
-#include <linux/linkage.h>
+#include <asm/vdso.h>
-__PAGE_ALIGNED_DATA
-
- .globl vdso_start, vdso_end
- .align PAGE_SIZE
-vdso_start:
- .incbin "arch/x86/vdso/vdso.so"
-vdso_end:
- .align PAGE_SIZE /* extra data here leaks to userspace. */
-
-.previous
-
- .globl vdso_pages
- .bss
- .align 8
- .type vdso_pages, @object
-vdso_pages:
- .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
- .size vdso_pages, .-vdso_pages
+DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d6bfb876cfb0..00348980a3a6 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -16,6 +16,7 @@
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
@@ -25,17 +26,14 @@
#include <asm/tlbflush.h>
#include <asm/vdso.h>
#include <asm/proto.h>
-
-enum {
- VDSO_DISABLED = 0,
- VDSO_ENABLED = 1,
- VDSO_COMPAT = 2,
-};
+#include <asm/fixmap.h>
+#include <asm/hpet.h>
+#include <asm/vvar.h>
#ifdef CONFIG_COMPAT_VDSO
-#define VDSO_DEFAULT VDSO_COMPAT
+#define VDSO_DEFAULT 0
#else
-#define VDSO_DEFAULT VDSO_ENABLED
+#define VDSO_DEFAULT 1
#endif
#ifdef CONFIG_X86_64
@@ -44,13 +42,6 @@ enum {
#endif
/*
- * This is the difference between the prelinked addresses in the vDSO images
- * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
- * in the user address space.
- */
-#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
-
-/*
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
*/
@@ -60,6 +51,9 @@ static int __init vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
+ if (vdso_enabled > 1)
+ pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
+
return 1;
}
@@ -76,124 +70,8 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0);
EXPORT_SYMBOL_GPL(vdso_enabled);
#endif
-static __init void reloc_symtab(Elf32_Ehdr *ehdr,
- unsigned offset, unsigned size)
-{
- Elf32_Sym *sym = (void *)ehdr + offset;
- unsigned nsym = size / sizeof(*sym);
- unsigned i;
-
- for(i = 0; i < nsym; i++, sym++) {
- if (sym->st_shndx == SHN_UNDEF ||
- sym->st_shndx == SHN_ABS)
- continue; /* skip */
-
- if (sym->st_shndx > SHN_LORESERVE) {
- printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
- sym->st_shndx);
- continue;
- }
-
- switch(ELF_ST_TYPE(sym->st_info)) {
- case STT_OBJECT:
- case STT_FUNC:
- case STT_SECTION:
- case STT_FILE:
- sym->st_value += VDSO_ADDR_ADJUST;
- }
- }
-}
-
-static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
-{
- Elf32_Dyn *dyn = (void *)ehdr + offset;
-
- for(; dyn->d_tag != DT_NULL; dyn++)
- switch(dyn->d_tag) {
- case DT_PLTGOT:
- case DT_HASH:
- case DT_STRTAB:
- case DT_SYMTAB:
- case DT_RELA:
- case DT_INIT:
- case DT_FINI:
- case DT_REL:
- case DT_DEBUG:
- case DT_JMPREL:
- case DT_VERSYM:
- case DT_VERDEF:
- case DT_VERNEED:
- case DT_ADDRRNGLO ... DT_ADDRRNGHI:
- /* definitely pointers needing relocation */
- dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
- break;
-
- case DT_ENCODING ... OLD_DT_LOOS-1:
- case DT_LOOS ... DT_HIOS-1:
- /* Tags above DT_ENCODING are pointers if
- they're even */
- if (dyn->d_tag >= DT_ENCODING &&
- (dyn->d_tag & 1) == 0)
- dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
- break;
-
- case DT_VERDEFNUM:
- case DT_VERNEEDNUM:
- case DT_FLAGS_1:
- case DT_RELACOUNT:
- case DT_RELCOUNT:
- case DT_VALRNGLO ... DT_VALRNGHI:
- /* definitely not pointers */
- break;
-
- case OLD_DT_LOOS ... DT_LOOS-1:
- case DT_HIOS ... DT_VALRNGLO-1:
- default:
- if (dyn->d_tag > DT_ENCODING)
- printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
- dyn->d_tag);
- break;
- }
-}
-
-static __init void relocate_vdso(Elf32_Ehdr *ehdr)
-{
- Elf32_Phdr *phdr;
- Elf32_Shdr *shdr;
- int i;
-
- BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
- !elf_check_arch_ia32(ehdr) ||
- ehdr->e_type != ET_DYN);
-
- ehdr->e_entry += VDSO_ADDR_ADJUST;
-
- /* rebase phdrs */
- phdr = (void *)ehdr + ehdr->e_phoff;
- for (i = 0; i < ehdr->e_phnum; i++) {
- phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
-
- /* relocate dynamic stuff */
- if (phdr[i].p_type == PT_DYNAMIC)
- reloc_dyn(ehdr, phdr[i].p_offset);
- }
-
- /* rebase sections */
- shdr = (void *)ehdr + ehdr->e_shoff;
- for(i = 0; i < ehdr->e_shnum; i++) {
- if (!(shdr[i].sh_flags & SHF_ALLOC))
- continue;
-
- shdr[i].sh_addr += VDSO_ADDR_ADJUST;
-
- if (shdr[i].sh_type == SHT_SYMTAB ||
- shdr[i].sh_type == SHT_DYNSYM)
- reloc_symtab(ehdr, shdr[i].sh_offset,
- shdr[i].sh_size);
- }
-}
-
-static struct page *vdso32_pages[1];
+static struct page **vdso32_pages;
+static unsigned vdso32_size;
#ifdef CONFIG_X86_64
@@ -212,12 +90,6 @@ void syscall32_cpu_init(void)
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
-#define compat_uses_vma 1
-
-static inline void map_compat_vdso(int map)
-{
-}
-
#else /* CONFIG_X86_32 */
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
@@ -241,64 +113,36 @@ void enable_sep_cpu(void)
put_cpu();
}
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
- gate_vma.vm_mm = NULL;
- gate_vma.vm_start = FIXADDR_USER_START;
- gate_vma.vm_end = FIXADDR_USER_END;
- gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
- gate_vma.vm_page_prot = __P101;
-
- return 0;
-}
-
-#define compat_uses_vma 0
-
-static void map_compat_vdso(int map)
-{
- static int vdso_mapped;
-
- if (map == vdso_mapped)
- return;
-
- vdso_mapped = map;
-
- __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
- map ? PAGE_READONLY_EXEC : PAGE_NONE);
-
- /* flush stray tlbs */
- flush_tlb_all();
-}
-
#endif /* CONFIG_X86_64 */
int __init sysenter_setup(void)
{
- void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
- const void *vsyscall;
- size_t vsyscall_len;
-
- vdso32_pages[0] = virt_to_page(syscall_page);
-
-#ifdef CONFIG_X86_32
- gate_vma_init();
-#endif
+ char *vdso32_start, *vdso32_end;
+ int npages, i;
+#ifdef CONFIG_COMPAT
if (vdso32_syscall()) {
- vsyscall = &vdso32_syscall_start;
- vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
- } else if (vdso32_sysenter()){
- vsyscall = &vdso32_sysenter_start;
- vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
+ vdso32_start = vdso32_syscall_start;
+ vdso32_end = vdso32_syscall_end;
+ vdso32_pages = vdso32_syscall_pages;
+ } else
+#endif
+ if (vdso32_sysenter()) {
+ vdso32_start = vdso32_sysenter_start;
+ vdso32_end = vdso32_sysenter_end;
+ vdso32_pages = vdso32_sysenter_pages;
} else {
- vsyscall = &vdso32_int80_start;
- vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
+ vdso32_start = vdso32_int80_start;
+ vdso32_end = vdso32_int80_end;
+ vdso32_pages = vdso32_int80_pages;
}
- memcpy(syscall_page, vsyscall, vsyscall_len);
- relocate_vdso(syscall_page);
+ npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE;
+ vdso32_size = npages << PAGE_SHIFT;
+ for (i = 0; i < npages; i++)
+ vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE);
+
+ patch_vdso32(vdso32_start, vdso32_size);
return 0;
}
@@ -309,48 +153,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret = 0;
- bool compat;
+ struct vm_area_struct *vma;
#ifdef CONFIG_X86_X32_ABI
if (test_thread_flag(TIF_X32))
return x32_setup_additional_pages(bprm, uses_interp);
#endif
- if (vdso_enabled == VDSO_DISABLED)
+ if (vdso_enabled != 1) /* Other values all mean "disabled" */
return 0;
down_write(&mm->mmap_sem);
- /* Test compat mode once here, in case someone
- changes it via sysctl */
- compat = (vdso_enabled == VDSO_COMPAT);
+ addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+
+ addr += VDSO_OFFSET(VDSO_PREV_PAGES);
- map_compat_vdso(compat);
+ current->mm->context.vdso = (void *)addr;
- if (compat)
- addr = VDSO_HIGH_BASE;
- else {
- addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
- if (IS_ERR_VALUE(addr)) {
- ret = addr;
- goto up_fail;
- }
+ /*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ */
+ ret = install_special_mapping(mm,
+ addr,
+ vdso32_size,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso32_pages);
+
+ if (ret)
+ goto up_fail;
+
+ vma = _install_special_mapping(mm,
+ addr - VDSO_OFFSET(VDSO_PREV_PAGES),
+ VDSO_OFFSET(VDSO_PREV_PAGES),
+ VM_READ,
+ NULL);
+
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto up_fail;
}
- current->mm->context.vdso = (void *)addr;
+ ret = remap_pfn_range(vma,
+ addr - VDSO_OFFSET(VDSO_VVAR_PAGE),
+ __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
+ PAGE_SIZE,
+ PAGE_READONLY);
+
+ if (ret)
+ goto up_fail;
- if (compat_uses_vma || !compat) {
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- */
- ret = install_special_mapping(mm, addr, PAGE_SIZE,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso32_pages);
+#ifdef CONFIG_HPET_TIMER
+ if (hpet_address) {
+ ret = io_remap_pfn_range(vma,
+ addr - VDSO_OFFSET(VDSO_HPET_PAGE),
+ hpet_address >> PAGE_SHIFT,
+ PAGE_SIZE,
+ pgprot_noncached(PAGE_READONLY));
if (ret)
goto up_fail;
}
+#endif
current_thread_info()->sysenter_return =
VDSO32_SYMBOL(addr, SYSENTER_RETURN);
@@ -411,20 +280,12 @@ const char *arch_vma_name(struct vm_area_struct *vma)
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{
- /*
- * Check to see if the corresponding task was created in compat vdso
- * mode.
- */
- if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
- return &gate_vma;
return NULL;
}
int in_gate_area(struct mm_struct *mm, unsigned long addr)
{
- const struct vm_area_struct *vma = get_gate_vma(mm);
-
- return vma && addr >= vma->vm_start && addr < vma->vm_end;
+ return 0;
}
int in_gate_area_no_mm(unsigned long addr)
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
index 2ce5f82c333b..018bcd9f97b4 100644
--- a/arch/x86/vdso/vdso32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -1,22 +1,9 @@
-#include <linux/init.h>
+#include <asm/vdso.h>
-__INITDATA
+DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
- .globl vdso32_int80_start, vdso32_int80_end
-vdso32_int80_start:
- .incbin "arch/x86/vdso/vdso32-int80.so"
-vdso32_int80_end:
-
- .globl vdso32_syscall_start, vdso32_syscall_end
-vdso32_syscall_start:
#ifdef CONFIG_COMPAT
- .incbin "arch/x86/vdso/vdso32-syscall.so"
+DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so")
#endif
-vdso32_syscall_end:
-
- .globl vdso32_sysenter_start, vdso32_sysenter_end
-vdso32_sysenter_start:
- .incbin "arch/x86/vdso/vdso32-sysenter.so"
-vdso32_sysenter_end:
-__FINIT
+DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so")
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c
new file mode 100644
index 000000000000..175cc72c0f68
--- /dev/null
+++ b/arch/x86/vdso/vdso32/vclock_gettime.c
@@ -0,0 +1,30 @@
+#define BUILD_VDSO32
+
+#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
+#undef CONFIG_OPTIMIZE_INLINING
+#endif
+
+#undef CONFIG_X86_PPRO_FENCE
+
+#ifdef CONFIG_X86_64
+
+/*
+ * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration
+ */
+#undef CONFIG_64BIT
+#undef CONFIG_X86_64
+#undef CONFIG_ILLEGAL_POINTER_VALUE
+#undef CONFIG_SPARSEMEM_VMEMMAP
+#undef CONFIG_NR_CPUS
+
+#define CONFIG_X86_32 1
+#define CONFIG_PAGE_OFFSET 0
+#define CONFIG_ILLEGAL_POINTER_VALUE 0
+#define CONFIG_NR_CPUS 1
+
+#define BUILD_VDSO32_64
+
+#endif
+
+#include "../vclock_gettime.c"
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
index 976124bb5f92..aadb8b9994cd 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -8,7 +8,11 @@
* values visible using the asm-x86/vdso.h macros from the kernel proper.
*/
+#include <asm/page.h>
+
+#define BUILD_VDSO32
#define VDSO_PRELINK 0
+
#include "../vdso-layout.lds.S"
/* The ELF entry point can be used to set the AT_SYSINFO value. */
@@ -19,6 +23,13 @@ ENTRY(__kernel_vsyscall);
*/
VERSION
{
+ LINUX_2.6 {
+ global:
+ __vdso_clock_gettime;
+ __vdso_gettimeofday;
+ __vdso_time;
+ };
+
LINUX_2.5 {
global:
__kernel_vsyscall;
@@ -31,7 +42,9 @@ VERSION
/*
* Symbols we define here called VDSO* get their values into vdso32-syms.h.
*/
-VDSO32_PRELINK = VDSO_PRELINK;
VDSO32_vsyscall = __kernel_vsyscall;
VDSO32_sigreturn = __kernel_sigreturn;
VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
+VDSO32_clock_gettime = clock_gettime;
+VDSO32_gettimeofday = gettimeofday;
+VDSO32_time = time;
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S
index 295f1c7543d8..f4aa34e7f370 100644
--- a/arch/x86/vdso/vdsox32.S
+++ b/arch/x86/vdso/vdsox32.S
@@ -1,21 +1,3 @@
-#include <asm/page_types.h>
-#include <linux/linkage.h>
+#include <asm/vdso.h>
-__PAGE_ALIGNED_DATA
-
- .globl vdsox32_start, vdsox32_end
- .align PAGE_SIZE
-vdsox32_start:
- .incbin "arch/x86/vdso/vdsox32.so"
-vdsox32_end:
- .align PAGE_SIZE /* extra data here leaks to userspace. */
-
-.previous
-
- .globl vdsox32_pages
- .bss
- .align 8
- .type vdsox32_pages, @object
-vdsox32_pages:
- .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
- .size vdsox32_pages, .-vdsox32_pages
+DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so")
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 431e87544411..1ad102613127 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -16,20 +16,22 @@
#include <asm/vdso.h>
#include <asm/page.h>
+#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso_enabled = 1;
-extern char vdso_start[], vdso_end[];
+DECLARE_VDSO_IMAGE(vdso);
extern unsigned short vdso_sync_cpuid;
-
-extern struct page *vdso_pages[];
static unsigned vdso_size;
#ifdef CONFIG_X86_X32_ABI
-extern char vdsox32_start[], vdsox32_end[];
-extern struct page *vdsox32_pages[];
+DECLARE_VDSO_IMAGE(vdsox32);
static unsigned vdsox32_size;
+#endif
+#endif
-static void __init patch_vdsox32(void *vdso, size_t len)
+#if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \
+ defined(CONFIG_COMPAT)
+void __init patch_vdso32(void *vdso, size_t len)
{
Elf32_Ehdr *hdr = vdso;
Elf32_Shdr *sechdrs, *alt_sec = 0;
@@ -52,7 +54,7 @@ static void __init patch_vdsox32(void *vdso, size_t len)
}
/* If we get here, it's probably a bug. */
- pr_warning("patch_vdsox32: .altinstructions not found\n");
+ pr_warning("patch_vdso32: .altinstructions not found\n");
return; /* nothing to patch */
found:
@@ -61,6 +63,7 @@ found:
}
#endif
+#if defined(CONFIG_X86_64)
static void __init patch_vdso64(void *vdso, size_t len)
{
Elf64_Ehdr *hdr = vdso;
@@ -104,7 +107,7 @@ static int __init init_vdso(void)
vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
#ifdef CONFIG_X86_X32_ABI
- patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start);
+ patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start);
npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
vdsox32_size = npages << PAGE_SHIFT;
for (i = 0; i < npages; i++)
@@ -204,3 +207,4 @@ static __init int vdso_setup(char *s)
return 0;
}
__setup("vdso=", vdso_setup);
+#endif
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 01b90261fa38..e88fda867a33 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -7,7 +7,7 @@ config XEN
depends on PARAVIRT
select PARAVIRT_CLOCK
select XEN_HAVE_PVMMU
- depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
+ depends on X86_64 || (X86_32 && X86_PAE)
depends on X86_TSC
help
This is the Linux Xen port. Enabling this will allow the
@@ -19,11 +19,6 @@ config XEN_DOM0
depends on XEN && PCI_XEN && SWIOTLB_XEN
depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
-# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
-# name in tools.
-config XEN_PRIVILEGED_GUEST
- def_bool XEN_DOM0
-
config XEN_PVHVM
def_bool y
depends on XEN && PCI && X86_LOCAL_APIC
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2423ef04ffea..86e02eabb640 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2058,7 +2058,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
case FIX_RO_IDT:
#ifdef CONFIG_X86_32
case FIX_WP_TEST:
- case FIX_VDSO:
# ifdef CONFIG_HIGHMEM
case FIX_KMAP_BEGIN ... FIX_KMAP_END:
# endif
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 696c694986d0..85e5d78c9874 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -881,6 +881,65 @@ static unsigned long mfn_hash(unsigned long mfn)
return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
}
+int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
+{
+ int i, ret = 0;
+ bool lazy = false;
+ pte_t *pte;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (kmap_ops &&
+ !in_interrupt() &&
+ paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ arch_enter_lazy_mmu_mode();
+ lazy = true;
+ }
+
+ for (i = 0; i < count; i++) {
+ unsigned long mfn, pfn;
+
+ /* Do not add to override if the map failed. */
+ if (map_ops[i].status)
+ continue;
+
+ if (map_ops[i].flags & GNTMAP_contains_pte) {
+ pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
+ (map_ops[i].host_addr & ~PAGE_MASK));
+ mfn = pte_mfn(*pte);
+ } else {
+ mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
+ }
+ pfn = page_to_pfn(pages[i]);
+
+ WARN_ON(PagePrivate(pages[i]));
+ SetPagePrivate(pages[i]);
+ set_page_private(pages[i], mfn);
+ pages[i]->index = pfn_to_mfn(pfn);
+
+ if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (kmap_ops) {
+ ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ if (lazy)
+ arch_leave_lazy_mmu_mode();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
+
/* Add an MFN override for a particular page */
int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op)
@@ -899,13 +958,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
- WARN_ON(PagePrivate(page));
- SetPagePrivate(page);
- set_page_private(page, mfn);
- page->index = pfn_to_mfn(pfn);
-
- if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
- return -ENOMEM;
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
@@ -943,20 +995,62 @@ int m2p_add_override(unsigned long mfn, struct page *page,
return 0;
}
EXPORT_SYMBOL_GPL(m2p_add_override);
+
+int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
+ struct gnttab_map_grant_ref *kmap_ops,
+ struct page **pages, unsigned int count)
+{
+ int i, ret = 0;
+ bool lazy = false;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (kmap_ops &&
+ !in_interrupt() &&
+ paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
+ arch_enter_lazy_mmu_mode();
+ lazy = true;
+ }
+
+ for (i = 0; i < count; i++) {
+ unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i]));
+ unsigned long pfn = page_to_pfn(pages[i]);
+
+ if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_page_private(pages[i], INVALID_P2M_ENTRY);
+ WARN_ON(!PagePrivate(pages[i]));
+ ClearPagePrivate(pages[i]);
+ set_phys_to_machine(pfn, pages[i]->index);
+
+ if (kmap_ops)
+ ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (lazy)
+ arch_leave_lazy_mmu_mode();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
+
int m2p_remove_override(struct page *page,
- struct gnttab_map_grant_ref *kmap_op)
+ struct gnttab_map_grant_ref *kmap_op,
+ unsigned long mfn)
{
unsigned long flags;
- unsigned long mfn;
unsigned long pfn;
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
pfn = page_to_pfn(page);
- mfn = get_phys_to_machine(pfn);
- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT))
- return -EINVAL;
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
@@ -970,10 +1064,7 @@ int m2p_remove_override(struct page *page,
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
- WARN_ON(!PagePrivate(page));
- ClearPagePrivate(page);
- set_phys_to_machine(pfn, page->index);
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs;