955 files changed, 36564 insertions, 15639 deletions
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 1b71ca70c9f6..6d9e805f18a7 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -51,8 +51,6 @@
 #define F_EXLCK		16	/* or 3 */
 #define F_SHLCK		32	/* or 4 */
 
-#define F_INPROGRESS	64
-
 #include <asm-generic/fcntl.h>
 
 #endif
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 4ac48a095f3a..2207fc61665d 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -457,10 +457,12 @@
 #define __NR_clock_adjtime		499
 #define __NR_syncfs			500
 #define __NR_setns			501
+#define __NR_accept4			502
+#define __NR_sendmmsg			503
 
 #ifdef __KERNEL__
 
-#define NR_SYSCALLS			502
+#define NR_SYSCALLS			504
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index f0df3fbd8402..b9fc6c309d2e 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -4,9 +4,8 @@
  *
  * (C) 2001,2002,2006 by Jan-Benedict Glaw <jbglaw@lug-owl.de>
  *
- * This driver is at all a modified version of Erik Mouw's
- * Documentation/DocBook/procfs_example.c, so: thank
- * you, Erik! He can be reached via email at
+ * This driver is a modified version of Erik Mouw's example proc
+ * interface, so: thank you, Erik! He can be reached via email at
  * <J.A.K.Mouw@its.tudelft.nl>. It is based on an idea
  * provided by DEC^WCompaq^WIntel's "Jumpstart" CD. They
  * included a patch like this as well. Thanks for idea!
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 6acea1f96de3..e534e1c5bc11 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -520,6 +520,8 @@ sys_call_table:
 	.quad sys_clock_adjtime
 	.quad sys_syncfs			/* 500 */
 	.quad sys_setns
+	.quad sys_accept4
+	.quad sys_sendmmsg
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2d95b5b5cf49..1005367d0f9d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -3,7 +3,7 @@ config ARM
 	default y
 	select HAVE_AOUT
 	select HAVE_DMA_API_DEBUG
-	select HAVE_IDE
+	select HAVE_IDE if PCI || ISA || PCMCIA
 	select HAVE_MEMBLOCK
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
@@ -29,6 +29,7 @@ config ARM
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
 	select GENERIC_IRQ_SHOW
+	select CPU_PM if (SUSPEND || CPU_IDLE)
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -225,6 +226,10 @@ config PHYS_OFFSET
 	  Please provide the physical address corresponding to the
 	  location of main memory in your system.
 
+config GENERIC_BUG
+	def_bool y
+	depends on BUG
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -358,7 +363,6 @@ config ARCH_GEMINI
 config ARCH_PRIMA2
 	bool "CSR SiRFSoC PRIMA2 ARM Cortex A9 Platform"
 	select CPU_V7
-	select GENERIC_TIME
 	select NO_IOPORT
 	select GENERIC_CLOCKEVENTS
 	select CLKDEV_LOOKUP
@@ -399,6 +403,7 @@ config ARCH_FOOTBRIDGE
 	select CPU_SA110
 	select FOOTBRIDGE
 	select GENERIC_CLOCKEVENTS
+	select HAVE_IDE
 	select NEED_MACH_MEMORY_H
 	help
 	  Support for systems based on the DC21285 companion chip
@@ -537,7 +542,6 @@ config ARCH_LPC32XX
 	select ARM_AMBA
 	select USB_ARCH_HAS_OHCI
 	select CLKDEV_LOOKUP
-	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	help
 	  Support for the NXP LPC32XX family of processors
@@ -617,7 +621,6 @@ config ARCH_TEGRA
 	bool "NVIDIA Tegra"
 	select CLKDEV_LOOKUP
 	select CLKSRC_MMIO
-	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
 	select HAVE_CLK
@@ -650,6 +653,8 @@ config ARCH_PXA
 	select SPARSE_IRQ
 	select AUTO_ZRELADDR
 	select MULTI_IRQ_HANDLER
+	select ARM_CPU_SUSPEND if PM
+	select HAVE_IDE
 	help
 	  Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
 
@@ -691,6 +696,7 @@ config ARCH_RPC
 	select NO_IOPORT
 	select ARCH_SPARSEMEM_ENABLE
 	select ARCH_USES_GETTIMEOFFSET
+	select HAVE_IDE
 	select NEED_MACH_MEMORY_H
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
@@ -710,6 +716,7 @@ config ARCH_SA1100
 	select HAVE_SCHED_CLOCK
 	select TICK_ONESHOT
 	select ARCH_REQUIRE_GPIOLIB
+	select HAVE_IDE
 	select NEED_MACH_MEMORY_H
 	help
 	  Support for StrongARM 11x0 based boards.
@@ -744,7 +751,6 @@ config ARCH_S3C64XX
 	select ARCH_REQUIRE_GPIOLIB
 	select SAMSUNG_CLKSRC
 	select SAMSUNG_IRQ_VIC_TIMER
-	select SAMSUNG_IRQ_UART
 	select S3C_GPIO_TRACK
 	select S3C_GPIO_PULL_UPDOWN
 	select S3C_GPIO_CFG_S3C24XX
@@ -937,7 +943,6 @@ config ARCH_VT8500
 config ARCH_ZYNQ
 	bool "Xilinx Zynq ARM Cortex A9 Platform"
 	select CPU_V7
-	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select CLKDEV_LOOKUP
 	select ARM_GIC
@@ -1402,6 +1407,7 @@ config SMP
 		 MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
 		 ARCH_EXYNOS4 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \
 		 ARCH_MSM_SCORPIONMP || ARCH_SHMOBILE
+	depends on MMU
 	select USE_GENERIC_SMP_HELPERS
 	select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP
 	help
@@ -1415,7 +1421,7 @@ config SMP
 	  processor machines. On a single processor machine, the kernel will
 	  run faster if you say N here.
 
-	  See also <file:Documentation/i386/IO-APIC.txt>,
+	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
 	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
 	  <http://tldp.org/HOWTO/SMP-HOWTO.html>.
 
@@ -1434,6 +1440,31 @@ config SMP_ON_UP
 
 	  If you don't know what to do here, say Y.
 
+config ARM_CPU_TOPOLOGY
+	bool "Support cpu topology definition"
+	depends on SMP && CPU_V7
+	default y
+	help
+	  Support ARM cpu topology definition. The MPIDR register defines
+	  affinity between processors which is then used to describe the cpu
+	  topology of an ARM System.
+
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on ARM_CPU_TOPOLOGY
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on ARM_CPU_TOPOLOGY
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  MultiThreading at a cost of slightly increased overhead in some
+	  places. If unsure say N here.
+
 config HAVE_ARM_SCU
 	bool
 	help
@@ -1509,6 +1540,7 @@ config THUMB2_KERNEL
 	depends on CPU_V7 && !CPU_V6 && !CPU_V6K && EXPERIMENTAL
 	select AEABI
 	select ARM_ASM_UNIFIED
+	select ARM_UNWIND
 	help
 	  By enabling this option, the kernel will be compiled in
 	  Thumb-2 mode. A compiler/assembler that understand the unified
@@ -1834,6 +1866,38 @@ config ZBOOT_ROM_SH_MOBILE_SDHI
 
 endchoice
 
+config ARM_APPENDED_DTB
+	bool "Use appended device tree blob to zImage (EXPERIMENTAL)"
+	depends on OF && !ZBOOT_ROM && EXPERIMENTAL
+	help
+	  With this option, the boot code will look for a device tree binary
+	  (DTB) appended to zImage
+	  (e.g. cat zImage <filename>.dtb > zImage_w_dtb).
+
+	  This is meant as a backward compatibility convenience for those
+	  systems with a bootloader that can't be upgraded to accommodate
+	  the documented boot protocol using a device tree.
+
+	  Beware that there is very little in terms of protection against
+	  this option being confused by leftover garbage in memory that might
+	  look like a DTB header after a reboot if no actual DTB is appended
+	  to zImage.  Do not leave this option active in a production kernel
+	  if you don't intend to always append a DTB.  Proper passing of the
+	  location into r2 of a bootloader provided DTB is always preferable
+	  to this option.
+
+config ARM_ATAG_DTB_COMPAT
+	bool "Supplement the appended DTB with traditional ATAG information"
+	depends on ARM_APPENDED_DTB
+	help
+	  Some old bootloaders can't be updated to a DTB capable one, yet
+	  they provide ATAGs with memory configuration, the ramdisk address,
+	  the kernel cmdline string, etc.  Such information is dynamically
+	  provided by the bootloader and can't always be stored in a static
+	  DTB.  To allow a device tree enabled kernel to be used with such
+	  bootloaders, this option allows zImage to extract the information
+	  from the ATAG list and store it at run time into the appended DTB.
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""
@@ -2129,6 +2193,9 @@ config ARCH_SUSPEND_POSSIBLE
 		CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE
 	def_bool y
 
+config ARM_CPU_SUSPEND
+	def_bool PM_SLEEP
+
 endmenu
 
 source "net/Kconfig"
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 81cbe40c159c..f283938c2fc7 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -65,13 +65,71 @@ config DEBUG_USER
 
 # These options are only for real kernel hackers who want to get their hands dirty.
 config DEBUG_LL
-	bool "Kernel low-level debugging functions"
+	bool "Kernel low-level debugging functions (read help!)"
 	depends on DEBUG_KERNEL
 	help
 	  Say Y here to include definitions of printascii, printch, printhex
 	  in the kernel.  This is helpful if you are debugging code that
 	  executes before the console is initialized.
 
+	  Note that selecting this option will limit the kernel to a single
+	  UART definition, as specified below. Attempting to boot the kernel
+	  image on a different platform *will not work*, so this option should
+	  not be enabled for kernels that are intended to be portable.
+
+choice
+	prompt "Kernel low-level debugging port"
+	depends on DEBUG_LL
+
+	config DEBUG_LL_UART_NONE
+		bool "No low-level debugging UART"
+		help
+		  Say Y here if your platform doesn't provide a UART option
+		  below. This relies on your platform choosing the right UART
+		  definition internally in order for low-level debugging to
+		  work.
+
+	config DEBUG_ICEDCC
+		bool "Kernel low-level debugging via EmbeddedICE DCC channel"
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the EmbeddedICE macrocell's DCC channel using
+		  co-processor 14. This is known to work on the ARM9 style ICE
+		  channel and on the XScale with the PEEDI.
+
+		  Note that the system will appear to hang during boot if there
+		  is nothing connected to read from the DCC.
+
+	config DEBUG_FOOTBRIDGE_COM1
+		bool "Kernel low-level debugging messages via footbridge 8250 at PCI COM1"
+		depends on FOOTBRIDGE
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the 8250 at PCI COM1.
+
+	config DEBUG_DC21285_PORT
+		bool "Kernel low-level debugging messages via footbridge serial port"
+		depends on FOOTBRIDGE
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the serial port in the DC21285 (Footbridge).
+
+	config DEBUG_CLPS711X_UART1
+		bool "Kernel low-level debugging messages via UART1"
+		depends on ARCH_CLPS711X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the first serial port on these devices.
+
+	config DEBUG_CLPS711X_UART2
+		bool "Kernel low-level debugging messages via UART2"
+		depends on ARCH_CLPS711X
+		help
+		  Say Y here if you want the debug print routines to direct
+		  their output to the second serial port on these devices.
+
+endchoice
+
 config EARLY_PRINTK
 	bool "Early printk"
 	depends on DEBUG_LL
@@ -80,43 +138,14 @@ config EARLY_PRINTK
 	  kernel low-level debugging functions. Add earlyprintk to your
 	  kernel parameters to enable this console.
 
-config DEBUG_ICEDCC
-	bool "Kernel low-level debugging via EmbeddedICE DCC channel"
-	depends on DEBUG_LL
-	help
-	  Say Y here if you want the debug print routines to direct their
-	  output to the EmbeddedICE macrocell's DCC channel using
-	  co-processor 14. This is known to work on the ARM9 style ICE
-	  channel and on the XScale with the PEEDI.
-
-	  It does include a timeout to ensure that the system does not
-	  totally freeze when there is nothing connected to read.
-
 config OC_ETM
 	bool "On-chip ETM and ETB"
-	select ARM_AMBA
+	depends on ARM_AMBA
 	help
 	  Enables the on-chip embedded trace macrocell and embedded trace
 	  buffer driver that will allow you to collect traces of the
 	  kernel code.
 
-config DEBUG_DC21285_PORT
-	bool "Kernel low-level debugging messages via footbridge serial port"
-	depends on DEBUG_LL && FOOTBRIDGE
-	help
-	  Say Y here if you want the debug print routines to direct their
-	  output to the serial port in the DC21285 (Footbridge). Saying N
-	  will cause the debug messages to appear on the first 16550
-	  serial port.
-
-config DEBUG_CLPS711X_UART2
-	bool "Kernel low-level debugging messages via UART2"
-	depends on DEBUG_LL && ARCH_CLPS711X
-	help
-	  Say Y here if you want the debug print routines to direct their
-	  output to the second serial port on these devices.  Saying N will
-	  cause the debug messages to appear on the first serial port.
-
 config DEBUG_S3C_UART
 	depends on PLAT_SAMSUNG
 	int "S3C UART to use for low-level debug"
@@ -129,4 +158,10 @@ config DEBUG_S3C_UART
 	  The uncompressor code port configuration is now handled
 	  by CONFIG_S3C_LOWLEVEL_UART_PORT.
 
+config ARM_KPROBES_TEST
+	tristate "Kprobes test module"
+	depends on KPROBES && MODULES
+	help
+	  Perform tests of kprobes API and instruction set simulation.
+
 endmenu
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index a1edfd5a129a..176062ac7f07 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -78,7 +78,16 @@ endif
 
 $(obj)/uImage: STARTADDR=$(LOADADDR)
 
+check_for_multiple_loadaddr = \
+if [ $(words $(LOADADDR)) -gt 1 ]; then \
+	echo 'multiple load addresses: $(LOADADDR)'; \
+	echo 'This is incompatible with uImages'; \
+	echo 'Specify LOADADDR on the commandline to build an uImage'; \
+	false; \
+fi
+
 $(obj)/uImage:	$(obj)/zImage FORCE
+	@$(check_for_multiple_loadaddr)
 	$(call if_changed,uimage)
 	@echo '  Image $@ is ready'
 
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index c6028967d336..e0936a148516 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -5,3 +5,12 @@ piggy.lzo
 piggy.lzma
 vmlinux
 vmlinux.lds
+
+# borrowed libfdt files
+fdt.c
+fdt.h
+fdt_ro.c
+fdt_rw.c
+fdt_wip.c
+libfdt.h
+libfdt_internal.h
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 0c74a6fab952..21f56ff32797 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -26,6 +26,10 @@ HEAD	= head.o
 OBJS	+= misc.o decompress.o
 FONTC	= $(srctree)/drivers/video/console/font_acorn_8x8.c
 
+# string library code (-Os is enforced to keep it much smaller)
+OBJS		+= string.o
+CFLAGS_string.o	:= -Os
+
 #
 # Architecture dependencies
 #
@@ -89,21 +93,41 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip
 suffix_$(CONFIG_KERNEL_LZO)  = lzo
 suffix_$(CONFIG_KERNEL_LZMA) = lzma
 
+# Borrowed libfdt files for the ATAG compatibility mode
+
+libfdt		:= fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
+libfdt_hdrs	:= fdt.h libfdt.h libfdt_internal.h
+
+libfdt_objs	:= $(addsuffix .o, $(basename $(libfdt)))
+
+$(addprefix $(obj)/,$(libfdt) $(libfdt_hdrs)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/%
+	$(call cmd,shipped)
+
+$(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \
+	$(addprefix $(obj)/,$(libfdt_hdrs))
+
+ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+OBJS	+= $(libfdt_objs) atags_to_fdt.o
+endif
+
 targets       := vmlinux vmlinux.lds \
 		 piggy.$(suffix_y) piggy.$(suffix_y).o \
-		 font.o font.c head.o misc.o $(OBJS)
+		 lib1funcs.o lib1funcs.S font.o font.c head.o misc.o $(OBJS)
 
 # Make sure files are removed during clean
-extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S
+extra-y       += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S $(libfdt) $(libfdt_hdrs)
 
 ifeq ($(CONFIG_FUNCTION_TRACER),y)
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-ccflags-y := -fpic -fno-builtin
+ccflags-y := -fpic -fno-builtin -I$(obj)
 asflags-y := -Wa,-march=all
 
+# Supply kernel BSS size to the decompressor via a linker symbol.
+KBSS_SZ = $(shell size $(obj)/../../../../vmlinux | awk 'END{print $$3}')
+LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
 # Supply ZRELADDR to the decompressor via a linker symbol.
 ifneq ($(CONFIG_AUTO_ZRELADDR),y)
 LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR)
@@ -123,7 +147,7 @@ LDFLAGS_vmlinux += -T
 # For __aeabi_uidivmod
 lib1funcs = $(obj)/lib1funcs.o
 
-$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE
+$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S
 	$(call cmd,shipped)
 
 # We need to prevent any GOTOFF relocs being used with references
@@ -139,8 +163,16 @@ bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
   ( echo "following symbols must have non local/private scope:" >&2; \
     echo "$$bad_syms" >&2; rm -f $@; false )
 
+check_for_multiple_zreladdr = \
+if [ $(words $(ZRELADDR)) -gt 1 -a "$(CONFIG_AUTO_ZRELADDR)" = "" ]; then \
+	echo 'multiple zreladdrs: $(ZRELADDR)'; \
+	echo 'This needs CONFIG_AUTO_ZRELADDR to be set'; \
+	false; \
+fi
+
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
 	 	$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE
+	@$(check_for_multiple_zreladdr)
 	$(call if_changed,ld)
 	@$(check_for_bad_syms)
 
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
new file mode 100644
index 000000000000..6ce11c481178
--- /dev/null
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -0,0 +1,97 @@
+#include <asm/setup.h>
+#include <libfdt.h>
+
+static int node_offset(void *fdt, const char *node_path)
+{
+	int offset = fdt_path_offset(fdt, node_path);
+	if (offset == -FDT_ERR_NOTFOUND)
+		offset = fdt_add_subnode(fdt, 0, node_path);
+	return offset;
+}
+
+static int setprop(void *fdt, const char *node_path, const char *property,
+		   uint32_t *val_array, int size)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop(fdt, offset, property, val_array, size);
+}
+
+static int setprop_string(void *fdt, const char *node_path,
+			  const char *property, const char *string)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_string(fdt, offset, property, string);
+}
+
+static int setprop_cell(void *fdt, const char *node_path,
+			const char *property, uint32_t val)
+{
+	int offset = node_offset(fdt, node_path);
+	if (offset < 0)
+		return offset;
+	return fdt_setprop_cell(fdt, offset, property, val);
+}
+
+/*
+ * Convert and fold provided ATAGs into the provided FDT.
+ *
+ * REturn values:
+ *    = 0 -> pretend success
+ *    = 1 -> bad ATAG (may retry with another possible ATAG pointer)
+ *    < 0 -> error from libfdt
+ */
+int atags_to_fdt(void *atag_list, void *fdt, int total_space)
+{
+	struct tag *atag = atag_list;
+	uint32_t mem_reg_property[2 * NR_BANKS];
+	int memcount = 0;
+	int ret;
+
+	/* make sure we've got an aligned pointer */
+	if ((u32)atag_list & 0x3)
+		return 1;
+
+	/* if we get a DTB here we're done already */
+	if (*(u32 *)atag_list == fdt32_to_cpu(FDT_MAGIC))
+	       return 0;
+
+	/* validate the ATAG */
+	if (atag->hdr.tag != ATAG_CORE ||
+	    (atag->hdr.size != tag_size(tag_core) &&
+	     atag->hdr.size != 2))
+		return 1;
+
+	/* let's give it all the room it could need */
+	ret = fdt_open_into(fdt, fdt, total_space);
+	if (ret < 0)
+		return ret;
+
+	for_each_tag(atag, atag_list) {
+		if (atag->hdr.tag == ATAG_CMDLINE) {
+			setprop_string(fdt, "/chosen", "bootargs",
+					atag->u.cmdline.cmdline);
+		} else if (atag->hdr.tag == ATAG_MEM) {
+			if (memcount >= sizeof(mem_reg_property)/4)
+				continue;
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.start);
+			mem_reg_property[memcount++] = cpu_to_fdt32(atag->u.mem.size);
+		} else if (atag->hdr.tag == ATAG_INITRD2) {
+			uint32_t initrd_start, initrd_size;
+			initrd_start = atag->u.initrd.start;
+			initrd_size = atag->u.initrd.size;
+			setprop_cell(fdt, "/chosen", "linux,initrd-start",
+					initrd_start);
+			setprop_cell(fdt, "/chosen", "linux,initrd-end",
+					initrd_start + initrd_size);
+		}
+	}
+
+	if (memcount)
+		setprop(fdt, "/memory", "reg", mem_reg_property, 4*memcount);
+
+	return fdt_pack(fdt);
+}
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index e95a5989602a..c2effc917254 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -216,6 +216,104 @@ restart:	adr	r0, LC0
 		mov	r10, r6
 #endif
 
+		mov	r5, #0			@ init dtb size to 0
+#ifdef CONFIG_ARM_APPENDED_DTB
+/*
+ *   r0  = delta
+ *   r2  = BSS start
+ *   r3  = BSS end
+ *   r4  = final kernel address
+ *   r5  = appended dtb size (still unknown)
+ *   r6  = _edata
+ *   r7  = architecture ID
+ *   r8  = atags/device tree pointer
+ *   r9  = size of decompressed image
+ *   r10 = end of this image, including  bss/stack/malloc space if non XIP
+ *   r11 = GOT start
+ *   r12 = GOT end
+ *   sp  = stack pointer
+ *
+ * if there are device trees (dtb) appended to zImage, advance r10 so that the
+ * dtb data will get relocated along with the kernel if necessary.
+ */
+
+		ldr	lr, [r6, #0]
+#ifndef __ARMEB__
+		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
+#else
+		ldr	r1, =0xd00dfeed
+#endif
+		cmp	lr, r1
+		bne	dtb_check_done		@ not found
+
+#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
+		/*
+		 * OK... Let's do some funky business here.
+		 * If we do have a DTB appended to zImage, and we do have
+		 * an ATAG list around, we want the later to be translated
+		 * and folded into the former here.  To be on the safe side,
+		 * let's temporarily move  the stack away into the malloc
+		 * area.  No GOT fixup has occurred yet, but none of the
+		 * code we're about to call uses any global variable.
+		*/
+		add	sp, sp, #0x10000
+		stmfd	sp!, {r0-r3, ip, lr}
+		mov	r0, r8
+		mov	r1, r6
+		sub	r2, sp, r6
+		bl	atags_to_fdt
+
+		/*
+		 * If returned value is 1, there is no ATAG at the location
+		 * pointed by r8.  Try the typical 0x100 offset from start
+		 * of RAM and hope for the best.
+		 */
+		cmp	r0, #1
+		sub	r0, r4, #TEXT_OFFSET
+		add	r0, r0, #0x100
+		mov	r1, r6
+		sub	r2, sp, r6
+		blne	atags_to_fdt
+
+		ldmfd	sp!, {r0-r3, ip, lr}
+		sub	sp, sp, #0x10000
+#endif
+
+		mov	r8, r6			@ use the appended device tree
+
+		/*
+		 * Make sure that the DTB doesn't end up in the final
+		 * kernel's .bss area. To do so, we adjust the decompressed
+		 * kernel size to compensate if that .bss size is larger
+		 * than the relocated code.
+		 */
+		ldr	r5, =_kernel_bss_size
+		adr	r1, wont_overwrite
+		sub	r1, r6, r1
+		subs	r1, r5, r1
+		addhi	r9, r9, r1
+
+		/* Get the dtb's size */
+		ldr	r5, [r6, #4]
+#ifndef __ARMEB__
+		/* convert r5 (dtb size) to little endian */
+		eor	r1, r5, r5, ror #16
+		bic	r1, r1, #0x00ff0000
+		mov	r5, r5, ror #8
+		eor	r5, r5, r1, lsr #8
+#endif
+
+		/* preserve 64-bit alignment */
+		add	r5, r5, #7
+		bic	r5, r5, #7
+
+		/* relocate some pointers past the appended dtb */
+		add	r6, r6, r5
+		add	r10, r10, r5
+		add	sp, sp, r5
+dtb_check_done:
+#endif
+
 /*
  * Check to see if we will overwrite ourselves.
  *   r4  = final kernel address
@@ -223,15 +321,14 @@ restart:	adr	r0, LC0
  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
  * We basically want:
  *   r4 - 16k page directory >= r10 -> OK
- *   r4 + image length <= current position (pc) -> OK
+ *   r4 + image length <= address of wont_overwrite -> OK
  */
 		add	r10, r10, #16384
 		cmp	r4, r10
 		bhs	wont_overwrite
 		add	r10, r4, r9
-   ARM(		cmp	r10, pc		)
- THUMB(		mov	lr, pc		)
- THUMB(		cmp	r10, lr		)
+		adr	r9, wont_overwrite
+		cmp	r10, r9
 		bls	wont_overwrite
 
 /*
@@ -285,14 +382,16 @@ wont_overwrite:
  *   r2  = BSS start
  *   r3  = BSS end
  *   r4  = kernel execution address
+ *   r5  = appended dtb size (0 if not present)
  *   r7  = architecture ID
  *   r8  = atags pointer
  *   r11 = GOT start
  *   r12 = GOT end
  *   sp  = stack pointer
  */
-		teq	r0, #0
+		orrs	r1, r0, r5
 		beq	not_relocated
+
 		add	r11, r11, r0
 		add	r12, r12, r0
 
@@ -307,12 +406,21 @@ wont_overwrite:
 
 		/*
 		 * Relocate all entries in the GOT table.
+		 * Bump bss entries to _edata + dtb size
 		 */
 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
-		add	r1, r1, r0		@ table.  This fixes up the
-		str	r1, [r11], #4		@ C references.
+		add	r1, r1, r0		@ This fixes up C references
+		cmp	r1, r2			@ if entry >= bss_start &&
+		cmphs	r3, r1			@       bss_end > entry
+		addhi	r1, r1, r5		@    entry += dtb size
+		str	r1, [r11], #4		@ next entry
 		cmp	r11, r12
 		blo	1b
+
+		/* bump our bss pointers too */
+		add	r2, r2, r5
+		add	r3, r3, r5
+
 #else
 
 		/*
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
new file mode 100644
index 000000000000..1f4e71876b00
--- /dev/null
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -0,0 +1,15 @@
+#ifndef _ARM_LIBFDT_ENV_H
+#define _ARM_LIBFDT_ENV_H
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+#define fdt16_to_cpu(x)		be16_to_cpu(x)
+#define cpu_to_fdt16(x)		cpu_to_be16(x)
+#define fdt32_to_cpu(x)		be32_to_cpu(x)
+#define cpu_to_fdt32(x)		cpu_to_be32(x)
+#define fdt64_to_cpu(x)		be64_to_cpu(x)
+#define cpu_to_fdt64(x)		cpu_to_be64(x)
+
+#endif
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 832d37236c59..8e2a8fca5ed2 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -18,14 +18,9 @@
 
 unsigned int __machine_arch_type;
 
-#define _LINUX_STRING_H_
-
 #include <linux/compiler.h>	/* for inline */
-#include <linux/types.h>	/* for size_t */
-#include <linux/stddef.h>	/* for NULL */
+#include <linux/types.h>
 #include <linux/linkage.h>
-#include <asm/string.h>
-
 
 static void putstr(const char *ptr);
 extern void error(char *x);
@@ -101,41 +96,6 @@ static void putstr(const char *ptr)
 	flush();
 }
 
-
-void *memcpy(void *__dest, __const void *__src, size_t __n)
-{
-	int i = 0;
-	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
-
-	for (i = __n >> 3; i > 0; i--) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 2) {
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1 << 1) {
-		*d++ = *s++;
-		*d++ = *s++;
-	}
-
-	if (__n & 1)
-		*d++ = *s++;
-
-	return __dest;
-}
-
 /*
  * gzip declarations
  */
diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c
new file mode 100644
index 000000000000..36e53ef9200f
--- /dev/null
+++ b/arch/arm/boot/compressed/string.c
@@ -0,0 +1,127 @@
+/*
+ * arch/arm/boot/compressed/string.c
+ *
+ * Small subset of simple string routines
+ */
+
+#include <linux/string.h>
+
+void *memcpy(void *__dest, __const void *__src, size_t __n)
+{
+	int i = 0;
+	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
+
+	for (i = __n >> 3; i > 0; i--) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 2) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1)
+		*d++ = *s++;
+
+	return __dest;
+}
+
+void *memmove(void *__dest, __const void *__src, size_t count)
+{
+	unsigned char *d = __dest;
+	const unsigned char *s = __src;
+
+	if (__dest == __src)
+		return __dest;
+
+	if (__dest < __src)
+		return memcpy(__dest, __src, count);
+
+	while (count--)
+		d[count] = s[count];
+	return __dest;
+}
+
+size_t strlen(const char *s)
+{
+	const char *sc = s;
+
+	while (*sc != '\0')
+		sc++;
+	return sc - s;
+}
+
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+	const unsigned char *su1 = cs, *su2 = ct, *end = su1 + count;
+	int res = 0;
+
+	while (su1 < end) {
+		res = *su1++ - *su2++;
+		if (res)
+			break;
+	}
+	return res;
+}
+
+int strcmp(const char *cs, const char *ct)
+{
+	unsigned char c1, c2;
+	int res = 0;
+
+	do {
+		c1 = *cs++;
+		c2 = *ct++;
+		res = c1 - c2;
+		if (res)
+			break;
+	} while (c1);
+	return res;
+}
+
+void *memchr(const void *s, int c, size_t count)
+{
+	const unsigned char *p = s;
+
+	while (count--)
+		if ((unsigned char)c == *p++)
+			return (void *)(p - 1);
+	return NULL;
+}
+
+char *strchr(const char *s, int c)
+{
+	while (*s != (char)c)
+		if (*s++ == '\0')
+			return NULL;
+	return (char *)s;
+}
+
+#undef memset
+
+void *memset(void *s, int c, size_t count)
+{
+	char *xs = s;
+	while (count--)
+		*xs++ = c;
+	return s;
+}
+
+void __memzero(void *s, size_t count)
+{
+	memset(s, 0, count);
+}
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in
index 4e728834a1b9..4919f2ac8b89 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.in
+++ b/arch/arm/boot/compressed/vmlinux.lds.in
@@ -51,6 +51,10 @@ SECTIONS
   _got_start = .;
   .got			: { *(.got) }
   _got_end = .;
+
+  /* ensure the zImage file size is always a multiple of 64 bits */
+  /* (without a dummy byte, ld just ignores the empty section) */
+  .pad			: { BYTE(0); . = ALIGN(8); }
   _edata = .;
 
   . = BSS_START;
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 3227ca952a12..a8fc6b237592 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -26,14 +26,18 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/smp.h>
+#include <linux/cpu_pm.h>
 #include <linux/cpumask.h>
 #include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
 
 #include <asm/irq.h>
 #include <asm/mach/irq.h>
 #include <asm/hardware/gic.h>
 
-static DEFINE_SPINLOCK(irq_controller_lock);
+static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
 /* Address of GIC 0 CPU interface */
 void __iomem *gic_cpu_base_addr __read_mostly;
@@ -82,30 +86,30 @@ static void gic_mask_irq(struct irq_data *d)
 {
 	u32 mask = 1 << (d->irq % 32);
 
-	spin_lock(&irq_controller_lock);
+	raw_spin_lock(&irq_controller_lock);
 	writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4);
 	if (gic_arch_extn.irq_mask)
 		gic_arch_extn.irq_mask(d);
-	spin_unlock(&irq_controller_lock);
+	raw_spin_unlock(&irq_controller_lock);
 }
 
 static void gic_unmask_irq(struct irq_data *d)
 {
 	u32 mask = 1 << (d->irq % 32);
 
-	spin_lock(&irq_controller_lock);
+	raw_spin_lock(&irq_controller_lock);
 	if (gic_arch_extn.irq_unmask)
 		gic_arch_extn.irq_unmask(d);
 	writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4);
-	spin_unlock(&irq_controller_lock);
+	raw_spin_unlock(&irq_controller_lock);
 }
 
 static void gic_eoi_irq(struct irq_data *d)
 {
 	if (gic_arch_extn.irq_eoi) {
-		spin_lock(&irq_controller_lock);
+		raw_spin_lock(&irq_controller_lock);
 		gic_arch_extn.irq_eoi(d);
-		spin_unlock(&irq_controller_lock);
+		raw_spin_unlock(&irq_controller_lock);
 	}
 
 	writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI);
@@ -129,7 +133,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
 		return -EINVAL;
 
-	spin_lock(&irq_controller_lock);
+	raw_spin_lock(&irq_controller_lock);
 
 	if (gic_arch_extn.irq_set_type)
 		gic_arch_extn.irq_set_type(d, type);
@@ -154,7 +158,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	if (enabled)
 		writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff);
 
-	spin_unlock(&irq_controller_lock);
+	raw_spin_unlock(&irq_controller_lock);
 
 	return 0;
 }
@@ -180,12 +184,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 		return -EINVAL;
 
 	mask = 0xff << shift;
-	bit = 1 << (cpu + shift);
+	bit = 1 << (cpu_logical_map(cpu) + shift);
 
-	spin_lock(&irq_controller_lock);
+	raw_spin_lock(&irq_controller_lock);
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
-	spin_unlock(&irq_controller_lock);
+	raw_spin_unlock(&irq_controller_lock);
 
 	return IRQ_SET_MASK_OK;
 }
@@ -215,9 +219,9 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
 
 	chained_irq_enter(chip, desc);
 
-	spin_lock(&irq_controller_lock);
+	raw_spin_lock(&irq_controller_lock);
 	status = readl_relaxed(chip_data->cpu_base + GIC_CPU_INTACK);
-	spin_unlock(&irq_controller_lock);
+	raw_spin_unlock(&irq_controller_lock);
 
 	gic_irq = (status & 0x3ff);
 	if (gic_irq == 1023)
@@ -259,9 +263,16 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	unsigned int irq_start)
 {
 	unsigned int gic_irqs, irq_limit, i;
+	u32 cpumask;
 	void __iomem *base = gic->dist_base;
-	u32 cpumask = 1 << smp_processor_id();
+	u32 cpu = 0;
+	u32 nrppis = 0, ppi_base = 0;
 
+#ifdef CONFIG_SMP
+	cpu = cpu_logical_map(smp_processor_id());
+#endif
+
+	cpumask = 1 << cpu;
 	cpumask |= cpumask << 8;
 	cpumask |= cpumask << 16;
 
@@ -276,6 +287,25 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
 
+	gic->gic_irqs = gic_irqs;
+
+	/*
+	 * Nobody would be insane enough to use PPIs on a secondary
+	 * GIC, right?
+	 */
+	if (gic == &gic_data[0]) {
+		nrppis = (32 - irq_start) & 31;
+
+		/* The GIC only supports up to 16 PPIs. */
+		if (nrppis > 16)
+			BUG();
+
+		ppi_base = gic->irq_offset + 32 - nrppis;
+	}
+
+	pr_info("Configuring GIC with %d sources (%d PPIs)\n",
+		gic_irqs, (gic == &gic_data[0]) ? nrppis : 0);
+
 	/*
 	 * Set all global interrupts to be level triggered, active low.
 	 */
@@ -311,7 +341,17 @@ static void __init gic_dist_init(struct gic_chip_data *gic,
 	/*
 	 * Setup the Linux IRQ subsystem.
 	 */
-	for (i = irq_start; i < irq_limit; i++) {
+	for (i = 0; i < nrppis; i++) {
+		int ppi = i + ppi_base;
+
+		irq_set_percpu_devid(ppi);
+		irq_set_chip_and_handler(ppi, &gic_chip,
+					 handle_percpu_devid_irq);
+		irq_set_chip_data(ppi, gic);
+		set_irq_flags(ppi, IRQF_VALID | IRQF_NOAUTOEN);
+	}
+
+	for (i = irq_start + nrppis; i < irq_limit; i++) {
 		irq_set_chip_and_handler(i, &gic_chip, handle_fasteoi_irq);
 		irq_set_chip_data(i, gic);
 		set_irq_flags(i, IRQF_VALID | IRQF_PROBE);
@@ -343,6 +383,189 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic)
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+#ifdef CONFIG_CPU_PM
+/*
+ * Saves the GIC distributor registers during suspend or idle.  Must be called
+ * with interrupts disabled but before powering down the GIC.  After calling
+ * this function, no interrupts will be delivered by the GIC, and another
+ * platform-specific wakeup source must be enabled.
+ */
+static void gic_dist_save(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	void __iomem *dist_base;
+	int i;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		gic_data[gic_nr].saved_spi_conf[i] =
+			readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		gic_data[gic_nr].saved_spi_target[i] =
+			readl_relaxed(dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		gic_data[gic_nr].saved_spi_enable[i] =
+			readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+}
+
+/*
+ * Restores the GIC distributor registers during resume or when coming out of
+ * idle.  Must be called before enabling interrupts.  If a level interrupt
+ * that occured while the GIC was suspended is still present, it will be
+ * handled normally, but any edge interrupts that occured will not be seen by
+ * the GIC and need to be handled by the platform-specific wakeup source.
+ */
+static void gic_dist_restore(unsigned int gic_nr)
+{
+	unsigned int gic_irqs;
+	unsigned int i;
+	void __iomem *dist_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	gic_irqs = gic_data[gic_nr].gic_irqs;
+	dist_base = gic_data[gic_nr].dist_base;
+
+	if (!dist_base)
+		return;
+
+	writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_conf[i],
+			dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(0xa0a0a0a0,
+			dist_base + GIC_DIST_PRI + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_target[i],
+			dist_base + GIC_DIST_TARGET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
+			dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+}
+
+static void gic_cpu_save(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
+
+}
+
+static void gic_cpu_restore(unsigned int gic_nr)
+{
+	int i;
+	u32 *ptr;
+	void __iomem *dist_base;
+	void __iomem *cpu_base;
+
+	if (gic_nr >= MAX_GIC_NR)
+		BUG();
+
+	dist_base = gic_data[gic_nr].dist_base;
+	cpu_base = gic_data[gic_nr].cpu_base;
+
+	if (!dist_base || !cpu_base)
+		return;
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
+	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
+		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
+
+	writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
+	writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+}
+
+static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
+{
+	int i;
+
+	for (i = 0; i < MAX_GIC_NR; i++) {
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			gic_cpu_save(i);
+			break;
+		case CPU_PM_ENTER_FAILED:
+		case CPU_PM_EXIT:
+			gic_cpu_restore(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER:
+			gic_dist_save(i);
+			break;
+		case CPU_CLUSTER_PM_ENTER_FAILED:
+		case CPU_CLUSTER_PM_EXIT:
+			gic_dist_restore(i);
+			break;
+		}
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block gic_notifier_block = {
+	.notifier_call = gic_notifier,
+};
+
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+	gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_enable);
+
+	gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_conf);
+
+	cpu_pm_register_notifier(&gic_notifier_block);
+}
+#else
+static void __init gic_pm_init(struct gic_chip_data *gic)
+{
+}
+#endif
+
 void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	void __iomem *dist_base, void __iomem *cpu_base)
 {
@@ -358,8 +581,10 @@ void __init gic_init(unsigned int gic_nr, unsigned int irq_start,
 	if (gic_nr == 0)
 		gic_cpu_base_addr = cpu_base;
 
+	gic_chip.flags |= gic_arch_extn.flags;
 	gic_dist_init(gic, irq_start);
 	gic_cpu_init(gic);
+	gic_pm_init(gic);
 }
 
 void __cpuinit gic_secondary_init(unsigned int gic_nr)
@@ -369,20 +594,15 @@ void __cpuinit gic_secondary_init(unsigned int gic_nr)
 	gic_cpu_init(&gic_data[gic_nr]);
 }
 
-void __cpuinit gic_enable_ppi(unsigned int irq)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	irq_set_status_flags(irq, IRQ_NOPROBE);
-	gic_unmask_irq(irq_get_irq_data(irq));
-	local_irq_restore(flags);
-}
-
 #ifdef CONFIG_SMP
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 {
-	unsigned long map = *cpus_addr(*mask);
+	int cpu;
+	unsigned long map = 0;
+
+	/* Convert our logical CPU mask into a physical one. */
+	for_each_cpu(cpu, mask)
+		map |= 1 << cpu_logical_map(cpu);
 
 	/*
 	 * Ensure that stores to Normal memory are visible to the
diff --git a/arch/arm/common/pl330.c b/arch/arm/common/pl330.c
index 97912fa48782..7129cfbdacd6 100644
--- a/arch/arm/common/pl330.c
+++ b/arch/arm/common/pl330.c
@@ -1546,7 +1546,7 @@ int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
 
 		/* Start the next */
 	case PL330_OP_START:
-		if (!_start(thrd))
+		if (!_thrd_active(thrd) && !_start(thrd))
 			ret = -EIO;
 		break;
 
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 0569de6acfba..61691cdbdcf2 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -718,6 +718,10 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 		goto err_free;
 	}
 
+	ret = clk_prepare(sachip->clk);
+	if (ret)
+		goto err_clkput;
+
 	spin_lock_init(&sachip->lock);
 
 	sachip->dev = me;
@@ -733,7 +737,7 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 	sachip->base = ioremap(mem->start, PAGE_SIZE * 2);
 	if (!sachip->base) {
 		ret = -ENOMEM;
-		goto err_clkput;
+		goto err_clk_unprep;
 	}
 
 	/*
@@ -809,6 +813,8 @@ __sa1111_probe(struct device *me, struct resource *mem, int irq)
 
  err_unmap:
 	iounmap(sachip->base);
+ err_clk_unprep:
+	clk_unprepare(sachip->clk);
  err_clkput:
 	clk_put(sachip->clk);
  err_free:
@@ -835,6 +841,7 @@ static void __sa1111_remove(struct sa1111 *sachip)
 	sa1111_writel(0, irqbase + SA1111_WAKEEN1);
 
 	clk_disable(sachip->clk);
+	clk_unprepare(sachip->clk);
 
 	if (sachip->irq != NO_IRQ) {
 		irq_set_chained_handler(sachip->irq, NULL);
diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c
index 41df47875122..2393b5bc96fa 100644
--- a/arch/arm/common/timer-sp.c
+++ b/arch/arm/common/timer-sp.c
@@ -41,9 +41,17 @@ static long __init sp804_get_clock_rate(const char *name)
 		return PTR_ERR(clk);
 	}
 
+	err = clk_prepare(clk);
+	if (err) {
+		pr_err("sp804: %s clock failed to prepare: %d\n", name, err);
+		clk_put(clk);
+		return err;
+	}
+
 	err = clk_enable(clk);
 	if (err) {
 		pr_err("sp804: %s clock failed to enable: %d\n", name, err);
+		clk_unprepare(clk);
 		clk_put(clk);
 		return err;
 	}
@@ -52,6 +60,7 @@ static long __init sp804_get_clock_rate(const char *name)
 	if (rate < 0) {
 		pr_err("sp804: %s clock failed to get rate: %ld\n", name, rate);
 		clk_disable(clk);
+		clk_unprepare(clk);
 		clk_put(clk);
 	}
 
diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index 7aa4262ada7a..01f18a421b17 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -259,7 +259,6 @@ static void __init vic_disable(void __iomem *base)
 	writel(0, base + VIC_INT_SELECT);
 	writel(0, base + VIC_INT_ENABLE);
 	writel(~0, base + VIC_INT_ENABLE_CLEAR);
-	writel(0, base + VIC_IRQ_STATUS);
 	writel(0, base + VIC_ITCR);
 	writel(~0, base + VIC_INT_SOFT_CLEAR);
 }
@@ -347,7 +346,8 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 
 	/* Identify which VIC cell this one is, by reading the ID */
 	for (i = 0; i < 4; i++) {
-		u32 addr = ((u32)base & PAGE_MASK) + 0xfe0 + (i * 4);
+		void __iomem *addr;
+		addr = (void __iomem *)((u32)base & PAGE_MASK) + 0xfe0 + (i * 4);
 		cellid |= (readl(addr) & 0xff) << (8 * i);
 	}
 	vendor = (cellid >> 12) & 0xff;
diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig
index 7196ade07e27..1103f62a1964 100644
--- a/arch/arm/configs/integrator_defconfig
+++ b/arch/arm/configs/integrator_defconfig
@@ -1,5 +1,6 @@
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
+CONFIG_TINY_RCU=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
@@ -8,20 +9,29 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_ARCH_INTEGRATOR=y
 CONFIG_ARCH_INTEGRATOR_AP=y
+CONFIG_ARCH_INTEGRATOR_CP=y
 CONFIG_CPU_ARM720T=y
 CONFIG_CPU_ARM920T=y
+CONFIG_CPU_ARM922T=y
+CONFIG_CPU_ARM926T=y
+CONFIG_CPU_ARM1020=y
+CONFIG_CPU_ARM1022=y
+CONFIG_CPU_ARM1026=y
 CONFIG_PCI=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
+CONFIG_AEABI=y
 CONFIG_LEDS=y
 CONFIG_LEDS_CPU=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="console=ttyAM0,38400n8 root=/dev/nfs ip=bootp mem=32M"
+CONFIG_CMDLINE="console=ttyAM0,38400n8 root=/dev/nfs ip=bootp"
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_FPE_NWFPE=y
-CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -32,7 +42,6 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_AFS_PARTS=y
 CONFIG_MTD_CHAR=y
@@ -40,6 +49,7 @@ CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_ADV_OPTIONS=y
 CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_PHYSMAP=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -56,6 +66,8 @@ CONFIG_FB_MODE_HELPERS=y
 CONFIG_FB_MATROX=y
 CONFIG_FB_MATROX_MILLENIUM=y
 CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL030=y
 CONFIG_EXT2_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
@@ -68,4 +80,3 @@ CONFIG_NFSD_V3=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_ERRORS=y
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 6550db3aa5c7..960abceb8e14 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -1,3 +1,20 @@
 include include/asm-generic/Kbuild.asm
 
 header-y += hwcap.h
+
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += cputime.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += ioctl.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += percpu.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sections.h
+generic-y += siginfo.h
+generic-y += sizes.h
diff --git a/arch/arm/include/asm/auxvec.h b/arch/arm/include/asm/auxvec.h
deleted file mode 100644
index c0536f6b29a7..000000000000
--- a/arch/arm/include/asm/auxvec.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASMARM_AUXVEC_H
-#define __ASMARM_AUXVEC_H
-
-#endif
diff --git a/arch/arm/include/asm/bitsperlong.h b/arch/arm/include/asm/bitsperlong.h
deleted file mode 100644
index 6dc0bb0c13b2..000000000000
--- a/arch/arm/include/asm/bitsperlong.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bitsperlong.h>
diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h
index 4d88425a4169..9abe7a07d5ac 100644
--- a/arch/arm/include/asm/bug.h
+++ b/arch/arm/include/asm/bug.h
@@ -3,21 +3,58 @@
 
 
 #ifdef CONFIG_BUG
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-extern void __bug(const char *file, int line) __attribute__((noreturn));
-
-/* give file/line information */
-#define BUG()		__bug(__FILE__, __LINE__)
 
+/*
+ * Use a suitable undefined instruction to use for ARM/Thumb2 bug handling.
+ * We need to be careful not to conflict with those used by other modules and
+ * the register_undef_hook() system.
+ */
+#ifdef CONFIG_THUMB2_KERNEL
+#define BUG_INSTR_VALUE 0xde02
+#define BUG_INSTR_TYPE ".hword "
 #else
+#define BUG_INSTR_VALUE 0xe7f001f2
+#define BUG_INSTR_TYPE ".word "
+#endif
 
-/* this just causes an oops */
-#define BUG()		do { *(int *)0 = 0; } while (1)
 
-#endif
+#define BUG() _BUG(__FILE__, __LINE__, BUG_INSTR_VALUE)
+#define _BUG(file, line, value) __BUG(file, line, value)
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+/*
+ * The extra indirection is to ensure that the __FILE__ string comes through
+ * OK. Many version of gcc do not support the asm %c parameter which would be
+ * preferable to this unpleasantness. We use mergeable string sections to
+ * avoid multiple copies of the string appearing in the kernel image.
+ */
+
+#define __BUG(__file, __line, __value)				\
+do {								\
+	BUILD_BUG_ON(sizeof(struct bug_entry) != 12);		\
+	asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n"	\
+		".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \
+		"2:\t.asciz " #__file "\n" 			\
+		".popsection\n" 				\
+		".pushsection __bug_table,\"a\"\n"		\
+		"3:\t.word 1b, 2b\n"				\
+		"\t.hword " #__line ", 0\n"			\
+		".popsection");					\
+	unreachable();						\
+} while (0)
+
+#else  /* not CONFIG_DEBUG_BUGVERBOSE */
+
+#define __BUG(__file, __line, __value)				\
+do {								\
+	asm volatile(BUG_INSTR_TYPE #__value);			\
+	unreachable();						\
+} while (0)
+#endif  /* CONFIG_DEBUG_BUGVERBOSE */
 
 #define HAVE_ARCH_BUG
-#endif
+#endif  /* CONFIG_BUG */
 
 #include <asm-generic/bug.h>
 
diff --git a/arch/arm/include/asm/cachetype.h b/arch/arm/include/asm/cachetype.h
index c023db09fcc1..7ea78144ae22 100644
--- a/arch/arm/include/asm/cachetype.h
+++ b/arch/arm/include/asm/cachetype.h
@@ -7,6 +7,7 @@
 #define CACHEID_VIPT			(CACHEID_VIPT_ALIASING|CACHEID_VIPT_NONALIASING)
 #define CACHEID_ASID_TAGGED		(1 << 3)
 #define CACHEID_VIPT_I_ALIASING		(1 << 4)
+#define CACHEID_PIPT			(1 << 5)
 
 extern unsigned int cacheid;
 
@@ -16,6 +17,7 @@ extern unsigned int cacheid;
 #define cache_is_vipt_aliasing()	cacheid_is(CACHEID_VIPT_ALIASING)
 #define icache_is_vivt_asid_tagged()	cacheid_is(CACHEID_ASID_TAGGED)
 #define icache_is_vipt_aliasing()	cacheid_is(CACHEID_VIPT_I_ALIASING)
+#define icache_is_pipt()		cacheid_is(CACHEID_PIPT)
 
 /*
  * __LINUX_ARM_ARCH__ is the minimum supported CPU architecture
@@ -26,7 +28,8 @@ extern unsigned int cacheid;
 #if __LINUX_ARM_ARCH__ >= 7
 #define __CACHEID_ARCH_MIN	(CACHEID_VIPT_NONALIASING |\
 				 CACHEID_ASID_TAGGED |\
-				 CACHEID_VIPT_I_ALIASING)
+				 CACHEID_VIPT_I_ALIASING |\
+				 CACHEID_PIPT)
 #elif __LINUX_ARM_ARCH__ >= 6
 #define	__CACHEID_ARCH_MIN	(~CACHEID_VIVT)
 #else
diff --git a/arch/arm/include/asm/cputime.h b/arch/arm/include/asm/cputime.h
deleted file mode 100644
index 3a8002a5fec7..000000000000
--- a/arch/arm/include/asm/cputime.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ARM_CPUTIME_H
-#define __ARM_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __ARM_CPUTIME_H */
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index cd4458f64171..cb47d28cbe1f 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -8,6 +8,7 @@
 #define CPUID_CACHETYPE	1
 #define CPUID_TCM	2
 #define CPUID_TLBTYPE	3
+#define CPUID_MPIDR	5
 
 #define CPUID_EXT_PFR0	"c1, 0"
 #define CPUID_EXT_PFR1	"c1, 1"
@@ -70,6 +71,11 @@ static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void)
 	return read_cpuid(CPUID_TCM);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_mpidr(void)
+{
+	return read_cpuid(CPUID_MPIDR);
+}
+
 /*
  * Intel's XScale3 core supports some v6 features (supersections, L2)
  * but advertises itself as v5 as it does not support the v6 ISA.  For
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index 9f390ce335cb..6615f03f56a5 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -10,6 +10,9 @@ struct dev_archdata {
 #ifdef CONFIG_DMABOUNCE
 	struct dmabounce_device_info *dmabounce;
 #endif
+#ifdef CONFIG_IOMMU_API
+	void *iommu; /* private IOMMU data */
+#endif
 };
 
 struct pdev_archdata {
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 7f27fab9d404..cb3b7c981c4b 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -32,7 +32,7 @@ static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr)
 
 static inline void *dma_to_virt(struct device *dev, dma_addr_t addr)
 {
-	return (void *)__bus_to_virt(addr);
+	return (void *)__bus_to_virt((unsigned long)addr);
 }
 
 static inline dma_addr_t virt_to_dma(struct device *dev, void *addr)
diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
index 628670e9d7c9..69a5b0b6455c 100644
--- a/arch/arm/include/asm/dma.h
+++ b/arch/arm/include/asm/dma.h
@@ -34,18 +34,18 @@
 #define DMA_MODE_CASCADE 0xc0
 #define DMA_AUTOINIT	 0x10
 
-extern spinlock_t  dma_spin_lock;
+extern raw_spinlock_t  dma_spin_lock;
 
 static inline unsigned long claim_dma_lock(void)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&dma_spin_lock, flags);
+	raw_spin_lock_irqsave(&dma_spin_lock, flags);
 	return flags;
 }
 
 static inline void release_dma_lock(unsigned long flags)
 {
-	spin_unlock_irqrestore(&dma_spin_lock, flags);
+	raw_spin_unlock_irqrestore(&dma_spin_lock, flags);
 }
 
 /* Clear the 'DMA Pointer Flip Flop'.
diff --git a/arch/arm/include/asm/ecard.h b/arch/arm/include/asm/ecard.h
index 29f2610efc70..eaea14676d57 100644
--- a/arch/arm/include/asm/ecard.h
+++ b/arch/arm/include/asm/ecard.h
@@ -161,7 +161,6 @@ struct expansion_card {
 
 	/* Private internal data */
 	const char		*card_desc;	/* Card description		*/
-	CONST unsigned int	podaddr;	/* Base Linux address for card	*/
 	CONST loader_t		loader;		/* loader program */
 	u64			dma_mask;
 };
diff --git a/arch/arm/include/asm/emergency-restart.h b/arch/arm/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/arm/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S
index 2f1e2098dfe7..88d61815f0c0 100644
--- a/arch/arm/include/asm/entry-macro-multi.S
+++ b/arch/arm/include/asm/entry-macro-multi.S
@@ -25,13 +25,6 @@
 	movne	r1, sp
 	adrne	lr, BSYM(1b)
 	bne	do_IPI
-
-#ifdef CONFIG_LOCAL_TIMERS
-	test_for_ltirq r0, r2, r6, lr
-	movne	r0, sp
-	adrne	lr, BSYM(1b)
-	bne	do_local_timer
-#endif
 #endif
 9997:
 	.endm
diff --git a/arch/arm/include/asm/errno.h b/arch/arm/include/asm/errno.h
deleted file mode 100644
index 6e60f0612bb6..000000000000
--- a/arch/arm/include/asm/errno.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ARM_ERRNO_H
-#define _ARM_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif
diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h
new file mode 100644
index 000000000000..5abaf5bbd985
--- /dev/null
+++ b/arch/arm/include/asm/exception.h
@@ -0,0 +1,19 @@
+/*
+ * Annotations for marking C functions as exception handlers.
+ *
+ * These should only be used for C functions that are called from the low
+ * level exception entry code and not any intervening C code.
+ */
+#ifndef __ASM_ARM_EXCEPTION_H
+#define __ASM_ARM_EXCEPTION_H
+
+#include <linux/ftrace.h>
+
+#define __exception	__attribute__((section(".exception.text")))
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#define __exception_irq_entry	__irq_entry
+#else
+#define __exception_irq_entry	__exception
+#endif
+
+#endif /* __ASM_ARM_EXCEPTION_H */
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 89ad1805e579..ddf07a92a6c8 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -9,9 +9,6 @@
 
 typedef struct {
 	unsigned int __softirq_pending;
-#ifdef CONFIG_LOCAL_TIMERS
-	unsigned int local_timer_irqs;
-#endif
 #ifdef CONFIG_SMP
 	unsigned int ipi_irqs[NR_IPI];
 #endif
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 99a6ed7e1bfd..434edccdf7f3 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -52,6 +52,8 @@
 #define L2X0_LOCKDOWN_WAY_D_BASE	0x900
 #define L2X0_LOCKDOWN_WAY_I_BASE	0x904
 #define L2X0_LOCKDOWN_STRIDE		0x08
+#define L2X0_ADDR_FILTER_START		0xC00
+#define L2X0_ADDR_FILTER_END		0xC04
 #define L2X0_TEST_OPERATION		0xF00
 #define L2X0_LINE_DATA			0xF10
 #define L2X0_LINE_TAG			0xF30
@@ -65,8 +67,23 @@
 #define L2X0_CACHE_ID_PART_MASK		(0xf << 6)
 #define L2X0_CACHE_ID_PART_L210		(1 << 6)
 #define L2X0_CACHE_ID_PART_L310		(3 << 6)
+#define L2X0_CACHE_ID_RTL_MASK          0x3f
+#define L2X0_CACHE_ID_RTL_R0P0          0x0
+#define L2X0_CACHE_ID_RTL_R1P0          0x2
+#define L2X0_CACHE_ID_RTL_R2P0          0x4
+#define L2X0_CACHE_ID_RTL_R3P0          0x5
+#define L2X0_CACHE_ID_RTL_R3P1          0x6
+#define L2X0_CACHE_ID_RTL_R3P2          0x8
 
 #define L2X0_AUX_CTRL_MASK			0xc0000fff
+#define L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT	0
+#define L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK	0x7
+#define L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT	3
+#define L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK	(0x7 << 3)
+#define L2X0_AUX_CTRL_TAG_LATENCY_SHIFT		6
+#define L2X0_AUX_CTRL_TAG_LATENCY_MASK		(0x7 << 6)
+#define L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT	9
+#define L2X0_AUX_CTRL_DIRTY_LATENCY_MASK	(0x7 << 9)
 #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT	16
 #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT		17
 #define L2X0_AUX_CTRL_WAY_SIZE_MASK		(0x7 << 17)
@@ -77,8 +94,33 @@
 #define L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT	29
 #define L2X0_AUX_CTRL_EARLY_BRESP_SHIFT		30
 
+#define L2X0_LATENCY_CTRL_SETUP_SHIFT	0
+#define L2X0_LATENCY_CTRL_RD_SHIFT	4
+#define L2X0_LATENCY_CTRL_WR_SHIFT	8
+
+#define L2X0_ADDR_FILTER_EN		1
+
 #ifndef __ASSEMBLY__
 extern void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask);
+extern int l2x0_of_init(__u32 aux_val, __u32 aux_mask);
+
+struct l2x0_regs {
+	unsigned long phy_base;
+	unsigned long aux_ctrl;
+	/*
+	 * Whether the following registers need to be saved/restored
+	 * depends on platform
+	 */
+	unsigned long tag_latency;
+	unsigned long data_latency;
+	unsigned long filter_start;
+	unsigned long filter_end;
+	unsigned long prefetch_ctrl;
+	unsigned long pwr_ctrl;
+};
+
+extern struct l2x0_regs l2x0_saved_regs;
+
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/hardware/entry-macro-gic.S b/arch/arm/include/asm/hardware/entry-macro-gic.S
index c115b82fe80a..74ebc803904d 100644
--- a/arch/arm/include/asm/hardware/entry-macro-gic.S
+++ b/arch/arm/include/asm/hardware/entry-macro-gic.S
@@ -22,15 +22,11 @@
  * interrupt controller spec.  To wit:
  *
  * Interrupts 0-15 are IPI
- * 16-28 are reserved
- * 29-31 are local.  We allow 30 to be used for the watchdog.
+ * 16-31 are local.  We allow 30 to be used for the watchdog.
  * 32-1020 are global
  * 1021-1022 are reserved
  * 1023 is "spurious" (no interrupt)
  *
- * For now, we ignore all local interrupts so only return an interrupt if it's
- * between 30 and 1020.  The test_for_ipi routine below will pick up on IPIs.
- *
  * A simple read from the controller will tell us the number of the highest
  * priority enabled interrupt.  We then just need to check whether it is in the
  * valid range for an IRQ (30-1020 inclusive).
@@ -43,7 +39,7 @@
 
 	ldr	\tmp, =1021
 	bic     \irqnr, \irqstat, #0x1c00
-	cmp     \irqnr, #29
+	cmp     \irqnr, #15
 	cmpcc	\irqnr, \irqnr
 	cmpne	\irqnr, \tmp
 	cmpcs	\irqnr, \irqnr
@@ -62,14 +58,3 @@
 	strcc	\irqstat, [\base, #GIC_CPU_EOI]
 	cmpcs	\irqnr, \irqnr
 	.endm
-
-/* As above, this assumes that irqstat and base are preserved.. */
-
-	.macro test_for_ltirq, irqnr, irqstat, base, tmp
-	bic	\irqnr, \irqstat, #0x1c00
-	mov 	\tmp, #0
-	cmp	\irqnr, #29
-	moveq	\tmp, #1
-	streq	\irqstat, [\base, #GIC_CPU_EOI]
-	cmp	\tmp, #0
-	.endm
diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
index 435d3f86c708..14867e12f205 100644
--- a/arch/arm/include/asm/hardware/gic.h
+++ b/arch/arm/include/asm/hardware/gic.h
@@ -40,12 +40,19 @@ void gic_init(unsigned int, unsigned int, void __iomem *, void __iomem *);
 void gic_secondary_init(unsigned int);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
 void gic_raise_softirq(const struct cpumask *mask, unsigned int irq);
-void gic_enable_ppi(unsigned int);
 
 struct gic_chip_data {
 	unsigned int irq_offset;
 	void __iomem *dist_base;
 	void __iomem *cpu_base;
+#ifdef CONFIG_CPU_PM
+	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
+	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
+	u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
+	u32 __percpu *saved_ppi_enable;
+	u32 __percpu *saved_ppi_conf;
+#endif
+	unsigned int gic_irqs;
 };
 #endif
 
diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h
index f389b2704d82..c190bc992f0e 100644
--- a/arch/arm/include/asm/hw_breakpoint.h
+++ b/arch/arm/include/asm/hw_breakpoint.h
@@ -50,6 +50,7 @@ static inline void decode_ctrl_reg(u32 reg,
 #define ARM_DEBUG_ARCH_V6_1	2
 #define ARM_DEBUG_ARCH_V7_ECP14	3
 #define ARM_DEBUG_ARCH_V7_MM	4
+#define ARM_DEBUG_ARCH_V7_1	5
 
 /* Breakpoint */
 #define ARM_BREAKPOINT_EXECUTE	0
@@ -57,6 +58,7 @@ static inline void decode_ctrl_reg(u32 reg,
 /* Watchpoints */
 #define ARM_BREAKPOINT_LOAD	1
 #define ARM_BREAKPOINT_STORE	2
+#define ARM_FSR_ACCESS_MASK	(1 << 11)
 
 /* Privilege Levels */
 #define ARM_BREAKPOINT_PRIV	1
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index d66605dea55a..065d100fa63e 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -80,6 +80,7 @@ extern void __iomem *__arm_ioremap_caller(unsigned long, size_t, unsigned int,
 
 extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int);
 extern void __iomem *__arm_ioremap(unsigned long, size_t, unsigned int);
+extern void __iomem *__arm_ioremap_exec(unsigned long, size_t, bool cached);
 extern void __iounmap(volatile void __iomem *addr);
 
 /*
@@ -110,6 +111,27 @@ static inline void __iomem *__typesafe_io(unsigned long addr)
 #include <mach/io.h>
 
 /*
+ * This is the limit of PC card/PCI/ISA IO space, which is by default
+ * 64K if we have PC card, PCI or ISA support.  Otherwise, default to
+ * zero to prevent ISA/PCI drivers claiming IO space (and potentially
+ * oopsing.)
+ *
+ * Only set this larger if you really need inb() et.al. to operate over
+ * a larger address space.  Note that SOC_COMMON ioremaps each sockets
+ * IO space area, and so inb() et.al. must be defined to operate as per
+ * readb() et.al. on such platforms.
+ */
+#ifndef IO_SPACE_LIMIT
+#if defined(CONFIG_PCMCIA_SOC_COMMON) || defined(CONFIG_PCMCIA_SOC_COMMON_MODULE)
+#define IO_SPACE_LIMIT ((resource_size_t)0xffffffff)
+#elif defined(CONFIG_PCI) || defined(CONFIG_ISA) || defined(CONFIG_PCCARD)
+#define IO_SPACE_LIMIT ((resource_size_t)0xffff)
+#else
+#define IO_SPACE_LIMIT ((resource_size_t)0)
+#endif
+#endif
+
+/*
  *  IO port access primitives
  *  -------------------------
  *
@@ -189,11 +211,11 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  * IO port primitives for more information.
  */
 #ifdef __mem_pci
-#define readb_relaxed(c) ({ u8  __v = __raw_readb(__mem_pci(c)); __v; })
-#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16) \
-					__raw_readw(__mem_pci(c))); __v; })
-#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32) \
-					__raw_readl(__mem_pci(c))); __v; })
+#define readb_relaxed(c) ({ u8  __r = __raw_readb(__mem_pci(c)); __r; })
+#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16) \
+					__raw_readw(__mem_pci(c))); __r; })
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \
+					__raw_readl(__mem_pci(c))); __r; })
 
 #define writeb_relaxed(v,c)	((void)__raw_writeb(v,__mem_pci(c)))
 #define writew_relaxed(v,c)	((void)__raw_writew((__force u16) \
@@ -238,7 +260,7 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
  * ioremap and friends.
  *
  * ioremap takes a PCI memory address, as specified in
- * Documentation/IO-mapping.txt.
+ * Documentation/io-mapping.txt.
  *
  */
 #ifndef __arch_ioremap
@@ -260,10 +282,16 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define ioread16(p)	({ unsigned int __v = le16_to_cpu((__force __le16)__raw_readw(p)); __iormb(); __v; })
 #define ioread32(p)	({ unsigned int __v = le32_to_cpu((__force __le32)__raw_readl(p)); __iormb(); __v; })
 
+#define ioread16be(p)	({ unsigned int __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32be(p)	({ unsigned int __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
+
 #define iowrite8(v,p)	({ __iowmb(); (void)__raw_writeb(v, p); })
 #define iowrite16(v,p)	({ __iowmb(); (void)__raw_writew((__force __u16)cpu_to_le16(v), p); })
 #define iowrite32(v,p)	({ __iowmb(); (void)__raw_writel((__force __u32)cpu_to_le32(v), p); })
 
+#define iowrite16be(v,p) ({ __iowmb(); (void)__raw_writew((__force __u16)cpu_to_be16(v), p); })
+#define iowrite32be(v,p) ({ __iowmb(); (void)__raw_writel((__force __u32)cpu_to_be32(v), p); })
+
 #define ioread8_rep(p,d,c)	__raw_readsb(p,d,c)
 #define ioread16_rep(p,d,c)	__raw_readsw(p,d,c)
 #define ioread32_rep(p,d,c)	__raw_readsl(p,d,c)
diff --git a/arch/arm/include/asm/ioctl.h b/arch/arm/include/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/arm/include/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/arm/include/asm/irq_regs.h b/arch/arm/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/arm/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/arm/include/asm/kdebug.h b/arch/arm/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/arm/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/arm/include/asm/local.h b/arch/arm/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/arm/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/arm/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/localtimer.h b/arch/arm/include/asm/localtimer.h
index 080d74f8128d..c6a18424888e 100644
--- a/arch/arm/include/asm/localtimer.h
+++ b/arch/arm/include/asm/localtimer.h
@@ -10,6 +10,9 @@
 #ifndef __ASM_ARM_LOCALTIMER_H
 #define __ASM_ARM_LOCALTIMER_H
 
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+
 struct clock_event_device;
 
 /*
@@ -17,27 +20,20 @@ struct clock_event_device;
  */
 void percpu_timer_setup(void);
 
-/*
- * Called from assembly, this is the local timer IRQ handler
- */
-asmlinkage void do_local_timer(struct pt_regs *);
-
-
 #ifdef CONFIG_LOCAL_TIMERS
 
 #ifdef CONFIG_HAVE_ARM_TWD
 
 #include "smp_twd.h"
 
-#define local_timer_ack()	twd_timer_ack()
+#define local_timer_stop(c)	twd_timer_stop((c))
 
 #else
 
 /*
- * Platform provides this to acknowledge a local timer IRQ.
- * Returns true if the local timer IRQ is to be processed.
+ * Stop the local timer
  */
-int local_timer_ack(void);
+void local_timer_stop(struct clock_event_device *);
 
 #endif
 
@@ -52,6 +48,10 @@ static inline int local_timer_setup(struct clock_event_device *evt)
 {
 	return -ENXIO;
 }
+
+static inline void local_timer_stop(struct clock_event_device *evt)
+{
+}
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 727da118bcc1..7d19425dd496 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -34,8 +34,7 @@ struct machine_desc {
 	unsigned int		reserve_lp1 :1;	/* never has lp1	*/
 	unsigned int		reserve_lp2 :1;	/* never has lp2	*/
 	unsigned int		soft_reboot :1;	/* soft reboot		*/
-	void			(*fixup)(struct machine_desc *,
-					 struct tag *, char **,
+	void			(*fixup)(struct tag *, char **,
 					 struct meminfo *);
 	void			(*reserve)(void);/* reserve mem blocks	*/
 	void			(*map_io)(void);/* IO mapping function	*/
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index d2fedb5aeb1f..b36f3654bf54 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -29,6 +29,7 @@ struct map_desc {
 #define MT_MEMORY_NONCACHED	11
 #define MT_MEMORY_DTCM		12
 #define MT_MEMORY_ITCM		13
+#define MT_MEMORY_SO		14
 
 #ifdef CONFIG_MMU
 extern void iotable_init(struct map_desc *, int);
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index b4ffe9d5b526..14965658a923 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -6,7 +6,7 @@
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	unsigned int id;
-	spinlock_t id_lock;
+	raw_spinlock_t id_lock;
 #endif
 	unsigned int kvm_seq;
 } mm_context_t;
@@ -16,7 +16,7 @@ typedef struct {
 
 /* init_mm.context.id_lock should be initialized. */
 #define INIT_MM_CONTEXT(name)                                                 \
-	.context.id_lock    = __SPIN_LOCK_UNLOCKED(name.context.id_lock),
+	.context.id_lock    = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
 #else
 #define ASID(mm)	(0)
 #endif
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index d8387437ec5a..53426c66352a 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -34,6 +34,7 @@ struct outer_cache_fns {
 	void (*sync)(void);
 #endif
 	void (*set_debug)(unsigned long);
+	void (*resume)(void);
 };
 
 #ifdef CONFIG_OUTER_CACHE
@@ -74,6 +75,12 @@ static inline void outer_disable(void)
 		outer_cache.disable();
 }
 
+static inline void outer_resume(void)
+{
+	if (outer_cache.resume)
+		outer_cache.resume();
+}
+
 #else
 
 static inline void outer_inv_range(phys_addr_t start, phys_addr_t end)
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index ac75d0848889..ca94653f1ecb 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -151,47 +151,7 @@ extern void __cpu_copy_user_highpage(struct page *to, struct page *from,
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, const void *from);
 
-typedef unsigned long pteval_t;
-
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct { pteval_t pte; } pte_t;
-typedef struct { unsigned long pmd; } pmd_t;
-typedef struct { unsigned long pgd[2]; } pgd_t;
-typedef struct { unsigned long pgprot; } pgprot_t;
-
-#define pte_val(x)      ((x).pte)
-#define pmd_val(x)      ((x).pmd)
-#define pgd_val(x)	((x).pgd[0])
-#define pgprot_val(x)   ((x).pgprot)
-
-#define __pte(x)        ((pte_t) { (x) } )
-#define __pmd(x)        ((pmd_t) { (x) } )
-#define __pgprot(x)     ((pgprot_t) { (x) } )
-
-#else
-/*
- * .. while these make it easier on the compiler
- */
-typedef pteval_t pte_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pgd_t[2];
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)      (x)
-#define pmd_val(x)      (x)
-#define pgd_val(x)	((x)[0])
-#define pgprot_val(x)   (x)
-
-#define __pte(x)        (x)
-#define __pmd(x)        (x)
-#define __pgprot(x)     (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
+#include <asm/pgtable-2level-types.h>
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
deleted file mode 100644
index b4e32d8ec072..000000000000
--- a/arch/arm/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ARM_PERCPU
-#define __ARM_PERCPU
-
-#include <asm-generic/percpu.h>
-
-#endif
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 22de005f159c..3e08fd3fbb6b 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -105,9 +105,9 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 }
 
 static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
-	unsigned long prot)
+				  pmdval_t prot)
 {
-	unsigned long pmdval = (pte + PTE_HWTABLE_OFF) | prot;
+	pmdval_t pmdval = (pte + PTE_HWTABLE_OFF) | prot;
 	pmdp[0] = __pmd(pmdval);
 	pmdp[1] = __pmd(pmdval + 256 * sizeof(pte_t));
 	flush_pmd_entry(pmdp);
diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
new file mode 100644
index 000000000000..5cfba15cb401
--- /dev/null
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -0,0 +1,93 @@
+/*
+ *  arch/arm/include/asm/pgtable-2level-hwdef.h
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_PGTABLE_2LEVEL_HWDEF_H
+#define _ASM_PGTABLE_2LEVEL_HWDEF_H
+
+/*
+ * Hardware page table definitions.
+ *
+ * + Level 1 descriptor (PMD)
+ *   - common
+ */
+#define PMD_TYPE_MASK		(_AT(pmdval_t, 3) << 0)
+#define PMD_TYPE_FAULT		(_AT(pmdval_t, 0) << 0)
+#define PMD_TYPE_TABLE		(_AT(pmdval_t, 1) << 0)
+#define PMD_TYPE_SECT		(_AT(pmdval_t, 2) << 0)
+#define PMD_BIT4		(_AT(pmdval_t, 1) << 4)
+#define PMD_DOMAIN(x)		(_AT(pmdval_t, (x)) << 5)
+#define PMD_PROTECTION		(_AT(pmdval_t, 1) << 9)		/* v5 */
+/*
+ *   - section
+ */
+#define PMD_SECT_BUFFERABLE	(_AT(pmdval_t, 1) << 2)
+#define PMD_SECT_CACHEABLE	(_AT(pmdval_t, 1) << 3)
+#define PMD_SECT_XN		(_AT(pmdval_t, 1) << 4)		/* v6 */
+#define PMD_SECT_AP_WRITE	(_AT(pmdval_t, 1) << 10)
+#define PMD_SECT_AP_READ	(_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_TEX(x)		(_AT(pmdval_t, (x)) << 12)	/* v5 */
+#define PMD_SECT_APX		(_AT(pmdval_t, 1) << 15)	/* v6 */
+#define PMD_SECT_S		(_AT(pmdval_t, 1) << 16)	/* v6 */
+#define PMD_SECT_nG		(_AT(pmdval_t, 1) << 17)	/* v6 */
+#define PMD_SECT_SUPER		(_AT(pmdval_t, 1) << 18)	/* v6 */
+#define PMD_SECT_AF		(_AT(pmdval_t, 0))
+
+#define PMD_SECT_UNCACHED	(_AT(pmdval_t, 0))
+#define PMD_SECT_BUFFERED	(PMD_SECT_BUFFERABLE)
+#define PMD_SECT_WT		(PMD_SECT_CACHEABLE)
+#define PMD_SECT_WB		(PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
+#define PMD_SECT_MINICACHE	(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE)
+#define PMD_SECT_WBWA		(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
+#define PMD_SECT_NONSHARED_DEV	(PMD_SECT_TEX(2))
+
+/*
+ *   - coarse table (not used)
+ */
+
+/*
+ * + Level 2 descriptor (PTE)
+ *   - common
+ */
+#define PTE_TYPE_MASK		(_AT(pteval_t, 3) << 0)
+#define PTE_TYPE_FAULT		(_AT(pteval_t, 0) << 0)
+#define PTE_TYPE_LARGE		(_AT(pteval_t, 1) << 0)
+#define PTE_TYPE_SMALL		(_AT(pteval_t, 2) << 0)
+#define PTE_TYPE_EXT		(_AT(pteval_t, 3) << 0)		/* v5 */
+#define PTE_BUFFERABLE		(_AT(pteval_t, 1) << 2)
+#define PTE_CACHEABLE		(_AT(pteval_t, 1) << 3)
+
+/*
+ *   - extended small page/tiny page
+ */
+#define PTE_EXT_XN		(_AT(pteval_t, 1) << 0)		/* v6 */
+#define PTE_EXT_AP_MASK		(_AT(pteval_t, 3) << 4)
+#define PTE_EXT_AP0		(_AT(pteval_t, 1) << 4)
+#define PTE_EXT_AP1		(_AT(pteval_t, 2) << 4)
+#define PTE_EXT_AP_UNO_SRO	(_AT(pteval_t, 0) << 4)
+#define PTE_EXT_AP_UNO_SRW	(PTE_EXT_AP0)
+#define PTE_EXT_AP_URO_SRW	(PTE_EXT_AP1)
+#define PTE_EXT_AP_URW_SRW	(PTE_EXT_AP1|PTE_EXT_AP0)
+#define PTE_EXT_TEX(x)		(_AT(pteval_t, (x)) << 6)	/* v5 */
+#define PTE_EXT_APX		(_AT(pteval_t, 1) << 9)		/* v6 */
+#define PTE_EXT_COHERENT	(_AT(pteval_t, 1) << 9)		/* XScale3 */
+#define PTE_EXT_SHARED		(_AT(pteval_t, 1) << 10)	/* v6 */
+#define PTE_EXT_NG		(_AT(pteval_t, 1) << 11)	/* v6 */
+
+/*
+ *   - small page
+ */
+#define PTE_SMALL_AP_MASK	(_AT(pteval_t, 0xff) << 4)
+#define PTE_SMALL_AP_UNO_SRO	(_AT(pteval_t, 0x00) << 4)
+#define PTE_SMALL_AP_UNO_SRW	(_AT(pteval_t, 0x55) << 4)
+#define PTE_SMALL_AP_URO_SRW	(_AT(pteval_t, 0xaa) << 4)
+#define PTE_SMALL_AP_URW_SRW	(_AT(pteval_t, 0xff) << 4)
+
+#define PHYS_MASK		(~0UL)
+
+#endif
diff --git a/arch/arm/include/asm/pgtable-2level-types.h b/arch/arm/include/asm/pgtable-2level-types.h
new file mode 100644
index 000000000000..66cb5b0e89c5
--- /dev/null
+++ b/arch/arm/include/asm/pgtable-2level-types.h
@@ -0,0 +1,67 @@
+/*
+ * arch/arm/include/asm/pgtable-2level-types.h
+ *
+ * Copyright (C) 1995-2003 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _ASM_PGTABLE_2LEVEL_TYPES_H
+#define _ASM_PGTABLE_2LEVEL_TYPES_H
+
+#include <asm/types.h>
+
+typedef u32 pteval_t;
+typedef u32 pmdval_t;
+
+#undef STRICT_MM_TYPECHECKS
+
+#ifdef STRICT_MM_TYPECHECKS
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { pteval_t pte; } pte_t;
+typedef struct { pmdval_t pmd; } pmd_t;
+typedef struct { pmdval_t pgd[2]; } pgd_t;
+typedef struct { pteval_t pgprot; } pgprot_t;
+
+#define pte_val(x)      ((x).pte)
+#define pmd_val(x)      ((x).pmd)
+#define pgd_val(x)	((x).pgd[0])
+#define pgprot_val(x)   ((x).pgprot)
+
+#define __pte(x)        ((pte_t) { (x) } )
+#define __pmd(x)        ((pmd_t) { (x) } )
+#define __pgprot(x)     ((pgprot_t) { (x) } )
+
+#else
+/*
+ * .. while these make it easier on the compiler
+ */
+typedef pteval_t pte_t;
+typedef pmdval_t pmd_t;
+typedef pmdval_t pgd_t[2];
+typedef pteval_t pgprot_t;
+
+#define pte_val(x)      (x)
+#define pmd_val(x)      (x)
+#define pgd_val(x)	((x)[0])
+#define pgprot_val(x)   (x)
+
+#define __pte(x)        (x)
+#define __pmd(x)        (x)
+#define __pgprot(x)     (x)
+
+#endif /* STRICT_MM_TYPECHECKS */
+
+#endif	/* _ASM_PGTABLE_2LEVEL_TYPES_H */
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
new file mode 100644
index 000000000000..470457e1cfc5
--- /dev/null
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -0,0 +1,143 @@
+/*
+ *  arch/arm/include/asm/pgtable-2level.h
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_PGTABLE_2LEVEL_H
+#define _ASM_PGTABLE_2LEVEL_H
+
+/*
+ * Hardware-wise, we have a two level page table structure, where the first
+ * level has 4096 entries, and the second level has 256 entries.  Each entry
+ * is one 32-bit word.  Most of the bits in the second level entry are used
+ * by hardware, and there aren't any "accessed" and "dirty" bits.
+ *
+ * Linux on the other hand has a three level page table structure, which can
+ * be wrapped to fit a two level page table structure easily - using the PGD
+ * and PTE only.  However, Linux also expects one "PTE" table per page, and
+ * at least a "dirty" bit.
+ *
+ * Therefore, we tweak the implementation slightly - we tell Linux that we
+ * have 2048 entries in the first level, each of which is 8 bytes (iow, two
+ * hardware pointers to the second level.)  The second level contains two
+ * hardware PTE tables arranged contiguously, preceded by Linux versions
+ * which contain the state information Linux needs.  We, therefore, end up
+ * with 512 entries in the "PTE" level.
+ *
+ * This leads to the page tables having the following layout:
+ *
+ *    pgd             pte
+ * |        |
+ * +--------+
+ * |        |       +------------+ +0
+ * +- - - - +       | Linux pt 0 |
+ * |        |       +------------+ +1024
+ * +--------+ +0    | Linux pt 1 |
+ * |        |-----> +------------+ +2048
+ * +- - - - + +4    |  h/w pt 0  |
+ * |        |-----> +------------+ +3072
+ * +--------+ +8    |  h/w pt 1  |
+ * |        |       +------------+ +4096
+ *
+ * See L_PTE_xxx below for definitions of bits in the "Linux pt", and
+ * PTE_xxx for definitions of bits appearing in the "h/w pt".
+ *
+ * PMD_xxx definitions refer to bits in the first level page table.
+ *
+ * The "dirty" bit is emulated by only granting hardware write permission
+ * iff the page is marked "writable" and "dirty" in the Linux PTE.  This
+ * means that a write to a clean page will cause a permission fault, and
+ * the Linux MM layer will mark the page dirty via handle_pte_fault().
+ * For the hardware to notice the permission change, the TLB entry must
+ * be flushed, and ptep_set_access_flags() does that for us.
+ *
+ * The "accessed" or "young" bit is emulated by a similar method; we only
+ * allow accesses to the page if the "young" bit is set.  Accesses to the
+ * page will cause a fault, and handle_pte_fault() will set the young bit
+ * for us as long as the page is marked present in the corresponding Linux
+ * PTE entry.  Again, ptep_set_access_flags() will ensure that the TLB is
+ * up to date.
+ *
+ * However, when the "young" bit is cleared, we deny access to the page
+ * by clearing the hardware PTE.  Currently Linux does not flush the TLB
+ * for us in this case, which means the TLB will retain the transation
+ * until either the TLB entry is evicted under pressure, or a context
+ * switch which changes the user space mapping occurs.
+ */
+#define PTRS_PER_PTE		512
+#define PTRS_PER_PMD		1
+#define PTRS_PER_PGD		2048
+
+#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
+#define PTE_HWTABLE_OFF		(PTE_HWTABLE_PTRS * sizeof(pte_t))
+#define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u32))
+
+/*
+ * PMD_SHIFT determines the size of the area a second-level page table can map
+ * PGDIR_SHIFT determines what a third-level page table entry can map
+ */
+#define PMD_SHIFT		21
+#define PGDIR_SHIFT		21
+
+#define PMD_SIZE		(1UL << PMD_SHIFT)
+#define PMD_MASK		(~(PMD_SIZE-1))
+#define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK		(~(PGDIR_SIZE-1))
+
+/*
+ * section address mask and size definitions.
+ */
+#define SECTION_SHIFT		20
+#define SECTION_SIZE		(1UL << SECTION_SHIFT)
+#define SECTION_MASK		(~(SECTION_SIZE-1))
+
+/*
+ * ARMv6 supersection address mask and size definitions.
+ */
+#define SUPERSECTION_SHIFT	24
+#define SUPERSECTION_SIZE	(1UL << SUPERSECTION_SHIFT)
+#define SUPERSECTION_MASK	(~(SUPERSECTION_SIZE-1))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+
+/*
+ * "Linux" PTE definitions.
+ *
+ * We keep two sets of PTEs - the hardware and the linux version.
+ * This allows greater flexibility in the way we map the Linux bits
+ * onto the hardware tables, and allows us to have YOUNG and DIRTY
+ * bits.
+ *
+ * The PTE table pointer refers to the hardware entries; the "Linux"
+ * entries are stored 1024 bytes below.
+ */
+#define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
+#define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
+#define L_PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !PRESENT */
+#define L_PTE_DIRTY		(_AT(pteval_t, 1) << 6)
+#define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)
+#define L_PTE_USER		(_AT(pteval_t, 1) << 8)
+#define L_PTE_XN		(_AT(pteval_t, 1) << 9)
+#define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
+
+/*
+ * These are the memory types, defined to be compatible with
+ * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
+ */
+#define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
+#define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
+#define L_PTE_MT_WRITETHROUGH	(_AT(pteval_t, 0x02) << 2)	/* 0010 */
+#define L_PTE_MT_WRITEBACK	(_AT(pteval_t, 0x03) << 2)	/* 0011 */
+#define L_PTE_MT_MINICACHE	(_AT(pteval_t, 0x06) << 2)	/* 0110 (sa1100, xscale) */
+#define L_PTE_MT_WRITEALLOC	(_AT(pteval_t, 0x07) << 2)	/* 0111 */
+#define L_PTE_MT_DEV_SHARED	(_AT(pteval_t, 0x04) << 2)	/* 0100 */
+#define L_PTE_MT_DEV_NONSHARED	(_AT(pteval_t, 0x0c) << 2)	/* 1100 */
+#define L_PTE_MT_DEV_WC		(_AT(pteval_t, 0x09) << 2)	/* 1001 */
+#define L_PTE_MT_DEV_CACHED	(_AT(pteval_t, 0x0b) << 2)	/* 1011 */
+#define L_PTE_MT_MASK		(_AT(pteval_t, 0x0f) << 2)
+
+#endif /* _ASM_PGTABLE_2LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable-hwdef.h b/arch/arm/include/asm/pgtable-hwdef.h
index fd1521d5cb9d..183111164ce9 100644
--- a/arch/arm/include/asm/pgtable-hwdef.h
+++ b/arch/arm/include/asm/pgtable-hwdef.h
@@ -10,81 +10,6 @@
 #ifndef _ASMARM_PGTABLE_HWDEF_H
 #define _ASMARM_PGTABLE_HWDEF_H
 
-/*
- * Hardware page table definitions.
- *
- * + Level 1 descriptor (PMD)
- *   - common
- */
-#define PMD_TYPE_MASK		(3 << 0)
-#define PMD_TYPE_FAULT		(0 << 0)
-#define PMD_TYPE_TABLE		(1 << 0)
-#define PMD_TYPE_SECT		(2 << 0)
-#define PMD_BIT4		(1 << 4)
-#define PMD_DOMAIN(x)		((x) << 5)
-#define PMD_PROTECTION		(1 << 9)	/* v5 */
-/*
- *   - section
- */
-#define PMD_SECT_BUFFERABLE	(1 << 2)
-#define PMD_SECT_CACHEABLE	(1 << 3)
-#define PMD_SECT_XN		(1 << 4)	/* v6 */
-#define PMD_SECT_AP_WRITE	(1 << 10)
-#define PMD_SECT_AP_READ	(1 << 11)
-#define PMD_SECT_TEX(x)		((x) << 12)	/* v5 */
-#define PMD_SECT_APX		(1 << 15)	/* v6 */
-#define PMD_SECT_S		(1 << 16)	/* v6 */
-#define PMD_SECT_nG		(1 << 17)	/* v6 */
-#define PMD_SECT_SUPER		(1 << 18)	/* v6 */
-
-#define PMD_SECT_UNCACHED	(0)
-#define PMD_SECT_BUFFERED	(PMD_SECT_BUFFERABLE)
-#define PMD_SECT_WT		(PMD_SECT_CACHEABLE)
-#define PMD_SECT_WB		(PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
-#define PMD_SECT_MINICACHE	(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE)
-#define PMD_SECT_WBWA		(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
-#define PMD_SECT_NONSHARED_DEV	(PMD_SECT_TEX(2))
-
-/*
- *   - coarse table (not used)
- */
-
-/*
- * + Level 2 descriptor (PTE)
- *   - common
- */
-#define PTE_TYPE_MASK		(3 << 0)
-#define PTE_TYPE_FAULT		(0 << 0)
-#define PTE_TYPE_LARGE		(1 << 0)
-#define PTE_TYPE_SMALL		(2 << 0)
-#define PTE_TYPE_EXT		(3 << 0)	/* v5 */
-#define PTE_BUFFERABLE		(1 << 2)
-#define PTE_CACHEABLE		(1 << 3)
-
-/*
- *   - extended small page/tiny page
- */
-#define PTE_EXT_XN		(1 << 0)	/* v6 */
-#define PTE_EXT_AP_MASK		(3 << 4)
-#define PTE_EXT_AP0		(1 << 4)
-#define PTE_EXT_AP1		(2 << 4)
-#define PTE_EXT_AP_UNO_SRO	(0 << 4)
-#define PTE_EXT_AP_UNO_SRW	(PTE_EXT_AP0)
-#define PTE_EXT_AP_URO_SRW	(PTE_EXT_AP1)
-#define PTE_EXT_AP_URW_SRW	(PTE_EXT_AP1|PTE_EXT_AP0)
-#define PTE_EXT_TEX(x)		((x) << 6)	/* v5 */
-#define PTE_EXT_APX		(1 << 9)	/* v6 */
-#define PTE_EXT_COHERENT	(1 << 9)	/* XScale3 */
-#define PTE_EXT_SHARED		(1 << 10)	/* v6 */
-#define PTE_EXT_NG		(1 << 11)	/* v6 */
-
-/*
- *   - small page
- */
-#define PTE_SMALL_AP_MASK	(0xff << 4)
-#define PTE_SMALL_AP_UNO_SRO	(0x00 << 4)
-#define PTE_SMALL_AP_UNO_SRW	(0x55 << 4)
-#define PTE_SMALL_AP_URO_SRW	(0xaa << 4)
-#define PTE_SMALL_AP_URW_SRW	(0xff << 4)
+#include <asm/pgtable-2level-hwdef.h>
 
 #endif
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 5750704e0271..9451dce3a553 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -24,6 +24,8 @@
 #include <mach/vmalloc.h>
 #include <asm/pgtable-hwdef.h>
 
+#include <asm/pgtable-2level.h>
+
 /*
  * Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
@@ -41,79 +43,6 @@
 #define VMALLOC_START		(((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
 #endif
 
-/*
- * Hardware-wise, we have a two level page table structure, where the first
- * level has 4096 entries, and the second level has 256 entries.  Each entry
- * is one 32-bit word.  Most of the bits in the second level entry are used
- * by hardware, and there aren't any "accessed" and "dirty" bits.
- *
- * Linux on the other hand has a three level page table structure, which can
- * be wrapped to fit a two level page table structure easily - using the PGD
- * and PTE only.  However, Linux also expects one "PTE" table per page, and
- * at least a "dirty" bit.
- *
- * Therefore, we tweak the implementation slightly - we tell Linux that we
- * have 2048 entries in the first level, each of which is 8 bytes (iow, two
- * hardware pointers to the second level.)  The second level contains two
- * hardware PTE tables arranged contiguously, preceded by Linux versions
- * which contain the state information Linux needs.  We, therefore, end up
- * with 512 entries in the "PTE" level.
- *
- * This leads to the page tables having the following layout:
- *
- *    pgd             pte
- * |        |
- * +--------+
- * |        |       +------------+ +0
- * +- - - - +       | Linux pt 0 |
- * |        |       +------------+ +1024
- * +--------+ +0    | Linux pt 1 |
- * |        |-----> +------------+ +2048
- * +- - - - + +4    |  h/w pt 0  |
- * |        |-----> +------------+ +3072
- * +--------+ +8    |  h/w pt 1  |
- * |        |       +------------+ +4096
- *
- * See L_PTE_xxx below for definitions of bits in the "Linux pt", and
- * PTE_xxx for definitions of bits appearing in the "h/w pt".
- *
- * PMD_xxx definitions refer to bits in the first level page table.
- *
- * The "dirty" bit is emulated by only granting hardware write permission
- * iff the page is marked "writable" and "dirty" in the Linux PTE.  This
- * means that a write to a clean page will cause a permission fault, and
- * the Linux MM layer will mark the page dirty via handle_pte_fault().
- * For the hardware to notice the permission change, the TLB entry must
- * be flushed, and ptep_set_access_flags() does that for us.
- *
- * The "accessed" or "young" bit is emulated by a similar method; we only
- * allow accesses to the page if the "young" bit is set.  Accesses to the
- * page will cause a fault, and handle_pte_fault() will set the young bit
- * for us as long as the page is marked present in the corresponding Linux
- * PTE entry.  Again, ptep_set_access_flags() will ensure that the TLB is
- * up to date.
- *
- * However, when the "young" bit is cleared, we deny access to the page
- * by clearing the hardware PTE.  Currently Linux does not flush the TLB
- * for us in this case, which means the TLB will retain the transation
- * until either the TLB entry is evicted under pressure, or a context
- * switch which changes the user space mapping occurs.
- */
-#define PTRS_PER_PTE		512
-#define PTRS_PER_PMD		1
-#define PTRS_PER_PGD		2048
-
-#define PTE_HWTABLE_PTRS	(PTRS_PER_PTE)
-#define PTE_HWTABLE_OFF		(PTE_HWTABLE_PTRS * sizeof(pte_t))
-#define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u32))
-
-/*
- * PMD_SHIFT determines the size of the area a second-level page table can map
- * PGDIR_SHIFT determines what a third-level page table entry can map
- */
-#define PMD_SHIFT		21
-#define PGDIR_SHIFT		21
-
 #define LIBRARY_TEXT_START	0x0c000000
 
 #ifndef __ASSEMBLY__
@@ -124,12 +53,6 @@ extern void __pgd_error(const char *file, int line, pgd_t);
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte)
 #define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd)
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd)
-#endif /* !__ASSEMBLY__ */
-
-#define PMD_SIZE		(1UL << PMD_SHIFT)
-#define PMD_MASK		(~(PMD_SIZE-1))
-#define PGDIR_SIZE		(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK		(~(PGDIR_SIZE-1))
 
 /*
  * This is the lowest virtual address we can permit any user space
@@ -138,60 +61,6 @@ extern void __pgd_error(const char *file, int line, pgd_t);
  */
 #define FIRST_USER_ADDRESS	PAGE_SIZE
 
-#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
-
-/*
- * section address mask and size definitions.
- */
-#define SECTION_SHIFT		20
-#define SECTION_SIZE		(1UL << SECTION_SHIFT)
-#define SECTION_MASK		(~(SECTION_SIZE-1))
-
-/*
- * ARMv6 supersection address mask and size definitions.
- */
-#define SUPERSECTION_SHIFT	24
-#define SUPERSECTION_SIZE	(1UL << SUPERSECTION_SHIFT)
-#define SUPERSECTION_MASK	(~(SUPERSECTION_SIZE-1))
-
-/*
- * "Linux" PTE definitions.
- *
- * We keep two sets of PTEs - the hardware and the linux version.
- * This allows greater flexibility in the way we map the Linux bits
- * onto the hardware tables, and allows us to have YOUNG and DIRTY
- * bits.
- *
- * The PTE table pointer refers to the hardware entries; the "Linux"
- * entries are stored 1024 bytes below.
- */
-#define L_PTE_PRESENT		(_AT(pteval_t, 1) << 0)
-#define L_PTE_YOUNG		(_AT(pteval_t, 1) << 1)
-#define L_PTE_FILE		(_AT(pteval_t, 1) << 2)	/* only when !PRESENT */
-#define L_PTE_DIRTY		(_AT(pteval_t, 1) << 6)
-#define L_PTE_RDONLY		(_AT(pteval_t, 1) << 7)
-#define L_PTE_USER		(_AT(pteval_t, 1) << 8)
-#define L_PTE_XN		(_AT(pteval_t, 1) << 9)
-#define L_PTE_SHARED		(_AT(pteval_t, 1) << 10)	/* shared(v6), coherent(xsc3) */
-
-/*
- * These are the memory types, defined to be compatible with
- * pre-ARMv6 CPUs cacheable and bufferable bits:   XXCB
- */
-#define L_PTE_MT_UNCACHED	(_AT(pteval_t, 0x00) << 2)	/* 0000 */
-#define L_PTE_MT_BUFFERABLE	(_AT(pteval_t, 0x01) << 2)	/* 0001 */
-#define L_PTE_MT_WRITETHROUGH	(_AT(pteval_t, 0x02) << 2)	/* 0010 */
-#define L_PTE_MT_WRITEBACK	(_AT(pteval_t, 0x03) << 2)	/* 0011 */
-#define L_PTE_MT_MINICACHE	(_AT(pteval_t, 0x06) << 2)	/* 0110 (sa1100, xscale) */
-#define L_PTE_MT_WRITEALLOC	(_AT(pteval_t, 0x07) << 2)	/* 0111 */
-#define L_PTE_MT_DEV_SHARED	(_AT(pteval_t, 0x04) << 2)	/* 0100 */
-#define L_PTE_MT_DEV_NONSHARED	(_AT(pteval_t, 0x0c) << 2)	/* 1100 */
-#define L_PTE_MT_DEV_WC		(_AT(pteval_t, 0x09) << 2)	/* 1001 */
-#define L_PTE_MT_DEV_CACHED	(_AT(pteval_t, 0x0b) << 2)	/* 1011 */
-#define L_PTE_MT_MASK		(_AT(pteval_t, 0x0f) << 2)
-
-#ifndef __ASSEMBLY__
-
 /*
  * The pgprot_* and protection_map entries will be fixed up in runtime
  * to include the cachable and bufferable bits based on memory policy,
@@ -232,6 +101,9 @@ extern pgprot_t		pgprot_kernel;
 #define pgprot_writecombine(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE)
 
+#define pgprot_stronglyordered(prot) \
+	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED)
+
 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 #define pgprot_dmacoherent(prot) \
 	__pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN)
@@ -327,10 +199,10 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 {
-	return __va(pmd_val(pmd) & PAGE_MASK);
+	return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
 }
 
-#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd)))
+#define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
 /* we don't need complex calculations here as the pmd is folded into the pgd */
 #define pmd_addr_end(addr,end)	(end)
@@ -351,7 +223,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 #define pte_offset_map(pmd,addr)	(__pte_map(pmd) + pte_index(addr))
 #define pte_unmap(pte)			__pte_unmap(pte)
 
-#define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
+#define pte_pfn(pte)		((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
 #define pfn_pte(pfn,prot)	__pte(__pfn_to_phys(pfn) | pgprot_val(prot))
 
 #define pte_page(pte)		pfn_to_page(pte_pfn(pte))
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index b7e82c4aced6..71d99b83cdb9 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -13,7 +13,12 @@
 #define __ARM_PMU_H__
 
 #include <linux/interrupt.h>
+#include <linux/perf_event.h>
 
+/*
+ * Types of PMUs that can be accessed directly and require mutual
+ * exclusion between profiling tools.
+ */
 enum arm_pmu_type {
 	ARM_PMU_DEVICE_CPU	= 0,
 	ARM_NUM_PMU_DEVICES,
@@ -37,21 +42,17 @@ struct arm_pmu_platdata {
  * reserve_pmu() - reserve the hardware performance counters
  *
  * Reserve the hardware performance counters in the system for exclusive use.
- * The platform_device for the system is returned on success, ERR_PTR()
- * encoded error on failure.
+ * Returns 0 on success or -EBUSY if the lock is already held.
  */
-extern struct platform_device *
+extern int
 reserve_pmu(enum arm_pmu_type type);
 
 /**
  * release_pmu() - Relinquish control of the performance counters
  *
  * Release the performance counters and allow someone else to use them.
- * Callers must have disabled the counters and released IRQs before calling
- * this. The platform_device returned from reserve_pmu() must be passed as
- * a cookie.
  */
-extern int
+extern void
 release_pmu(enum arm_pmu_type type);
 
 /**
@@ -68,24 +69,78 @@ init_pmu(enum arm_pmu_type type);
 
 #include <linux/err.h>
 
-static inline struct platform_device *
-reserve_pmu(enum arm_pmu_type type)
-{
-	return ERR_PTR(-ENODEV);
-}
-
 static inline int
-release_pmu(enum arm_pmu_type type)
+reserve_pmu(enum arm_pmu_type type)
 {
 	return -ENODEV;
 }
 
-static inline int
-init_pmu(enum arm_pmu_type type)
-{
-	return -ENODEV;
-}
+static inline void
+release_pmu(enum arm_pmu_type type)	{ }
 
 #endif /* CONFIG_CPU_HAS_PMU */
 
+#ifdef CONFIG_HW_PERF_EVENTS
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event	**events;
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long           *used_mask;
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t		pmu_lock;
+};
+
+struct arm_pmu {
+	struct pmu	pmu;
+	enum arm_perf_pmu_ids id;
+	enum arm_pmu_type type;
+	cpumask_t	active_irqs;
+	const char	*name;
+	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
+	void		(*enable)(struct hw_perf_event *evt, int idx);
+	void		(*disable)(struct hw_perf_event *evt, int idx);
+	int		(*get_event_idx)(struct pmu_hw_events *hw_events,
+					 struct hw_perf_event *hwc);
+	int		(*set_event_filter)(struct hw_perf_event *evt,
+					    struct perf_event_attr *attr);
+	u32		(*read_counter)(int idx);
+	void		(*write_counter)(int idx, u32 val);
+	void		(*start)(void);
+	void		(*stop)(void);
+	void		(*reset)(void *);
+	int		(*map_event)(struct perf_event *event);
+	int		num_events;
+	atomic_t	active_events;
+	struct mutex	reserve_mutex;
+	u64		max_period;
+	struct platform_device	*plat_device;
+	struct pmu_hw_events	*(*get_hw_events)(void);
+};
+
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
+
+u64 armpmu_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc,
+			int idx, int overflow);
+
+int armpmu_event_set_period(struct perf_event *event,
+			    struct hw_perf_event *hwc,
+			    int idx);
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
 #endif /* __ARM_PMU_H__ */
diff --git a/arch/arm/include/asm/poll.h b/arch/arm/include/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/arm/include/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 633d1cb84d87..9e92cb205e65 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -81,6 +81,10 @@ extern void cpu_dcache_clean_area(void *, int);
 extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
+
+/* These three are private to arch/arm/kernel/suspend.c */
+extern void cpu_do_suspend(void *);
+extern void cpu_do_resume(void *);
 #else
 #define cpu_proc_init			processor._proc_init
 #define cpu_proc_fin			processor._proc_fin
@@ -89,6 +93,10 @@ extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
 #define cpu_dcache_clean_area		processor.dcache_clean_area
 #define cpu_set_pte_ext			processor.set_pte_ext
 #define cpu_do_switch_mm		processor.switch_mm
+
+/* These three are private to arch/arm/kernel/suspend.c */
+#define cpu_do_suspend			processor.do_suspend
+#define cpu_do_resume			processor.do_resume
 #endif
 
 extern void cpu_resume(void);
diff --git a/arch/arm/include/asm/resource.h b/arch/arm/include/asm/resource.h
deleted file mode 100644
index 734b581b5b6a..000000000000
--- a/arch/arm/include/asm/resource.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ARM_RESOURCE_H
-#define _ARM_RESOURCE_H
-
-#include <asm-generic/resource.h>
-
-#endif
diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h
deleted file mode 100644
index 2b8c5160388f..000000000000
--- a/arch/arm/include/asm/sections.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sections.h>
diff --git a/arch/arm/include/asm/siginfo.h b/arch/arm/include/asm/siginfo.h
deleted file mode 100644
index 5e21852e6039..000000000000
--- a/arch/arm/include/asm/siginfo.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASMARM_SIGINFO_H
-#define _ASMARM_SIGINFO_H
-
-#include <asm-generic/siginfo.h>
-
-#endif
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index e42d96a45d3e..1e5717afc4ac 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -33,6 +33,11 @@ extern void show_ipi_list(struct seq_file *, int);
 asmlinkage void do_IPI(int ipinr, struct pt_regs *regs);
 
 /*
+ * Called from C code, this handles an IPI.
+ */
+void handle_IPI(int ipinr, struct pt_regs *regs);
+
+/*
  * Setup the set of possible CPUs (via set_cpu_possible)
  */
 extern void smp_init_cpus(void);
@@ -66,6 +71,12 @@ extern void platform_secondary_init(unsigned int cpu);
 extern void platform_smp_prepare_cpus(unsigned int);
 
 /*
+ * Logical CPU mapping.
+ */
+extern int __cpu_logical_map[NR_CPUS];
+#define cpu_logical_map(cpu)	__cpu_logical_map[cpu]
+
+/*
  * Initial data for bringing up a secondary CPU.
  */
 struct secondary_data {
@@ -88,9 +99,4 @@ extern void platform_cpu_enable(unsigned int cpu);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
-/*
- * show local interrupt info
- */
-extern void show_local_irqs(struct seq_file *, int);
-
 #endif /* ifndef __ASM_ARM_SMP_H */
diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h
index fed9981fba08..ef9ffba97ad8 100644
--- a/arch/arm/include/asm/smp_twd.h
+++ b/arch/arm/include/asm/smp_twd.h
@@ -22,7 +22,7 @@ struct clock_event_device;
 
 extern void __iomem *twd_base;
 
-int twd_timer_ack(void);
 void twd_timer_setup(struct clock_event_device *);
+void twd_timer_stop(struct clock_event_device *);
 
 #endif
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index b0e4e1a02318..1c0a551ae375 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -1,22 +1,7 @@
 #ifndef __ASM_ARM_SUSPEND_H
 #define __ASM_ARM_SUSPEND_H
 
-#include <asm/memory.h>
-#include <asm/tlbflush.h>
-
 extern void cpu_resume(void);
-
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
-static inline int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
-{
-	extern int __cpu_suspend(int, long, unsigned long,
-				 int (*)(unsigned long));
-	int ret = __cpu_suspend(0, PHYS_OFFSET - PAGE_OFFSET, arg, fn);
-	flush_tlb_all();
-	return ret;
-}
+extern int cpu_suspend(unsigned long, int (*)(unsigned long));
 
 #endif
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 832888d0c20c..984014b92647 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -57,18 +57,12 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/compiler.h>
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
 
 #include <asm/outercache.h>
 
-#define __exception	__attribute__((section(".exception.text")))
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#define __exception_irq_entry	__irq_entry
-#else
-#define __exception_irq_entry	__exception
-#endif
-
 struct thread_info;
 struct task_struct;
 
@@ -97,14 +91,13 @@ void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int,
 #define xchg(ptr,x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
-extern asmlinkage void __backtrace(void);
 extern asmlinkage void c_backtrace(unsigned long fp, int pmode);
 
 struct mm_struct;
 extern void show_pte(struct mm_struct *mm, unsigned long addr);
 extern void __show_regs(struct pt_regs *);
 
-extern int cpu_architecture(void);
+extern int __pure cpu_architecture(void);
 extern void cpu_init(void);
 
 void arm_machine_restart(char mode, const char *cmd);
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index 8077145698ff..02b2f8203982 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -471,7 +471,7 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
  *	these operations.  This is typically used when we are removing
  *	PMD entries.
  */
-static inline void flush_pmd_entry(pmd_t *pmd)
+static inline void flush_pmd_entry(void *pmd)
 {
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
@@ -487,7 +487,7 @@ static inline void flush_pmd_entry(pmd_t *pmd)
 		dsb();
 }
 
-static inline void clean_pmd_entry(pmd_t *pmd)
+static inline void clean_pmd_entry(void *pmd)
 {
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index accbd7cad9b5..a7e457ed27c3 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -1,6 +1,39 @@
 #ifndef _ASM_ARM_TOPOLOGY_H
 #define _ASM_ARM_TOPOLOGY_H
 
+#ifdef CONFIG_ARM_CPU_TOPOLOGY
+
+#include <linux/cpumask.h>
+
+struct cputopo_arm {
+	int thread_id;
+	int core_id;
+	int socket_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+};
+
+extern struct cputopo_arm cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].socket_id)
+#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable()	(cpu_topology[0].socket_id != -1)
+#define smt_capable()	(cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index f7887dc53c1f..16eed6aebfa4 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_MODULES)		+= armksyms.o module.o
 obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
-obj-$(CONFIG_PM_SLEEP)		+= sleep.o
+obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
 obj-$(CONFIG_HAVE_SCHED_CLOCK)	+= sched_clock.o
 obj-$(CONFIG_SMP)		+= smp.o smp_tlb.o
 obj-$(CONFIG_HAVE_ARM_SCU)	+= smp_scu.o
@@ -43,6 +43,13 @@ obj-$(CONFIG_KPROBES)		+= kprobes-thumb.o
 else
 obj-$(CONFIG_KPROBES)		+= kprobes-arm.o
 endif
+obj-$(CONFIG_ARM_KPROBES_TEST)	+= test-kprobes.o
+test-kprobes-objs		:= kprobes-test.o
+ifdef CONFIG_THUMB2_KERNEL
+test-kprobes-objs		+= kprobes-test-thumb.o
+else
+test-kprobes-objs		+= kprobes-test-arm.o
+endif
 obj-$(CONFIG_ATAGS_PROC)	+= atags.o
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
@@ -66,6 +73,7 @@ obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_CPU_HAS_PMU)	+= pmu.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 AFLAGS_iwmmxt.o			:= -Wa,-mcpu=iwmmxt
+obj-$(CONFIG_ARM_CPU_TOPOLOGY)  += topology.o
 
 ifneq ($(CONFIG_ARCH_EBSA110),y)
   obj-y		+= io.o
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index aeef960ff795..8e3c6f11b0a1 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -49,9 +49,6 @@ extern void __aeabi_ulcmp(void);
 
 extern void fpundefinstr(void);
 
-
-EXPORT_SYMBOL(__backtrace);
-
 	/* platform dependent support */
 EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 16baba2e4369..1429d8989fb9 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -20,6 +20,7 @@
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/procinfo.h>
+#include <asm/hardware/cache-l2x0.h>
 #include <linux/kbuild.h>
 
 /*
@@ -92,6 +93,17 @@ int main(void)
   DEFINE(S_OLD_R0,		offsetof(struct pt_regs, ARM_ORIG_r0));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
+#ifdef CONFIG_CACHE_L2X0
+  DEFINE(L2X0_R_PHY_BASE,	offsetof(struct l2x0_regs, phy_base));
+  DEFINE(L2X0_R_AUX_CTRL,	offsetof(struct l2x0_regs, aux_ctrl));
+  DEFINE(L2X0_R_TAG_LATENCY,	offsetof(struct l2x0_regs, tag_latency));
+  DEFINE(L2X0_R_DATA_LATENCY,	offsetof(struct l2x0_regs, data_latency));
+  DEFINE(L2X0_R_FILTER_START,	offsetof(struct l2x0_regs, filter_start));
+  DEFINE(L2X0_R_FILTER_END,	offsetof(struct l2x0_regs, filter_end));
+  DEFINE(L2X0_R_PREFETCH_CTRL,	offsetof(struct l2x0_regs, prefetch_ctrl));
+  DEFINE(L2X0_R_PWR_CTRL,	offsetof(struct l2x0_regs, pwr_ctrl));
+  BLANK();
+#endif
 #ifdef CONFIG_CPU_HAS_ASID
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
   BLANK();
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index d6df359408f0..c0d9203fc75e 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -412,6 +412,9 @@ void pcibios_fixup_bus(struct pci_bus *bus)
 	printk(KERN_INFO "PCI: bus%d: Fast back to back transfers %sabled\n",
 		bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis");
 }
+#ifdef CONFIG_HOTPLUG
+EXPORT_SYMBOL(pcibios_fixup_bus);
+#endif
 
 /*
  * Convert from Linux-centric to bus-centric addresses for bridge devices.
@@ -431,6 +434,7 @@ pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
 	region->start = res->start - offset;
 	region->end   = res->end - offset;
 }
+EXPORT_SYMBOL(pcibios_resource_to_bus);
 
 void __devinit
 pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
@@ -447,12 +451,7 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->start = region->start + offset;
 	res->end   = region->end + offset;
 }
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_fixup_bus);
-EXPORT_SYMBOL(pcibios_resource_to_bus);
 EXPORT_SYMBOL(pcibios_bus_to_resource);
-#endif
 
 /*
  * Swizzle the device pin each time we cross a bridge.
diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S
index b7685f1bb04a..204e2160cfcc 100644
--- a/arch/arm/kernel/debug.S
+++ b/arch/arm/kernel/debug.S
@@ -151,6 +151,8 @@ printhex:	adr	r2, hexbuf
 		b	printascii
 ENDPROC(printhex2)
 
+hexbuf:		.space 16
+
 		.ltorg
 
 ENTRY(printascii)
@@ -175,5 +177,3 @@ ENTRY(printch)
 		mov	r0, #0
 		b	1b
 ENDPROC(printch)
-
-hexbuf:		.space 16
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
index 2c4a185f92cd..7b829d9663b1 100644
--- a/arch/arm/kernel/dma.c
+++ b/arch/arm/kernel/dma.c
@@ -23,7 +23,7 @@
 
 #include <asm/mach/dma.h>
 
-DEFINE_SPINLOCK(dma_spin_lock);
+DEFINE_RAW_SPINLOCK(dma_spin_lock);
 EXPORT_SYMBOL(dma_spin_lock);
 
 static dma_t *dma_chan[MAX_DMA_CHANNELS];
diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c
index d16500110ee9..4dd0edab6a65 100644
--- a/arch/arm/kernel/ecard.c
+++ b/arch/arm/kernel/ecard.c
@@ -237,7 +237,7 @@ static void ecard_init_pgtables(struct mm_struct *mm)
 
 	memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (IO_SIZE / PGDIR_SIZE));
 
-	src_pgd = pgd_offset(mm, EASI_BASE);
+	src_pgd = pgd_offset(mm, (unsigned long)EASI_BASE);
 	dst_pgd = pgd_offset(mm, EASI_START);
 
 	memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE));
@@ -674,44 +674,37 @@ static int __init ecard_probeirqhw(void)
 #define ecard_probeirqhw() (0)
 #endif
 
-#ifndef IO_EC_MEMC8_BASE
-#define IO_EC_MEMC8_BASE 0
-#endif
-
-static unsigned int __ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
+static void __iomem *__ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
 {
-	unsigned long address = 0;
+	void __iomem *address = NULL;
 	int slot = ec->slot_no;
 
 	if (ec->slot_no == 8)
-		return IO_EC_MEMC8_BASE;
+		return ECARD_MEMC8_BASE;
 
 	ectcr &= ~(1 << slot);
 
 	switch (type) {
 	case ECARD_MEMC:
 		if (slot < 4)
-			address = IO_EC_MEMC_BASE + (slot << 12);
+			address = ECARD_MEMC_BASE + (slot << 14);
 		break;
 
 	case ECARD_IOC:
 		if (slot < 4)
-			address = IO_EC_IOC_BASE + (slot << 12);
-#ifdef IO_EC_IOC4_BASE
+			address = ECARD_IOC_BASE + (slot << 14);
 		else
-			address = IO_EC_IOC4_BASE + ((slot - 4) << 12);
-#endif
+			address = ECARD_IOC4_BASE + ((slot - 4) << 14);
 		if (address)
-			address +=  speed << 17;
+			address += speed << 19;
 		break;
 
-#ifdef IO_EC_EASI_BASE
 	case ECARD_EASI:
-		address = IO_EC_EASI_BASE + (slot << 22);
+		address = ECARD_EASI_BASE + (slot << 24);
 		if (speed == ECARD_FAST)
 			ectcr |= 1 << slot;
 		break;
-#endif
+
 	default:
 		break;
 	}
@@ -990,6 +983,7 @@ ecard_probe(int slot, card_type_t type)
 	ecard_t **ecp;
 	ecard_t *ec;
 	struct ex_ecid cid;
+	void __iomem *addr;
 	int i, rc;
 
 	ec = ecard_alloc_card(type, slot);
@@ -999,7 +993,7 @@ ecard_probe(int slot, card_type_t type)
 	}
 
 	rc = -ENODEV;
-	if ((ec->podaddr = __ecard_address(ec, type, ECARD_SYNC)) == 0)
+	if ((addr = __ecard_address(ec, type, ECARD_SYNC)) == NULL)
 		goto nodev;
 
 	cid.r_zero = 1;
@@ -1019,7 +1013,7 @@ ecard_probe(int slot, card_type_t type)
 	ec->cid.fiqmask = cid.r_fiqmask;
 	ec->cid.fiqoff  = ecard_gets24(cid.r_fiqoff);
 	ec->fiqaddr	=
-	ec->irqaddr	= (void __iomem *)ioaddr(ec->podaddr);
+	ec->irqaddr	= addr;
 
 	if (ec->cid.is) {
 		ec->irqmask = ec->cid.irqmask;
@@ -1048,10 +1042,8 @@ ecard_probe(int slot, card_type_t type)
 		set_irq_flags(ec->irq, IRQF_VALID);
 	}
 
-#ifdef IO_EC_MEMC8_BASE
 	if (slot == 8)
 		ec->irq = 11;
-#endif
 #ifdef CONFIG_ARCH_RPC
 	/* On RiscPC, only first two slots have DMA capability */
 	if (slot < 2)
@@ -1097,9 +1089,7 @@ static int __init ecard_init(void)
 			ecard_probe(slot, ECARD_IOC);
 	}
 
-#ifdef IO_EC_MEMC8_BASE
 	ecard_probe(8, ECARD_IOC);
-#endif
 
 	irqhw = ecard_probeirqhw();
 
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index a87cbf889ff4..9ad50c4208ae 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -24,6 +24,7 @@
 #include <asm/unwind.h>
 #include <asm/unistd.h>
 #include <asm/tls.h>
+#include <asm/system.h>
 
 #include "entry-header.S"
 #include <asm/entry-macro-multi.S>
@@ -262,8 +263,7 @@ __und_svc:
 	ldr	r0, [r4, #-4]
 #else
 	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
-	and	r9, r0, #0xf800
-	cmp	r9, #0xe800			@ 32-bit instruction if xx >= 0
+	cmp	r0, #0xe800			@ 32-bit instruction if xx >= 0
 	ldrhhs	r9, [r4]			@ bottom 16 bits
 	orrhs	r0, r9, r0, lsl #16
 #endif
@@ -440,18 +440,46 @@ __und_usr:
 #endif
 	beq	call_fpe
 	@ Thumb instruction
-#if __LINUX_ARM_ARCH__ >= 7
+#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7
+/*
+ * Thumb-2 instruction handling.  Note that because pre-v6 and >= v6 platforms
+ * can never be supported in a single kernel, this code is not applicable at
+ * all when __LINUX_ARM_ARCH__ < 6.  This allows simplifying assumptions to be
+ * made about .arch directives.
+ */
+#if __LINUX_ARM_ARCH__ < 7
+/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */
+#define NEED_CPU_ARCHITECTURE
+	ldr	r5, .LCcpu_architecture
+	ldr	r5, [r5]
+	cmp	r5, #CPU_ARCH_ARMv7
+	blo	__und_usr_unknown
+/*
+ * The following code won't get run unless the running CPU really is v7, so
+ * coding round the lack of ldrht on older arches is pointless.  Temporarily
+ * override the assembler target arch with the minimum required instead:
+ */
+	.arch	armv6t2
+#endif
 2:
  ARM(	ldrht	r5, [r4], #2	)
  THUMB(	ldrht	r5, [r4]	)
  THUMB(	add	r4, r4, #2	)
-	and	r0, r5, #0xf800			@ mask bits 111x x... .... ....
-	cmp	r0, #0xe800			@ 32bit instruction if xx != 0
+	cmp	r5, #0xe800			@ 32bit instruction if xx != 0
 	blo	__und_usr_unknown
 3:	ldrht	r0, [r4]
 	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
 	orr	r0, r0, r5, lsl #16
+
+#if __LINUX_ARM_ARCH__ < 7
+/* If the target arch was overridden, change it back: */
+#ifdef CONFIG_CPU_32v6K
+	.arch	armv6k
 #else
+	.arch	armv6
+#endif
+#endif /* __LINUX_ARM_ARCH__ < 7 */
+#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */
 	b	__und_usr_unknown
 #endif
  UNWIND(.fnend		)
@@ -578,6 +606,12 @@ call_fpe:
 	movw_pc	lr				@ CP#14 (Debug)
 	movw_pc	lr				@ CP#15 (Control)
 
+#ifdef NEED_CPU_ARCHITECTURE
+	.align	2
+.LCcpu_architecture:
+	.word	__cpu_architecture
+#endif
+
 #ifdef CONFIG_NEON
 	.align	6
 
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 673c806cc106..566c54c2a1fe 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -21,6 +21,7 @@
 #include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/system.h>
+#include <asm/pgtable.h>
 
 #ifdef CONFIG_DEBUG_LL
 #include <mach/debug-macro.S>
@@ -38,11 +39,14 @@
 #error KERNEL_RAM_VADDR must start at 0xXXXX8000
 #endif
 
+#define PG_DIR_SIZE	0x4000
+#define PMD_ORDER	2
+
 	.globl	swapper_pg_dir
-	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000
+	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE
 
 	.macro	pgtbl, rd, phys
-	add	\rd, \phys, #TEXT_OFFSET - 0x4000
+	add	\rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE
 	.endm
 
 #ifdef CONFIG_XIP_KERNEL
@@ -148,11 +152,11 @@ __create_page_tables:
 	pgtbl	r4, r8				@ page table address
 
 	/*
-	 * Clear the 16K level 1 swapper page table
+	 * Clear the swapper page table
 	 */
 	mov	r0, r4
 	mov	r3, #0
-	add	r6, r0, #0x4000
+	add	r6, r0, #PG_DIR_SIZE
 1:	str	r3, [r0], #4
 	str	r3, [r0], #4
 	str	r3, [r0], #4
@@ -171,30 +175,30 @@ __create_page_tables:
 	sub	r0, r0, r3			@ virt->phys offset
 	add	r5, r5, r0			@ phys __enable_mmu
 	add	r6, r6, r0			@ phys __enable_mmu_end
-	mov	r5, r5, lsr #20
-	mov	r6, r6, lsr #20
+	mov	r5, r5, lsr #SECTION_SHIFT
+	mov	r6, r6, lsr #SECTION_SHIFT
 
-1:	orr	r3, r7, r5, lsl #20		@ flags + kernel base
-	str	r3, [r4, r5, lsl #2]		@ identity mapping
-	teq	r5, r6
-	addne	r5, r5, #1			@ next section
-	bne	1b
+1:	orr	r3, r7, r5, lsl #SECTION_SHIFT	@ flags + kernel base
+	str	r3, [r4, r5, lsl #PMD_ORDER]	@ identity mapping
+	cmp	r5, r6
+	addlo	r5, r5, #1			@ next section
+	blo	1b
 
 	/*
 	 * Now setup the pagetables for our kernel direct
 	 * mapped region.
 	 */
 	mov	r3, pc
-	mov	r3, r3, lsr #20
-	orr	r3, r7, r3, lsl #20
-	add	r0, r4,  #(KERNEL_START & 0xff000000) >> 18
-	str	r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]!
+	mov	r3, r3, lsr #SECTION_SHIFT
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
+	add	r0, r4,  #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
 	ldr	r6, =(KERNEL_END - 1)
-	add	r0, r0, #4
-	add	r6, r4, r6, lsr #18
+	add	r0, r0, #1 << PMD_ORDER
+	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
 1:	cmp	r0, r6
-	add	r3, r3, #1 << 20
-	strls	r3, [r0], #4
+	add	r3, r3, #1 << SECTION_SHIFT
+	strls	r3, [r0], #1 << PMD_ORDER
 	bls	1b
 
 #ifdef CONFIG_XIP_KERNEL
@@ -203,11 +207,11 @@ __create_page_tables:
 	 */
 	add	r3, r8, #TEXT_OFFSET
 	orr	r3, r3, r7
-	add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> 18
-	str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> 18]!
+	add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]!
 	ldr	r6, =(_end - 1)
 	add	r0, r0, #4
-	add	r6, r4, r6, lsr #18
+	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
 1:	cmp	r0, r6
 	add	r3, r3, #1 << 20
 	strls	r3, [r0], #4
@@ -218,12 +222,12 @@ __create_page_tables:
 	 * Then map boot params address in r2 or
 	 * the first 1MB of ram if boot params address is not specified.
 	 */
-	mov	r0, r2, lsr #20
-	movs	r0, r0, lsl #20
+	mov	r0, r2, lsr #SECTION_SHIFT
+	movs	r0, r0, lsl #SECTION_SHIFT
 	moveq	r0, r8
 	sub	r3, r0, r8
 	add	r3, r3, #PAGE_OFFSET
-	add	r3, r4, r3, lsr #18
+	add	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
 	orr	r6, r7, r0
 	str	r6, [r3]
 
@@ -236,21 +240,21 @@ __create_page_tables:
 	 */
 	addruart r7, r3, r0
 
-	mov	r3, r3, lsr #20
-	mov	r3, r3, lsl #2
+	mov	r3, r3, lsr #SECTION_SHIFT
+	mov	r3, r3, lsl #PMD_ORDER
 
 	add	r0, r4, r3
 	rsb	r3, r3, #0x4000			@ PTRS_PER_PGD*sizeof(long)
 	cmp	r3, #0x0800			@ limit to 512MB
 	movhi	r3, #0x0800
 	add	r6, r0, r3
-	mov	r3, r7, lsr #20
+	mov	r3, r7, lsr #SECTION_SHIFT
 	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
-	orr	r3, r7, r3, lsl #20
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
 1:	str	r3, [r0], #4
-	add	r3, r3, #1 << 20
-	teq	r0, r6
-	bne	1b
+	add	r3, r3, #1 << SECTION_SHIFT
+	cmp	r0, r6
+	blo	1b
 
 #else /* CONFIG_DEBUG_ICEDCC */
 	/* we don't need any serial debugging mappings for ICEDCC */
@@ -262,7 +266,7 @@ __create_page_tables:
 	 * If we're using the NetWinder or CATS, we also need to map
 	 * in the 16550-type serial port for the debug messages
 	 */
-	add	r0, r4, #0xff000000 >> 18
+	add	r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)
 	orr	r3, r7, #0x7c000000
 	str	r3, [r0]
 #endif
@@ -272,10 +276,10 @@ __create_page_tables:
 	 * Similar reasons here - for debug.  This is
 	 * only for Acorn RiscPC architectures.
 	 */
-	add	r0, r4, #0x02000000 >> 18
+	add	r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)
 	orr	r3, r7, #0x02000000
 	str	r3, [r0]
-	add	r0, r4, #0xd8000000 >> 18
+	add	r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)
 	str	r3, [r0]
 #endif
 #endif
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index a927ca1f5566..814a52a9dc39 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -45,7 +45,6 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
 
 /* Number of BRP/WRP registers on this CPU. */
 static int core_num_brps;
-static int core_num_reserved_brps;
 static int core_num_wrps;
 
 /* Debug architecture version. */
@@ -137,10 +136,11 @@ static u8 get_debug_arch(void)
 	u32 didr;
 
 	/* Do we implement the extended CPUID interface? */
-	if (WARN_ONCE((((read_cpuid_id() >> 16) & 0xf) != 0xf),
-	    "CPUID feature registers not supported. "
-	    "Assuming v6 debug is present.\n"))
+	if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
+		pr_warning("CPUID feature registers not supported. "
+			   "Assuming v6 debug is present.\n");
 		return ARM_DEBUG_ARCH_V6;
+	}
 
 	ARM_DBG_READ(c0, 0, didr);
 	return (didr >> 16) & 0xf;
@@ -154,10 +154,21 @@ u8 arch_get_debug_arch(void)
 static int debug_arch_supported(void)
 {
 	u8 arch = get_debug_arch();
-	return arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14;
+
+	/* We don't support the memory-mapped interface. */
+	return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) ||
+		arch >= ARM_DEBUG_ARCH_V7_1;
+}
+
+/* Determine number of WRP registers available. */
+static int get_num_wrp_resources(void)
+{
+	u32 didr;
+	ARM_DBG_READ(c0, 0, didr);
+	return ((didr >> 28) & 0xf) + 1;
 }
 
-/* Determine number of BRP register available. */
+/* Determine number of BRP registers available. */
 static int get_num_brp_resources(void)
 {
 	u32 didr;
@@ -176,9 +187,10 @@ static int core_has_mismatch_brps(void)
 static int get_num_wrps(void)
 {
 	/*
-	 * FIXME: When a watchpoint fires, the only way to work out which
-	 * watchpoint it was is by disassembling the faulting instruction
-	 * and working out the address of the memory access.
+	 * On debug architectures prior to 7.1, when a watchpoint fires, the
+	 * only way to work out which watchpoint it was is by disassembling
+	 * the faulting instruction and working out the address of the memory
+	 * access.
 	 *
 	 * Furthermore, we can only do this if the watchpoint was precise
 	 * since imprecise watchpoints prevent us from calculating register
@@ -192,36 +204,17 @@ static int get_num_wrps(void)
 	 * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
 	 * that it is set on some implementations].
 	 */
+	if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1)
+		return 1;
 
-#if 0
-	int wrps;
-	u32 didr;
-	ARM_DBG_READ(c0, 0, didr);
-	wrps = ((didr >> 28) & 0xf) + 1;
-#endif
-	int wrps = 1;
-
-	if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
-		wrps = get_num_brp_resources() - 1;
-
-	return wrps;
-}
-
-/* We reserve one breakpoint for each watchpoint. */
-static int get_num_reserved_brps(void)
-{
-	if (core_has_mismatch_brps())
-		return get_num_wrps();
-	return 0;
+	return get_num_wrp_resources();
 }
 
 /* Determine number of usable BRPs available. */
 static int get_num_brps(void)
 {
 	int brps = get_num_brp_resources();
-	if (core_has_mismatch_brps())
-		brps -= get_num_reserved_brps();
-	return brps;
+	return core_has_mismatch_brps() ? brps - 1 : brps;
 }
 
 /*
@@ -239,7 +232,7 @@ static int enable_monitor_mode(void)
 
 	/* Ensure that halting mode is disabled. */
 	if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN,
-			"halting debug mode enabled. Unable to access hardware resources.\n")) {
+		"halting debug mode enabled. Unable to access hardware resources.\n")) {
 		ret = -EPERM;
 		goto out;
 	}
@@ -255,6 +248,7 @@ static int enable_monitor_mode(void)
 		ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	case ARM_DEBUG_ARCH_V7_ECP14:
+	case ARM_DEBUG_ARCH_V7_1:
 		ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN));
 		break;
 	default:
@@ -346,24 +340,10 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		val_base = ARM_BASE_BVR;
 		slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
 		max_slots = core_num_brps;
-		if (info->step_ctrl.enabled) {
-			/* Override the breakpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		}
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled) {
-			/* Install into the reserved breakpoint region. */
-			ctrl_base = ARM_BASE_BCR + core_num_brps;
-			val_base = ARM_BASE_BVR + core_num_brps;
-			/* Override the watchpoint data with the step data. */
-			addr = info->trigger & ~0x3;
-			ctrl = encode_ctrl_reg(info->step_ctrl);
-		} else {
-			ctrl_base = ARM_BASE_WCR;
-			val_base = ARM_BASE_WVR;
-		}
+		ctrl_base = ARM_BASE_WCR;
+		val_base = ARM_BASE_WVR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -382,6 +362,17 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 		goto out;
 	}
 
+	/* Override the breakpoint data with the step data. */
+	if (info->step_ctrl.enabled) {
+		addr = info->trigger & ~0x3;
+		ctrl = encode_ctrl_reg(info->step_ctrl);
+		if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) {
+			i = 0;
+			ctrl_base = ARM_BASE_BCR + core_num_brps;
+			val_base = ARM_BASE_BVR + core_num_brps;
+		}
+	}
+
 	/* Setup the address register. */
 	write_wb_reg(val_base + i, addr);
 
@@ -405,10 +396,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 		max_slots = core_num_brps;
 	} else {
 		/* Watchpoint */
-		if (info->step_ctrl.enabled)
-			base = ARM_BASE_BCR + core_num_brps;
-		else
-			base = ARM_BASE_WCR;
+		base = ARM_BASE_WCR;
 		slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 		max_slots = core_num_wrps;
 	}
@@ -426,6 +414,13 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot\n"))
 		return;
 
+	/* Ensure that we disable the mismatch breakpoint. */
+	if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE &&
+	    info->step_ctrl.enabled) {
+		i = 0;
+		base = ARM_BASE_BCR + core_num_brps;
+	}
+
 	/* Reset the control register. */
 	write_wb_reg(base + i, 0);
 }
@@ -632,10 +627,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * we can use the mismatch feature as a poor-man's hardware
 	 * single-step, but this only works for per-task breakpoints.
 	 */
-	if (WARN_ONCE(!bp->overflow_handler &&
-		(arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
-		 || !bp->hw.bp_target),
-			"overflow handler required but none found\n")) {
+	if (!bp->overflow_handler && (arch_check_bp_in_kernelspace(bp) ||
+	    !core_has_mismatch_brps() || !bp->hw.bp_target)) {
+		pr_warning("overflow handler required but none found\n");
 		ret = -EINVAL;
 	}
 out:
@@ -666,34 +660,62 @@ static void disable_single_step(struct perf_event *bp)
 	arch_install_hw_breakpoint(bp);
 }
 
-static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
+static void watchpoint_handler(unsigned long addr, unsigned int fsr,
+			       struct pt_regs *regs)
 {
-	int i;
+	int i, access;
+	u32 val, ctrl_reg, alignment_mask;
 	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
+	struct arch_hw_breakpoint_ctrl ctrl;
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	/* Without a disassembler, we can only handle 1 watchpoint. */
-	BUG_ON(core_num_wrps > 1);
-
 	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
 
-		if (wp == NULL) {
-			rcu_read_unlock();
-			continue;
-		}
+		if (wp == NULL)
+			goto unlock;
 
+		info = counter_arch_bp(wp);
 		/*
-		 * The DFAR is an unknown value. Since we only allow a
-		 * single watchpoint, we can set the trigger to the lowest
-		 * possible faulting address.
+		 * The DFAR is an unknown value on debug architectures prior
+		 * to 7.1. Since we only allow a single watchpoint on these
+		 * older CPUs, we can set the trigger to the lowest possible
+		 * faulting address.
 		 */
-		info = counter_arch_bp(wp);
-		info->trigger = wp->attr.bp_addr;
+		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
+			BUG_ON(i > 0);
+			info->trigger = wp->attr.bp_addr;
+		} else {
+			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
+				alignment_mask = 0x7;
+			else
+				alignment_mask = 0x3;
+
+			/* Check if the watchpoint value matches. */
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			if (val != (addr & ~alignment_mask))
+				goto unlock;
+
+			/* Possible match, check the byte address select. */
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			if (!((1 << (addr & alignment_mask)) & ctrl.len))
+				goto unlock;
+
+			/* Check that the access type matches. */
+			access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W :
+				 HW_BREAKPOINT_R;
+			if (!(access & hw_breakpoint_type(wp)))
+				goto unlock;
+
+			/* We have a winner. */
+			info->trigger = addr;
+		}
+
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
 		perf_bp_event(wp, regs);
 
@@ -705,6 +727,7 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 		if (!wp->overflow_handler)
 			enable_single_step(wp, instruction_pointer(regs));
 
+unlock:
 		rcu_read_unlock();
 	}
 }
@@ -717,7 +740,7 @@ static void watchpoint_single_step_handler(unsigned long pc)
 
 	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
-	for (i = 0; i < core_num_reserved_brps; ++i) {
+	for (i = 0; i < core_num_wrps; ++i) {
 		rcu_read_lock();
 
 		wp = slots[i];
@@ -820,7 +843,7 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 	case ARM_ENTRY_ASYNC_WATCHPOINT:
 		WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n");
 	case ARM_ENTRY_SYNC_WATCHPOINT:
-		watchpoint_handler(addr, regs);
+		watchpoint_handler(addr, fsr, regs);
 		break;
 	default:
 		ret = 1; /* Unhandled fault. */
@@ -834,11 +857,31 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
 /*
  * One-time initialisation.
  */
-static void reset_ctrl_regs(void *info)
+static cpumask_t debug_err_mask;
+
+static int debug_reg_trap(struct pt_regs *regs, unsigned int instr)
 {
-	int i, cpu = smp_processor_id();
+	int cpu = smp_processor_id();
+
+	pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+		   instr, cpu);
+
+	/* Set the error flag for this CPU and skip the faulting instruction. */
+	cpumask_set_cpu(cpu, &debug_err_mask);
+	instruction_pointer(regs) += 4;
+	return 0;
+}
+
+static struct undef_hook debug_reg_hook = {
+	.instr_mask	= 0x0fe80f10,
+	.instr_val	= 0x0e000e10,
+	.fn		= debug_reg_trap,
+};
+
+static void reset_ctrl_regs(void *unused)
+{
+	int i, raw_num_brps, err = 0, cpu = smp_processor_id();
 	u32 dbg_power;
-	cpumask_t *cpumask = info;
 
 	/*
 	 * v7 debug contains save and restore registers so that debug state
@@ -848,38 +891,57 @@ static void reset_ctrl_regs(void *info)
 	 * Access Register to avoid taking undefined instruction exceptions
 	 * later on.
 	 */
-	if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
+	switch (debug_arch) {
+	case ARM_DEBUG_ARCH_V6:
+	case ARM_DEBUG_ARCH_V6_1:
+		/* ARMv6 cores just need to reset the registers. */
+		goto reset_regs;
+	case ARM_DEBUG_ARCH_V7_ECP14:
 		/*
 		 * Ensure sticky power-down is clear (i.e. debug logic is
 		 * powered up).
 		 */
 		asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
-		if ((dbg_power & 0x1) == 0) {
-			pr_warning("CPU %d debug is powered down!\n", cpu);
-			cpumask_or(cpumask, cpumask, cpumask_of(cpu));
-			return;
-		}
-
+		if ((dbg_power & 0x1) == 0)
+			err = -EPERM;
+		break;
+	case ARM_DEBUG_ARCH_V7_1:
 		/*
-		 * Unconditionally clear the lock by writing a value
-		 * other than 0xC5ACCE55 to the access register.
+		 * Ensure the OS double lock is clear.
 		 */
-		asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
-		isb();
+		asm volatile("mrc p14, 0, %0, c1, c3, 4" : "=r" (dbg_power));
+		if ((dbg_power & 0x1) == 1)
+			err = -EPERM;
+		break;
+	}
 
-		/*
-		 * Clear any configured vector-catch events before
-		 * enabling monitor mode.
-		 */
-		asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
-		isb();
+	if (err) {
+		pr_warning("CPU %d debug is powered down!\n", cpu);
+		cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu));
+		return;
 	}
 
+	/*
+	 * Unconditionally clear the lock by writing a value
+	 * other than 0xC5ACCE55 to the access register.
+	 */
+	asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+	isb();
+
+	/*
+	 * Clear any configured vector-catch events before
+	 * enabling monitor mode.
+	 */
+	asm volatile("mcr p14, 0, %0, c0, c7, 0" : : "r" (0));
+	isb();
+
+reset_regs:
 	if (enable_monitor_mode())
 		return;
 
 	/* We must also reset any reserved registers. */
-	for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
+	raw_num_brps = get_num_brp_resources();
+	for (i = 0; i < raw_num_brps; ++i) {
 		write_wb_reg(ARM_BASE_BCR + i, 0UL);
 		write_wb_reg(ARM_BASE_BVR + i, 0UL);
 	}
@@ -895,6 +957,7 @@ static int __cpuinit dbg_reset_notify(struct notifier_block *self,
 {
 	if (action == CPU_ONLINE)
 		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+
 	return NOTIFY_OK;
 }
 
@@ -905,7 +968,6 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
 static int __init arch_hw_breakpoint_init(void)
 {
 	u32 dscr;
-	cpumask_t cpumask = { CPU_BITS_NONE };
 
 	debug_arch = get_debug_arch();
 
@@ -916,28 +978,31 @@ static int __init arch_hw_breakpoint_init(void)
 
 	/* Determine how many BRPs/WRPs are available. */
 	core_num_brps = get_num_brps();
-	core_num_reserved_brps = get_num_reserved_brps();
 	core_num_wrps = get_num_wrps();
 
-	pr_info("found %d breakpoint and %d watchpoint registers.\n",
-		core_num_brps + core_num_reserved_brps, core_num_wrps);
-
-	if (core_num_reserved_brps)
-		pr_info("%d breakpoint(s) reserved for watchpoint "
-				"single-step.\n", core_num_reserved_brps);
+	/*
+	 * We need to tread carefully here because DBGSWENABLE may be
+	 * driven low on this core and there isn't an architected way to
+	 * determine that.
+	 */
+	register_undef_hook(&debug_reg_hook);
 
 	/*
 	 * Reset the breakpoint resources. We assume that a halting
 	 * debugger will leave the world in a nice state for us.
 	 */
-	on_each_cpu(reset_ctrl_regs, &cpumask, 1);
-	if (!cpumask_empty(&cpumask)) {
+	on_each_cpu(reset_ctrl_regs, NULL, 1);
+	unregister_undef_hook(&debug_reg_hook);
+	if (!cpumask_empty(&debug_err_mask)) {
 		core_num_brps = 0;
-		core_num_reserved_brps = 0;
 		core_num_wrps = 0;
 		return 0;
 	}
 
+	pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n",
+		core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " :
+		"", core_num_wrps);
+
 	ARM_DBG_READ(c1, 0, dscr);
 	if (dscr & ARM_DSCR_HDBGEN) {
 		max_watchpoint_len = 4;
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index de3dcab8610b..7cb29261249a 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -35,8 +35,8 @@
 #include <linux/list.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
-#include <linux/ftrace.h>
 
+#include <asm/exception.h>
 #include <asm/system.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/irq.h>
@@ -59,9 +59,6 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
-#ifdef CONFIG_LOCAL_TIMERS
-	show_local_irqs(p, prec);
-#endif
 	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count);
 	return 0;
 }
diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/kernel/kprobes-arm.c
index 79203ee1d039..9fe8910308af 100644
--- a/arch/arm/kernel/kprobes-arm.c
+++ b/arch/arm/kernel/kprobes-arm.c
@@ -60,6 +60,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -971,6 +972,9 @@ const union decode_item kprobe_decode_arm_table[] = {
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_arm_table);
+#endif
 
 static void __kprobes arm_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c
new file mode 100644
index 000000000000..fc82de8bdcce
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test-arm.c
@@ -0,0 +1,1323 @@
+/*
+ * arch/arm/kernel/kprobes-test-arm.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "32"
+
+#define TEST_ARM_TO_THUMB_INTERWORK_R(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_REG(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+#define TEST_ARM_TO_THUMB_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f)					\
+	TEST_ARG_MEM(15, 3f+1)					\
+	TEST_ARG_END("")					\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"3:	adr	lr, 2f		\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_arm_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Data-processing (register), (register-shifted register), (immediate)")
+
+#define _DATA_PROCESSING_DNM(op,s,val)						\
+	TEST_RR(  op "eq" s "	r0,  r",1, VAL1,", r",2, val, "")		\
+	TEST_RR(  op "ne" s "	r1,  r",1, VAL1,", r",2, val, ", lsl #3")	\
+	TEST_RR(  op "cs" s "	r2,  r",3, VAL1,", r",2, val, ", lsr #4")	\
+	TEST_RR(  op "cc" s "	r3,  r",3, VAL1,", r",2, val, ", asr #5")	\
+	TEST_RR(  op "mi" s "	r4,  r",5, VAL1,", r",2, N(val),", asr #6")	\
+	TEST_RR(  op "pl" s "	r5,  r",5, VAL1,", r",2, val, ", ror #7")	\
+	TEST_RR(  op "vs" s "	r6,  r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", pc, lsl #3")			\
+	TEST_R(   op "vc" s "	r6,  r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vc" s "	r6,  pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vc" s "	r6,  sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "hi" s "	r8,  r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\
+	TEST_RRR( op "ls" s "	r9,  r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\
+	TEST_RRR( op "ge" s "	r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\
+	TEST_RRR( op "lt" s "	r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\
+	TEST_RR(  op "gt" s "	r12, r13"       ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op "le" s "	r14, r",0, val, ", r13"       ", lsl r",14,8,"")\
+	TEST_RR(  op s "	r12, pc"        ", r",14,val, ", ror r",14,7,"")\
+	TEST_RR(  op s "	r14, r",0, val, ", pc"        ", lsl r",14,8,"")\
+	TEST_R(   op "eq" s "	r0,  r",11,VAL1,", #0xf5")			\
+	TEST_R(   op "ne" s "	r11, r",0, VAL1,", #0xf5000000")		\
+	TEST_R(   op s "	r7,  r",8, VAL2,", #0x000af000")		\
+	TEST(     op s "	r4,  pc"        ", #0x00005a00")
+
+#define DATA_PROCESSING_DNM(op,val)		\
+	_DATA_PROCESSING_DNM(op,"",val)		\
+	_DATA_PROCESSING_DNM(op,"s",val)
+
+#define DATA_PROCESSING_NM(op,val)						\
+	TEST_RR(  op "ne	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(  op "eq	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(  op "cc	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(  op "cs	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(  op "pl	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(  op "mi	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(  op "vc	r",7, VAL1,", r",2, val, ", rrx")		\
+	TEST_R (  op "vs	r",7, VAL1,", pc, lsl #3")			\
+	TEST_R (  op "vs	r",7, VAL1,", sp, lsr #4")			\
+	TEST_R(   op "vs	pc, r",7, VAL1,", asr #5")			\
+	TEST_R(   op "vs	sp, r",7, VAL1,", ror #6")			\
+	TEST_RRR( op "ls	r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")	\
+	TEST_RRR( op "hi	r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")	\
+	TEST_RRR( op "lt	r",11,VAL1,", r",14,val, ", asr r",7, 5,"")	\
+	TEST_RRR( op "ge	r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "le	r13"       ", r",14,val, ", ror r",14,7,"")	\
+	TEST_RR(  op "gt	r",0, val, ", r13"       ", lsl r",14,8,"")	\
+	TEST_RR(  op "	pc"        ", r",14,val, ", ror r",14,7,"")		\
+	TEST_RR(  op "	r",0, val, ", pc"        ", lsl r",14,8,"")		\
+	TEST_R(   op "eq	r",11,VAL1,", #0xf5")				\
+	TEST_R(   op "ne	r",0, VAL1,", #0xf5000000")			\
+	TEST_R(   op "	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING_DM(op,s,val)					\
+	TEST_R(   op "eq" s "	r0,  r",1, val, "")			\
+	TEST_R(   op "ne" s "	r1,  r",1, val, ", lsl #3")		\
+	TEST_R(   op "cs" s "	r2,  r",3, val, ", lsr #4")		\
+	TEST_R(   op "cc" s "	r3,  r",3, val, ", asr #5")		\
+	TEST_R(   op "mi" s "	r4,  r",5, N(val),", asr #6")		\
+	TEST_R(   op "pl" s "	r5,  r",5, val, ", ror #7")		\
+	TEST_R(   op "vs" s "	r6,  r",10,val, ", rrx")		\
+	TEST(     op "vs" s "	r7,  pc, lsl #3")			\
+	TEST(     op "vs" s "	r7,  sp, lsr #4")			\
+	TEST_RR(  op "vc" s "	r8,  r",7, val, ", lsl r",0, 3,"")	\
+	TEST_RR(  op "hi" s "	r9,  r",9, val, ", lsr r",7, 4,"")	\
+	TEST_RR(  op "ls" s "	r10, r",9, val, ", asr r",7, 5,"")	\
+	TEST_RR(  op "ge" s "	r11, r",11,N(val),", asr r",7, 6,"")	\
+	TEST_RR(  op "lt" s "	r12, r",11,val, ", ror r",14,7,"")	\
+	TEST_R(   op "gt" s "	r14, r13"       ", lsl r",14,8,"")	\
+	TEST_R(   op "le" s "	r14, pc"        ", lsl r",14,8,"")	\
+	TEST(     op "eq" s "	r0,  #0xf5")				\
+	TEST(     op "ne" s "	r11, #0xf5000000")			\
+	TEST(     op s "	r7,  #0x000af000")			\
+	TEST(     op s "	r4,  #0x00005a00")
+
+#define DATA_PROCESSING_DM(op,val)		\
+	_DATA_PROCESSING_DM(op,"",val)		\
+	_DATA_PROCESSING_DM(op,"s",val)
+
+	DATA_PROCESSING_DNM("and",0xf00f00ff)
+	DATA_PROCESSING_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING_DNM("sub",VAL2)
+	DATA_PROCESSING_DNM("rsb",VAL2)
+	DATA_PROCESSING_DNM("add",VAL2)
+	DATA_PROCESSING_DNM("adc",VAL2)
+	DATA_PROCESSING_DNM("sbc",VAL2)
+	DATA_PROCESSING_DNM("rsc",VAL2)
+	DATA_PROCESSING_NM("tst",0xf00f00ff)
+	DATA_PROCESSING_NM("teq",0xf00f00ff)
+	DATA_PROCESSING_NM("cmp",VAL2)
+	DATA_PROCESSING_NM("cmn",VAL2)
+	DATA_PROCESSING_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING_DM("mov",VAL2)
+	DATA_PROCESSING_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING_DM("mvn",VAL2)
+
+	TEST("mov	ip, sp") /* This has special case emulation code */
+
+	TEST_SUPPORTED("mov	pc, #0x1000");
+	TEST_SUPPORTED("mov	sp, #0x1000");
+	TEST_SUPPORTED("cmp	pc, #0x1000");
+	TEST_SUPPORTED("cmp	sp, #0x1000");
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED(".word 0xe15c0f1e	@ cmp	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe1a0cf1e	@ mov	r12, r14, asl pc")
+	TEST_UNSUPPORTED(".word 0xe08caf1e	@ add	r10, r12, r14, asl pc")
+
+	/* Data-processing with PC as shift*/
+	TEST_UNSUPPORTED("movs	pc, r1")
+	TEST_UNSUPPORTED("movs	pc, r1, lsl r2")
+	TEST_UNSUPPORTED("movs	pc, #0x10000")
+	TEST_UNSUPPORTED("adds	pc, lr, r1")
+	TEST_UNSUPPORTED("adds	pc, lr, r1, lsl r2")
+	TEST_UNSUPPORTED("adds	pc, lr, #4")
+
+	/* Data-processing with SP as target */
+	TEST("add	sp, sp, #16")
+	TEST("sub	sp, sp, #8")
+	TEST("bic	sp, sp, #0x20")
+	TEST("orr	sp, sp, #0x20")
+	TEST_PR( "add	sp, r",10,0,", r",11,4,"")
+	TEST_PRR("add	sp, r",10,0,", r",11,4,", asl r",12,1,"")
+	TEST_P(  "mov	sp, r",10,0,"")
+	TEST_PR( "mov	sp, r",10,0,", asl r",12,0,"")
+
+	/* Data-processing with PC as target */
+	TEST_BF(   "add	pc, pc, #2f-1b-8")
+	TEST_BF_R ("add	pc, pc, r",14,2f-1f-8,"")
+	TEST_BF_R ("add	pc, r",14,2f-1f-8,", pc")
+	TEST_BF_R ("mov	pc, r",0,2f,"")
+	TEST_BF_RR("mov	pc, r",0,2f,", asl r",1,0,"")
+	TEST_BB(   "sub	pc, pc, #1b-2b+8")
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_BB(   "sub	pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before ARMv6 */
+#endif
+	TEST_BB_R( "sub	pc, pc, r",14, 1f-2f+8,"")
+	TEST_BB_R( "rsb	pc, r",14,1f-2f+8,", pc")
+	TEST_RR(   "add	pc, pc, r",10,-2,", asl r",11,1,"")
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_R("add	pc, pc, r",0,3f-1f-8+1,"")
+	TEST_ARM_TO_THUMB_INTERWORK_R("sub	pc, r",0,3f+8+1,", #8")
+#endif
+	TEST_GROUP("Miscellaneous instructions")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrspl	r7, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".word 0xe10ff000	@ mrs r15, cpsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, lr")
+	TEST_UNSUPPORTED("msr	spsr, r0")
+
+	TEST_BF_R("bx	r",0,2f,"")
+	TEST_BB_R("bx	r",7,2f,"")
+	TEST_BF_R("bxeq	r",14,2f,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clzeq	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+	TEST(  "clz	r4, sp")
+	TEST_UNSUPPORTED(".word 0x016fff10	@ clz pc, r0")
+	TEST_UNSUPPORTED(".word 0x016f0f1f	@ clz r0, pc")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("bxj	r0")
+#endif
+
+	TEST_BF_R("blx	r",0,2f,"")
+	TEST_BB_R("blx	r",7,2f,"")
+	TEST_BF_R("blxeq	r",14,2f,"")
+	TEST_UNSUPPORTED(".word 0x0120003f	@ blx pc")
+
+	TEST_RR(   "qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qsub	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdaddvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdadd	lr, r",9, VAL2,", r13")
+	TEST_RR(   "qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(   "qdsubvs	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_R(    "qdsub	lr, r",9, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe101f050	@ qadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe121f050	@ qsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe141f050	@ qdadd pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe161f050	@ qdsub pc, r0, r1")
+	TEST_UNSUPPORTED(".word 0xe16f2050	@ qdsub r2, r0, pc")
+	TEST_UNSUPPORTED(".word 0xe161205f	@ qdsub r2, pc, r1")
+
+	TEST_UNSUPPORTED("bkpt	0xffff")
+	TEST_UNSUPPORTED("bkpt	0x0000")
+
+	TEST_UNSUPPORTED(".word 0xe1600070 @ smc #0")
+
+	TEST_GROUP("Halfword multiply and multiply-accumulate")
+
+	TEST_RRR(    "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f3281 @ smlabb pc, r1, r2, r3")
+	TEST_RRR(    "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlatbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32a1 @ smlatb pc, r1, r2, r3")
+	TEST_RRR(    "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlabtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlabt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32c1 @ smlabt pc, r1, r2, r3")
+	TEST_RRR(    "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlattge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlatt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe10f32e1 @ smlatt pc, r1, r2, r3")
+
+	TEST_RRR(    "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawbge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawb	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f3281 @ smlawb pc, r1, r2, r3")
+	TEST_RRR(    "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "smlawtge	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "smlawt	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f32c1 @ smlawt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe12032cf @ smlawt r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe1203fc1 @ smlawt r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe120f2c1 @ smlawt r0, r1, r2, pc")
+
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02a1 @ smulwb pc, r1, r2")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulwt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe12f02e1 @ smulwt pc, r1, r2")
+
+	TEST_RRRR(  "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f1382 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f382 @ smlalbb r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlaltble	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltb	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13a2 @ smlaltb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3a2 @ smlaltb r1, pc, r2, r3")
+	TEST_RRRR(  "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalbtle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlalbt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13c2 @ smlalbt pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe141f3c2 @ smlalbt r1, pc, r2, r3")
+	TEST_RRRR(  "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalttle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlaltt	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe14f13e2 @ smlalbb pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe140f3e2 @ smlalbb r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe14013ef @ smlalbb r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe1401fe2 @ smlalbb r0, r1, r2, pc")
+
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f0281 @ smulbb pc, r1, r2")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultbge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultb	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02a1 @ smultb pc, r1, r2")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbtge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smulbt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02c1 @ smultb pc, r1, r2")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulttge	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_R(     "smultt	lr, r",1, VAL2,", r13")
+	TEST_UNSUPPORTED(".word 0xe16f02e1 @ smultt pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe16002ef @ smultt r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe1600fe1 @ smultt r0, r1, pc")
+
+	TEST_GROUP("Multiply and multiply-accumulate")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mulls	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "mul	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe00f0291 @ mul pc, r1, r2")
+	TEST_UNSUPPORTED(".word 0xe000029f @ mul r0, pc, r2")
+	TEST_UNSUPPORTED(".word 0xe0000f91 @ mul r0, r1, pc")
+	TEST_RR(    "muls	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mullss	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_R(     "muls	lr, r",4, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe01f0291 @ muls pc, r1, r2")
+
+	TEST_RRR(    "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mla	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe02f3291 @ mla pc, r1, r2, r3")
+	TEST_RRR(    "mlas	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(    "mlahis	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(     "mlas	lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe03f3291 @ mlas pc, r1, r2, r3")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_RR(  "umaal	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umaalls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umaal	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe041f392 @ umaal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe04f0392 @ umaal r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0500090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe05fff9f @ undef")
+
+	TEST_RRR(  "mls		r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(  "mlshi	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(   "mls		lr, r",1, VAL2,", r",2, VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe06f3291 @ mls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe060329f @ mls r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0603f91 @ mls r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe060f291 @ mls r0, r1, r2, pc")
+#endif
+
+	TEST_UNSUPPORTED(".word 0xe0700090 @ undef")
+	TEST_UNSUPPORTED(".word 0xe07fff9f @ undef")
+
+	TEST_RR(  "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe081f392 @ umull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe08f1392 @ umull r1, pc, r2, r3")
+	TEST_RR(  "umulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "umulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "umulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe091f392 @ umulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe09f1392 @ umulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0af1392 @ umlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0a1f392 @ umlal r1, pc, r2, r3")
+	TEST_RRRR(  "umlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "umlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "umlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0bf1392 @ umlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0b1f392 @ umlals r1, pc, r2, r3")
+
+	TEST_RR(  "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smullls	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smull	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0c1f392 @ smull pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0cf1392 @ smull r1, pc, r2, r3")
+	TEST_RR(  "smulls	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(  "smulllss	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_R(   "smulls	lr, r12, r",11,VAL3,", r13")
+	TEST_UNSUPPORTED(".word 0xe0d1f392 @ smulls pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0df1392 @ smulls r1, pc, r2, r3")
+
+	TEST_RRRR(  "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalle	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlal	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ef1392 @ smlal pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0e1f392 @ smlal r1, pc, r2, r3")
+	TEST_RRRR(  "smlals	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR(  "smlalles	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRR(   "smlals	r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
+	TEST_UNSUPPORTED(".word 0xe0ff1392 @ smlals pc, r1, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0f392 @ smlals r0, pc, r2, r3")
+	TEST_UNSUPPORTED(".word 0xe0f0139f @ smlals r0, r1, pc, r3")
+	TEST_UNSUPPORTED(".word 0xe0f01f92 @ smlals r0, r1, r2, pc")
+
+	TEST_GROUP("Synchronization primitives")
+
+	/*
+	 * Use hard coded constants for SWP instructions to avoid warnings
+	 * about deprecated instructions.
+	 */
+	TEST_RP( ".word 0xe108e097 @ swp	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x610d0091 @ swpvs	r0, r",1,VAL1,", [sp]")
+	TEST_RP( ".word 0xe10cd09e @ swp	sp, r",14,VAL2,", [r",12,13*4,"]")
+	TEST_UNSUPPORTED(".word 0xe102f091 @ swp pc, r1, [r2]")
+	TEST_UNSUPPORTED(".word 0xe102009f @ swp r0, pc, [r2]")
+	TEST_UNSUPPORTED(".word 0xe10f0091 @ swp r0, r1, [pc]")
+	TEST_RP( ".word 0xe148e097 @ swpb	lr, r",7,VAL2,", [r",8,0,"]")
+	TEST_R(  ".word 0x614d0091 @ swpvsb	r0, r",1,VAL1,", [sp]")
+	TEST_UNSUPPORTED(".word 0xe142f091 @ swpb pc, r1, [r2]")
+
+	TEST_UNSUPPORTED(".word	0xe1100090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1200090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1300090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1500090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1600090") /* Unallocated space */
+	TEST_UNSUPPORTED(".word	0xe1700090") /* Unallocated space */
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("ldrex	r2, [sp]")
+	TEST_UNSUPPORTED("strexd	r0, r2, r3, [sp]")
+	TEST_UNSUPPORTED("ldrexd	r2, r3, [sp]")
+	TEST_UNSUPPORTED("strexb	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexb	r2, [sp]")
+	TEST_UNSUPPORTED("strexh	r0, r2, [sp]")
+	TEST_UNSUPPORTED("ldrexh	r2, [sp]")
+#endif
+	TEST_GROUP("Extra load/store instructions")
+
+	TEST_RPR(  "strh	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")
+	TEST_RPR(  "streqh	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")
+	TEST_RPR(  "strh	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")
+	TEST_RPR(  "strneh	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strh	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")
+	TEST_RPR(  "strh	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0ba	@ strh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0bb	@ strh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089a0bf	@ strh r10, [r9], pc")
+
+	TEST_PR(   "ldrh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrcsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrcch	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0ba	@ ldrh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0bb	@ ldrh pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe099a0bf	@ ldrh r10, [r9], pc")
+
+	TEST_RP(   "strh	r",0, VAL1,", [r",1, 24,", #-2]")
+	TEST_RP(   "strmih	r",14,VAL2,", [r",13,0, ", #2]")
+	TEST_RP(   "strh	r",1, VAL1,", [r",2, 24,", #4]!")
+	TEST_RP(   "strplh	r",12,VAL2,", [r",11,24,", #-4]!")
+	TEST_RP(   "strh	r",2, VAL1,", [r",3, 24,"], #48")
+	TEST_RP(   "strh	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3b0	@ strh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3b0	@ strh pc, [r9], #48")
+
+	TEST_P(	   "ldrh	r0, [r",0,  24,", #-2]")
+	TEST_P(	   "ldrvsh	r14, [r",13,0, ", #2]")
+	TEST_P(	   "ldrh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrvch	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3b0	@ ldrh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3b0	@ ldrh pc, [r9], #48")
+
+	TEST_PR(   "ldrsb	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrhisb	r14, [r",13,0,", r",12,  48,"]")
+	TEST_PR(   "ldrsb	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlssb	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsb	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsb	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0da	@ ldrsb r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0db	@ ldrsb pc, [r9], r11")
+
+	TEST_P(	   "ldrsb	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldrgesb	r14, [r",13,0, ", #1]")
+	TEST_P(	   "ldrsb	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrltsb	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsb	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsb	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsb	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3d0	@ ldrsb r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3d0	@ ldrsb pc, [r9], #48")
+
+	TEST_PR(   "ldrsh	r0, [r",0,  48,", -r",2, 24,"]")
+	TEST_PR(   "ldrgtsh	r14, [r",13,0, ", r",12, 48,"]")
+	TEST_PR(   "ldrsh	r1, [r",2,  24,", r",3,  48,"]!")
+	TEST_PR(   "ldrlesh	r12, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrsh	r2, [r",3,  24,"], r",4, 48,"")
+	TEST_PR(   "ldrsh	r10, [r",9, 48,"], -r",11,24,"")
+	TEST_UNSUPPORTED(".word 0xe1bfc0fa	@ ldrsh r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe099f0fb	@ ldrsh pc, [r9], r11")
+
+	TEST_P(	   "ldrsh	r0, [r",0,  24,", #-1]")
+	TEST_P(	   "ldreqsh	r14, [r",13,0 ,", #1]")
+	TEST_P(	   "ldrsh	r1, [r",2,  24,", #4]!")
+	TEST_P(	   "ldrnesh	r12, [r",11,24,", #-4]!")
+	TEST_P(	   "ldrsh	r2, [r",3,  24,"], #48")
+	TEST_P(	   "ldrsh	r10, [r",9, 64,"], #-48")
+	TEST(      "ldrsh	r0, [pc, #0]")
+	TEST_UNSUPPORTED(".word 0xe1ffc3f0	@ ldrsh r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0d9f3f0	@ ldrsh pc, [r9], #48")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("strht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], r3")
+	TEST_UNSUPPORTED("strht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrht	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsbt	r1, [r2], #48")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], r3")
+	TEST_UNSUPPORTED("ldrsht	r1, [r2], #48")
+#endif
+
+	TEST_RPR(  "strd	r",0, VAL1,", [r",1, 48,", -r",2,24,"]")
+	TEST_RPR(  "strccd	r",8, VAL2,", [r",13,0, ", r",12,48,"]")
+	TEST_RPR(  "strd	r",4, VAL1,", [r",2, 24,", r",3, 48,"]!")
+	TEST_RPR(  "strcsd	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+	TEST_RPR(  "strd	r",2, VAL1,", [r",3, 24,"], r",4,48,"")
+	TEST_RPR(  "strd	r",10,VAL2,", [r",9, 48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0fa	@ strd r12, [pc, r10]!")
+
+	TEST_PR(   "ldrd	r0, [r",0, 48,", -r",2,24,"]")
+	TEST_PR(   "ldrmid	r8, [r",13,0, ", r",12,48,"]")
+	TEST_PR(   "ldrd	r4, [r",2, 24,", r",3, 48,"]!")
+	TEST_PR(   "ldrpld	r6, [r",11,48,", -r",10,24,"]!")
+	TEST_PR(   "ldrd	r2, [r",5, 24,"], r",4,48,"")
+	TEST_PR(   "ldrd	r10, [r",9,48,"], -r",7,24,"")
+	TEST_UNSUPPORTED(".word 0xe1afc0da	@ ldrd r12, [pc, r10]!")
+	TEST_UNSUPPORTED(".word 0xe089f0db	@ ldrd pc, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089e0db	@ ldrd lr, [r9], r11")
+	TEST_UNSUPPORTED(".word 0xe089c0df	@ ldrd r12, [r9], pc")
+
+	TEST_RP(   "strd	r",0, VAL1,", [r",1, 24,", #-8]")
+	TEST_RP(   "strvsd	r",8, VAL2,", [r",13,0, ", #8]")
+	TEST_RP(   "strd	r",4, VAL1,", [r",2, 24,", #16]!")
+	TEST_RP(   "strvcd	r",12,VAL2,", [r",11,24,", #-16]!")
+	TEST_RP(   "strd	r",2, VAL1,", [r",4, 24,"], #48")
+	TEST_RP(   "strd	r",10,VAL2,", [r",9, 64,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3f0	@ strd r12, [pc, #48]!")
+
+	TEST_P(	   "ldrd	r0, [r",0, 24,", #-8]")
+	TEST_P(	   "ldrhid	r8, [r",13,0, ", #8]")
+	TEST_P(	   "ldrd	r4, [r",2, 24,", #16]!")
+	TEST_P(	   "ldrlsd	r6, [r",11,24,", #-16]!")
+	TEST_P(	   "ldrd	r2, [r",5, 24,"], #48")
+	TEST_P(	   "ldrd	r10, [r",9,6,"], #-48")
+	TEST_UNSUPPORTED(".word 0xe1efc3d0	@ ldrd r12, [pc, #48]!")
+	TEST_UNSUPPORTED(".word 0xe0c9f3d0	@ ldrd pc, [r9], #48")
+	TEST_UNSUPPORTED(".word 0xe0c9e3d0	@ ldrd lr, [r9], #48")
+
+	TEST_GROUP("Miscellaneous")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".word 0xe300f000	@ movw pc, #0")
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".word 0xe340f000	@ movt pc, #0")
+#endif
+
+	TEST_UNSUPPORTED("msr	cpsr, 0x13")
+	TEST_UNSUPPORTED("msr	cpsr_f, 0xf0000000")
+	TEST_UNSUPPORTED("msr	spsr, 0x13")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED("dbg #0")
+#endif
+
+	TEST_GROUP("Load/store word and unsigned byte")
+
+#define LOAD_STORE(byte)							\
+	TEST_RP( "str"byte"	r",0, VAL1,", [r",1, 24,", #-2]")		\
+	TEST_RP( "str"byte"	r",14,VAL2,", [r",13,0, ", #2]")		\
+	TEST_RP( "str"byte"	r",1, VAL1,", [r",2, 24,", #4]!")		\
+	TEST_RP( "str"byte"	r",12,VAL2,", [r",11,24,", #-4]!")		\
+	TEST_RP( "str"byte"	r",2, VAL1,", [r",3, 24,"], #48")		\
+	TEST_RP( "str"byte"	r",10,VAL2,", [r",9, 64,"], #-48")		\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")	\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 48,"]")	\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  48,"]!")	\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,48,", -r",10,24,"]!")	\
+	TEST_RPR("str"byte"	r",2, VAL1,", [r",3, 24,"], r",4, 48,"")	\
+	TEST_RPR("str"byte"	r",10,VAL2,", [r",9, 48,"], -r",11,24,"")	\
+	TEST_RPR("str"byte"	r",0, VAL1,", [r",1, 24,", r",2,  32,", asl #1]")\
+	TEST_RPR("str"byte"	r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\
+	TEST_RPR("str"byte"	r",1, VAL1,", [r",2, 24,", r",3,  32,", asr #3]!")\
+	TEST_RPR("str"byte"	r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\
+	TEST_P(  "ldr"byte"	r0, [r",0,  24,", #-2]")			\
+	TEST_P(  "ldr"byte"	r14, [r",13,0, ", #2]")				\
+	TEST_P(  "ldr"byte"	r1, [r",2,  24,", #4]!")			\
+	TEST_P(  "ldr"byte"	r12, [r",11,24,", #-4]!")			\
+	TEST_P(  "ldr"byte"	r2, [r",3,  24,"], #48")			\
+	TEST_P(  "ldr"byte"	r10, [r",9, 64,"], #-48")			\
+	TEST_PR( "ldr"byte"	r0, [r",0,  48,", -r",2, 24,"]")		\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 48,"]")		\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3, 48,"]!")		\
+	TEST_PR( "ldr"byte"	r12, [r",11,48,", -r",10,24,"]!")		\
+	TEST_PR( "ldr"byte"	r2, [r",3,  24,"], r",4, 48,"")			\
+	TEST_PR( "ldr"byte"	r10, [r",9, 48,"], -r",11,24,"")		\
+	TEST_PR( "ldr"byte"	r0, [r",0,  24,", r",2,  32,", asl #1]")	\
+	TEST_PR( "ldr"byte"	r14, [r",13,0, ", r",12, 32,", lsr #2]")	\
+	TEST_PR( "ldr"byte"	r1, [r",2,  24,", r",3,  32,", asr #3]!")	\
+	TEST_PR( "ldr"byte"	r12, [r",11,24,", r",10, 4,", ror #31]!")	\
+	TEST(    "ldr"byte"	r0, [pc, #0]")					\
+	TEST_R(  "ldr"byte"	r12, [pc, r",14,0,"]")
+
+	LOAD_STORE("")
+	TEST_P(   "str	pc, [r",0,0,", #15*4]")
+	TEST_R(   "str	pc, [sp, r",2,15*4,"]")
+	TEST_BF(  "ldr	pc, [sp, #15*4]")
+	TEST_BF_R("ldr	pc, [sp, r",2,15*4,"]")
+
+	TEST_P(   "str	sp, [r",0,0,", #13*4]")
+	TEST_R(   "str	sp, [sp, r",2,13*4,"]")
+	TEST_BF(  "ldr	sp, [sp, #13*4]")
+	TEST_BF_R("ldr	sp, [sp, r",2,13*4,"]")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+#endif
+	TEST_UNSUPPORTED(".word 0xe5af6008	@ str r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7af6008	@ str r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5bf6008	@ ldr r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7bf6008	@ ldr r6, [pc, r8]!")
+	TEST_UNSUPPORTED(".word 0xe788600f	@ str r6, [r8, pc]")
+	TEST_UNSUPPORTED(".word 0xe798600f	@ ldr r6, [r8, pc]")
+
+	LOAD_STORE("b")
+	TEST_UNSUPPORTED(".word 0xe5f7f008	@ ldrb pc, [r7, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7f7f008	@ ldrb pc, [r7, r8]!")
+	TEST_UNSUPPORTED(".word 0xe5ef6008	@ strb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ef6008	@ strb r6, [pc, r3]!")
+	TEST_UNSUPPORTED(".word 0xe5ff6008	@ ldrb r6, [pc, #8]!")
+	TEST_UNSUPPORTED(".word 0xe7ff6008	@ ldrb r6, [pc, r3]!")
+
+	TEST_UNSUPPORTED("ldrt	r0, [r1], #4")
+	TEST_UNSUPPORTED("ldrt	r1, [r2], r3")
+	TEST_UNSUPPORTED("strt	r2, [r3], #4")
+	TEST_UNSUPPORTED("strt	r3, [r4], r5")
+	TEST_UNSUPPORTED("ldrbt	r4, [r5], #4")
+	TEST_UNSUPPORTED("ldrbt	r5, [r6], r7")
+	TEST_UNSUPPORTED("strbt	r6, [r7], #4")
+	TEST_UNSUPPORTED("strbt	r7, [r8], r9")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	TEST_UNSUPPORTED(".word 0xe6000010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe60fffff") /* Unallocated space */
+
+	TEST_RR(    "sadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff1a	@ sadd16	pc, r12, r10")
+	TEST_RR(    "sasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff3a	@ sasx	pc, r12, r10")
+	TEST_RR(    "ssax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff5a	@ ssax	pc, r12, r10")
+	TEST_RR(    "ssub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff7a	@ ssub16	pc, r12, r10")
+	TEST_RR(    "sadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cff9a	@ sadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe61000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe61fffdf") /* Unallocated space */
+	TEST_RR(    "ssub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "ssub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe61cfffa	@ ssub8	pc, r12, r10")
+
+	TEST_RR(    "qadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff1a	@ qadd16	pc, r12, r10")
+	TEST_RR(    "qasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff3a	@ qasx	pc, r12, r10")
+	TEST_RR(    "qsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff5a	@ qsax	pc, r12, r10")
+	TEST_RR(    "qsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff7a	@ qsub16	pc, r12, r10")
+	TEST_RR(    "qadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cff9a	@ qadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe62000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe62fffdf") /* Unallocated space */
+	TEST_RR(    "qsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "qsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe62cfffa	@ qsub8	pc, r12, r10")
+
+	TEST_RR(    "shadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff1a	@ shadd16	pc, r12, r10")
+	TEST_RR(    "shasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff3a	@ shasx	pc, r12, r10")
+	TEST_RR(    "shsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff5a	@ shsax	pc, r12, r10")
+	TEST_RR(    "shsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff7a	@ shsub16	pc, r12, r10")
+	TEST_RR(    "shadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cff9a	@ shadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe63000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe63fffdf") /* Unallocated space */
+	TEST_RR(    "shsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "shsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe63cfffa	@ shsub8	pc, r12, r10")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	TEST_UNSUPPORTED(".word 0xe6400010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe64fffff") /* Unallocated space */
+
+	TEST_RR(    "uadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff1a	@ uadd16	pc, r12, r10")
+	TEST_RR(    "uasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff3a	@ uasx	pc, r12, r10")
+	TEST_RR(    "usax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff5a	@ usax	pc, r12, r10")
+	TEST_RR(    "usub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff7a	@ usub16	pc, r12, r10")
+	TEST_RR(    "uadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cff9a	@ uadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe65000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe65fffdf") /* Unallocated space */
+	TEST_RR(    "usub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "usub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe65cfffa	@ usub8	pc, r12, r10")
+
+	TEST_RR(    "uqadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff1a	@ uqadd16	pc, r12, r10")
+	TEST_RR(    "uqasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff3a	@ uqasx	pc, r12, r10")
+	TEST_RR(    "uqsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff5a	@ uqsax	pc, r12, r10")
+	TEST_RR(    "uqsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff7a	@ uqsub16	pc, r12, r10")
+	TEST_RR(    "uqadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cff9a	@ uqadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe66000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe66fffdf") /* Unallocated space */
+	TEST_RR(    "uqsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uqsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe66cfffa	@ uqsub8	pc, r12, r10")
+
+	TEST_RR(    "uhadd16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff1a	@ uhadd16	pc, r12, r10")
+	TEST_RR(    "uhasx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhasx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff3a	@ uhasx	pc, r12, r10")
+	TEST_RR(    "uhsax	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsax	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff5a	@ uhsax	pc, r12, r10")
+	TEST_RR(    "uhsub16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub16	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff7a	@ uhsub16	pc, r12, r10")
+	TEST_RR(    "uhadd8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhadd8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cff9a	@ uhadd8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67000b0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffbf") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67000d0") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe67fffdf") /* Unallocated space */
+	TEST_RR(    "uhsub8	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uhsub8	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe67cfffa	@ uhsub8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe67feffa	@ uhsub8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe67cefff	@ uhsub8	r14, r12, pc")
+#endif /* __LINUX_ARM_ARCH__ >= 7 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Packing, unpacking, saturation, and reversal")
+
+	TEST_RR(    "pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_UNSUPPORTED(".word 0xe68cf11a	@ pkhbt	pc, r12, r10, lsl #2")
+	TEST_RR(    "pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+	TEST_UNSUPPORTED(".word 0xe68cf15a	@ pkhtb	pc, r12, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68fe15a	@ pkhtb	r14, pc, r10, asr #2")
+	TEST_UNSUPPORTED(".word 0xe68ce15f	@ pkhtb	r14, r12, pc, asr #2")
+	TEST_UNSUPPORTED(".word 0xe6900010") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fffdf") /* Unallocated space */
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6b7f01c	@ ssat	pc, #24, r12")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".word 0xe6f7f01c	@ usat	pc, #24, r12")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe68cf47a	@ sxtab16	pc,r12, r10, ror #8")
+
+	TEST_RR(    "sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "sel	r14, r",12,VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe68cffba	@ sel	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe68fefba	@ sel	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe68cefbf	@ sel	r14, r12, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6abff3c	@ ssat16	pc, #12, r12")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6acf47a	@ sxtab	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev	r0, r",0,   VAL1,"")
+	TEST_R(     "rev	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfff3c	@ rev	pc, r12")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6bcf47a	@ sxtah	pc,r12, r10, ror #8")
+
+	TEST_R(     "rev16	r0, r",0,   VAL1,"")
+	TEST_R(     "rev16	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6bfffbc	@ rev16	pc, r12")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ccf47a	@ uxtab16	pc,r12, r10, ror #8")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".word 0xe6ecff3c	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".word 0xe6ecef3f	@ usat16	r14, #12, pc")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6ecf47a	@ uxtab	pc,r12, r10, ror #8")
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_R(     "rbit	r0, r",0,   VAL1,"")
+	TEST_R(     "rbit	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ rbit	pc, r12")
+#endif
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+	TEST_UNSUPPORTED(".word 0xe6fff077	@ uxth	pc, r7")
+	TEST_UNSUPPORTED(".word 0xe6ff807f	@ uxth	r8, pc")
+	TEST_UNSUPPORTED(".word 0xe6fcf47a	@ uxtah	pc, r12, r10, ror #8")
+	TEST_UNSUPPORTED(".word 0xe6fce47f	@ uxtah	r14, r12, pc, ror #8")
+
+	TEST_R(     "revsh	r0, r",0,   VAL1,"")
+	TEST_R(     "revsh	r14, r",12, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe6ffff3c	@ revsh	pc, r12")
+	TEST_UNSUPPORTED(".word 0xe6ffef3f	@ revsh	r14, pc")
+
+	TEST_UNSUPPORTED(".word 0xe6900070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe69fff7f") /* Unallocated space */
+
+	TEST_UNSUPPORTED(".word 0xe6d00070") /* Unallocated space */
+	TEST_UNSUPPORTED(".word 0xe6dfff7f") /* Unallocated space */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_GROUP("Signed multiplies")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a1c	@ smlad	pc, r12, r10, r8")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a3c	@ smladx	pc, r12, r10, r8")
+
+	TEST_RR(   "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa1c	@ smuad	pc, r12, r10")
+	TEST_RR(   "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa3c	@ smuadx	pc, r12, r10")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a5c	@ smlsd	pc, r12, r10, r8")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe70f8a7c	@ smlsdx	pc, r12, r10, r8")
+
+	TEST_RR(   "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa5c	@ smusd	pc, r12, r10")
+	TEST_RR(   "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(   "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_UNSUPPORTED(".word 0xe70ffa7c	@ smusdx	pc, r12, r10")
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af819	@ smlald	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb819	@ smlald	r11, pc, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74ab81f	@ smlald	r11, r10, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe74abf19	@ smlald	r11, r10, r9, pc")
+
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_UNSUPPORTED(".word 0xe74af839	@ smlaldx	pc, r10, r9, r8")
+	TEST_UNSUPPORTED(".word 0xe74fb839	@ smlaldx	r11, pc, r9, r8")
+
+	TEST_RRR(  "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a1c	@ smmla	pc, r12, r10, r8")
+	TEST_RRR(  "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8a3c	@ smmlar	pc, r12, r10, r8")
+
+	TEST_RR(   "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ smmul	pc, r12, r10")
+	TEST_RR(   "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa3c	@ smmulr	pc, r12, r10")
+
+	TEST_RRR(  "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8adc	@ smmls	pc, r12, r10, r8")
+	TEST_RRR(  "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(  "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_UNSUPPORTED(".word 0xe75f8afc	@ smmlsr	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8aff	@ smmlsr	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe75e8ffc	@ smmlsr	r14, r12, pc, r8")
+	TEST_UNSUPPORTED(".word 0xe75efafc	@ smmlsr	r14, r12, r10, pc")
+
+	TEST_RR(   "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(   "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_UNSUPPORTED(".word 0xe75ffa1c	@ usad8	pc, r12, r10")
+	TEST_UNSUPPORTED(".word 0xe75efa1f	@ usad8	r14, pc, r10")
+	TEST_UNSUPPORTED(".word 0xe75eff1c	@ usad8	r14, r12, pc")
+
+	TEST_RRR(  "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(  "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_UNSUPPORTED(".word 0xe78f8a1c	@ usada8	pc, r12, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8a1f	@ usada8	r14, pc, r10, r8")
+	TEST_UNSUPPORTED(".word 0xe78e8f1c	@ usada8	r14, r12, pc, r8")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_GROUP("Bit Field")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfxeq	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7aff45c	@ sbfx	pc, r12, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfxcs	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".word 0xe7eff45c	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".word 0xe7efc45f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfcvs	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".word 0xe7def01f	@ bfc	pc, #0, #31");
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfipl	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".word 0xe7d7f21e	@ bfi	pc, r14, #4, #20")
+
+	TEST_UNSUPPORTED(".word 0x07f000f0")  /* Permanently UNDEFINED */
+	TEST_UNSUPPORTED(".word 0x07ffffff")  /* Permanently UNDEFINED */
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	TEST_GROUP("Branch, branch with link, and block data transfer")
+
+	TEST_P(   "stmda	r",0, 16*4,", {r0}")
+	TEST_P(   "stmeqda	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmneda	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmda	r",13,0,   "!, {pc}")
+
+	TEST_P(   "ldmda	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmcsda	r",4, 15*4,", {r0-r15}")
+	TEST_BF_P("ldmccda	r",7, 15*4,"!, {r8-r15}")
+	TEST_P(   "ldmda	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmda	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0}")
+	TEST_P(   "stmmiia	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmplia	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmia	r",14,0,   "!, {pc}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmvsia	r",4, 0,   ", {r0-r15}")
+	TEST_BF_P("ldmvcia	r",7, 8*4, "!, {r8-r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,15*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0}")
+	TEST_P(   "stmhidb	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmlsdb	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmdb	r",13,4,   "!, {pc}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmgedb	r",4, 16*4,", {r0-r15}")
+	TEST_BF_P("ldmltdb	r",7, 16*4,"!, {r8-r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {pc}")
+
+	TEST_P(   "stmib	r",0, 16*4,", {r0}")
+	TEST_P(   "stmgtib	r",4, 16*4,", {r0-r15}")
+	TEST_P(   "stmleib	r",8, 16*4,"!, {r8-r15}")
+	TEST_P(   "stmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_P(   "stmib	r",13,-4,  "!, {pc}")
+
+	TEST_P(   "ldmib	r",0, 16*4,", {r0}")
+	TEST_BF_P("ldmeqib	r",4, -4,", {r0-r15}")
+	TEST_BF_P("ldmneib	r",7, 7*4,"!, {r8-r15}")
+	TEST_P(   "ldmib	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmib	r",14,14*4,"!, {pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmeqdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmnedb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmccia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmcsia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+#ifdef CONFIG_THUMB2_KERNEL
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmplia	r",0,15*4,", {pc}")
+	TEST_ARM_TO_THUMB_INTERWORK_P("ldmmiia	r",13,0,", {r0-r15}")
+#endif
+	TEST_BF("b	2f")
+	TEST_BF("bl	2f")
+	TEST_BB("b	2b")
+	TEST_BB("bl	2b")
+
+	TEST_BF("beq	2f")
+	TEST_BF("bleq	2f")
+	TEST_BB("bne	2b")
+	TEST_BB("blne	2b")
+
+	TEST_BF("bgt	2f")
+	TEST_BF("blgt	2f")
+	TEST_BB("blt	2b")
+	TEST_BB("bllt	2b")
+
+	TEST_GROUP("Supervisor Call, and coprocessor instructions")
+
+	/*
+	 * We can't really test these by executing them, so all
+	 * we can do is check that probes are, or are not allowed.
+	 * At the moment none are allowed...
+	 */
+#define TEST_COPROCESSOR(code) TEST_UNSUPPORTED(code)
+
+#define COPROCESSOR_INSTRUCTIONS_ST_LD(two,cc)					\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("stc"two"l	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"	0, cr0, [r13], {1}")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13, #-4]!")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], #-4")			\
+	TEST_COPROCESSOR("ldc"two"l	0, cr0, [r13], {1}")			\
+										\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"daf0001	@ stc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d2f0001	@ stc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"caf0001	@ stc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c2f0001	@ stc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"def0001	@ stc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d6f0001	@ stc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cef0001	@ stc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c6f0001	@ stc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "stc"two"l	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dbf0001	@ ldc"two"	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d3f0001	@ ldc"two"	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cbf0001	@ ldc"two"	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c3f0001	@ ldc"two"	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"	0, cr0, [r15], {1}")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #4]")			\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15, #-4]")			\
+	TEST_UNSUPPORTED(".word 0x"cc"dff0001	@ ldc"two"l	0, cr0, [r15, #4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"d7f0001	@ ldc"two"l	0, cr0, [r15, #-4]!")	\
+	TEST_UNSUPPORTED(".word 0x"cc"cff0001	@ ldc"two"l	0, cr0, [r15], #4")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c7f0001	@ ldc"two"l	0, cr0, [r15], #-4")	\
+	TEST_COPROCESSOR( "ldc"two"l	0, cr0, [r15], {1}")
+
+#define COPROCESSOR_INSTRUCTIONS_MC_MR(two,cc)					\
+										\
+	TEST_COPROCESSOR( "mcrr"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mcrr"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c4f00f0	@ mcrr"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c40ff0f	@ mcrr"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "mrrc"two"	0, 15, r0, r14, cr0")			\
+	TEST_COPROCESSOR( "mrrc"two"	15, 0, r14, r0, cr15")			\
+	TEST_UNSUPPORTED(".word 0x"cc"c5f00f0	@ mrrc"two"	0, 15, r0, r15, cr0")	\
+	TEST_UNSUPPORTED(".word 0x"cc"c50ff0f	@ mrrc"two"	15, 0, r15, r0, cr15")	\
+	TEST_COPROCESSOR( "cdp"two"	15, 15, cr15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "cdp"two"	0, 0, cr0, cr0, cr0, 0")		\
+	TEST_COPROCESSOR( "mcr"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mcr"two"	0, 0, r0, cr0, cr0, 0")			\
+	TEST_COPROCESSOR( "mrc"two"	15, 7, r15, cr15, cr15, 7")		\
+	TEST_COPROCESSOR( "mrc"two"	0, 0, r0, cr0, cr0, 0")
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("","e")
+	COPROCESSOR_INSTRUCTIONS_MC_MR("","e")
+	TEST_UNSUPPORTED("svc	0")
+	TEST_UNSUPPORTED("svc	0xffffff")
+
+	TEST_UNSUPPORTED("svc	0")
+
+	TEST_GROUP("Unconditional instruction")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("srsda	sp, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp, 0x13")
+	TEST_UNSUPPORTED("srsia	sp, 0x13")
+	TEST_UNSUPPORTED("srsib	sp, 0x13")
+	TEST_UNSUPPORTED("srsda	sp!, 0x13")
+	TEST_UNSUPPORTED("srsdb	sp!, 0x13")
+	TEST_UNSUPPORTED("srsia	sp!, 0x13")
+	TEST_UNSUPPORTED("srsib	sp!, 0x13")
+
+	TEST_UNSUPPORTED("rfeda	sp")
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfeib	sp")
+	TEST_UNSUPPORTED("rfeda	sp!")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+	TEST_UNSUPPORTED("rfeib	sp!")
+	TEST_UNSUPPORTED(".word 0xf81d0a00	@ rfeda	pc")
+	TEST_UNSUPPORTED(".word 0xf91d0a00	@ rfedb	pc")
+	TEST_UNSUPPORTED(".word 0xf89d0a00	@ rfeia	pc")
+	TEST_UNSUPPORTED(".word 0xf99d0a00	@ rfeib	pc")
+	TEST_UNSUPPORTED(".word 0xf83d0a00	@ rfeda	pc!")
+	TEST_UNSUPPORTED(".word 0xf93d0a00	@ rfedb	pc!")
+	TEST_UNSUPPORTED(".word 0xf8bd0a00	@ rfeia	pc!")
+	TEST_UNSUPPORTED(".word 0xf9bd0a00	@ rfeib	pc!")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_X(	"blx	__dummy_thumb_subroutine_even",
+		".thumb				\n\t"
+		".space 4			\n\t"
+		".type __dummy_thumb_subroutine_even, %%function \n\t"
+		"__dummy_thumb_subroutine_even:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_even")
+
+	TEST_X(	"blx	__dummy_thumb_subroutine_odd",
+		".thumb				\n\t"
+		".space 2			\n\t"
+		".type __dummy_thumb_subroutine_odd, %%function	\n\t"
+		"__dummy_thumb_subroutine_odd:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".arm				\n\t"
+	)
+	TEST(	"blx	__dummy_thumb_subroutine_odd")
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+	COPROCESSOR_INSTRUCTIONS_ST_LD("2","f")
+#if __LINUX_ARM_ARCH__ >= 6
+	COPROCESSOR_INSTRUCTIONS_MC_MR("2","f")
+#endif
+
+	TEST_GROUP("Miscellaneous instructions, memory hints, and Advanced SIMD instructions")
+
+#if __LINUX_ARM_ARCH__ >= 6
+	TEST_UNSUPPORTED("cps	0x13")
+	TEST_UNSUPPORTED("cpsie	i")
+	TEST_UNSUPPORTED("cpsid	i")
+	TEST_UNSUPPORTED("cpsie	i,0x13")
+	TEST_UNSUPPORTED("cpsid	i,0x13")
+	TEST_UNSUPPORTED("setend	le")
+	TEST_UNSUPPORTED("setend	be")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_P("pli	[r",0,0b,", #16]")
+	TEST(  "pli	[pc, #0]")
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 5
+	TEST_P("pld	[r",0,32,", #-16]")
+	TEST(  "pld	[pc, #0]")
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", -r",12,16,", lsl #4]")
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_SUPPORTED(  ".word 0xf590f000	@ pldw [r0, #0]")
+	TEST_SUPPORTED(  ".word 0xf797f000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(  ".word 0xf798f18c	@ pldw	[r8, r12, lsl #3]");
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+#endif
+
+	verbose("\n");
+}
+
diff --git a/arch/arm/kernel/kprobes-test-thumb.c b/arch/arm/kernel/kprobes-test-thumb.c
new file mode 100644
index 000000000000..5e726c31c45a
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test-thumb.c
@@ -0,0 +1,1187 @@
+/*
+ * arch/arm/kernel/kprobes-test-thumb.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "kprobes-test.h"
+
+
+#define TEST_ISA "16"
+
+#define DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_flags |= TEST_FLAG_NO_ITBLOCK;	\
+	tests						\
+	kprobe_test_flags &= ~TEST_FLAG_NO_ITBLOCK;
+
+#define CONDITION_INSTRUCTIONS(cc_pos, tests)		\
+	kprobe_test_cc_position = cc_pos;		\
+	DONT_TEST_IN_ITBLOCK(tests)			\
+	kprobe_test_cc_position = 0;
+
+#define TEST_ITBLOCK(code)				\
+	kprobe_test_flags |= TEST_FLAG_FULL_ITBLOCK;	\
+	TESTCASE_START(code)				\
+	TEST_ARG_END("")				\
+	"50:	nop			\n\t"		\
+	"1:	"code"			\n\t"		\
+	"	mov r1, #0x11		\n\t"		\
+	"	mov r2, #0x22		\n\t"		\
+	"	mov r3, #0x33		\n\t"		\
+	"2:	nop			\n\t"		\
+	TESTCASE_END					\
+	kprobe_test_flags &= ~TEST_FLAG_FULL_ITBLOCK;
+
+#define TEST_THUMB_TO_ARM_INTERWORK_P(code1, reg, val, code2)	\
+	TESTCASE_START(code1 #reg code2)			\
+	TEST_ARG_PTR(reg, val)					\
+	TEST_ARG_REG(14, 99f+1)					\
+	TEST_ARG_MEM(15, 3f)					\
+	TEST_ARG_END("")					\
+	"	nop			\n\t" /* To align 1f */	\
+	"50:	nop			\n\t"			\
+	"1:	"code1 #reg code2"	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".arm				\n\t"			\
+	"3:	adr	lr, 2f+1	\n\t"			\
+	"	bx	lr		\n\t"			\
+	".thumb				\n\t"			\
+	"2:	nop			\n\t"			\
+	TESTCASE_END
+
+
+void kprobe_thumb16_test_cases(void)
+{
+	kprobe_test_flags = TEST_FLAG_NARROW_INSTR;
+
+	TEST_GROUP("Shift (immediate), add, subtract, move, and compare")
+
+	TEST_R(    "lsls	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsls	r0, r",7,VAL2,", #11")
+	TEST_R(    "lsrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "lsrs	r0, r",7,VAL2,", #11")
+	TEST_R(    "asrs	r7, r",0,VAL1,", #5")
+	TEST_R(    "asrs	r0, r",7,VAL2,", #11")
+	TEST_RR(   "adds	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "adds	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_RR(   "subs	r2, r",0,VAL1,", r",7,VAL2,"")
+	TEST_RR(   "subs	r5, r",7,VAL2,", r",0,VAL2,"")
+	TEST_R(    "adds	r7, r",0,VAL1,", #5")
+	TEST_R(    "adds	r0, r",7,VAL2,", #2")
+	TEST_R(    "subs	r7, r",0,VAL1,", #5")
+	TEST_R(    "subs	r0, r",7,VAL2,", #2")
+	TEST(      "movs.n	r0, #0x5f")
+	TEST(      "movs.n	r7, #0xa0")
+	TEST_R(    "cmp.n	r",0,0x5e, ", #0x5f")
+	TEST_R(    "cmp.n	r",5,0x15f,", #0x5f")
+	TEST_R(    "cmp.n	r",7,0xa0, ", #0xa0")
+	TEST_R(    "adds.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "adds.n	r",7,VAL2,", #0xa0")
+	TEST_R(    "subs.n	r",0,VAL1,", #0x5f")
+	TEST_R(    "subs.n	r",7,VAL2,", #0xa0")
+
+	TEST_GROUP("16-bit Thumb data-processing instructions")
+
+#define DATA_PROCESSING16(op,val)			\
+	TEST_RR(   op"	r",0,VAL1,", r",7,val,"")	\
+	TEST_RR(   op"	r",7,VAL2,", r",0,val,"")
+
+	DATA_PROCESSING16("ands",0xf00f00ff)
+	DATA_PROCESSING16("eors",0xf00f00ff)
+	DATA_PROCESSING16("lsls",11)
+	DATA_PROCESSING16("lsrs",11)
+	DATA_PROCESSING16("asrs",11)
+	DATA_PROCESSING16("adcs",VAL2)
+	DATA_PROCESSING16("sbcs",VAL2)
+	DATA_PROCESSING16("rors",11)
+	DATA_PROCESSING16("tst",0xf00f00ff)
+	TEST_R("rsbs	r",0,VAL1,", #0")
+	TEST_R("rsbs	r",7,VAL2,", #0")
+	DATA_PROCESSING16("cmp",0xf00f00ff)
+	DATA_PROCESSING16("cmn",0xf00f00ff)
+	DATA_PROCESSING16("orrs",0xf00f00ff)
+	DATA_PROCESSING16("muls",VAL2)
+	DATA_PROCESSING16("bics",0xf00f00ff)
+	DATA_PROCESSING16("mvns",VAL2)
+
+	TEST_GROUP("Special data instructions and branch and exchange")
+
+	TEST_RR(  "add	r",0, VAL1,", r",7,VAL2,"")
+	TEST_RR(  "add	r",3, VAL2,", r",8,VAL3,"")
+	TEST_RR(  "add	r",8, VAL3,", r",0,VAL1,"")
+	TEST_R(   "add	sp"        ", r",8,-8,  "")
+	TEST_R(   "add	r",14,VAL1,", pc")
+	TEST_BF_R("add	pc"        ", r",0,2f-1f-8,"")
+	TEST_UNSUPPORTED(".short 0x44ff	@ add pc, pc")
+
+	TEST_RR(  "cmp	r",3,VAL1,", r",8,VAL2,"")
+	TEST_RR(  "cmp	r",8,VAL2,", r",0,VAL1,"")
+	TEST_R(   "cmp	sp"       ", r",8,-8,  "")
+
+	TEST_R(   "mov	r0, r",7,VAL2,"")
+	TEST_R(   "mov	r3, r",8,VAL3,"")
+	TEST_R(   "mov	r8, r",0,VAL1,"")
+	TEST_P(   "mov	sp, r",8,-8,  "")
+	TEST(     "mov	lr, pc")
+	TEST_BF_R("mov	pc, r",0,2f,  "")
+
+	TEST_BF_R("bx	r",0, 2f+1,"")
+	TEST_BF_R("bx	r",14,2f+1,"")
+	TESTCASE_START("bx	pc")
+		TEST_ARG_REG(14, 99f+1)
+		TEST_ARG_END("")
+		"	nop			\n\t" /* To align the bx pc*/
+		"50:	nop			\n\t"
+		"1:	bx	pc		\n\t"
+		"	bx	lr		\n\t"
+		".arm				\n\t"
+		"	adr	lr, 2f+1	\n\t"
+		"	bx	lr		\n\t"
+		".thumb				\n\t"
+		"2:	nop			\n\t"
+	TESTCASE_END
+
+	TEST_BF_R("blx	r",0, 2f+1,"")
+	TEST_BB_R("blx	r",14,2f+1,"")
+	TEST_UNSUPPORTED(".short 0x47f8	@ blx pc")
+
+	TEST_GROUP("Load from Literal Pool")
+
+	TEST_X( "ldr	r0, 3f",
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL1))
+	TEST_X( "ldr	r7, 3f",
+		".space 128				\n\t"
+		".align					\n\t"
+		"3:	.word	"__stringify(VAL2))
+
+	TEST_GROUP("16-bit Thumb Load/store instructions")
+
+	TEST_RPR("str	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("str	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strh	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strh	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_RPR("strb	r",0, VAL1,", [r",1, 24,", r",2,  48,"]")
+	TEST_RPR("strb	r",7, VAL2,", [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrsb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldr	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldr	r7, [r",6, 24,", r",5,  48,"]")
+	TEST_PR( "ldrh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrh	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrb	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrb	r7, [r",6, 24,", r",5,  50,"]")
+	TEST_PR( "ldrsh	r0, [r",1, 24,", r",2,  48,"]")
+	TEST_PR( "ldrsh	r7, [r",6, 24,", r",5,  50,"]")
+
+	TEST_RP("str	r",0, VAL1,", [r",1, 24,", #120]")
+	TEST_RP("str	r",7, VAL2,", [r",6, 24,", #120]")
+	TEST_P( "ldr	r0, [r",1, 24,", #120]")
+	TEST_P( "ldr	r7, [r",6, 24,", #120]")
+	TEST_RP("strb	r",0, VAL1,", [r",1, 24,", #30]")
+	TEST_RP("strb	r",7, VAL2,", [r",6, 24,", #30]")
+	TEST_P( "ldrb	r0, [r",1, 24,", #30]")
+	TEST_P( "ldrb	r7, [r",6, 24,", #30]")
+	TEST_RP("strh	r",0, VAL1,", [r",1, 24,", #60]")
+	TEST_RP("strh	r",7, VAL2,", [r",6, 24,", #60]")
+	TEST_P( "ldrh	r0, [r",1, 24,", #60]")
+	TEST_P( "ldrh	r7, [r",6, 24,", #60]")
+
+	TEST_R( "str	r",0, VAL1,", [sp, #0]")
+	TEST_R( "str	r",7, VAL2,", [sp, #160]")
+	TEST(   "ldr	r0, [sp, #0]")
+	TEST(   "ldr	r7, [sp, #160]")
+
+	TEST_RP("str	r",0, VAL1,", [r",0, 24,"]")
+	TEST_P( "ldr	r0, [r",0, 24,"]")
+
+	TEST_GROUP("Generate PC-/SP-relative address")
+
+	TEST("add	r0, pc, #4")
+	TEST("add	r7, pc, #1020")
+	TEST("add	r0, sp, #4")
+	TEST("add	r7, sp, #1020")
+
+	TEST_GROUP("Miscellaneous 16-bit instructions")
+
+	TEST_UNSUPPORTED( "cpsie	i")
+	TEST_UNSUPPORTED( "cpsid	i")
+	TEST_UNSUPPORTED( "setend	le")
+	TEST_UNSUPPORTED( "setend	be")
+
+	TEST("add	sp, #"__stringify(TEST_MEMORY_SIZE)) /* Assumes TEST_MEMORY_SIZE < 0x400 */
+	TEST("sub	sp, #0x7f*4")
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_BF_R(  "cbnz	r",0,0, ", 2f")
+	TEST_BF_R(  "cbz	r",2,-1,", 2f")
+	TEST_BF_RX( "cbnz	r",4,1, ", 2f",0x20)
+	TEST_BF_RX( "cbz	r",7,0, ", 2f",0x40)
+)
+	TEST_R("sxth	r0, r",7, HH1,"")
+	TEST_R("sxth	r7, r",0, HH2,"")
+	TEST_R("sxtb	r0, r",7, HH1,"")
+	TEST_R("sxtb	r7, r",0, HH2,"")
+	TEST_R("uxth	r0, r",7, HH1,"")
+	TEST_R("uxth	r7, r",0, HH2,"")
+	TEST_R("uxtb	r0, r",7, HH1,"")
+	TEST_R("uxtb	r7, r",0, HH2,"")
+	TEST_R("rev	r0, r",7, VAL1,"")
+	TEST_R("rev	r7, r",0, VAL2,"")
+	TEST_R("rev16	r0, r",7, VAL1,"")
+	TEST_R("rev16	r7, r",0, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xba80")
+	TEST_UNSUPPORTED(".short 0xbabf")
+	TEST_R("revsh	r0, r",7, VAL1,"")
+	TEST_R("revsh	r7, r",0, VAL2,"")
+
+#define TEST_POPPC(code, offset)	\
+	TESTCASE_START(code)		\
+	TEST_ARG_PTR(13, offset)	\
+	TEST_ARG_END("")		\
+	TEST_BRANCH_F(code,0)		\
+	TESTCASE_END
+
+	TEST("push	{r0}")
+	TEST("push	{r7}")
+	TEST("push	{r14}")
+	TEST("push	{r0-r7,r14}")
+	TEST("push	{r0,r2,r4,r6,r14}")
+	TEST("push	{r1,r3,r5,r7}")
+	TEST("pop	{r0}")
+	TEST("pop	{r7}")
+	TEST("pop	{r0,r2,r4,r6}")
+	TEST_POPPC("pop	{pc}",15*4)
+	TEST_POPPC("pop	{r0-r7,pc}",7*4)
+	TEST_POPPC("pop	{r1,r3,r5,r7,pc}",11*4)
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{pc}	@ ",13,15*4,"")
+	TEST_THUMB_TO_ARM_INTERWORK_P("pop	{r0-r7,pc}	@ ",13,7*4,"")
+
+	TEST_UNSUPPORTED("bkpt.n	0")
+	TEST_UNSUPPORTED("bkpt.n	255")
+
+	TEST_SUPPORTED("yield")
+	TEST("sev")
+	TEST("nop")
+	TEST("wfi")
+	TEST_SUPPORTED("wfe")
+	TEST_UNSUPPORTED(".short 0xbf50") /* Unassigned hints */
+	TEST_UNSUPPORTED(".short 0xbff0") /* Unassigned hints */
+
+#define TEST_IT(code, code2)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	"50:	nop			\n\t"	\
+	"1:	"code"			\n\t"	\
+	"	"code2"			\n\t"	\
+	"2:	nop			\n\t"	\
+	TESTCASE_END
+
+DONT_TEST_IN_ITBLOCK(
+	TEST_IT("it	eq","moveq r0,#0")
+	TEST_IT("it	vc","movvc r0,#0")
+	TEST_IT("it	le","movle r0,#0")
+	TEST_IT("ite	eq","moveq r0,#0\n\t  movne r1,#1")
+	TEST_IT("itet	vc","movvc r0,#0\n\t  movvs r1,#1\n\t  movvc r2,#2")
+	TEST_IT("itete	le","movle r0,#0\n\t  movgt r1,#1\n\t  movle r2,#2\n\t  movgt r3,#3")
+	TEST_IT("itttt	le","movle r0,#0\n\t  movle r1,#1\n\t  movle r2,#2\n\t  movle r3,#3")
+	TEST_IT("iteee	le","movle r0,#0\n\t  movgt r1,#1\n\t  movgt r2,#2\n\t  movgt r3,#3")
+)
+
+	TEST_GROUP("Load and store multiple")
+
+	TEST_P("ldmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("ldmia	r",7, 16*4,"!, {r0-r6}")
+	TEST_P("stmia	r",4, 16*4,"!, {r0,r7}")
+	TEST_P("stmia	r",0, 16*4,"!, {r0-r7}")
+
+	TEST_GROUP("Conditional branch and Supervisor Call instructions")
+
+CONDITION_INSTRUCTIONS(8,
+	TEST_BF("beq	2f")
+	TEST_BB("bne	2b")
+	TEST_BF("bgt	2f")
+	TEST_BB("blt	2b")
+)
+	TEST_UNSUPPORTED(".short 0xde00")
+	TEST_UNSUPPORTED(".short 0xdeff")
+	TEST_UNSUPPORTED("svc	#0x00")
+	TEST_UNSUPPORTED("svc	#0xff")
+
+	TEST_GROUP("Unconditional branch")
+
+	TEST_BF(  "b	2f")
+	TEST_BB(  "b	2b")
+	TEST_BF_X("b	2f", 0x400)
+	TEST_BB_X("b	2b", 0x400)
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("subs.n r0, r0")
+
+	verbose("\n");
+}
+
+
+void kprobe_thumb32_test_cases(void)
+{
+	kprobe_test_flags = 0;
+
+	TEST_GROUP("Load/store multiple")
+
+	TEST_UNSUPPORTED("rfedb	sp")
+	TEST_UNSUPPORTED("rfeia	sp")
+	TEST_UNSUPPORTED("rfedb	sp!")
+	TEST_UNSUPPORTED("rfeia	sp!")
+
+	TEST_P(   "stmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmia	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmia	r",7, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmia	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmia	r",4, 0,   ", {r0-r12,r14}")
+	TEST_BF_P("ldmia	r",5, 8*4, "!, {r6-r12,r15}")
+	TEST_P(   "ldmia	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmia	r",14,14*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "stmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_P(   "stmdb	r",5, 16*4,"!, {r8-r12,r14}")
+	TEST_P(   "stmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+
+	TEST_P(   "ldmdb	r",0, 16*4,", {r0,r8}")
+	TEST_P(   "ldmdb	r",4, 16*4,", {r0-r12,r14}")
+	TEST_BF_P("ldmdb	r",5, 16*4,"!, {r6-r12,r15}")
+	TEST_P(   "ldmdb	r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
+	TEST_BF_P("ldmdb	r",14,16*4,"!, {r4,pc}")
+
+	TEST_P(   "stmdb	r",13,16*4,"!, {r3-r12,lr}")
+	TEST_P(	  "stmdb	r",13,16*4,"!, {r3-r12}")
+	TEST_P(   "stmdb	r",2, 16*4,", {r3-r12,lr}")
+	TEST_P(   "stmdb	r",13,16*4,"!, {r2-r12,lr}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "stmdb	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_BF_P("ldmia	r",13,5*4, "!, {r3-r12,pc}")
+	TEST_P(	  "ldmia	r",13,5*4, "!, {r3-r12}")
+	TEST_BF_P("ldmia	r",2, 5*4, "!, {r3-r12,pc}")
+	TEST_BF_P("ldmia	r",13,4*4, "!, {r2-r12,pc}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12}")
+	TEST_P(   "ldmia	r",0, 16*4,", {r0-r12,lr}")
+
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",0,14*4,", {r12,pc}")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldmia	r",13,2*4,", {r0-r12,pc}")
+
+	TEST_UNSUPPORTED(".short 0xe88f,0x0101	@ stmia	pc, {r0,r8}")
+	TEST_UNSUPPORTED(".short 0xe92f,0x5f00	@ stmdb	pc!, {r8-r12,r14}")
+	TEST_UNSUPPORTED(".short 0xe8bd,0xc000	@ ldmia	r13!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0xc000	@ ldmdb	r14!, {r14,pc}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x3f00	@ stmia	r7!, {r8-r12,sp}")
+	TEST_UNSUPPORTED(".short 0xe8a7,0x9f00	@ stmia	r7!, {r8-r12,pc}")
+	TEST_UNSUPPORTED(".short 0xe93e,0x2010	@ ldmdb	r14!, {r4,sp}")
+
+	TEST_GROUP("Load/store double or exclusive, table branch")
+
+	TEST_P(  "ldrd	r0, r1, [r",1, 24,", #-16]")
+	TEST(    "ldrd	r12, r14, [sp, #16]")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,", #-16]!")
+	TEST(    "ldrd	r14, r12, [sp, #16]!")
+	TEST_P(  "ldrd	r1, r0, [r",7, 24,"], #16")
+	TEST(    "ldrd	r7, r8, [sp], #-16")
+
+	TEST_X( "ldrd	r12, r14, 3f",
+		".align 3				\n\t"
+		"3:	.word	"__stringify(VAL1)"	\n\t"
+		"	.word	"__stringify(VAL2))
+
+	TEST_UNSUPPORTED(".short 0xe9ff,0xec04	@ ldrd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ff,0xec04	@ ldrd	r14, r12, [pc], #16")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xd800	@ ldrd	sp, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0xf800	@ ldrd	pc, r8, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7d00	@ ldrd	r7, sp, [r4]")
+	TEST_UNSUPPORTED(".short 0xe9d4,0x7f00	@ ldrd	r7, pc, [r4]")
+
+	TEST_RRP("strd	r",0, VAL1,", r",1, VAL2,", [r",1, 24,", #-16]")
+	TEST_RR( "strd	r",12,VAL2,", r",14,VAL1,", [sp, #16]")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,", #-16]!")
+	TEST_RR( "strd	r",14,VAL2,", r",12,VAL1,", [sp, #16]!")
+	TEST_RRP("strd	r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16")
+	TEST_RR( "strd	r",7, VAL2,", r",8, VAL1,", [sp], #-16")
+	TEST_UNSUPPORTED(".short 0xe9ef,0xec04	@ strd	r14, r12, [pc, #16]!")
+	TEST_UNSUPPORTED(".short 0xe8ef,0xec04	@ strd	r14, r12, [pc], #16")
+
+	TEST_RX("tbb	[pc, r",0, (9f-(1f+4)),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbb	[pc, r",4, (9f-(1f+4)+1),"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbb	[r",1,9f,", r",2,0,"]",
+		"9:			\n\t"
+		".byte	(2f-1b-4)>>1	\n\t"
+		".byte	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",7, (9f-(1f+4))>>1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RX("tbh	[pc, r",12, ((9f-(1f+4))>>1)+1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_RRX("tbh	[r",1,9f, ", r",14,1,"]",
+		"9:			\n\t"
+		".short	(2f-1b-4)>>1	\n\t"
+		".short	(3f-1b-4)>>1	\n\t"
+		"3:	mvn	r0, r0	\n\t"
+		"2:	nop		\n\t")
+
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01f	@ tbh [r1, pc]")
+	TEST_UNSUPPORTED(".short 0xe8d1,0xf01d	@ tbh [r1, sp]")
+	TEST_UNSUPPORTED(".short 0xe8dd,0xf012	@ tbh [sp, r2]")
+
+	TEST_UNSUPPORTED("strexb	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexh	r0, r1, [r2]")
+	TEST_UNSUPPORTED("strexd	r0, r1, [r2]")
+	TEST_UNSUPPORTED("ldrexb	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexh	r0, [r1]")
+	TEST_UNSUPPORTED("ldrexd	r0, [r1]")
+
+	TEST_GROUP("Data-processing (shifted register) and (modified immediate)")
+
+#define _DATA_PROCESSING32_DNM(op,s,val)					\
+	TEST_RR(op s".w	r0,  r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op s"	r1,  r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op s"	r2,  r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op s"	r3,  r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op s"	r4,  r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op s"	r5,  r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op s"	r8,  r",9, VAL1,", r",10,val, ", rrx")			\
+	TEST_R( op s"	r0,  r",11,VAL1,", #0x00010001")			\
+	TEST_R( op s"	r11, r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op s"	r7,  r",8, VAL2,", #0x000af000")
+
+#define DATA_PROCESSING32_DNM(op,val)		\
+	_DATA_PROCESSING32_DNM(op,"",val)	\
+	_DATA_PROCESSING32_DNM(op,"s",val)
+
+#define DATA_PROCESSING32_NM(op,val)					\
+	TEST_RR(op".w	r",1, VAL1,", r",2, val, "")			\
+	TEST_RR(op"	r",1, VAL1,", r",2, val, ", lsl #3")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", lsr #4")		\
+	TEST_RR(op"	r",3, VAL1,", r",2, val, ", asr #5")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, N(val),", asr #6")		\
+	TEST_RR(op"	r",5, VAL1,", r",2, val, ", ror #7")		\
+	TEST_RR(op"	r",9, VAL1,", r",10,val, ", rrx")		\
+	TEST_R( op"	r",11,VAL1,", #0x00010001")			\
+	TEST_R( op"	r",0, VAL1,", #0xf5000000")			\
+	TEST_R( op"	r",8, VAL2,", #0x000af000")
+
+#define _DATA_PROCESSING32_DM(op,s,val)				\
+	TEST_R( op s".w	r0,  r",14, val, "")			\
+	TEST_R( op s"	r1,  r",12, val, ", lsl #3")		\
+	TEST_R( op s"	r2,  r",11, val, ", lsr #4")		\
+	TEST_R( op s"	r3,  r",10, val, ", asr #5")		\
+	TEST_R( op s"	r4,  r",9, N(val),", asr #6")		\
+	TEST_R( op s"	r5,  r",8, val, ", ror #7")		\
+	TEST_R( op s"	r8,  r",7,val, ", rrx")			\
+	TEST(   op s"	r0,  #0x00010001")			\
+	TEST(   op s"	r11, #0xf5000000")			\
+	TEST(   op s"	r7,  #0x000af000")			\
+	TEST(   op s"	r4,  #0x00005a00")
+
+#define DATA_PROCESSING32_DM(op,val)		\
+	_DATA_PROCESSING32_DM(op,"",val)	\
+	_DATA_PROCESSING32_DM(op,"s",val)
+
+	DATA_PROCESSING32_DNM("and",0xf00f00ff)
+	DATA_PROCESSING32_NM("tst",0xf00f00ff)
+	DATA_PROCESSING32_DNM("bic",0xf00f00ff)
+	DATA_PROCESSING32_DNM("orr",0xf00f00ff)
+	DATA_PROCESSING32_DM("mov",VAL2)
+	DATA_PROCESSING32_DNM("orn",0xf00f00ff)
+	DATA_PROCESSING32_DM("mvn",VAL2)
+	DATA_PROCESSING32_DNM("eor",0xf00f00ff)
+	DATA_PROCESSING32_NM("teq",0xf00f00ff)
+	DATA_PROCESSING32_DNM("add",VAL2)
+	DATA_PROCESSING32_NM("cmn",VAL2)
+	DATA_PROCESSING32_DNM("adc",VAL2)
+	DATA_PROCESSING32_DNM("sbc",VAL2)
+	DATA_PROCESSING32_DNM("sub",VAL2)
+	DATA_PROCESSING32_NM("cmp",VAL2)
+	DATA_PROCESSING32_DNM("rsb",VAL2)
+
+	TEST_RR("pkhbt	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhbt	r14,r",12, HH1,", r",10,HH2,", lsl #2")
+	TEST_RR("pkhtb	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR("pkhtb	r14,r",12, HH1,", r",10,HH2,", asr #2")
+
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0d	@ tst.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea17,0x0f0f	@ tst.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea1d,0x0f07	@ tst.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea1f,0x0f07	@ tst.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf01d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf01f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0d	@ teq.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea97,0x0f0f	@ teq.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea9d,0x0f07	@ teq.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea9f,0x0f07	@ teq.w pc, r7")
+	TEST_UNSUPPORTED(".short 0xf09d,0x1f08	@ tst sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf09f,0x1f08	@ tst pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0d	@ cmn.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xeb17,0x0f0f	@ cmn.w r7, pc")
+	TEST_P("cmn.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xeb1f,0x0f07	@ cmn.w pc, r7")
+	TEST(  "cmn	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf11f,0x1f08	@ cmn pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0d	@ cmp.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xebb7,0x0f0f	@ cmp.w r7, pc")
+	TEST_P("cmp.w	sp, r",7,0,"")
+	TEST_UNSUPPORTED(".short 0xebbf,0x0f07	@ cmp.w pc, r7")
+	TEST(  "cmp	sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf1bf,0x1f08	@ cmp pc, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xea5f,0x070d	@ movs.w r7, sp")
+	TEST_UNSUPPORTED(".short 0xea5f,0x070f	@ movs.w r7, pc")
+	TEST_UNSUPPORTED(".short 0xea5f,0x0d07	@ movs.w sp, r7")
+	TEST_UNSUPPORTED(".short 0xea4f,0x0f07	@ mov.w  pc, r7")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1d08	@ mov sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf04f,0x1f08	@ mov pc, #0x00080008")
+
+	TEST_R("add.w	r0, sp, r",1, 4,"")
+	TEST_R("adds	r0, sp, r",1, 4,", asl #3")
+	TEST_R("add	r0, sp, r",1, 4,", asl #4")
+	TEST_R("add	r0, sp, r",1, 16,", ror #1")
+	TEST_R("add.w	sp, sp, r",1, 4,"")
+	TEST_R("add	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x1d01	@ add sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d71	@ add sp, sp, r1, ror #1")
+	TEST(  "add.w	r0, sp, #24")
+	TEST(  "add.w	sp, sp, #24")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0f01	@ add pc, sp, r1")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000f	@ add r0, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x000d	@ add r0, sp, sp")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0f	@ add sp, sp, pc")
+	TEST_UNSUPPORTED(".short 0xeb0d,0x0d0d	@ add sp, sp, sp")
+
+	TEST_R("sub.w	r0, sp, r",1, 4,"")
+	TEST_R("subs	r0, sp, r",1, 4,", asl #3")
+	TEST_R("sub	r0, sp, r",1, 4,", asl #4")
+	TEST_R("sub	r0, sp, r",1, 16,", ror #1")
+	TEST_R("sub.w	sp, sp, r",1, 4,"")
+	TEST_R("sub	sp, sp, r",1, 4,", asl #3")
+	TEST_UNSUPPORTED(".short 0xebad,0x1d01	@ sub sp, sp, r1, asl #4")
+	TEST_UNSUPPORTED(".short 0xebad,0x0d71	@ sub sp, sp, r1, ror #1")
+	TEST_UNSUPPORTED(".short 0xebad,0x0f01	@ sub pc, sp, r1")
+	TEST(  "sub.w	r0, sp, #24")
+	TEST(  "sub.w	sp, sp, #24")
+
+	TEST_UNSUPPORTED(".short 0xea02,0x010f	@ and r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xea0f,0x0103	@ and r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0f03	@ and pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x010d	@ and r1, r2, sp")
+	TEST_UNSUPPORTED(".short 0xea0d,0x0103	@ and r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xea02,0x0d03	@ and sp, r2, r3")
+	TEST_UNSUPPORTED(".short 0xf00d,0x1108	@ and r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf00f,0x1108	@ and r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1d08	@ and sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf002,0x1f08	@ and pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeb02,0x010f	@ add r1, r2, pc")
+	TEST_UNSUPPORTED(".short 0xeb0f,0x0103	@ add r1, pc, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0f03	@ add pc, r2, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x010d	@ add r1, r2, sp")
+	TEST_SUPPORTED(  ".short 0xeb0d,0x0103	@ add r1, sp, r3")
+	TEST_UNSUPPORTED(".short 0xeb02,0x0d03	@ add sp, r2, r3")
+	TEST_SUPPORTED(  ".short 0xf10d,0x1108	@ add r1, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10d,0x1f08	@ add pc, sp, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf10f,0x1108	@ add r1, pc, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1d08	@ add sp, r8, #0x00080008")
+	TEST_UNSUPPORTED(".short 0xf102,0x1f08	@ add pc, r8, #0x00080008")
+
+	TEST_UNSUPPORTED(".short 0xeaa0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeaf0,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb20,0x0000")
+	TEST_UNSUPPORTED(".short 0xeb80,0x0000")
+	TEST_UNSUPPORTED(".short 0xebe0,0x0000")
+
+	TEST_UNSUPPORTED(".short 0xf0a0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0c0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf0f0,0x0000")
+	TEST_UNSUPPORTED(".short 0xf120,0x0000")
+	TEST_UNSUPPORTED(".short 0xf180,0x0000")
+	TEST_UNSUPPORTED(".short 0xf1e0,0x0000")
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xec00,0x0000")
+	TEST_UNSUPPORTED(".short 0xeff0,0x0000")
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xfff0,0x0000")
+
+	TEST_GROUP("Data-processing (plain binary immediate)")
+
+	TEST_R("addw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "addw	r14, sp, #0xf5a")
+	TEST(  "addw	sp, sp, #0x20")
+	TEST(  "addw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1f20	@ addw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20d,0x1f20	@ addw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf20f,0x1d20	@ addw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf200,0x1d20	@ addw sp, r0, #0x120")
+
+	TEST_R("subw	r0,  r",1, VAL1,", #0x123")
+	TEST(  "subw	r14, sp, #0xf5a")
+	TEST(  "subw	sp, sp, #0x20")
+	TEST(  "subw	r7,  pc, #0x888")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1f20	@ subw pc, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2ad,0x1f20	@ subw pc, sp, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2af,0x1d20	@ subw sp, pc, #0x120")
+	TEST_UNSUPPORTED(".short 0xf2a0,0x1d20	@ subw sp, r0, #0x120")
+
+	TEST("movw	r0, #0")
+	TEST("movw	r0, #0xffff")
+	TEST("movw	lr, #0xffff")
+	TEST_UNSUPPORTED(".short 0xf240,0x0d00	@ movw sp, #0")
+	TEST_UNSUPPORTED(".short 0xf240,0x0f00	@ movw pc, #0")
+
+	TEST_R("movt	r",0, VAL1,", #0")
+	TEST_R("movt	r",0, VAL2,", #0xffff")
+	TEST_R("movt	r",14,VAL1,", #0xffff")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0d00	@ movt sp, #0")
+	TEST_UNSUPPORTED(".short 0xf2c0,0x0f00	@ movt pc, #0")
+
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "ssat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "ssat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0d17	@ ssat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30c,0x0f17	@ ssat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf30d,0x0c17	@ ssat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf30f,0x0c17	@ ssat	r12, #24, pc")
+
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,"")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,"")
+	TEST_R(     "usat	r0, #24, r",0,   VAL1,", lsl #8")
+	TEST_R(     "usat	r14, #24, r",12, VAL2,", asr #8")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0d17	@ usat	sp, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38c,0x0f17	@ usat	pc, #24, r12")
+	TEST_UNSUPPORTED(".short 0xf38d,0x0c17	@ usat	r12, #24, sp")
+	TEST_UNSUPPORTED(".short 0xf38f,0x0c17	@ usat	r12, #24, pc")
+
+	TEST_R(     "ssat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "ssat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0d0b	@ ssat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32c,0x0f0b	@ ssat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf32d,0x0c0b	@ ssat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf32f,0x0c0b	@ ssat16	r12, #12, pc")
+
+	TEST_R(     "usat16	r0, #12, r",0,   HH1,"")
+	TEST_R(     "usat16	r14, #12, r",12, HH2,"")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0d0b	@ usat16	sp, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ac,0x0f0b	@ usat16	pc, #12, r12")
+	TEST_UNSUPPORTED(".short 0xf3ad,0x0c0b	@ usat16	r12, #12, sp")
+	TEST_UNSUPPORTED(".short 0xf3af,0x0c0b	@ usat16	r12, #12, pc")
+
+	TEST_R(     "sbfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "sbfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "sbfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2d0f	@ sbfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34c,0x2f0f	@ sbfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34d,0x2c0f	@ sbfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf34f,0x2c0f	@ sbfx	r12, pc, #8, #16")
+
+	TEST_R(     "ubfx	r0, r",0  , VAL1,", #0, #31")
+	TEST_R(     "ubfx	r14, r",12, VAL2,", #8, #16")
+	TEST_R(     "ubfx	r4, r",10,  VAL1,", #16, #15")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2d0f	@ ubfx	sp, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cc,0x2f0f	@ ubfx	pc, r12, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cd,0x2c0f	@ ubfx	r12, sp, #8, #16")
+	TEST_UNSUPPORTED(".short 0xf3cf,0x2c0f	@ ubfx	r12, pc, #8, #16")
+
+	TEST_R(     "bfc	r",0, VAL1,", #4, #20")
+	TEST_R(     "bfc	r",14,VAL2,", #4, #20")
+	TEST_R(     "bfc	r",7, VAL1,", #0, #31")
+	TEST_R(     "bfc	r",8, VAL2,", #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0d1e	@ bfc	sp, #0, #31")
+	TEST_UNSUPPORTED(".short 0xf36f,0x0f1e	@ bfc	pc, #0, #31")
+
+	TEST_RR(    "bfi	r",0, VAL1,", r",0  , VAL2,", #0, #31")
+	TEST_RR(    "bfi	r",12,VAL1,", r",14 , VAL2,", #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1d17	@ bfi	sp, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36e,0x1f17	@ bfi	pc, r14, #4, #20")
+	TEST_UNSUPPORTED(".short 0xf36d,0x1e17	@ bfi	r14, sp, #4, #20")
+
+	TEST_GROUP("Branches and miscellaneous control")
+
+CONDITION_INSTRUCTIONS(22,
+	TEST_BF("beq.w	2f")
+	TEST_BB("bne.w	2b")
+	TEST_BF("bgt.w	2f")
+	TEST_BB("blt.w	2b")
+	TEST_BF_X("bpl.w	2f",0x1000)
+)
+
+	TEST_UNSUPPORTED("msr	cpsr, r0")
+	TEST_UNSUPPORTED("msr	cpsr_f, r1")
+	TEST_UNSUPPORTED("msr	spsr, r2")
+
+	TEST_UNSUPPORTED("cpsie.w	i")
+	TEST_UNSUPPORTED("cpsid.w	i")
+	TEST_UNSUPPORTED("cps	0x13")
+
+	TEST_SUPPORTED("yield.w")
+	TEST("sev.w")
+	TEST("nop.w")
+	TEST("wfi.w")
+	TEST_SUPPORTED("wfe.w")
+	TEST_UNSUPPORTED("dbg.w	#0")
+
+	TEST_UNSUPPORTED("clrex")
+	TEST_UNSUPPORTED("dsb")
+	TEST_UNSUPPORTED("dmb")
+	TEST_UNSUPPORTED("isb")
+
+	TEST_UNSUPPORTED("bxj	r0")
+
+	TEST_UNSUPPORTED("subs	pc, lr, #4")
+
+	TEST("mrs	r0, cpsr")
+	TEST("mrs	r14, cpsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8d00	@ mrs	sp, spsr")
+	TEST_UNSUPPORTED(".short 0xf3ef,0x8f00	@ mrs	pc, spsr")
+	TEST_UNSUPPORTED("mrs	r0, spsr")
+	TEST_UNSUPPORTED("mrs	lr, spsr")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0x8000 @ smc #0")
+
+	TEST_UNSUPPORTED(".short 0xf7f0,0xa000 @ undefeined")
+
+	TEST_BF(  "b.w	2f")
+	TEST_BB(  "b.w	2b")
+	TEST_BF_X("b.w	2f", 0x1000)
+
+	TEST_BF(  "bl.w	2f")
+	TEST_BB(  "bl.w	2b")
+	TEST_BB_X("bl.w	2b", 0x1000)
+
+	TEST_X(	"blx	__dummy_arm_subroutine",
+		".arm				\n\t"
+		".align				\n\t"
+		".type __dummy_arm_subroutine, %%function \n\t"
+		"__dummy_arm_subroutine:	\n\t"
+		"mov	r0, pc			\n\t"
+		"bx	lr			\n\t"
+		".thumb				\n\t"
+	)
+	TEST(	"blx	__dummy_arm_subroutine")
+
+	TEST_GROUP("Store single data item")
+
+#define SINGLE_STORE(size)							\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,-1024,", #1024]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, -1024,", #1080]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,  "], #-120")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,  "], #-128")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,24,   ", #120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 24,   ", #128]!")		\
+	TEST_RP( "str"size"	r",0, VAL1,", [r",11,256,  ", #-120]!")		\
+	TEST_RP( "str"size"	r",14,VAL2,", [r",1, 256,  ", #-128]!")		\
+	TEST_RPR("str"size".w	r",0, VAL1,", [r",1, 0,", r",2, 4,"]")		\
+	TEST_RPR("str"size"	r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_R(  "str"size".w	r",7, VAL1,", [sp, #24]")			\
+	TEST_RP( "str"size".w	r",0, VAL2,", [r",0,0, "]")			\
+	TEST_UNSUPPORTED("str"size"t	r0, [r1, #4]")
+
+	SINGLE_STORE("b")
+	SINGLE_STORE("h")
+	SINGLE_STORE("")
+
+	TEST("str	sp, [sp]")
+	TEST_UNSUPPORTED(".short 0xf8cf,0xe000	@ str	r14, [pc]")
+	TEST_UNSUPPORTED(".short 0xf8ce,0xf000	@ str	pc, [r14]")
+
+	TEST_GROUP("Advanced SIMD element or structure load/store instructions")
+
+	TEST_UNSUPPORTED(".short 0xf900,0x0000")
+	TEST_UNSUPPORTED(".short 0xf92f,0xffff")
+	TEST_UNSUPPORTED(".short 0xf980,0x0000")
+	TEST_UNSUPPORTED(".short 0xf9ef,0xffff")
+
+	TEST_GROUP("Load single data item and memory hints")
+
+#define SINGLE_LOAD(size)						\
+	TEST_P( "ldr"size"	r0, [r",11,-1024, ", #1024]")		\
+	TEST_P( "ldr"size"	r14, [r",1, -1024,", #1080]")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #120")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,  "], #128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,   "], #-120")		\
+	TEST_P( "ldr"size"	r14, [r",1,24,   "], #-128")		\
+	TEST_P( "ldr"size"	r0, [r",11,24,    ", #120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 24,   ", #128]!")		\
+	TEST_P( "ldr"size"	r0, [r",11,256,   ", #-120]!")		\
+	TEST_P( "ldr"size"	r14, [r",1, 256,  ", #-128]!")		\
+	TEST_PR("ldr"size".w	r0, [r",1, 0,", r",2, 4,"]")		\
+	TEST_PR("ldr"size"	r14, [r",10,0,", r",11,4,", lsl #1]")	\
+	TEST_X( "ldr"size".w	r0, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL1))			\
+	TEST_X( "ldr"size".w	r14, 3f",				\
+		".align 3				\n\t"		\
+		"3:	.word	"__stringify(VAL2))			\
+	TEST(   "ldr"size".w	r7, 3b")				\
+	TEST(   "ldr"size".w	r7, [sp, #24]")				\
+	TEST_P( "ldr"size".w	r0, [r",0,0, "]")			\
+	TEST_UNSUPPORTED("ldr"size"t	r0, [r1, #4]")
+
+	SINGLE_LOAD("b")
+	SINGLE_LOAD("sb")
+	SINGLE_LOAD("h")
+	SINGLE_LOAD("sh")
+	SINGLE_LOAD("")
+
+	TEST_BF_P("ldr	pc, [r",14, 15*4,"]")
+	TEST_P(   "ldr	sp, [r",14, 13*4,"]")
+	TEST_BF_R("ldr	pc, [sp, r",14, 15*4,"]")
+	TEST_R(   "ldr	sp, [sp, r",14, 13*4,"]")
+	TEST_THUMB_TO_ARM_INTERWORK_P("ldr	pc, [r",0,0,", #15*4]")
+	TEST_SUPPORTED("ldr	sp, 99f")
+	TEST_SUPPORTED("ldr	pc, 99f")
+
+	TEST_UNSUPPORTED(".short 0xf854,0x700d	@ ldr	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf854,0x700f	@ ldr	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700d	@ ldrb	r7, [r4, sp]")
+	TEST_UNSUPPORTED(".short 0xf814,0x700f	@ ldrb	r7, [r4, pc]")
+	TEST_UNSUPPORTED(".short 0xf89f,0xd004	@ ldrb	sp, 99f")
+	TEST_UNSUPPORTED(".short 0xf814,0xd008	@ ldrb	sp, [r4, r8]")
+	TEST_UNSUPPORTED(".short 0xf894,0xd000	@ ldrb	sp, [r4]")
+
+	TEST_UNSUPPORTED(".short 0xf860,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf9ff,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf950,0x0000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf95f,0xffff") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf800,0x0800") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xf97f,0xfaff") /* Unallocated space */
+
+	TEST(   "pli	[pc, #4]")
+	TEST(   "pli	[pc, #-4]")
+	TEST(   "pld	[pc, #4]")
+	TEST(   "pld	[pc, #-4]")
+
+	TEST_P( "pld	[r",0,-1024,", #1024]")
+	TEST(   ".short 0xf8b0,0xf400	@ pldw	[r0, #1024]")
+	TEST_P( "pli	[r",4, 0b,", #1024]")
+	TEST_P( "pld	[r",7, 120,", #-120]")
+	TEST(   ".short 0xf837,0xfc78	@ pldw	[r7, #-120]")
+	TEST_P( "pli	[r",11,120,", #-120]")
+	TEST(   "pld	[sp, #0]")
+
+	TEST_PR("pld	[r",7, 24, ", r",0, 16,"]")
+	TEST_PR("pld	[r",8, 24, ", r",12,16,", lsl #3]")
+	TEST_SUPPORTED(".short 0xf837,0xf000	@ pldw	[r7, r0]")
+	TEST_SUPPORTED(".short 0xf838,0xf03c	@ pldw	[r8, r12, lsl #3]");
+	TEST_RR("pli	[r",12,0b,", r",0, 16,"]")
+	TEST_RR("pli	[r",0, 0b,", r",12,16,", lsl #3]")
+	TEST_R( "pld	[sp, r",1, 16,"]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00d  @pld	[r7, sp]")
+	TEST_UNSUPPORTED(".short 0xf817,0xf00f  @pld	[r7, pc]")
+
+	TEST_GROUP("Data-processing (register)")
+
+#define SHIFTS32(op)					\
+	TEST_RR(op"	r0,  r",1, VAL1,", r",2, 3, "")	\
+	TEST_RR(op"	r14, r",12,VAL2,", r",11,10,"")
+
+	SHIFTS32("lsl")
+	SHIFTS32("lsls")
+	SHIFTS32("lsr")
+	SHIFTS32("lsrs")
+	SHIFTS32("asr")
+	SHIFTS32("asrs")
+	SHIFTS32("ror")
+	SHIFTS32("rors")
+
+	TEST_UNSUPPORTED(".short 0xfa01,0xff02	@ lsl	pc, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xfd02	@ lsl	sp, r1, r2")
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf002	@ lsl	r0, pc, r2")
+	TEST_UNSUPPORTED(".short 0xfa0d,0xf002	@ lsl	r0, sp, r2")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00f	@ lsl	r0, r1, pc")
+	TEST_UNSUPPORTED(".short 0xfa01,0xf00d	@ lsl	r0, r1, sp")
+
+	TEST_RR(    "sxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxth	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa0f,0xff87	@ sxth	pc, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xfd87	@ sxth	sp, r7");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88f	@ sxth	r8, pc");
+	TEST_UNSUPPORTED(".short 0xfa0f,0xf88d	@ sxth	r8, sp");
+
+	TEST_RR(    "uxtah	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtah	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxth	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab16	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab16	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb16	r8, r",7,  HH1,"")
+
+	TEST_RR(    "sxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "sxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "sxtb	r8, r",7,  HH1,"")
+
+	TEST_RR(    "uxtab	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "uxtab	r14,r",12, HH2,", r",10,HH1,", ror #8")
+	TEST_R(     "uxtb	r8, r",7,  HH1,"")
+
+	TEST_UNSUPPORTED(".short 0xfa60,0x00f0")
+	TEST_UNSUPPORTED(".short 0xfa7f,0xffff")
+
+#define PARALLEL_ADD_SUB(op)					\
+	TEST_RR(  op"add16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"asx	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"asx	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sax	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sax	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub16	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub16	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"add8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"add8	r14, r",12,HH2,", r",10,HH1,"")	\
+	TEST_RR(  op"sub8	r0, r",0,  HH1,", r",1, HH2,"")	\
+	TEST_RR(  op"sub8	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_GROUP("Parallel addition and subtraction, signed")
+
+	PARALLEL_ADD_SUB("s")
+	PARALLEL_ADD_SUB("q")
+	PARALLEL_ADD_SUB("sh")
+
+	TEST_GROUP("Parallel addition and subtraction, unsigned")
+
+	PARALLEL_ADD_SUB("u")
+	PARALLEL_ADD_SUB("uq")
+	PARALLEL_ADD_SUB("uh")
+
+	TEST_GROUP("Miscellaneous operations")
+
+	TEST_RR("qadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qsub	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdadd	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdadd	lr, r",9, VAL2,", r",8, VAL1,"")
+	TEST_RR("qdsub	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR("qdsub	lr, r",9, VAL2,", r",8, VAL1,"")
+
+	TEST_R("rev.w	r0, r",0,   VAL1,"")
+	TEST_R("rev	r14, r",12, VAL2,"")
+	TEST_R("rev16.w	r0, r",0,   VAL1,"")
+	TEST_R("rev16	r14, r",12, VAL2,"")
+	TEST_R("rbit	r0, r",0,   VAL1,"")
+	TEST_R("rbit	r14, r",12, VAL2,"")
+	TEST_R("revsh.w	r0, r",0,   VAL1,"")
+	TEST_R("revsh	r14, r",12, VAL2,"")
+
+	TEST_UNSUPPORTED(".short 0xfa9c,0xff8c	@ rev	pc, r12");
+	TEST_UNSUPPORTED(".short 0xfa9c,0xfd8c	@ rev	sp, r12");
+	TEST_UNSUPPORTED(".short 0xfa9f,0xfe8f	@ rev	r14, pc");
+	TEST_UNSUPPORTED(".short 0xfa9d,0xfe8d	@ rev	r14, sp");
+
+	TEST_RR("sel	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR("sel	r14, r",12,VAL1,", r",10, VAL2,"")
+
+	TEST_R("clz	r0, r",0, 0x0,"")
+	TEST_R("clz	r7, r",14,0x1,"")
+	TEST_R("clz	lr, r",7, 0xffffffff,"")
+
+	TEST_UNSUPPORTED(".short 0xfa80,0xf030") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfab0,0xf000") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfaff,0xff7f") /* Unallocated space */
+
+	TEST_GROUP("Multiply, multiply accumulate, and absolute difference operations")
+
+	TEST_RR(    "mul	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "mul	r7, r",8, VAL2,", r",9, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xff09	@ mul	pc, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xfd09	@ mul	sp, r8, r9")
+	TEST_UNSUPPORTED(".short 0xfb0f,0xf709	@ mul	r7, pc, r9")
+	TEST_UNSUPPORTED(".short 0xfb0d,0xf709	@ mul	r7, sp, r9")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70f	@ mul	r7, r8, pc")
+	TEST_UNSUPPORTED(".short 0xfb08,0xf70d	@ mul	r7, r8, sp")
+
+	TEST_RRR(   "mla	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mla	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_UNSUPPORTED(".short 0xfb08,0xaf09	@ mla	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xad09	@ mla	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0f,0xa709	@ mla	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb0d,0xa709	@ mla	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70f	@ mla	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xa70d	@ mla	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb08,0xd709	@ mla	r7, r8, r9, sp");
+
+	TEST_RRR(   "mls	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "mls	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+
+	TEST_RRR(   "smlabb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlabt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlabt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlatt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlatt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulbb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulbt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulbt	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smultt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smultt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlad	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlad	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smladx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smladx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smuad	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuad	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smuadx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smuadx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smlawb	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawb	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RRR(   "smlawt	r0, r",1, VAL1,", r",2, VAL2,", r",3,  VAL3,"")
+	TEST_RRR(   "smlawt	r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+	TEST_RR(    "smulwb	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwb	r7, r",8, VAL3,", r",9, VAL1,"")
+	TEST_RR(    "smulwt	r0, r",1, VAL1,", r",2, VAL2,"")
+	TEST_RR(    "smulwt	r7, r",8, VAL3,", r",9, VAL1,"")
+
+	TEST_RRR(   "smlsd	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsd	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RRR(   "smlsdx	r0, r",0,  HH1,", r",1, HH2,", r",2, VAL1,"")
+	TEST_RRR(   "smlsdx	r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"")
+	TEST_RR(    "smusd	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusd	r14, r",12,HH2,", r",10,HH1,"")
+	TEST_RR(    "smusdx	r0, r",0,  HH1,", r",1, HH2,"")
+	TEST_RR(    "smusdx	r14, r",12,HH2,", r",10,HH1,"")
+
+	TEST_RRR(   "smmla	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmla	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlar	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlar	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RR(    "smmul	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmul	r14, r",12,VAL2,", r",10,VAL1,"")
+	TEST_RR(    "smmulr	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "smmulr	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_RRR(   "smmls	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmls	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+	TEST_RRR(   "smmlsr	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL1,"")
+	TEST_RRR(   "smmlsr	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"")
+
+	TEST_RRR(   "usada8	r0, r",0,  VAL1,", r",1, VAL2,", r",2, VAL3,"")
+	TEST_RRR(   "usada8	r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"")
+	TEST_RR(    "usad8	r0, r",0,  VAL1,", r",1, VAL2,"")
+	TEST_RR(    "usad8	r14, r",12,VAL2,", r",10,VAL1,"")
+
+	TEST_UNSUPPORTED(".short 0xfb00,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb0f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0xf010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb70,0x0010") /* Unallocated space */
+	TEST_UNSUPPORTED(".short 0xfb7f,0xff1f") /* Unallocated space */
+
+	TEST_GROUP("Long multiply, long multiply accumulate, and divide")
+
+	TEST_RR(   "smull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "smull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+	TEST_UNSUPPORTED(".short 0xfb89,0xf80a	@ smull	pc, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0xd80a	@ smull	sp, r8, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7f0a	@ smull	r7, pc, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x7d0a	@ smull	r7, sp, r9, r10");
+	TEST_UNSUPPORTED(".short 0xfb8f,0x780a	@ smull	r7, r8, pc, r10");
+	TEST_UNSUPPORTED(".short 0xfb8d,0x780a	@ smull	r7, r8, sp, r10");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780f	@ smull	r7, r8, r9, pc");
+	TEST_UNSUPPORTED(".short 0xfb89,0x780d	@ smull	r7, r8, r9, sp");
+
+	TEST_RR(   "umull	r0, r1, r",2, VAL1,", r",3, VAL2,"")
+	TEST_RR(   "umull	r7, r8, r",9, VAL2,", r",10, VAL1,"")
+
+	TEST_RRRR( "smlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlalbb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlalbt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlalbt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltb	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltb	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "smlaltt	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "smlaltt	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_RRRR( "smlald	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlald	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlaldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlaldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "smlsld	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsld	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+	TEST_RRRR( "smlsldx	r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2)
+	TEST_RRRR( "smlsldx	r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1)
+
+	TEST_RRRR( "umlal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umlal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+	TEST_RRRR( "umaal	r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
+	TEST_RRRR( "umaal	r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+
+	TEST_GROUP("Coprocessor instructions")
+
+	TEST_UNSUPPORTED(".short 0xfc00,0x0000")
+	TEST_UNSUPPORTED(".short 0xffff,0xffff")
+
+	TEST_GROUP("Testing instructions in IT blocks")
+
+	TEST_ITBLOCK("sub.w	r0, r0")
+
+	verbose("\n");
+}
+
diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c
new file mode 100644
index 000000000000..e17cdd6d90d8
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test.c
@@ -0,0 +1,1748 @@
+/*
+ * arch/arm/kernel/kprobes-test.c
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * This file contains test code for ARM kprobes.
+ *
+ * The top level function run_all_tests() executes tests for all of the
+ * supported instruction sets: ARM, 16-bit Thumb, and 32-bit Thumb. These tests
+ * fall into two categories; run_api_tests() checks basic functionality of the
+ * kprobes API, and run_test_cases() is a comprehensive test for kprobes
+ * instruction decoding and simulation.
+ *
+ * run_test_cases() first checks the kprobes decoding table for self consistency
+ * (using table_test()) then executes a series of test cases for each of the CPU
+ * instruction forms. coverage_start() and coverage_end() are used to verify
+ * that these test cases cover all of the possible combinations of instructions
+ * described by the kprobes decoding tables.
+ *
+ * The individual test cases are in kprobes-test-arm.c and kprobes-test-thumb.c
+ * which use the macros defined in kprobes-test.h. The rest of this
+ * documentation will describe the operation of the framework used by these
+ * test cases.
+ */
+
+/*
+ * TESTING METHODOLOGY
+ * -------------------
+ *
+ * The methodology used to test an ARM instruction 'test_insn' is to use
+ * inline assembler like:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *
+ * When the test case is run a kprobe is placed of each nop. The
+ * post-handler of the test_before probe is used to modify the saved CPU
+ * register context to that which we require for the test case. The
+ * pre-handler of the of the test_after probe saves a copy of the CPU
+ * register context. In this way we can execute test_insn with a specific
+ * register context and see the results afterwards.
+ *
+ * To actually test the kprobes instruction emulation we perform the above
+ * step a second time but with an additional kprobe on the test_case
+ * instruction itself. If the emulation is accurate then the results seen
+ * by the test_after probe will be identical to the first run which didn't
+ * have a probe on test_case.
+ *
+ * Each test case is run several times with a variety of variations in the
+ * flags value of stored in CPSR, and for Thumb code, different ITState.
+ *
+ * For instructions which can modify PC, a second test_after probe is used
+ * like this:
+ *
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ *		b test_done
+ * test_after2: nop
+ * test_done:
+ *
+ * The test case is constructed such that test_insn branches to
+ * test_after2, or, if testing a conditional instruction, it may just
+ * continue to test_after. The probes inserted at both locations let us
+ * determine which happened. A similar approach is used for testing
+ * backwards branches...
+ *
+ *		b test_before
+ *		b test_done  @ helps to cope with off by 1 branches
+ * test_after2: nop
+ *		b test_done
+ * test_before: nop
+ * test_case:	test_insn
+ * test_after:	nop
+ * test_done:
+ *
+ * The macros used to generate the assembler instructions describe above
+ * are TEST_INSTRUCTION, TEST_BRANCH_F (branch forwards) and TEST_BRANCH_B
+ * (branch backwards). In these, the local variables numbered 1, 50, 2 and
+ * 99 represent: test_before, test_case, test_after2 and test_done.
+ *
+ * FRAMEWORK
+ * ---------
+ *
+ * Each test case is wrapped between the pair of macros TESTCASE_START and
+ * TESTCASE_END. As well as performing the inline assembler boilerplate,
+ * these call out to the kprobes_test_case_start() and
+ * kprobes_test_case_end() functions which drive the execution of the test
+ * case. The specific arguments to use for each test case are stored as
+ * inline data constructed using the various TEST_ARG_* macros. Putting
+ * this all together, a simple test case may look like:
+ *
+ *	TESTCASE_START("Testing mov r0, r7")
+ *	TEST_ARG_REG(7, 0x12345678) // Set r7=0x12345678
+ *	TEST_ARG_END("")
+ *	TEST_INSTRUCTION("mov r0, r7")
+ *	TESTCASE_END
+ *
+ * Note, in practice the single convenience macro TEST_R would be used for this
+ * instead.
+ *
+ * The above would expand to assembler looking something like:
+ *
+ *	@ TESTCASE_START
+ *	bl	__kprobes_test_case_start
+ *	@ start of inline data...
+ *	.ascii "mov r0, r7"	@ text title for test case
+ *	.byte	0
+ *	.align	2
+ *
+ *	@ TEST_ARG_REG
+ *	.byte	ARG_TYPE_REG
+ *	.byte	7
+ *	.short	0
+ *	.word	0x1234567
+ *
+ *	@ TEST_ARG_END
+ *	.byte	ARG_TYPE_END
+ *	.byte	TEST_ISA	@ flags, including ISA being tested
+ *	.short	50f-0f		@ offset of 'test_before'
+ *	.short	2f-0f		@ offset of 'test_after2' (if relevent)
+ *	.short	99f-0f		@ offset of 'test_done'
+ *	@ start of test case code...
+ *	0:
+ *	.code	TEST_ISA	@ switch to ISA being tested
+ *
+ *	@ TEST_INSTRUCTION
+ *	50:	nop		@ location for 'test_before' probe
+ *	1:	mov r0, r7	@ the test case instruction 'test_insn'
+ *		nop		@ location for 'test_after' probe
+ *
+ *	// TESTCASE_END
+ *	2:
+ *	99:	bl __kprobes_test_case_end_##TEST_ISA
+ *	.code	NONMAL_ISA
+ *
+ * When the above is execute the following happens...
+ *
+ * __kprobes_test_case_start() is an assembler wrapper which sets up space
+ * for a stack buffer and calls the C function kprobes_test_case_start().
+ * This C function will do some initial processing of the inline data and
+ * setup some global state. It then inserts the test_before and test_after
+ * kprobes and returns a value which causes the assembler wrapper to jump
+ * to the start of the test case code, (local label '0').
+ *
+ * When the test case code executes, the test_before probe will be hit and
+ * test_before_post_handler will call setup_test_context(). This fills the
+ * stack buffer and CPU registers with a test pattern and then processes
+ * the test case arguments. In our example there is one TEST_ARG_REG which
+ * indicates that R7 should be loaded with the value 0x12345678.
+ *
+ * When the test_before probe ends, the test case continues and executes
+ * the "mov r0, r7" instruction. It then hits the test_after probe and the
+ * pre-handler for this (test_after_pre_handler) will save a copy of the
+ * CPU register context. This should now have R0 holding the same value as
+ * R7.
+ *
+ * Finally we get to the call to __kprobes_test_case_end_{32,16}. This is
+ * an assembler wrapper which switches back to the ISA used by the test
+ * code and calls the C function kprobes_test_case_end().
+ *
+ * For each run through the test case, test_case_run_count is incremented
+ * by one. For even runs, kprobes_test_case_end() saves a copy of the
+ * register and stack buffer contents from the test case just run. It then
+ * inserts a kprobe on the test case instruction 'test_insn' and returns a
+ * value to cause the test case code to be re-run.
+ *
+ * For odd numbered runs, kprobes_test_case_end() compares the register and
+ * stack buffer contents to those that were saved on the previous even
+ * numbered run (the one without the kprobe on test_insn). These should be
+ * the same if the kprobe instruction simulation routine is correct.
+ *
+ * The pair of test case runs is repeated with different combinations of
+ * flag values in CPSR and, for Thumb, different ITState. This is
+ * controlled by test_context_cpsr().
+ *
+ * BUILDING TEST CASES
+ * -------------------
+ *
+ *
+ * As an aid to building test cases, the stack buffer is initialised with
+ * some special values:
+ *
+ *   [SP+13*4]	Contains SP+120. This can be used to test instructions
+ *		which load a value into SP.
+ *
+ *   [SP+15*4]	When testing branching instructions using TEST_BRANCH_{F,B},
+ *		this holds the target address of the branch, 'test_after2'.
+ *		This can be used to test instructions which load a PC value
+ *		from memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kprobes.h>
+
+#include "kprobes.h"
+#include "kprobes-test.h"
+
+
+#define BENCHMARKING	1
+
+
+/*
+ * Test basic API
+ */
+
+static bool test_regs_ok;
+static int test_func_instance;
+static int pre_handler_called;
+static int post_handler_called;
+static int jprobe_func_called;
+static int kretprobe_handler_called;
+
+#define FUNC_ARG1 0x12345678
+#define FUNC_ARG2 0xabcdef
+
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+long arm_func(long r0, long r1);
+
+static void __used __naked __arm_kprobes_test_func(void)
+{
+	__asm__ __volatile__ (
+		".arm					\n\t"
+		".type arm_func, %%function		\n\t"
+		"arm_func:				\n\t"
+		"adds	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+		".code "NORMAL_ISA	 /* Back to Thumb if necessary */
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+long thumb16_func(long r0, long r1);
+long thumb32even_func(long r0, long r1);
+long thumb32odd_func(long r0, long r1);
+
+static void __used __naked __thumb_kprobes_test_funcs(void)
+{
+	__asm__ __volatile__ (
+		".type thumb16_func, %%function		\n\t"
+		"thumb16_func:				\n\t"
+		"adds.n	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		".type thumb32even_func, %%function	\n\t"
+		"thumb32even_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		".align					\n\t"
+		"nop.n					\n\t"
+		".type thumb32odd_func, %%function	\n\t"
+		"thumb32odd_func:			\n\t"
+		"adds.w	r0, r0, r1			\n\t"
+		"bx	lr				\n\t"
+
+		: : : "r0", "r1", "cc"
+	);
+}
+
+#endif /* CONFIG_THUMB2_KERNEL */
+
+
+static int call_test_func(long (*func)(long, long), bool check_test_regs)
+{
+	long ret;
+
+	++test_func_instance;
+	test_regs_ok = false;
+
+	ret = (*func)(FUNC_ARG1, FUNC_ARG2);
+	if (ret != FUNC_ARG1 + FUNC_ARG2) {
+		pr_err("FAIL: call_test_func: func returned %lx\n", ret);
+		return false;
+	}
+
+	if (check_test_regs && !test_regs_ok) {
+		pr_err("FAIL: test regs not OK\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int __kprobes pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	pre_handler_called = test_func_instance;
+	if (regs->ARM_r0 == FUNC_ARG1 && regs->ARM_r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs,
+				unsigned long flags)
+{
+	post_handler_called = test_func_instance;
+	if (regs->ARM_r0 != FUNC_ARG1 + FUNC_ARG2 || regs->ARM_r1 != FUNC_ARG2)
+		test_regs_ok = false;
+}
+
+static struct kprobe the_kprobe = {
+	.addr		= 0,
+	.pre_handler	= pre_handler,
+	.post_handler	= post_handler
+};
+
+static int test_kprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kprobe.addr = (kprobe_opcode_t *)func;
+	ret = register_kprobe(&the_kprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kprobe(&the_kprobe);
+	the_kprobe.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (pre_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe pre_handler not called\n");
+		return -EINVAL;
+	}
+	if (post_handler_called != test_func_instance) {
+		pr_err("FAIL: kprobe post_handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (pre_handler_called == test_func_instance ||
+				post_handler_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void __kprobes jprobe_func(long r0, long r1)
+{
+	jprobe_func_called = test_func_instance;
+	if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2)
+		test_regs_ok = true;
+	jprobe_return();
+}
+
+static struct jprobe the_jprobe = {
+	.entry		= jprobe_func,
+};
+
+static int test_jprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_jprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_jprobe(&the_jprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_jprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_jprobe(&the_jprobe);
+	the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (jprobe_func_called != test_func_instance) {
+		pr_err("FAIL: jprobe handler function not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: probe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __kprobes
+kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	kretprobe_handler_called = test_func_instance;
+	if (regs_return_value(regs) == FUNC_ARG1 + FUNC_ARG2)
+		test_regs_ok = true;
+	return 0;
+}
+
+static struct kretprobe the_kretprobe = {
+	.handler	= kretprobe_handler,
+};
+
+static int test_kretprobe(long (*func)(long, long))
+{
+	int ret;
+
+	the_kretprobe.kp.addr = (kprobe_opcode_t *)func;
+	ret = register_kretprobe(&the_kretprobe);
+	if (ret < 0) {
+		pr_err("FAIL: register_kretprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = call_test_func(func, true);
+
+	unregister_kretprobe(&the_kretprobe);
+	the_kretprobe.kp.flags = 0; /* Clear disable flag to allow reuse */
+
+	if (!ret)
+		return -EINVAL;
+	if (kretprobe_handler_called != test_func_instance) {
+		pr_err("FAIL: kretprobe handler not called\n");
+		return -EINVAL;
+	}
+	if (!call_test_func(func, false))
+		return -EINVAL;
+	if (jprobe_func_called == test_func_instance) {
+		pr_err("FAIL: kretprobe called after unregistering\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int run_api_tests(long (*func)(long, long))
+{
+	int ret;
+
+	pr_info("    kprobe\n");
+	ret = test_kprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    jprobe\n");
+	ret = test_jprobe(func);
+	if (ret < 0)
+		return ret;
+
+	pr_info("    kretprobe\n");
+	ret = test_kretprobe(func);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+
+/*
+ * Benchmarking
+ */
+
+#if BENCHMARKING
+
+static void __naked benchmark_nop(void)
+{
+	__asm__ __volatile__ (
+		"nop		\n\t"
+		"bx	lr"
+	);
+}
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define wide ".w"
+#else
+#define wide
+#endif
+
+static void __naked benchmark_pushpop1(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r3-r11,lr}  \n\t"
+		"ldmia"wide"	sp!, {r3-r11,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop2(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0-r8,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0-r8,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop3(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r4,lr}  \n\t"
+		"ldmia"wide"	sp!, {r4,pc}"
+	);
+}
+
+static void __naked benchmark_pushpop4(void)
+{
+	__asm__ __volatile__ (
+		"stmdb"wide"	sp!, {r0,lr}  \n\t"
+		"ldmia"wide"	sp!, {r0,pc}"
+	);
+}
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+
+static void __naked benchmark_pushpop_thumb(void)
+{
+	__asm__ __volatile__ (
+		"push.n	{r0-r7,lr}  \n\t"
+		"pop.n	{r0-r7,pc}"
+	);
+}
+
+#endif
+
+static int __kprobes
+benchmark_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	return 0;
+}
+
+static int benchmark(void(*fn)(void))
+{
+	unsigned n, i, t, t0;
+
+	for (n = 1000; ; n *= 2) {
+		t0 = sched_clock();
+		for (i = n; i > 0; --i)
+			fn();
+		t = sched_clock() - t0;
+		if (t >= 250000000)
+			break; /* Stop once we took more than 0.25 seconds */
+	}
+	return t / n; /* Time for one iteration in nanoseconds */
+};
+
+static int kprobe_benchmark(void(*fn)(void), unsigned offset)
+{
+	struct kprobe k = {
+		.addr		= (kprobe_opcode_t *)((uintptr_t)fn + offset),
+		.pre_handler	= benchmark_pre_handler,
+	};
+
+	int ret = register_kprobe(&k);
+	if (ret < 0) {
+		pr_err("FAIL: register_kprobe failed with %d\n", ret);
+		return ret;
+	}
+
+	ret = benchmark(fn);
+
+	unregister_kprobe(&k);
+	return ret;
+};
+
+struct benchmarks {
+	void		(*fn)(void);
+	unsigned	offset;
+	const char	*title;
+};
+
+static int run_benchmarks(void)
+{
+	int ret;
+	struct benchmarks list[] = {
+		{&benchmark_nop, 0, "nop"},
+		/*
+		 * benchmark_pushpop{1,3} will have the optimised
+		 * instruction emulation, whilst benchmark_pushpop{2,4} will
+		 * be the equivalent unoptimised instructions.
+		 */
+		{&benchmark_pushpop1, 0, "stmdb	sp!, {r3-r11,lr}"},
+		{&benchmark_pushpop1, 4, "ldmia	sp!, {r3-r11,pc}"},
+		{&benchmark_pushpop2, 0, "stmdb	sp!, {r0-r8,lr}"},
+		{&benchmark_pushpop2, 4, "ldmia	sp!, {r0-r8,pc}"},
+		{&benchmark_pushpop3, 0, "stmdb	sp!, {r4,lr}"},
+		{&benchmark_pushpop3, 4, "ldmia	sp!, {r4,pc}"},
+		{&benchmark_pushpop4, 0, "stmdb	sp!, {r0,lr}"},
+		{&benchmark_pushpop4, 4, "ldmia	sp!, {r0,pc}"},
+#ifdef CONFIG_THUMB2_KERNEL
+		{&benchmark_pushpop_thumb, 0, "push.n	{r0-r7,lr}"},
+		{&benchmark_pushpop_thumb, 2, "pop.n	{r0-r7,pc}"},
+#endif
+		{0}
+	};
+
+	struct benchmarks *b;
+	for (b = list; b->fn; ++b) {
+		ret = kprobe_benchmark(b->fn, b->offset);
+		if (ret < 0)
+			return ret;
+		pr_info("    %dns for kprobe %s\n", ret, b->title);
+	}
+
+	pr_info("\n");
+	return 0;
+}
+
+#endif /* BENCHMARKING */
+
+
+/*
+ * Decoding table self-consistency tests
+ */
+
+static const int decode_struct_sizes[NUM_DECODE_TYPES] = {
+	[DECODE_TYPE_TABLE]	= sizeof(struct decode_table),
+	[DECODE_TYPE_CUSTOM]	= sizeof(struct decode_custom),
+	[DECODE_TYPE_SIMULATE]	= sizeof(struct decode_simulate),
+	[DECODE_TYPE_EMULATE]	= sizeof(struct decode_emulate),
+	[DECODE_TYPE_OR]	= sizeof(struct decode_or),
+	[DECODE_TYPE_REJECT]	= sizeof(struct decode_reject)
+};
+
+static int table_iter(const union decode_item *table,
+			int (*fn)(const struct decode_header *, void *),
+			void *args)
+{
+	const struct decode_header *h = (struct decode_header *)table;
+	int result;
+
+	for (;;) {
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (type == DECODE_TYPE_END)
+			return 0;
+
+		result = fn(h, args);
+		if (result)
+			return result;
+
+		h = (struct decode_header *)
+			((uintptr_t)h + decode_struct_sizes[type]);
+
+	}
+}
+
+static int table_test_fail(const struct decode_header *h, const char* message)
+{
+
+	pr_err("FAIL: kprobes test failure \"%s\" (mask %08x, value %08x)\n",
+					message, h->mask.bits, h->value.bits);
+	return -EINVAL;
+}
+
+struct table_test_args {
+	const union decode_item *root_table;
+	u32			parent_mask;
+	u32			parent_value;
+};
+
+static int table_test_fn(const struct decode_header *h, void *args)
+{
+	struct table_test_args *a = (struct table_test_args *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+	if (h->value.bits & ~h->mask.bits)
+		return table_test_fail(h, "Match value has bits not in mask");
+
+	if ((h->mask.bits & a->parent_mask) != a->parent_mask)
+		return table_test_fail(h, "Mask has bits not in parent mask");
+
+	if ((h->value.bits ^ a->parent_value) & a->parent_mask)
+		return table_test_fail(h, "Value is inconsistent with parent");
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		struct table_test_args args2 = *a;
+		args2.parent_mask = h->mask.bits;
+		args2.parent_value = h->value.bits;
+		return table_iter(d->table.table, table_test_fn, &args2);
+	}
+
+	return 0;
+}
+
+static int table_test(const union decode_item *table)
+{
+	struct table_test_args args = {
+		.root_table	= table,
+		.parent_mask	= 0,
+		.parent_value	= 0
+	};
+	return table_iter(args.root_table, table_test_fn, &args);
+}
+
+
+/*
+ * Decoding table test coverage analysis
+ *
+ * coverage_start() builds a coverage_table which contains a list of
+ * coverage_entry's to match each entry in the specified kprobes instruction
+ * decoding table.
+ *
+ * When test cases are run, coverage_add() is called to process each case.
+ * This looks up the corresponding entry in the coverage_table and sets it as
+ * being matched, as well as clearing the regs flag appropriate for the test.
+ *
+ * After all test cases have been run, coverage_end() is called to check that
+ * all entries in coverage_table have been matched and that all regs flags are
+ * cleared. I.e. that all possible combinations of instructions described by
+ * the kprobes decoding tables have had a test case executed for them.
+ */
+
+bool coverage_fail;
+
+#define MAX_COVERAGE_ENTRIES 256
+
+struct coverage_entry {
+	const struct decode_header	*header;
+	unsigned			regs;
+	unsigned			nesting;
+	char				matched;
+};
+
+struct coverage_table {
+	struct coverage_entry	*base;
+	unsigned		num_entries;
+	unsigned		nesting;
+};
+
+struct coverage_table coverage;
+
+#define COVERAGE_ANY_REG	(1<<0)
+#define COVERAGE_SP		(1<<1)
+#define COVERAGE_PC		(1<<2)
+#define COVERAGE_PCWB		(1<<3)
+
+static const char coverage_register_lookup[16] = {
+	[REG_TYPE_ANY]		= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_SAMEAS16]	= COVERAGE_ANY_REG,
+	[REG_TYPE_SP]		= COVERAGE_SP,
+	[REG_TYPE_PC]		= COVERAGE_PC,
+	[REG_TYPE_NOSP]		= COVERAGE_ANY_REG | COVERAGE_SP,
+	[REG_TYPE_NOSPPC]	= COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC,
+	[REG_TYPE_NOPC]		= COVERAGE_ANY_REG | COVERAGE_PC,
+	[REG_TYPE_NOPCWB]	= COVERAGE_ANY_REG | COVERAGE_PC | COVERAGE_PCWB,
+	[REG_TYPE_NOPCX]	= COVERAGE_ANY_REG,
+	[REG_TYPE_NOSPPCX]	= COVERAGE_ANY_REG | COVERAGE_SP,
+};
+
+unsigned coverage_start_registers(const struct decode_header *h)
+{
+	unsigned regs = 0;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		int r = (h->type_regs.bits >> (DECODE_TYPE_BITS + i)) & 0xf;
+		regs |= coverage_register_lookup[r] << i;
+	}
+	return regs;
+}
+
+static int coverage_start_fn(const struct decode_header *h, void *args)
+{
+	struct coverage_table *coverage = (struct coverage_table *)args;
+	enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+	struct coverage_entry *entry = coverage->base + coverage->num_entries;
+
+	if (coverage->num_entries == MAX_COVERAGE_ENTRIES - 1) {
+		pr_err("FAIL: Out of space for test coverage data");
+		return -ENOMEM;
+	}
+
+	++coverage->num_entries;
+
+	entry->header = h;
+	entry->regs = coverage_start_registers(h);
+	entry->nesting = coverage->nesting;
+	entry->matched = false;
+
+	if (type == DECODE_TYPE_TABLE) {
+		struct decode_table *d = (struct decode_table *)h;
+		int ret;
+		++coverage->nesting;
+		ret = table_iter(d->table.table, coverage_start_fn, coverage);
+		--coverage->nesting;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int coverage_start(const union decode_item *table)
+{
+	coverage.base = kmalloc(MAX_COVERAGE_ENTRIES *
+				sizeof(struct coverage_entry), GFP_KERNEL);
+	coverage.num_entries = 0;
+	coverage.nesting = 0;
+	return table_iter(table, coverage_start_fn, &coverage);
+}
+
+static void
+coverage_add_registers(struct coverage_entry *entry, kprobe_opcode_t insn)
+{
+	int regs = entry->header->type_regs.bits >> DECODE_TYPE_BITS;
+	int i;
+	for (i = 0; i < 20; i += 4) {
+		enum decode_reg_type reg_type = (regs >> i) & 0xf;
+		int reg = (insn >> i) & 0xf;
+		int flag;
+
+		if (!reg_type)
+			continue;
+
+		if (reg == 13)
+			flag = COVERAGE_SP;
+		else if (reg == 15)
+			flag = COVERAGE_PC;
+		else
+			flag = COVERAGE_ANY_REG;
+		entry->regs &= ~(flag << i);
+
+		switch (reg_type) {
+
+		case REG_TYPE_NONE:
+		case REG_TYPE_ANY:
+		case REG_TYPE_SAMEAS16:
+			break;
+
+		case REG_TYPE_SP:
+			if (reg != 13)
+				return;
+			break;
+
+		case REG_TYPE_PC:
+			if (reg != 15)
+				return;
+			break;
+
+		case REG_TYPE_NOSP:
+			if (reg == 13)
+				return;
+			break;
+
+		case REG_TYPE_NOSPPC:
+		case REG_TYPE_NOSPPCX:
+			if (reg == 13 || reg == 15)
+				return;
+			break;
+
+		case REG_TYPE_NOPCWB:
+			if (!is_writeback(insn))
+				break;
+			if (reg == 15) {
+				entry->regs &= ~(COVERAGE_PCWB << i);
+				return;
+			}
+			break;
+
+		case REG_TYPE_NOPC:
+		case REG_TYPE_NOPCX:
+			if (reg == 15)
+				return;
+			break;
+		}
+
+	}
+}
+
+static void coverage_add(kprobe_opcode_t insn)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+	bool matched = false;
+	unsigned nesting = 0;
+
+	for (; entry < end; ++entry) {
+		const struct decode_header *h = entry->header;
+		enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK;
+
+		if (entry->nesting > nesting)
+			continue; /* Skip sub-table we didn't match */
+
+		if (entry->nesting < nesting)
+			break; /* End of sub-table we were scanning */
+
+		if (!matched) {
+			if ((insn & h->mask.bits) != h->value.bits)
+				continue;
+			entry->matched = true;
+		}
+
+		switch (type) {
+
+		case DECODE_TYPE_TABLE:
+			++nesting;
+			break;
+
+		case DECODE_TYPE_CUSTOM:
+		case DECODE_TYPE_SIMULATE:
+		case DECODE_TYPE_EMULATE:
+			coverage_add_registers(entry, insn);
+			return;
+
+		case DECODE_TYPE_OR:
+			matched = true;
+			break;
+
+		case DECODE_TYPE_REJECT:
+		default:
+			return;
+		}
+
+	}
+}
+
+static void coverage_end(void)
+{
+	struct coverage_entry *entry = coverage.base;
+	struct coverage_entry *end = coverage.base + coverage.num_entries;
+
+	for (; entry < end; ++entry) {
+		u32 mask = entry->header->mask.bits;
+		u32 value = entry->header->value.bits;
+
+		if (entry->regs) {
+			pr_err("FAIL: Register test coverage missing for %08x %08x (%05x)\n",
+				mask, value, entry->regs);
+			coverage_fail = true;
+		}
+		if (!entry->matched) {
+			pr_err("FAIL: Test coverage entry missing for %08x %08x\n",
+				mask, value);
+			coverage_fail = true;
+		}
+	}
+
+	kfree(coverage.base);
+}
+
+
+/*
+ * Framework for instruction set test cases
+ */
+
+void __naked __kprobes_test_case_start(void)
+{
+	__asm__ __volatile__ (
+		"stmdb	sp!, {r4-r11}				\n\t"
+		"sub	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"bic	r0, lr, #1  @ r0 = inline title string	\n\t"
+		"mov	r1, sp					\n\t"
+		"bl	kprobes_test_case_start			\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"movne	pc, r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"mov	pc, r0					\n\t"
+	);
+}
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+void __naked __kprobes_test_case_end_16(void)
+{
+	__asm__ __volatile__ (
+		"mov	r4, lr					\n\t"
+		"bl	kprobes_test_case_end			\n\t"
+		"cmp	r0, #0					\n\t"
+		"bxne	r0					\n\t"
+		"mov	r0, r4					\n\t"
+		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
+		"ldmia	sp!, {r4-r11}				\n\t"
+		"bx	r0					\n\t"
+	);
+}
+
+void __naked __kprobes_test_case_end_32(void)
+{
+	__asm__ __volatile__ (
+		".arm						\n\t"
+		"orr	lr, lr, #1  @ will return to Thumb code	\n\t"
+		"ldr	pc, 1f					\n\t"
+		"1:						\n\t"
+		".word	__kprobes_test_case_end_16		\n\t"
+	);
+}
+
+#endif
+
+
+int kprobe_test_flags;
+int kprobe_test_cc_position;
+
+static int test_try_count;
+static int test_pass_count;
+static int test_fail_count;
+
+static struct pt_regs initial_regs;
+static struct pt_regs expected_regs;
+static struct pt_regs result_regs;
+
+static u32 expected_memory[TEST_MEMORY_SIZE/sizeof(u32)];
+
+static const char *current_title;
+static struct test_arg *current_args;
+static u32 *current_stack;
+static uintptr_t current_branch_target;
+
+static uintptr_t current_code_start;
+static kprobe_opcode_t current_instruction;
+
+
+#define TEST_CASE_PASSED -1
+#define TEST_CASE_FAILED -2
+
+static int test_case_run_count;
+static bool test_case_is_thumb;
+static int test_instance;
+
+/*
+ * We ignore the state of the imprecise abort disable flag (CPSR.A) because this
+ * can change randomly as the kernel doesn't take care to preserve or initialise
+ * this across context switches. Also, with Security Extentions, the flag may
+ * not be under control of the kernel; for this reason we ignore the state of
+ * the FIQ disable flag CPSR.F as well.
+ */
+#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT)
+
+static unsigned long test_check_cc(int cc, unsigned long cpsr)
+{
+	unsigned long temp;
+
+	switch (cc) {
+	case 0x0: /* eq */
+		return cpsr & PSR_Z_BIT;
+
+	case 0x1: /* ne */
+		return (~cpsr) & PSR_Z_BIT;
+
+	case 0x2: /* cs */
+		return cpsr & PSR_C_BIT;
+
+	case 0x3: /* cc */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0x4: /* mi */
+		return cpsr & PSR_N_BIT;
+
+	case 0x5: /* pl */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0x6: /* vs */
+		return cpsr & PSR_V_BIT;
+
+	case 0x7: /* vc */
+		return (~cpsr) & PSR_V_BIT;
+
+	case 0x8: /* hi */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return cpsr & PSR_C_BIT;
+
+	case 0x9: /* ls */
+		cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+		return (~cpsr) & PSR_C_BIT;
+
+	case 0xa: /* ge */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return (~cpsr) & PSR_N_BIT;
+
+	case 0xb: /* lt */
+		cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		return cpsr & PSR_N_BIT;
+
+	case 0xc: /* gt */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return (~temp) & PSR_N_BIT;
+
+	case 0xd: /* le */
+		temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+		temp |= (cpsr << 1);	   /* PSR_N_BIT |= PSR_Z_BIT */
+		return temp & PSR_N_BIT;
+
+	case 0xe: /* al */
+	case 0xf: /* unconditional */
+		return true;
+	}
+	BUG();
+	return false;
+}
+
+static int is_last_scenario;
+static int probe_should_run; /* 0 = no, 1 = yes, -1 = unknown */
+static int memory_needs_checking;
+
+static unsigned long test_context_cpsr(int scenario)
+{
+	unsigned long cpsr;
+
+	probe_should_run = 1;
+
+	/* Default case is that we cycle through 16 combinations of flags */
+	cpsr  = (scenario & 0xf) << 28; /* N,Z,C,V flags */
+	cpsr |= (scenario & 0xf) << 16; /* GE flags */
+	cpsr |= (scenario & 0x1) << 27; /* Toggle Q flag */
+
+	if (!test_case_is_thumb) {
+		/* Testing ARM code */
+		probe_should_run = test_check_cc(current_instruction >> 28, cpsr) != 0;
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_NO_ITBLOCK) {
+		/* Testing Thumb code without setting ITSTATE */
+		if (kprobe_test_cc_position) {
+			int cc = (current_instruction >> kprobe_test_cc_position) & 0xf;
+			probe_should_run = test_check_cc(cc, cpsr) != 0;
+		}
+
+		if (scenario == 15)
+			is_last_scenario = true;
+
+	} else if (kprobe_test_flags & TEST_FLAG_FULL_ITBLOCK) {
+		/* Testing Thumb code with all combinations of ITSTATE */
+		unsigned x = (scenario >> 4);
+		unsigned cond_base = x % 7; /* ITSTATE<7:5> */
+		unsigned mask = x / 7 + 2;  /* ITSTATE<4:0>, bits reversed */
+
+		if (mask > 0x1f) {
+			/* Finish by testing state from instruction 'itt al' */
+			cond_base = 7;
+			mask = 0x4;
+			if ((scenario & 0xf) == 0xf)
+				is_last_scenario = true;
+		}
+
+		cpsr |= cond_base << 13;	/* ITSTATE<7:5> */
+		cpsr |= (mask & 0x1) << 12;	/* ITSTATE<4> */
+		cpsr |= (mask & 0x2) << 10;	/* ITSTATE<3> */
+		cpsr |= (mask & 0x4) << 8;	/* ITSTATE<2> */
+		cpsr |= (mask & 0x8) << 23;	/* ITSTATE<1> */
+		cpsr |= (mask & 0x10) << 21;	/* ITSTATE<0> */
+
+		probe_should_run = test_check_cc((cpsr >> 12) & 0xf, cpsr) != 0;
+
+	} else {
+		/* Testing Thumb code with several combinations of ITSTATE */
+		switch (scenario) {
+		case 16: /* Clear NZCV flags and 'it eq' state (false as Z=0) */
+			cpsr = 0x00000800;
+			probe_should_run = 0;
+			break;
+		case 17: /* Set NZCV flags and 'it vc' state (false as V=1) */
+			cpsr = 0xf0007800;
+			probe_should_run = 0;
+			break;
+		case 18: /* Clear NZCV flags and 'it ls' state (true as C=0) */
+			cpsr = 0x00009800;
+			break;
+		case 19: /* Set NZCV flags and 'it cs' state (true as C=1) */
+			cpsr = 0xf0002800;
+			is_last_scenario = true;
+			break;
+		}
+	}
+
+	return cpsr;
+}
+
+static void setup_test_context(struct pt_regs *regs)
+{
+	int scenario = test_case_run_count>>1;
+	unsigned long val;
+	struct test_arg *args;
+	int i;
+
+	is_last_scenario = false;
+	memory_needs_checking = false;
+
+	/* Initialise test memory on stack */
+	val = (scenario & 1) ? VALM : ~VALM;
+	for (i = 0; i < TEST_MEMORY_SIZE / sizeof(current_stack[0]); ++i)
+		current_stack[i] = val + (i << 8);
+	/* Put target of branch on stack for tests which load PC from memory */
+	if (current_branch_target)
+		current_stack[15] = current_branch_target;
+	/* Put a value for SP on stack for tests which load SP from memory */
+	current_stack[13] = (u32)current_stack + 120;
+
+	/* Initialise register values to their default state */
+	val = (scenario & 2) ? VALR : ~VALR;
+	for (i = 0; i < 13; ++i)
+		regs->uregs[i] = val ^ (i << 8);
+	regs->ARM_lr = val ^ (14 << 8);
+	regs->ARM_cpsr &= ~(APSR_MASK | PSR_IT_MASK);
+	regs->ARM_cpsr |= test_context_cpsr(scenario);
+
+	/* Perform testcase specific register setup  */
+	args = current_args;
+	for (; args[0].type != ARG_TYPE_END; ++args)
+		switch (args[0].type) {
+		case ARG_TYPE_REG: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] = arg->val;
+			break;
+		}
+		case ARG_TYPE_PTR: {
+			struct test_arg_regptr *arg =
+				(struct test_arg_regptr *)args;
+			regs->uregs[arg->reg] =
+				(unsigned long)current_stack + arg->val;
+			memory_needs_checking = true;
+			break;
+		}
+		case ARG_TYPE_MEM: {
+			struct test_arg_mem *arg = (struct test_arg_mem *)args;
+			current_stack[arg->index] = arg->val;
+			break;
+		}
+		default:
+			break;
+		}
+}
+
+struct test_probe {
+	struct kprobe	kprobe;
+	bool		registered;
+	int		hit;
+};
+
+static void unregister_test_probe(struct test_probe *probe)
+{
+	if (probe->registered) {
+		unregister_kprobe(&probe->kprobe);
+		probe->kprobe.flags = 0; /* Clear disable flag to allow reuse */
+	}
+	probe->registered = false;
+}
+
+static int register_test_probe(struct test_probe *probe)
+{
+	int ret;
+
+	if (probe->registered)
+		BUG();
+
+	ret = register_kprobe(&probe->kprobe);
+	if (ret >= 0) {
+		probe->registered = true;
+		probe->hit = -1;
+	}
+	return ret;
+}
+
+static int __kprobes
+test_before_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static void __kprobes
+test_before_post_handler(struct kprobe *p, struct pt_regs *regs,
+							unsigned long flags)
+{
+	setup_test_context(regs);
+	initial_regs = *regs;
+	initial_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+}
+
+static int __kprobes
+test_case_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static int __kprobes
+test_after_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	if (container_of(p, struct test_probe, kprobe)->hit == test_instance)
+		return 0; /* Already run for this test instance */
+
+	result_regs = *regs;
+	result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS;
+
+	/* Undo any changes done to SP by the test case */
+	regs->ARM_sp = (unsigned long)current_stack;
+
+	container_of(p, struct test_probe, kprobe)->hit = test_instance;
+	return 0;
+}
+
+static struct test_probe test_before_probe = {
+	.kprobe.pre_handler	= test_before_pre_handler,
+	.kprobe.post_handler	= test_before_post_handler,
+};
+
+static struct test_probe test_case_probe = {
+	.kprobe.pre_handler	= test_case_pre_handler,
+};
+
+static struct test_probe test_after_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static struct test_probe test_after2_probe = {
+	.kprobe.pre_handler	= test_after_pre_handler,
+};
+
+static void test_case_cleanup(void)
+{
+	unregister_test_probe(&test_before_probe);
+	unregister_test_probe(&test_case_probe);
+	unregister_test_probe(&test_after_probe);
+	unregister_test_probe(&test_after2_probe);
+}
+
+static void print_registers(struct pt_regs *regs)
+{
+	pr_err("r0  %08lx | r1  %08lx | r2  %08lx | r3  %08lx\n",
+		regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
+	pr_err("r4  %08lx | r5  %08lx | r6  %08lx | r7  %08lx\n",
+		regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7);
+	pr_err("r8  %08lx | r9  %08lx | r10 %08lx | r11 %08lx\n",
+		regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp);
+	pr_err("r12 %08lx | sp  %08lx | lr  %08lx | pc  %08lx\n",
+		regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc);
+	pr_err("cpsr %08lx\n", regs->ARM_cpsr);
+}
+
+static void print_memory(u32 *mem, size_t size)
+{
+	int i;
+	for (i = 0; i < size / sizeof(u32); i += 4)
+		pr_err("%08x %08x %08x %08x\n", mem[i], mem[i+1],
+						mem[i+2], mem[i+3]);
+}
+
+static size_t expected_memory_size(u32 *sp)
+{
+	size_t size = sizeof(expected_memory);
+	int offset = (uintptr_t)sp - (uintptr_t)current_stack;
+	if (offset > 0)
+		size -= offset;
+	return size;
+}
+
+static void test_case_failed(const char *message)
+{
+	test_case_cleanup();
+
+	pr_err("FAIL: %s\n", message);
+	pr_err("FAIL: Test %s\n", current_title);
+	pr_err("FAIL: Scenario %d\n", test_case_run_count >> 1);
+}
+
+static unsigned long next_instruction(unsigned long pc)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	if ((pc & 1) && !is_wide_instruction(*(u16 *)(pc - 1)))
+		return pc + 2;
+	else
+#endif
+	return pc + 4;
+}
+
+static uintptr_t __used kprobes_test_case_start(const char *title, void *stack)
+{
+	struct test_arg *args;
+	struct test_arg_end *end_arg;
+	unsigned long test_code;
+
+	args = (struct test_arg *)PTR_ALIGN(title + strlen(title) + 1, 4);
+
+	current_title = title;
+	current_args = args;
+	current_stack = stack;
+
+	++test_try_count;
+
+	while (args->type != ARG_TYPE_END)
+		++args;
+	end_arg = (struct test_arg_end *)args;
+
+	test_code = (unsigned long)(args + 1); /* Code starts after args */
+
+	test_case_is_thumb = end_arg->flags & ARG_FLAG_THUMB;
+	if (test_case_is_thumb)
+		test_code |= 1;
+
+	current_code_start = test_code;
+
+	current_branch_target = 0;
+	if (end_arg->branch_offset != end_arg->end_offset)
+		current_branch_target = test_code + end_arg->branch_offset;
+
+	test_code += end_arg->code_offset;
+	test_before_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	test_code = next_instruction(test_code);
+	test_case_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (test_case_is_thumb) {
+		u16 *p = (u16 *)(test_code & ~1);
+		current_instruction = p[0];
+		if (is_wide_instruction(current_instruction)) {
+			current_instruction <<= 16;
+			current_instruction |= p[1];
+		}
+	} else {
+		current_instruction = *(u32 *)test_code;
+	}
+
+	if (current_title[0] == '.')
+		verbose("%s\n", current_title);
+	else
+		verbose("%s\t@ %0*x\n", current_title,
+					test_case_is_thumb ? 4 : 8,
+					current_instruction);
+
+	test_code = next_instruction(test_code);
+	test_after_probe.kprobe.addr = (kprobe_opcode_t *)test_code;
+
+	if (kprobe_test_flags & TEST_FLAG_NARROW_INSTR) {
+		if (!test_case_is_thumb ||
+			is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 16-bit instruction");
+				goto fail;
+		}
+	} else {
+		if (test_case_is_thumb &&
+			!is_wide_instruction(current_instruction)) {
+				test_case_failed("expected 32-bit instruction");
+				goto fail;
+		}
+	}
+
+	coverage_add(current_instruction);
+
+	if (end_arg->flags & ARG_FLAG_UNSUPPORTED) {
+		if (register_test_probe(&test_case_probe) < 0)
+			goto pass;
+		test_case_failed("registered probe for unsupported instruction");
+		goto fail;
+	}
+
+	if (end_arg->flags & ARG_FLAG_SUPPORTED) {
+		if (register_test_probe(&test_case_probe) >= 0)
+			goto pass;
+		test_case_failed("couldn't register probe for supported instruction");
+		goto fail;
+	}
+
+	if (register_test_probe(&test_before_probe) < 0) {
+		test_case_failed("register test_before_probe failed");
+		goto fail;
+	}
+	if (register_test_probe(&test_after_probe) < 0) {
+		test_case_failed("register test_after_probe failed");
+		goto fail;
+	}
+	if (current_branch_target) {
+		test_after2_probe.kprobe.addr =
+				(kprobe_opcode_t *)current_branch_target;
+		if (register_test_probe(&test_after2_probe) < 0) {
+			test_case_failed("register test_after2_probe failed");
+			goto fail;
+		}
+	}
+
+	/* Start first run of test case */
+	test_case_run_count = 0;
+	++test_instance;
+	return current_code_start;
+pass:
+	test_case_run_count = TEST_CASE_PASSED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+fail:
+	test_case_run_count = TEST_CASE_FAILED;
+	return (uintptr_t)test_after_probe.kprobe.addr;
+}
+
+static bool check_test_results(void)
+{
+	size_t mem_size = 0;
+	u32 *mem = 0;
+
+	if (memcmp(&expected_regs, &result_regs, sizeof(expected_regs))) {
+		test_case_failed("registers differ");
+		goto fail;
+	}
+
+	if (memory_needs_checking) {
+		mem = (u32 *)result_regs.ARM_sp;
+		mem_size = expected_memory_size(mem);
+		if (memcmp(expected_memory, mem, mem_size)) {
+			test_case_failed("test memory differs");
+			goto fail;
+		}
+	}
+
+	return true;
+
+fail:
+	pr_err("initial_regs:\n");
+	print_registers(&initial_regs);
+	pr_err("expected_regs:\n");
+	print_registers(&expected_regs);
+	pr_err("result_regs:\n");
+	print_registers(&result_regs);
+
+	if (mem) {
+		pr_err("current_stack=%p\n", current_stack);
+		pr_err("expected_memory:\n");
+		print_memory(expected_memory, mem_size);
+		pr_err("result_memory:\n");
+		print_memory(mem, mem_size);
+	}
+
+	return false;
+}
+
+static uintptr_t __used kprobes_test_case_end(void)
+{
+	if (test_case_run_count < 0) {
+		if (test_case_run_count == TEST_CASE_PASSED)
+			/* kprobes_test_case_start did all the needed testing */
+			goto pass;
+		else
+			/* kprobes_test_case_start failed */
+			goto fail;
+	}
+
+	if (test_before_probe.hit != test_instance) {
+		test_case_failed("test_before_handler not run");
+		goto fail;
+	}
+
+	if (test_after_probe.hit != test_instance &&
+				test_after2_probe.hit != test_instance) {
+		test_case_failed("test_after_handler not run");
+		goto fail;
+	}
+
+	/*
+	 * Even numbered test runs ran without a probe on the test case so
+	 * we can gather reference results. The subsequent odd numbered run
+	 * will have the probe inserted.
+	*/
+	if ((test_case_run_count & 1) == 0) {
+		/* Save results from run without probe */
+		u32 *mem = (u32 *)result_regs.ARM_sp;
+		expected_regs = result_regs;
+		memcpy(expected_memory, mem, expected_memory_size(mem));
+
+		/* Insert probe onto test case instruction */
+		if (register_test_probe(&test_case_probe) < 0) {
+			test_case_failed("register test_case_probe failed");
+			goto fail;
+		}
+	} else {
+		/* Check probe ran as expected */
+		if (probe_should_run == 1) {
+			if (test_case_probe.hit != test_instance) {
+				test_case_failed("test_case_handler not run");
+				goto fail;
+			}
+		} else if (probe_should_run == 0) {
+			if (test_case_probe.hit == test_instance) {
+				test_case_failed("test_case_handler ran");
+				goto fail;
+			}
+		}
+
+		/* Remove probe for any subsequent reference run */
+		unregister_test_probe(&test_case_probe);
+
+		if (!check_test_results())
+			goto fail;
+
+		if (is_last_scenario)
+			goto pass;
+	}
+
+	/* Do next test run */
+	++test_case_run_count;
+	++test_instance;
+	return current_code_start;
+fail:
+	++test_fail_count;
+	goto end;
+pass:
+	++test_pass_count;
+end:
+	test_case_cleanup();
+	return 0;
+}
+
+
+/*
+ * Top level test functions
+ */
+
+static int run_test_cases(void (*tests)(void), const union decode_item *table)
+{
+	int ret;
+
+	pr_info("    Check decoding tables\n");
+	ret = table_test(table);
+	if (ret)
+		return ret;
+
+	pr_info("    Run test cases\n");
+	ret = coverage_start(table);
+	if (ret)
+		return ret;
+
+	tests();
+
+	coverage_end();
+	return 0;
+}
+
+
+static int __init run_all_tests(void)
+{
+	int ret = 0;
+
+	pr_info("Begining kprobe tests...\n");
+
+#ifndef CONFIG_THUMB2_KERNEL
+
+	pr_info("Probe ARM code\n");
+	ret = run_api_tests(arm_func);
+	if (ret)
+		goto out;
+
+	pr_info("ARM instruction simulation\n");
+	ret = run_test_cases(kprobe_arm_test_cases, kprobe_decode_arm_table);
+	if (ret)
+		goto out;
+
+#else /* CONFIG_THUMB2_KERNEL */
+
+	pr_info("Probe 16-bit Thumb code\n");
+	ret = run_api_tests(thumb16_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, even halfword\n");
+	ret = run_api_tests(thumb32even_func);
+	if (ret)
+		goto out;
+
+	pr_info("Probe 32-bit Thumb code, odd halfword\n");
+	ret = run_api_tests(thumb32odd_func);
+	if (ret)
+		goto out;
+
+	pr_info("16-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb16_test_cases,
+				kprobe_decode_thumb16_table);
+	if (ret)
+		goto out;
+
+	pr_info("32-bit Thumb instruction simulation\n");
+	ret = run_test_cases(kprobe_thumb32_test_cases,
+				kprobe_decode_thumb32_table);
+	if (ret)
+		goto out;
+#endif
+
+	pr_info("Total instruction simulation tests=%d, pass=%d fail=%d\n",
+		test_try_count, test_pass_count, test_fail_count);
+	if (test_fail_count) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+#if BENCHMARKING
+	pr_info("Benchmarks\n");
+	ret = run_benchmarks();
+	if (ret)
+		goto out;
+#endif
+
+#if __LINUX_ARM_ARCH__ >= 7
+	/* We are able to run all test cases so coverage should be complete */
+	if (coverage_fail) {
+		pr_err("FAIL: Test coverage checks failed\n");
+		ret = -EINVAL;
+		goto out;
+	}
+#endif
+
+out:
+	if (ret == 0)
+		pr_info("Finished kprobe tests OK\n");
+	else
+		pr_err("kprobe tests failed\n");
+
+	return ret;
+}
+
+
+/*
+ * Module setup
+ */
+
+#ifdef MODULE
+
+static void __exit kprobe_test_exit(void)
+{
+}
+
+module_init(run_all_tests)
+module_exit(kprobe_test_exit)
+MODULE_LICENSE("GPL");
+
+#else /* !MODULE */
+
+late_initcall(run_all_tests);
+
+#endif
diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/kernel/kprobes-test.h
new file mode 100644
index 000000000000..0dc5d77b9356
--- /dev/null
+++ b/arch/arm/kernel/kprobes-test.h
@@ -0,0 +1,392 @@
+/*
+ * arch/arm/kernel/kprobes-test.h
+ *
+ * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define VERBOSE 0 /* Set to '1' for more logging of test cases */
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define NORMAL_ISA "16"
+#else
+#define NORMAL_ISA "32"
+#endif
+
+
+/* Flags used in kprobe_test_flags */
+#define TEST_FLAG_NO_ITBLOCK	(1<<0)
+#define TEST_FLAG_FULL_ITBLOCK	(1<<1)
+#define TEST_FLAG_NARROW_INSTR	(1<<2)
+
+extern int kprobe_test_flags;
+extern int kprobe_test_cc_position;
+
+
+#define TEST_MEMORY_SIZE 256
+
+
+/*
+ * Test case structures.
+ *
+ * The arguments given to test cases can be one of three types.
+ *
+ *   ARG_TYPE_REG
+ *	Load a register with the given value.
+ *
+ *   ARG_TYPE_PTR
+ *	Load a register with a pointer into the stack buffer (SP + given value).
+ *
+ *   ARG_TYPE_MEM
+ *	Store the given value into the stack buffer at [SP+index].
+ *
+ */
+
+#define	ARG_TYPE_END	0
+#define	ARG_TYPE_REG	1
+#define	ARG_TYPE_PTR	2
+#define	ARG_TYPE_MEM	3
+
+#define ARG_FLAG_UNSUPPORTED	0x01
+#define ARG_FLAG_SUPPORTED	0x02
+#define ARG_FLAG_THUMB		0x10	/* Must be 16 so TEST_ISA can be used */
+#define ARG_FLAG_ARM		0x20	/* Must be 32 so TEST_ISA can be used */
+
+struct test_arg {
+	u8	type;		/* ARG_TYPE_x */
+	u8	_padding[7];
+};
+
+struct test_arg_regptr {
+	u8	type;		/* ARG_TYPE_REG or ARG_TYPE_PTR */
+	u8	reg;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_mem {
+	u8	type;		/* ARG_TYPE_MEM */
+	u8	index;
+	u8	_padding[2];
+	u32	val;
+};
+
+struct test_arg_end {
+	u8	type;		/* ARG_TYPE_END */
+	u8	flags;		/* ARG_FLAG_x */
+	u16	code_offset;
+	u16	branch_offset;
+	u16	end_offset;
+};
+
+
+/*
+ * Building blocks for test cases.
+ *
+ * Each test case is wrapped between TESTCASE_START and TESTCASE_END.
+ *
+ * To specify arguments for a test case the TEST_ARG_{REG,PTR,MEM} macros are
+ * used followed by a terminating TEST_ARG_END.
+ *
+ * After this, the instruction to be tested is defined with TEST_INSTRUCTION.
+ * Or for branches, TEST_BRANCH_B and TEST_BRANCH_F (branch forwards/backwards).
+ *
+ * Some specific test cases may make use of other custom constructs.
+ */
+
+#if VERBOSE
+#define verbose(fmt, ...) pr_info(fmt, ##__VA_ARGS__)
+#else
+#define verbose(fmt, ...)
+#endif
+
+#define TEST_GROUP(title)					\
+	verbose("\n");						\
+	verbose(title"\n");					\
+	verbose("---------------------------------------------------------\n");
+
+#define TESTCASE_START(title)					\
+	__asm__ __volatile__ (					\
+	"bl	__kprobes_test_case_start		\n\t"	\
+	/* don't use .asciz here as 'title' may be */		\
+	/* multiple strings to be concatenated.  */		\
+	".ascii "#title"				\n\t"	\
+	".byte	0					\n\t"	\
+	".align	2					\n\t"
+
+#define	TEST_ARG_REG(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_REG)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_PTR(reg, val)					\
+	".byte	"__stringify(ARG_TYPE_PTR)"		\n\t"	\
+	".byte	"#reg"					\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_MEM(index, val)				\
+	".byte	"__stringify(ARG_TYPE_MEM)"		\n\t"	\
+	".byte	"#index"				\n\t"	\
+	".short	0					\n\t"	\
+	".word	"#val"					\n\t"
+
+#define	TEST_ARG_END(flags)					\
+	".byte	"__stringify(ARG_TYPE_END)"		\n\t"	\
+	".byte	"TEST_ISA flags"			\n\t"	\
+	".short	50f-0f					\n\t"	\
+	".short	2f-0f					\n\t"	\
+	".short	99f-0f					\n\t"	\
+	".code "TEST_ISA"				\n\t"	\
+	"0:						\n\t"
+
+#define TEST_INSTRUCTION(instruction)				\
+	"50:	nop					\n\t"	\
+	"1:	"instruction"				\n\t"	\
+	"	nop					\n\t"
+
+#define TEST_BRANCH_F(instruction, xtra_dist)			\
+	TEST_INSTRUCTION(instruction)				\
+	".if "#xtra_dist"				\n\t"	\
+	"	b	99f				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"
+
+#define TEST_BRANCH_B(instruction, xtra_dist)			\
+	"	b	50f				\n\t"	\
+	"	b	99f				\n\t"	\
+	"2:	nop					\n\t"	\
+	"	b	99f				\n\t"	\
+	".if "#xtra_dist"				\n\t"	\
+	".space "#xtra_dist"				\n\t"	\
+	".endif						\n\t"	\
+	TEST_INSTRUCTION(instruction)
+
+#define TESTCASE_END						\
+	"2:						\n\t"	\
+	"99:						\n\t"	\
+	"	bl __kprobes_test_case_end_"TEST_ISA"	\n\t"	\
+	".code "NORMAL_ISA"				\n\t"	\
+	: :							\
+	: "r0", "r1", "r2", "r3", "ip", "lr", "memory", "cc"	\
+	);
+
+
+/*
+ * Macros to define test cases.
+ *
+ * Those of the form TEST_{R,P,M}* can be used to define test cases
+ * which take combinations of the three basic types of arguments. E.g.
+ *
+ *   TEST_R	One register argument
+ *   TEST_RR	Two register arguments
+ *   TEST_RPR	A register, a pointer, then a register argument
+ *
+ * For testing instructions which may branch, there are macros TEST_BF_*
+ * and TEST_BB_* for branching forwards and backwards.
+ *
+ * TEST_SUPPORTED and TEST_UNSUPPORTED don't cause the code to be executed,
+ * the just verify that a kprobe is or is not allowed on the given instruction.
+ */
+
+#define TEST(code)				\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	TESTCASE_END
+
+#define TEST_UNSUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_UNSUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_SUPPORTED(code)					\
+	TESTCASE_START(code)					\
+	TEST_ARG_END("|"__stringify(ARG_FLAG_SUPPORTED))	\
+	TEST_INSTRUCTION(code)					\
+	TESTCASE_END
+
+#define TEST_R(code1, reg, val, code2)			\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 #reg code2)		\
+	TESTCASE_END
+
+#define TEST_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4, reg4, val4)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_REG(reg4, val4)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4)	\
+	TESTCASE_END
+
+#define TEST_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code1 #reg1 code2)	\
+	TESTCASE_END
+
+#define TEST_PR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_PTR(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_RP(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_PTR(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3)		\
+	TESTCASE_END
+
+#define TEST_PRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_PTR(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RPR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_PTR(reg2, val2)						\
+	TEST_ARG_REG(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_RRP(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_PTR(reg3, val3)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4)		\
+	TESTCASE_END
+
+#define TEST_BF_P(code1, reg1, val1, code2)	\
+	TESTCASE_START(code1 #reg1 code2)	\
+	TEST_ARG_PTR(reg1, val1)		\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code1 #reg1 code2, 0)	\
+	TESTCASE_END
+
+#define TEST_BF_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_F(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BB_X(code, xtra_dist)		\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_BRANCH_B(code, xtra_dist)		\
+	TESTCASE_END
+
+#define TEST_BF_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_F(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BB_RX(code1, reg, val, code2, xtra_dist)	\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_BRANCH_B(code1 #reg code2, xtra_dist)	\
+	TESTCASE_END
+
+#define TEST_BF(code)	TEST_BF_X(code, 0)
+#define TEST_BB(code)	TEST_BB_X(code, 0)
+
+#define TEST_BF_R(code1, reg, val, code2) TEST_BF_RX(code1, reg, val, code2, 0)
+#define TEST_BB_R(code1, reg, val, code2) TEST_BB_RX(code1, reg, val, code2, 0)
+
+#define TEST_BF_RR(code1, reg1, val1, code2, reg2, val2, code3)	\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)		\
+	TEST_ARG_REG(reg1, val1)				\
+	TEST_ARG_REG(reg2, val2)				\
+	TEST_ARG_END("")					\
+	TEST_BRANCH_F(code1 #reg1 code2 #reg2 code3, 0)		\
+	TESTCASE_END
+
+#define TEST_X(code, codex)			\
+	TESTCASE_START(code)			\
+	TEST_ARG_END("")			\
+	TEST_INSTRUCTION(code)			\
+	"	b	99f		\n\t"	\
+	"	"codex"			\n\t"	\
+	TESTCASE_END
+
+#define TEST_RX(code1, reg, val, code2, codex)		\
+	TESTCASE_START(code1 #reg code2)		\
+	TEST_ARG_REG(reg, val)				\
+	TEST_ARG_END("")				\
+	TEST_INSTRUCTION(code1 __stringify(reg) code2)	\
+	"	b	99f		\n\t"		\
+	"	"codex"			\n\t"		\
+	TESTCASE_END
+
+#define TEST_RRX(code1, reg1, val1, code2, reg2, val2, code3, codex)		\
+	TESTCASE_START(code1 #reg1 code2 #reg2 code3)				\
+	TEST_ARG_REG(reg1, val1)						\
+	TEST_ARG_REG(reg2, val2)						\
+	TEST_ARG_END("")							\
+	TEST_INSTRUCTION(code1 __stringify(reg1) code2 __stringify(reg2) code3)	\
+	"	b	99f		\n\t"					\
+	"	"codex"			\n\t"					\
+	TESTCASE_END
+
+
+/* Various values used in test cases... */
+#define N(val)	(val ^ 0xffffffff)
+#define VAL1	0x12345678
+#define VAL2	N(VAL1)
+#define VAL3	0xa5f801
+#define VAL4	N(VAL3)
+#define VALM	0x456789ab
+#define VALR	0xdeaddead
+#define HH1	0x0123fecb
+#define HH2	0xa9874567
+
+
+#ifdef CONFIG_THUMB2_KERNEL
+void kprobe_thumb16_test_cases(void);
+void kprobe_thumb32_test_cases(void);
+#else
+void kprobe_arm_test_cases(void);
+#endif
diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c
index 902ca59e8b11..8f96ec778e8d 100644
--- a/arch/arm/kernel/kprobes-thumb.c
+++ b/arch/arm/kernel/kprobes-thumb.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
+#include <linux/module.h>
 
 #include "kprobes.h"
 
@@ -943,6 +944,9 @@ const union decode_item kprobe_decode_thumb32_table[] = {
 	 */
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb32_table);
+#endif
 
 static void __kprobes
 t16_simulate_bxblx(struct kprobe *p, struct pt_regs *regs)
@@ -1423,6 +1427,9 @@ const union decode_item kprobe_decode_thumb16_table[] = {
 
 	DECODE_END
 };
+#ifdef CONFIG_ARM_KPROBES_TEST_MODULE
+EXPORT_SYMBOL_GPL(kprobe_decode_thumb16_table);
+#endif
 
 static unsigned long __kprobes thumb_check_cc(unsigned long cpsr)
 {
diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/kernel/kprobes.h
index a6aeda0a6c7f..38945f78f9f1 100644
--- a/arch/arm/kernel/kprobes.h
+++ b/arch/arm/kernel/kprobes.h
@@ -413,6 +413,14 @@ struct decode_reject {
 	DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0)
 
 
+#ifdef CONFIG_THUMB2_KERNEL
+extern const union decode_item kprobe_decode_thumb16_table[];
+extern const union decode_item kprobe_decode_thumb32_table[];
+#else
+extern const union decode_item kprobe_decode_arm_table[];
+#endif
+
+
 int kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi,
 			const union decode_item *table, bool thumb16);
 
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index e59bbd496c39..c1b4463dcc83 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -32,6 +32,24 @@ static atomic_t waiting_for_crash_ipi;
 
 int machine_kexec_prepare(struct kimage *image)
 {
+	unsigned long page_list;
+	void *reboot_code_buffer;
+	page_list = image->head & PAGE_MASK;
+
+	reboot_code_buffer = page_address(image->control_code_page);
+
+	/* Prepare parameters for reboot_code_buffer*/
+	kexec_start_address = image->start;
+	kexec_indirection_page = page_list;
+	kexec_mach_type = machine_arch_type;
+	kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET;
+
+	/* copy our kernel relocation code to the control code page */
+	memcpy(reboot_code_buffer,
+	       relocate_new_kernel, relocate_new_kernel_size);
+
+	flush_icache_range((unsigned long) reboot_code_buffer,
+			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	return 0;
 }
 
@@ -82,31 +100,14 @@ void (*kexec_reinit)(void);
 
 void machine_kexec(struct kimage *image)
 {
-	unsigned long page_list;
 	unsigned long reboot_code_buffer_phys;
 	void *reboot_code_buffer;
 
-
-	page_list = image->head & PAGE_MASK;
-
 	/* we need both effective and real address here */
 	reboot_code_buffer_phys =
 	    page_to_pfn(image->control_code_page) << PAGE_SHIFT;
 	reboot_code_buffer = page_address(image->control_code_page);
 
-	/* Prepare parameters for reboot_code_buffer*/
-	kexec_start_address = image->start;
-	kexec_indirection_page = page_list;
-	kexec_mach_type = machine_arch_type;
-	kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET;
-
-	/* copy our kernel relocation code to the control code page */
-	memcpy(reboot_code_buffer,
-	       relocate_new_kernel, relocate_new_kernel_size);
-
-
-	flush_icache_range((unsigned long) reboot_code_buffer,
-			   (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
 	if (kexec_reinit)
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index cc2020c2c709..1e9be5d25e56 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -33,7 +33,7 @@
  * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
  */
 #undef MODULES_VADDR
-#define MODULES_VADDR	(((unsigned long)_etext + ~PGDIR_MASK) & PGDIR_MASK)
+#define MODULES_VADDR	(((unsigned long)_etext + ~PMD_MASK) & PMD_MASK)
 #endif
 
 #ifdef CONFIG_MMU
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 53c9c2610cbc..e6e5d7c84f1a 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -26,16 +27,8 @@
 #include <asm/pmu.h>
 #include <asm/stacktrace.h>
 
-static struct platform_device *pmu_device;
-
-/*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
-static DEFINE_RAW_SPINLOCK(pmu_lock);
-
 /*
- * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
+ * ARMv6 supports a maximum of 3 events, starting from index 0. If we add
  * another platform that supports more, we need to increase this to be the
  * largest of all platforms.
  *
@@ -43,62 +36,24 @@ static DEFINE_RAW_SPINLOCK(pmu_lock);
  *  cycle counter CCNT + 31 events counters CNT0..30.
  *  Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters.
  */
-#define ARMPMU_MAX_HWEVENTS		33
+#define ARMPMU_MAX_HWEVENTS		32
 
-/* The events for a given CPU. */
-struct cpu_hw_events {
-	/*
-	 * The events that are active on the CPU for the given index. Index 0
-	 * is reserved.
-	 */
-	struct perf_event	*events[ARMPMU_MAX_HWEVENTS];
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long		used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
+static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 
-	/*
-	 * A 1 bit for an index indicates that the counter is actively being
-	 * used.
-	 */
-	unsigned long		active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
-};
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-struct arm_pmu {
-	enum arm_perf_pmu_ids id;
-	const char	*name;
-	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
-	void		(*enable)(struct hw_perf_event *evt, int idx);
-	void		(*disable)(struct hw_perf_event *evt, int idx);
-	int		(*get_event_idx)(struct cpu_hw_events *cpuc,
-					 struct hw_perf_event *hwc);
-	u32		(*read_counter)(int idx);
-	void		(*write_counter)(int idx, u32 val);
-	void		(*start)(void);
-	void		(*stop)(void);
-	void		(*reset)(void *);
-	const unsigned	(*cache_map)[PERF_COUNT_HW_CACHE_MAX]
-				    [PERF_COUNT_HW_CACHE_OP_MAX]
-				    [PERF_COUNT_HW_CACHE_RESULT_MAX];
-	const unsigned	(*event_map)[PERF_COUNT_HW_MAX];
-	u32		raw_event_mask;
-	int		num_events;
-	u64		max_period;
-};
+#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
 
 /* Set at runtime when we know what CPU type we are. */
-static const struct arm_pmu *armpmu;
+static struct arm_pmu *cpu_pmu;
 
 enum arm_perf_pmu_ids
 armpmu_get_pmu_id(void)
 {
 	int id = -ENODEV;
 
-	if (armpmu != NULL)
-		id = armpmu->id;
+	if (cpu_pmu != NULL)
+		id = cpu_pmu->id;
 
 	return id;
 }
@@ -109,8 +64,8 @@ armpmu_get_max_events(void)
 {
 	int max_events = 0;
 
-	if (armpmu != NULL)
-		max_events = armpmu->num_events;
+	if (cpu_pmu != NULL)
+		max_events = cpu_pmu->num_events;
 
 	return max_events;
 }
@@ -130,7 +85,11 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
 #define CACHE_OP_UNSUPPORTED		0xFFFF
 
 static int
-armpmu_map_cache_event(u64 config)
+armpmu_map_cache_event(const unsigned (*cache_map)
+				      [PERF_COUNT_HW_CACHE_MAX]
+				      [PERF_COUNT_HW_CACHE_OP_MAX]
+				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
+		       u64 config)
 {
 	unsigned int cache_type, cache_op, cache_result, ret;
 
@@ -146,7 +105,7 @@ armpmu_map_cache_event(u64 config)
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return -EINVAL;
 
-	ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
 
 	if (ret == CACHE_OP_UNSUPPORTED)
 		return -ENOENT;
@@ -155,23 +114,46 @@ armpmu_map_cache_event(u64 config)
 }
 
 static int
-armpmu_map_event(u64 config)
+armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*armpmu->event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
+	int mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
 static int
-armpmu_map_raw_event(u64 config)
+armpmu_map_raw_event(u32 raw_event_mask, u64 config)
 {
-	return (int)(config & armpmu->raw_event_mask);
+	return (int)(config & raw_event_mask);
 }
 
-static int
+static int map_cpu_event(struct perf_event *event,
+			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
+			 const unsigned (*cache_map)
+					[PERF_COUNT_HW_CACHE_MAX]
+					[PERF_COUNT_HW_CACHE_OP_MAX]
+					[PERF_COUNT_HW_CACHE_RESULT_MAX],
+			 u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		return armpmu_map_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return armpmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return armpmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+int
 armpmu_event_set_period(struct perf_event *event,
 			struct hw_perf_event *hwc,
 			int idx)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
 	int ret = 0;
@@ -202,11 +184,12 @@ armpmu_event_set_period(struct perf_event *event,
 	return ret;
 }
 
-static u64
+u64
 armpmu_event_update(struct perf_event *event,
 		    struct hw_perf_event *hwc,
 		    int idx, int overflow)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	u64 delta, prev_raw_count, new_raw_count;
 
 again:
@@ -246,11 +229,9 @@ armpmu_read(struct perf_event *event)
 static void
 armpmu_stop(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to update the counter, so ignore
 	 * PERF_EF_UPDATE, see comments in armpmu_start().
@@ -266,11 +247,9 @@ armpmu_stop(struct perf_event *event, int flags)
 static void
 armpmu_start(struct perf_event *event, int flags)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (!armpmu)
-		return;
-
 	/*
 	 * ARM pmu always has to reprogram the period, so ignore
 	 * PERF_EF_RELOAD, see the comment below.
@@ -293,16 +272,16 @@ armpmu_start(struct perf_event *event, int flags)
 static void
 armpmu_del(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
 	WARN_ON(idx < 0);
 
-	clear_bit(idx, cpuc->active_mask);
 	armpmu_stop(event, PERF_EF_UPDATE);
-	cpuc->events[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
 
 	perf_event_update_userpage(event);
 }
@@ -310,7 +289,8 @@ armpmu_del(struct perf_event *event, int flags)
 static int
 armpmu_add(struct perf_event *event, int flags)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int err = 0;
@@ -318,7 +298,7 @@ armpmu_add(struct perf_event *event, int flags)
 	perf_pmu_disable(event->pmu);
 
 	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(cpuc, hwc);
+	idx = armpmu->get_event_idx(hw_events, hwc);
 	if (idx < 0) {
 		err = idx;
 		goto out;
@@ -330,8 +310,7 @@ armpmu_add(struct perf_event *event, int flags)
 	 */
 	event->hw.idx = idx;
 	armpmu->disable(hwc, idx);
-	cpuc->events[idx] = event;
-	set_bit(idx, cpuc->active_mask);
+	hw_events->events[idx] = event;
 
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	if (flags & PERF_EF_START)
@@ -345,25 +324,25 @@ out:
 	return err;
 }
 
-static struct pmu pmu;
-
 static int
-validate_event(struct cpu_hw_events *cpuc,
+validate_event(struct pmu_hw_events *hw_events,
 	       struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event fake_event = event->hw;
+	struct pmu *leader_pmu = event->group_leader->pmu;
 
-	if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
-	return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
+	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
 }
 
 static int
 validate_group(struct perf_event *event)
 {
 	struct perf_event *sibling, *leader = event->group_leader;
-	struct cpu_hw_events fake_pmu;
+	struct pmu_hw_events fake_pmu;
 
 	memset(&fake_pmu, 0, sizeof(fake_pmu));
 
@@ -383,110 +362,119 @@ validate_group(struct perf_event *event)
 
 static irqreturn_t armpmu_platform_irq(int irq, void *dev)
 {
-	struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev);
+	struct arm_pmu *armpmu = (struct arm_pmu *) dev;
+	struct platform_device *plat_device = armpmu->plat_device;
+	struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
 
 	return plat->handle_irq(irq, dev, armpmu->handle_irq);
 }
 
+static void
+armpmu_release_hardware(struct arm_pmu *armpmu)
+{
+	int i, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+
+	for (i = 0; i < irqs; ++i) {
+		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			continue;
+		irq = platform_get_irq(pmu_device, i);
+		if (irq >= 0)
+			free_irq(irq, armpmu);
+	}
+
+	release_pmu(armpmu->type);
+}
+
 static int
-armpmu_reserve_hardware(void)
+armpmu_reserve_hardware(struct arm_pmu *armpmu)
 {
 	struct arm_pmu_platdata *plat;
 	irq_handler_t handle_irq;
-	int i, err = -ENODEV, irq;
+	int i, err, irq, irqs;
+	struct platform_device *pmu_device = armpmu->plat_device;
 
-	pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU);
-	if (IS_ERR(pmu_device)) {
+	err = reserve_pmu(armpmu->type);
+	if (err) {
 		pr_warning("unable to reserve pmu\n");
-		return PTR_ERR(pmu_device);
+		return err;
 	}
 
-	init_pmu(ARM_PMU_DEVICE_CPU);
-
 	plat = dev_get_platdata(&pmu_device->dev);
 	if (plat && plat->handle_irq)
 		handle_irq = armpmu_platform_irq;
 	else
 		handle_irq = armpmu->handle_irq;
 
-	if (pmu_device->num_resources < 1) {
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
 		pr_err("no irqs for PMUs defined\n");
 		return -ENODEV;
 	}
 
-	for (i = 0; i < pmu_device->num_resources; ++i) {
+	for (i = 0; i < irqs; ++i) {
+		err = 0;
 		irq = platform_get_irq(pmu_device, i);
 		if (irq < 0)
 			continue;
 
+		/*
+		 * If we have a single PMU interrupt that we can't shift,
+		 * assume that we're running on a uniprocessor machine and
+		 * continue. Otherwise, continue without this interrupt.
+		 */
+		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				    irq, i);
+			continue;
+		}
+
 		err = request_irq(irq, handle_irq,
 				  IRQF_DISABLED | IRQF_NOBALANCING,
-				  "armpmu", NULL);
+				  "arm-pmu", armpmu);
 		if (err) {
-			pr_warning("unable to request IRQ%d for ARM perf "
-				"counters\n", irq);
-			break;
+			pr_err("unable to request IRQ%d for ARM PMU counters\n",
+				irq);
+			armpmu_release_hardware(armpmu);
+			return err;
 		}
-	}
 
-	if (err) {
-		for (i = i - 1; i >= 0; --i) {
-			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
-				free_irq(irq, NULL);
-		}
-		release_pmu(ARM_PMU_DEVICE_CPU);
-		pmu_device = NULL;
+		cpumask_set_cpu(i, &armpmu->active_irqs);
 	}
 
-	return err;
+	return 0;
 }
 
 static void
-armpmu_release_hardware(void)
+hw_perf_event_destroy(struct perf_event *event)
 {
-	int i, irq;
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+	atomic_t *active_events	 = &armpmu->active_events;
+	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
 
-	for (i = pmu_device->num_resources - 1; i >= 0; --i) {
-		irq = platform_get_irq(pmu_device, i);
-		if (irq >= 0)
-			free_irq(irq, NULL);
+	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
+		armpmu_release_hardware(armpmu);
+		mutex_unlock(pmu_reserve_mutex);
 	}
-	armpmu->stop();
-
-	release_pmu(ARM_PMU_DEVICE_CPU);
-	pmu_device = NULL;
 }
 
-static atomic_t active_events = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmu_reserve_mutex);
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
+static int
+event_requires_mode_exclusion(struct perf_event_attr *attr)
 {
-	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
-		armpmu_release_hardware();
-		mutex_unlock(&pmu_reserve_mutex);
-	}
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv;
 }
 
 static int
 __hw_perf_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping, err;
 
-	/* Decode the generic type into an ARM event identifier. */
-	if (PERF_TYPE_HARDWARE == event->attr.type) {
-		mapping = armpmu_map_event(event->attr.config);
-	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
-		mapping = armpmu_map_cache_event(event->attr.config);
-	} else if (PERF_TYPE_RAW == event->attr.type) {
-		mapping = armpmu_map_raw_event(event->attr.config);
-	} else {
-		pr_debug("event type %x not supported\n", event->attr.type);
-		return -EOPNOTSUPP;
-	}
+	mapping = armpmu->map_event(event);
 
 	if (mapping < 0) {
 		pr_debug("event %x:%llx not supported\n", event->attr.type,
@@ -495,34 +483,31 @@ __hw_perf_event_init(struct perf_event *event)
 	}
 
 	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx		= -1;
+	hwc->config_base	= 0;
+	hwc->config		= 0;
+	hwc->event_base		= 0;
+
+	/*
 	 * Check whether we need to exclude the counter from certain modes.
-	 * The ARM performance counters are on all of the time so if someone
-	 * has asked us for some excludes then we have to fail.
 	 */
-	if (event->attr.exclude_kernel || event->attr.exclude_user ||
-	    event->attr.exclude_hv || event->attr.exclude_idle) {
+	if ((!armpmu->set_event_filter ||
+	     armpmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
 		pr_debug("ARM performance counters do not support "
 			 "mode exclusion\n");
 		return -EPERM;
 	}
 
 	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
+	 * Store the event encoding into the config_base field.
 	 */
-	hwc->idx = -1;
-
-	/*
-	 * Store the event encoding into the config_base field. config and
-	 * event_base are unused as the only 2 things we need to know are
-	 * the event mapping and the counter to use. The counter to use is
-	 * also the indx and the config_base is the event type.
-	 */
-	hwc->config_base	    = (unsigned long)mapping;
-	hwc->config		    = 0;
-	hwc->event_base		    = 0;
+	hwc->config_base	    |= (unsigned long)mapping;
 
 	if (!hwc->sample_period) {
 		hwc->sample_period  = armpmu->max_period;
@@ -542,32 +527,23 @@ __hw_perf_event_init(struct perf_event *event)
 
 static int armpmu_event_init(struct perf_event *event)
 {
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	int err = 0;
+	atomic_t *active_events = &armpmu->active_events;
 
-	switch (event->attr.type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		break;
-
-	default:
+	if (armpmu->map_event(event) == -ENOENT)
 		return -ENOENT;
-	}
-
-	if (!armpmu)
-		return -ENODEV;
 
 	event->destroy = hw_perf_event_destroy;
 
-	if (!atomic_inc_not_zero(&active_events)) {
-		mutex_lock(&pmu_reserve_mutex);
-		if (atomic_read(&active_events) == 0) {
-			err = armpmu_reserve_hardware();
-		}
+	if (!atomic_inc_not_zero(active_events)) {
+		mutex_lock(&armpmu->reserve_mutex);
+		if (atomic_read(active_events) == 0)
+			err = armpmu_reserve_hardware(armpmu);
 
 		if (!err)
-			atomic_inc(&active_events);
-		mutex_unlock(&pmu_reserve_mutex);
+			atomic_inc(active_events);
+		mutex_unlock(&armpmu->reserve_mutex);
 	}
 
 	if (err)
@@ -582,22 +558,9 @@ static int armpmu_event_init(struct perf_event *event)
 
 static void armpmu_enable(struct pmu *pmu)
 {
-	/* Enable all of the perf events on hardware. */
-	int idx, enabled = 0;
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
-	if (!armpmu)
-		return;
-
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
-		struct perf_event *event = cpuc->events[idx];
-
-		if (!event)
-			continue;
-
-		armpmu->enable(&event->hw, idx);
-		enabled = 1;
-	}
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
 
 	if (enabled)
 		armpmu->start();
@@ -605,20 +568,32 @@ static void armpmu_enable(struct pmu *pmu)
 
 static void armpmu_disable(struct pmu *pmu)
 {
-	if (armpmu)
-		armpmu->stop();
+	struct arm_pmu *armpmu = to_arm_pmu(pmu);
+	armpmu->stop();
 }
 
-static struct pmu pmu = {
-	.pmu_enable	= armpmu_enable,
-	.pmu_disable	= armpmu_disable,
-	.event_init	= armpmu_event_init,
-	.add		= armpmu_add,
-	.del		= armpmu_del,
-	.start		= armpmu_start,
-	.stop		= armpmu_stop,
-	.read		= armpmu_read,
-};
+static void __init armpmu_init(struct arm_pmu *armpmu)
+{
+	atomic_set(&armpmu->active_events, 0);
+	mutex_init(&armpmu->reserve_mutex);
+
+	armpmu->pmu = (struct pmu) {
+		.pmu_enable	= armpmu_enable,
+		.pmu_disable	= armpmu_disable,
+		.event_init	= armpmu_event_init,
+		.add		= armpmu_add,
+		.del		= armpmu_del,
+		.start		= armpmu_start,
+		.stop		= armpmu_stop,
+		.read		= armpmu_read,
+	};
+}
+
+int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
+{
+	armpmu_init(armpmu);
+	return perf_pmu_register(&armpmu->pmu, name, type);
+}
 
 /* Include the PMU-specific implementations. */
 #include "perf_event_xscale.c"
@@ -630,14 +605,72 @@ static struct pmu pmu = {
  * This requires SMP to be available, so exists as a separate initcall.
  */
 static int __init
-armpmu_reset(void)
+cpu_pmu_reset(void)
+{
+	if (cpu_pmu && cpu_pmu->reset)
+		return on_each_cpu(cpu_pmu->reset, NULL, 1);
+	return 0;
+}
+arch_initcall(cpu_pmu_reset);
+
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static struct of_device_id armpmu_of_device_ids[] = {
+	{.compatible = "arm,cortex-a9-pmu"},
+	{.compatible = "arm,cortex-a8-pmu"},
+	{.compatible = "arm,arm1136-pmu"},
+	{.compatible = "arm,arm1176-pmu"},
+	{},
+};
+
+static struct platform_device_id armpmu_plat_device_ids[] = {
+	{.name = "arm-pmu"},
+	{},
+};
+
+static int __devinit armpmu_device_probe(struct platform_device *pdev)
 {
-	if (armpmu && armpmu->reset)
-		return on_each_cpu(armpmu->reset, NULL, 1);
+	cpu_pmu->plat_device = pdev;
 	return 0;
 }
-arch_initcall(armpmu_reset);
 
+static struct platform_driver armpmu_driver = {
+	.driver		= {
+		.name	= "arm-pmu",
+		.of_match_table = armpmu_of_device_ids,
+	},
+	.probe		= armpmu_device_probe,
+	.id_table	= armpmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+	return platform_driver_register(&armpmu_driver);
+}
+device_initcall(register_pmu_driver);
+
+static struct pmu_hw_events *armpmu_get_cpu_events(void)
+{
+	return &__get_cpu_var(cpu_hw_events);
+}
+
+static void __init cpu_pmu_init(struct arm_pmu *armpmu)
+{
+	int cpu;
+	for_each_possible_cpu(cpu) {
+		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+		events->events = per_cpu(hw_events, cpu);
+		events->used_mask = per_cpu(used_mask, cpu);
+		raw_spin_lock_init(&events->pmu_lock);
+	}
+	armpmu->get_hw_events = armpmu_get_cpu_events;
+	armpmu->type = ARM_PMU_DEVICE_CPU;
+}
+
+/*
+ * CPU PMU identification and registration.
+ */
 static int __init
 init_hw_perf_events(void)
 {
@@ -651,22 +684,22 @@ init_hw_perf_events(void)
 		case 0xB360:	/* ARM1136 */
 		case 0xB560:	/* ARM1156 */
 		case 0xB760:	/* ARM1176 */
-			armpmu = armv6pmu_init();
+			cpu_pmu = armv6pmu_init();
 			break;
 		case 0xB020:	/* ARM11mpcore */
-			armpmu = armv6mpcore_pmu_init();
+			cpu_pmu = armv6mpcore_pmu_init();
 			break;
 		case 0xC080:	/* Cortex-A8 */
-			armpmu = armv7_a8_pmu_init();
+			cpu_pmu = armv7_a8_pmu_init();
 			break;
 		case 0xC090:	/* Cortex-A9 */
-			armpmu = armv7_a9_pmu_init();
+			cpu_pmu = armv7_a9_pmu_init();
 			break;
 		case 0xC050:	/* Cortex-A5 */
-			armpmu = armv7_a5_pmu_init();
+			cpu_pmu = armv7_a5_pmu_init();
 			break;
 		case 0xC0F0:	/* Cortex-A15 */
-			armpmu = armv7_a15_pmu_init();
+			cpu_pmu = armv7_a15_pmu_init();
 			break;
 		}
 	/* Intel CPUs [xscale]. */
@@ -674,23 +707,23 @@ init_hw_perf_events(void)
 		part_number = (cpuid >> 13) & 0x7;
 		switch (part_number) {
 		case 1:
-			armpmu = xscale1pmu_init();
+			cpu_pmu = xscale1pmu_init();
 			break;
 		case 2:
-			armpmu = xscale2pmu_init();
+			cpu_pmu = xscale2pmu_init();
 			break;
 		}
 	}
 
-	if (armpmu) {
+	if (cpu_pmu) {
 		pr_info("enabled with %s PMU driver, %d counters available\n",
-			armpmu->name, armpmu->num_events);
+			cpu_pmu->name, cpu_pmu->num_events);
+		cpu_pmu_init(cpu_pmu);
+		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
 	} else {
 		pr_info("no hardware support available\n");
 	}
 
-	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-
 	return 0;
 }
 early_initcall(init_hw_perf_events);
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index dd7f3b9f4cb3..e63d8115c01b 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -54,7 +54,7 @@ enum armv6_perf_types {
 };
 
 enum armv6_counters {
-	ARMV6_CYCLE_COUNTER = 1,
+	ARMV6_CYCLE_COUNTER = 0,
 	ARMV6_COUNTER0,
 	ARMV6_COUNTER1,
 };
@@ -433,6 +433,7 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
 		      int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= 0;
@@ -454,12 +455,29 @@ armv6pmu_enable_event(struct hw_perf_event *hwc,
 	 * Mask out the current event and set the counter to count the event
 	 * that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int counter_is_active(unsigned long pmcr, int idx)
+{
+	unsigned long mask = 0;
+	if (idx == ARMV6_CYCLE_COUNTER)
+		mask = ARMV6_PMCR_CCOUNT_IEN;
+	else if (idx == ARMV6_COUNTER0)
+		mask = ARMV6_PMCR_COUNT0_IEN;
+	else if (idx == ARMV6_COUNTER1)
+		mask = ARMV6_PMCR_COUNT1_IEN;
+
+	if (mask)
+		return pmcr & mask;
+
+	WARN_ONCE(1, "invalid counter number (%d)\n", idx);
+	return 0;
 }
 
 static irqreturn_t
@@ -468,7 +486,7 @@ armv6pmu_handle_irq(int irq_num,
 {
 	unsigned long pmcr = armv6_pmcr_read();
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -487,11 +505,11 @@ armv6pmu_handle_irq(int irq_num,
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
+		if (!counter_is_active(pmcr, idx))
 			continue;
 
 		/*
@@ -508,7 +526,7 @@ armv6pmu_handle_irq(int irq_num,
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -527,28 +545,30 @@ static void
 armv6pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val |= ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 armv6pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~ARMV6_PMCR_ENABLE;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
+armv6pmu_get_event_idx(struct pmu_hw_events *cpuc,
 		       struct hw_perf_event *event)
 {
 	/* Always place a cycle counter into the cycle counter. */
@@ -578,6 +598,7 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 		       int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -598,12 +619,12 @@ armv6pmu_disable_event(struct hw_perf_event *hwc,
 	 * of ETM bus signal assertion cycles. The external reporting should
 	 * be disabled and so this should never increment.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
@@ -611,6 +632,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
 			      int idx)
 {
 	unsigned long val, mask, flags, evt = 0;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	if (ARMV6_CYCLE_COUNTER == idx) {
 		mask	= ARMV6_PMCR_CCOUNT_IEN;
@@ -627,15 +649,21 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
 	 * Unlike UP ARMv6, we don't have a way of stopping the counters. We
 	 * simply disable the interrupt reporting.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = armv6_pmcr_read();
 	val &= ~mask;
 	val |= evt;
 	armv6_pmcr_write(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int armv6_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6_perf_map,
+				&armv6_perf_cache_map, 0xFF);
 }
 
-static const struct arm_pmu armv6pmu = {
+static struct arm_pmu armv6pmu = {
 	.id			= ARM_PERF_PMU_ID_V6,
 	.name			= "v6",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -646,14 +674,12 @@ static const struct arm_pmu armv6pmu = {
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6_perf_cache_map,
-	.event_map		= &armv6_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return &armv6pmu;
 }
@@ -665,7 +691,14 @@ static const struct arm_pmu *__init armv6pmu_init(void)
  * disable the interrupt reporting and update the event. When unthrottling we
  * reset the period and enable the interrupt reporting.
  */
-static const struct arm_pmu armv6mpcore_pmu = {
+
+static int armv6mpcore_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv6mpcore_perf_map,
+				&armv6mpcore_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu armv6mpcore_pmu = {
 	.id			= ARM_PERF_PMU_ID_V6MP,
 	.name			= "v6mpcore",
 	.handle_irq		= armv6pmu_handle_irq,
@@ -676,24 +709,22 @@ static const struct arm_pmu armv6mpcore_pmu = {
 	.get_event_idx		= armv6pmu_get_event_idx,
 	.start			= armv6pmu_start,
 	.stop			= armv6pmu_stop,
-	.cache_map		= &armv6mpcore_perf_cache_map,
-	.event_map		= &armv6mpcore_perf_map,
-	.raw_event_mask		= 0xFF,
+	.map_event		= armv6mpcore_map_event,
 	.num_events		= 3,
 	.max_period		= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return &armv6mpcore_pmu;
 }
 #else
-static const struct arm_pmu *__init armv6pmu_init(void)
+static struct arm_pmu *__init armv6pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv6mpcore_pmu_init(void)
+static struct arm_pmu *__init armv6mpcore_pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 4c851834f68e..1ef6d0034b85 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -17,6 +17,9 @@
  */
 
 #ifdef CONFIG_CPU_V7
+
+static struct arm_pmu armv7pmu;
+
 /*
  * Common ARMv7 event types
  *
@@ -321,8 +324,8 @@ static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_CPU_CYCLES]	    = ARMV7_PERFCTR_CPU_CYCLES,
 	[PERF_COUNT_HW_INSTRUCTIONS]	    =
 					ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
-	[PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_COHERENT_LINE_HIT,
-	[PERF_COUNT_HW_CACHE_MISSES]	    = ARMV7_PERFCTR_COHERENT_LINE_MISS,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = ARMV7_PERFCTR_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = ARMV7_PERFCTR_DCACHE_REFILL,
 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
 	[PERF_COUNT_HW_BRANCH_MISSES]	    = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
 	[PERF_COUNT_HW_BUS_CYCLES]	    = ARMV7_PERFCTR_CLOCK_CYCLES,
@@ -676,23 +679,24 @@ static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 };
 
 /*
- * Perf Events counters
+ * Perf Events' indices
  */
-enum armv7_counters {
-	ARMV7_CYCLE_COUNTER		= 1,	/* Cycle counter */
-	ARMV7_COUNTER0			= 2,	/* First event counter */
-};
+#define	ARMV7_IDX_CYCLE_COUNTER	0
+#define	ARMV7_IDX_COUNTER0	1
+#define	ARMV7_IDX_COUNTER_LAST	(ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+
+#define	ARMV7_MAX_COUNTERS	32
+#define	ARMV7_COUNTER_MASK	(ARMV7_MAX_COUNTERS - 1)
 
 /*
- * The cycle counter is ARMV7_CYCLE_COUNTER.
- * The first event counter is ARMV7_COUNTER0.
- * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
+ * ARMv7 low level PMNC access
  */
-#define	ARMV7_COUNTER_LAST	(ARMV7_COUNTER0 + armpmu->num_events - 1)
 
 /*
- * ARMv7 low level PMNC access
+ * Perf Event to low level counters mapping
  */
+#define	ARMV7_IDX_TO_COUNTER(x)	\
+	(((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK)
 
 /*
  * Per-CPU PMNC: config reg
@@ -708,103 +712,76 @@ enum armv7_counters {
 #define	ARMV7_PMNC_MASK		0x3f	 /* Mask for writable bits */
 
 /*
- * Available counters
- */
-#define ARMV7_CNT0		0	/* First event counter */
-#define ARMV7_CCNT		31	/* Cycle counter */
-
-/* Perf Event to low level counters mapping */
-#define ARMV7_EVENT_CNT_TO_CNTx	(ARMV7_COUNTER0 - ARMV7_CNT0)
-
-/*
- * CNTENS: counters enable reg
- */
-#define ARMV7_CNTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * CNTENC: counters disable reg
- */
-#define ARMV7_CNTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_CNTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENS: counters overflow interrupt enable reg
- */
-#define ARMV7_INTENS_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENS_C		(1 << ARMV7_CCNT)
-
-/*
- * INTENC: counters overflow interrupt disable reg
- */
-#define ARMV7_INTENC_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_INTENC_C		(1 << ARMV7_CCNT)
-
-/*
- * EVTSEL: Event selection reg
+ * FLAG: counters overflow flag status reg
  */
-#define	ARMV7_EVTSEL_MASK	0xff		/* Mask for writable bits */
+#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
+#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
 
 /*
- * SELECT: Counter selection reg
+ * PMXEVTYPER: Event selection reg
  */
-#define	ARMV7_SELECT_MASK	0x1f		/* Mask for writable bits */
+#define	ARMV7_EVTYPE_MASK	0xc00000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
 
 /*
- * FLAG: counters overflow flag status reg
+ * Event filters for PMUv2
  */
-#define ARMV7_FLAG_P(idx)	(1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
-#define ARMV7_FLAG_C		(1 << ARMV7_CCNT)
-#define	ARMV7_FLAG_MASK		0xffffffff	/* Mask for writable bits */
-#define	ARMV7_OVERFLOWED_MASK	ARMV7_FLAG_MASK
+#define	ARMV7_EXCLUDE_PL1	(1 << 31)
+#define	ARMV7_EXCLUDE_USER	(1 << 30)
+#define	ARMV7_INCLUDE_HYP	(1 << 27)
 
-static inline unsigned long armv7_pmnc_read(void)
+static inline u32 armv7_pmnc_read(void)
 {
 	u32 val;
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
 	return val;
 }
 
-static inline void armv7_pmnc_write(unsigned long val)
+static inline void armv7_pmnc_write(u32 val)
 {
 	val &= ARMV7_PMNC_MASK;
 	isb();
 	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
 }
 
-static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
+static inline int armv7_pmnc_has_overflowed(u32 pmnc)
 {
 	return pmnc & ARMV7_OVERFLOWED_MASK;
 }
 
-static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
-					enum armv7_counters counter)
+static inline int armv7_pmnc_counter_valid(int idx)
+{
+	return idx >= ARMV7_IDX_CYCLE_COUNTER && idx <= ARMV7_IDX_COUNTER_LAST;
+}
+
+static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
 {
 	int ret = 0;
+	u32 counter;
 
-	if (counter == ARMV7_CYCLE_COUNTER)
-		ret = pmnc & ARMV7_FLAG_C;
-	else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
-		ret = pmnc & ARMV7_FLAG_P(counter);
-	else
+	if (!armv7_pmnc_counter_valid(idx)) {
 		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), counter);
+			smp_processor_id(), idx);
+	} else {
+		counter = ARMV7_IDX_TO_COUNTER(idx);
+		ret = pmnc & BIT(counter);
+	}
 
 	return ret;
 }
 
-static inline int armv7_pmnc_select_counter(unsigned int idx)
+static inline int armv7_pmnc_select_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
-		pr_err("CPU%u selecting wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u selecting wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
-	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
 	isb();
 
 	return idx;
@@ -812,124 +789,95 @@ static inline int armv7_pmnc_select_counter(unsigned int idx)
 
 static inline u32 armv7pmu_read_counter(int idx)
 {
-	unsigned long value = 0;
+	u32 value = 0;
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mrc p15, 0, %0, c9, c13, 2"
-				     : "=r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
 
 	return value;
 }
 
 static inline void armv7pmu_write_counter(int idx, u32 value)
 {
-	if (idx == ARMV7_CYCLE_COUNTER)
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
-	else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
-		if (armv7_pmnc_select_counter(idx) == idx)
-			asm volatile("mcr p15, 0, %0, c9, c13, 2"
-				     : : "r" (value));
-	} else
+	if (!armv7_pmnc_counter_valid(idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
+	else if (idx == ARMV7_IDX_CYCLE_COUNTER)
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+	else if (armv7_pmnc_select_counter(idx) == idx)
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
 }
 
-static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
+static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
 {
 	if (armv7_pmnc_select_counter(idx) == idx) {
-		val &= ARMV7_EVTSEL_MASK;
+		val &= ARMV7_EVTYPE_MASK;
 		asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
 	}
 }
 
-static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
+static inline int armv7_pmnc_enable_counter(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENS_C;
-	else
-		val = ARMV7_CNTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
+static inline int armv7_pmnc_disable_counter(int idx)
 {
-	u32 val;
-
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_CNTENC_C;
-	else
-		val = ARMV7_CNTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
+static inline int armv7_pmnc_enable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u enabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENS_C;
-	else
-		val = ARMV7_INTENS_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
 	return idx;
 }
 
-static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
+static inline int armv7_pmnc_disable_intens(int idx)
 {
-	u32 val;
+	u32 counter;
 
-	if ((idx != ARMV7_CYCLE_COUNTER) &&
-	    ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
-		pr_err("CPU%u disabling wrong PMNC counter"
-			" interrupt enable %d\n", smp_processor_id(), idx);
-		return -1;
+	if (!armv7_pmnc_counter_valid(idx)) {
+		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
+			smp_processor_id(), idx);
+		return -EINVAL;
 	}
 
-	if (idx == ARMV7_CYCLE_COUNTER)
-		val = ARMV7_INTENC_C;
-	else
-		val = ARMV7_INTENC_P(idx);
-
-	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
-
+	counter = ARMV7_IDX_TO_COUNTER(idx);
+	asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
 	return idx;
 }
 
@@ -973,14 +921,14 @@ static void armv7_pmnc_dump_regs(void)
 	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
 	printk(KERN_INFO "CCNT  =0x%08x\n", val);
 
-	for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
+	for (cnt = ARMV7_IDX_COUNTER0; cnt <= ARMV7_IDX_COUNTER_LAST; cnt++) {
 		armv7_pmnc_select_counter(cnt);
 		asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] count =0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 		asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
 		printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
-			cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
+			ARMV7_IDX_TO_COUNTER(cnt), val);
 	}
 }
 #endif
@@ -988,12 +936,13 @@ static void armv7_pmnc_dump_regs(void)
 static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1002,9 +951,10 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 
 	/*
 	 * Set event (if destined for PMNx counters)
-	 * We don't need to set the event if it's a cycle count
+	 * We only need to set the event for the cycle counter if we
+	 * have the ability to perform event filtering.
 	 */
-	if (idx != ARMV7_CYCLE_COUNTER)
+	if (armv7pmu.set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER)
 		armv7_pmnc_write_evtsel(idx, hwc->config_base);
 
 	/*
@@ -1017,17 +967,18 @@ static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	 */
 	armv7_pmnc_enable_counter(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	/*
 	 * Disable counter and interrupt
 	 */
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -1039,14 +990,14 @@ static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 	 */
 	armv7_pmnc_disable_intens(idx);
 
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 {
-	unsigned long pmnc;
+	u32 pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -1069,13 +1020,10 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		/*
 		 * We have a single interrupt for all counters. Check that
 		 * each counter has overflowed before we process it.
@@ -1090,7 +1038,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	/*
@@ -1108,61 +1056,114 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 static void armv7pmu_start(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_stop(void)
 {
 	unsigned long flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
 	armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
+static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
 				  struct hw_perf_event *event)
 {
 	int idx;
+	unsigned long evtype = event->config_base & ARMV7_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
-	if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
+	if (evtype == ARMV7_PERFCTR_CPU_CYCLES) {
+		if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
-		return ARMV7_CYCLE_COUNTER;
-	} else {
-		/*
-		 * For anything other than a cycle counter, try and use
-		 * the events counters
-		 */
-		for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
-			if (!test_and_set_bit(idx, cpuc->used_mask))
-				return idx;
-		}
+		return ARMV7_IDX_CYCLE_COUNTER;
+	}
 
-		/* The counters are all in use. */
-		return -EAGAIN;
+	/*
+	 * For anything other than a cycle counter, try and use
+	 * the events counters
+	 */
+	for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
 	}
+
+	/* The counters are all in use. */
+	return -EAGAIN;
+}
+
+/*
+ * Add an event filter to a given event. This will only work for PMUv2 PMUs.
+ */
+static int armv7pmu_set_event_filter(struct hw_perf_event *event,
+				     struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+
+	if (attr->exclude_idle)
+		return -EPERM;
+	if (attr->exclude_user)
+		config_base |= ARMV7_EXCLUDE_USER;
+	if (attr->exclude_kernel)
+		config_base |= ARMV7_EXCLUDE_PL1;
+	if (!attr->exclude_hv)
+		config_base |= ARMV7_INCLUDE_HYP;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base = config_base;
+
+	return 0;
 }
 
 static void armv7pmu_reset(void *info)
 {
-	u32 idx, nb_cnt = armpmu->num_events;
+	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = 1; idx < nb_cnt; ++idx)
+	for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
 		armv7pmu_disable_event(NULL, idx);
 
 	/* Initialize & Reset PMNC: C and P bits */
 	armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
 }
 
+static int armv7_a8_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a8_perf_map,
+				&armv7_a8_perf_cache_map, 0xFF);
+}
+
+static int armv7_a9_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a9_perf_map,
+				&armv7_a9_perf_cache_map, 0xFF);
+}
+
+static int armv7_a5_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a5_perf_map,
+				&armv7_a5_perf_cache_map, 0xFF);
+}
+
+static int armv7_a15_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &armv7_a15_perf_map,
+				&armv7_a15_perf_cache_map, 0xFF);
+}
+
 static struct arm_pmu armv7pmu = {
 	.handle_irq		= armv7pmu_handle_irq,
 	.enable			= armv7pmu_enable_event,
@@ -1173,7 +1174,6 @@ static struct arm_pmu armv7pmu = {
 	.start			= armv7pmu_start,
 	.stop			= armv7pmu_stop,
 	.reset			= armv7pmu_reset,
-	.raw_event_mask		= 0xFF,
 	.max_period		= (1LLU << 32) - 1,
 };
 
@@ -1188,62 +1188,59 @@ static u32 __init armv7_read_num_pmnc_events(void)
 	return nb_cnt + 1;
 }
 
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
-	armv7pmu.cache_map	= &armv7_a8_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a8_perf_map;
+	armv7pmu.map_event	= armv7_a8_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
-	armv7pmu.cache_map	= &armv7_a9_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a9_perf_map;
+	armv7pmu.map_event	= armv7_a9_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
-	armv7pmu.cache_map	= &armv7_a5_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a5_perf_map;
+	armv7pmu.map_event	= armv7_a5_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
 	return &armv7pmu;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
-	armv7pmu.cache_map	= &armv7_a15_perf_cache_map;
-	armv7pmu.event_map	= &armv7_a15_perf_map;
+	armv7pmu.map_event	= armv7_a15_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
+	armv7pmu.set_event_filter = armv7pmu_set_event_filter;
 	return &armv7pmu;
 }
 #else
-static const struct arm_pmu *__init armv7_a8_pmu_init(void)
+static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a9_pmu_init(void)
+static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a5_pmu_init(void)
+static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init armv7_a15_pmu_init(void)
+static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 3c4397491d08..e0cca10a8411 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -40,7 +40,7 @@ enum xscale_perf_types {
 };
 
 enum xscale_counters {
-	XSCALE_CYCLE_COUNTER	= 1,
+	XSCALE_CYCLE_COUNTER	= 0,
 	XSCALE_COUNTER0,
 	XSCALE_COUNTER1,
 	XSCALE_COUNTER2,
@@ -222,7 +222,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -249,13 +249,10 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -266,7 +263,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -284,6 +281,7 @@ static void
 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -305,18 +303,19 @@ xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long val, mask, evt, flags;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	switch (idx) {
 	case XSCALE_CYCLE_COUNTER:
@@ -336,16 +335,16 @@ xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~mask;
 	val |= evt;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	if (XSCALE_PERFCTR_CCNT == event->config_base) {
@@ -368,24 +367,26 @@ static void
 xscale1pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val |= XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale1pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale1pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -424,7 +425,13 @@ xscale1pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static const struct arm_pmu xscale1pmu = {
+static int xscale_map_event(struct perf_event *event)
+{
+	return map_cpu_event(event, &xscale_perf_map,
+				&xscale_perf_cache_map, 0xFF);
+}
+
+static struct arm_pmu xscale1pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE1,
 	.name		= "xscale1",
 	.handle_irq	= xscale1pmu_handle_irq,
@@ -435,14 +442,12 @@ static const struct arm_pmu xscale1pmu = {
 	.get_event_idx	= xscale1pmu_get_event_idx,
 	.start		= xscale1pmu_start,
 	.stop		= xscale1pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 3,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return &xscale1pmu;
 }
@@ -560,7 +565,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 {
 	unsigned long pmnc, of_flags;
 	struct perf_sample_data data;
-	struct cpu_hw_events *cpuc;
+	struct pmu_hw_events *cpuc;
 	struct pt_regs *regs;
 	int idx;
 
@@ -581,13 +586,10 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
-	for (idx = 0; idx <= armpmu->num_events; ++idx) {
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
 
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
 		if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
 			continue;
 
@@ -598,7 +600,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			armpmu->disable(hwc, idx);
+			cpu_pmu->disable(hwc, idx);
 	}
 
 	irq_work_run();
@@ -616,6 +618,7 @@ static void
 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -649,16 +652,17 @@ xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	unsigned long flags, ien, evtsel;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
 	ien = xscale2pmu_read_int_enable();
 	evtsel = xscale2pmu_read_event_select();
@@ -692,14 +696,14 @@ xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 		return;
 	}
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	xscale2pmu_write_event_select(evtsel);
 	xscale2pmu_write_int_enable(ien);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
-xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
+xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc,
 			struct hw_perf_event *event)
 {
 	int idx = xscale1pmu_get_event_idx(cpuc, event);
@@ -718,24 +722,26 @@ static void
 xscale2pmu_start(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
 	val |= XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_stop(void)
 {
 	unsigned long flags, val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 
-	raw_spin_lock_irqsave(&pmu_lock, flags);
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	val = xscale2pmu_read_pmnc();
 	val &= ~XSCALE_PMU_ENABLE;
 	xscale2pmu_write_pmnc(val);
-	raw_spin_unlock_irqrestore(&pmu_lock, flags);
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
@@ -786,7 +792,7 @@ xscale2pmu_write_counter(int counter, u32 val)
 	}
 }
 
-static const struct arm_pmu xscale2pmu = {
+static struct arm_pmu xscale2pmu = {
 	.id		= ARM_PERF_PMU_ID_XSCALE2,
 	.name		= "xscale2",
 	.handle_irq	= xscale2pmu_handle_irq,
@@ -797,24 +803,22 @@ static const struct arm_pmu xscale2pmu = {
 	.get_event_idx	= xscale2pmu_get_event_idx,
 	.start		= xscale2pmu_start,
 	.stop		= xscale2pmu_stop,
-	.cache_map	= &xscale_perf_cache_map,
-	.event_map	= &xscale_perf_map,
-	.raw_event_mask	= 0xFF,
+	.map_event	= xscale_map_event,
 	.num_events	= 5,
 	.max_period	= (1LLU << 32) - 1,
 };
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return &xscale2pmu;
 }
 #else
-static const struct arm_pmu *__init xscale1pmu_init(void)
+static struct arm_pmu *__init xscale1pmu_init(void)
 {
 	return NULL;
 }
 
-static const struct arm_pmu *__init xscale2pmu_init(void)
+static struct arm_pmu *__init xscale2pmu_init(void)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
index c53474fe84df..2c3407ee8576 100644
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -10,192 +10,26 @@
  *
  */
 
-#define pr_fmt(fmt) "PMU: " fmt
-
-#include <linux/cpumask.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/platform_device.h>
 
 #include <asm/pmu.h>
 
-static volatile long pmu_lock;
-
-static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES];
-
-static int __devinit pmu_register(struct platform_device *pdev,
-					enum arm_pmu_type type)
-{
-	if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
-		pr_warning("received registration request for unknown "
-				"PMU device type %d\n", type);
-		return -EINVAL;
-	}
-
-	if (pmu_devices[type]) {
-		pr_warning("rejecting duplicate registration of PMU device "
-			"type %d.", type);
-		return -ENOSPC;
-	}
-
-	pr_info("registered new PMU device of type %d\n", type);
-	pmu_devices[type] = pdev;
-	return 0;
-}
-
-#define OF_MATCH_PMU(_name, _type) {	\
-	.compatible = _name,		\
-	.data = (void *)_type,		\
-}
-
-#define OF_MATCH_CPU(name)	OF_MATCH_PMU(name, ARM_PMU_DEVICE_CPU)
-
-static struct of_device_id armpmu_of_device_ids[] = {
-	OF_MATCH_CPU("arm,cortex-a9-pmu"),
-	OF_MATCH_CPU("arm,cortex-a8-pmu"),
-	OF_MATCH_CPU("arm,arm1136-pmu"),
-	OF_MATCH_CPU("arm,arm1176-pmu"),
-	{},
-};
-
-#define PLAT_MATCH_PMU(_name, _type) {	\
-	.name		= _name,	\
-	.driver_data	= _type,	\
-}
-
-#define PLAT_MATCH_CPU(_name)	PLAT_MATCH_PMU(_name, ARM_PMU_DEVICE_CPU)
-
-static struct platform_device_id armpmu_plat_device_ids[] = {
-	PLAT_MATCH_CPU("arm-pmu"),
-	{},
-};
-
-enum arm_pmu_type armpmu_device_type(struct platform_device *pdev)
-{
-	const struct of_device_id	*of_id;
-	const struct platform_device_id *pdev_id;
-
-	/* provided by of_device_id table */
-	if (pdev->dev.of_node) {
-		of_id = of_match_device(armpmu_of_device_ids, &pdev->dev);
-		BUG_ON(!of_id);
-		return (enum arm_pmu_type)of_id->data;
-	}
-
-	/* Provided by platform_device_id table */
-	pdev_id = platform_get_device_id(pdev);
-	BUG_ON(!pdev_id);
-	return pdev_id->driver_data;
-}
-
-static int __devinit armpmu_device_probe(struct platform_device *pdev)
-{
-	return pmu_register(pdev, armpmu_device_type(pdev));
-}
-
-static struct platform_driver armpmu_driver = {
-	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = armpmu_of_device_ids,
-	},
-	.probe		= armpmu_device_probe,
-	.id_table	= armpmu_plat_device_ids,
-};
-
-static int __init register_pmu_driver(void)
-{
-	return platform_driver_register(&armpmu_driver);
-}
-device_initcall(register_pmu_driver);
+/*
+ * PMU locking to ensure mutual exclusion between different subsystems.
+ */
+static unsigned long pmu_lock[BITS_TO_LONGS(ARM_NUM_PMU_DEVICES)];
 
-struct platform_device *
+int
 reserve_pmu(enum arm_pmu_type type)
 {
-	struct platform_device *pdev;
-
-	if (test_and_set_bit_lock(type, &pmu_lock)) {
-		pdev = ERR_PTR(-EBUSY);
-	} else if (pmu_devices[type] == NULL) {
-		clear_bit_unlock(type, &pmu_lock);
-		pdev = ERR_PTR(-ENODEV);
-	} else {
-		pdev = pmu_devices[type];
-	}
-
-	return pdev;
+	return test_and_set_bit_lock(type, pmu_lock) ? -EBUSY : 0;
 }
 EXPORT_SYMBOL_GPL(reserve_pmu);
 
-int
+void
 release_pmu(enum arm_pmu_type type)
 {
-	if (WARN_ON(!pmu_devices[type]))
-		return -EINVAL;
-	clear_bit_unlock(type, &pmu_lock);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(release_pmu);
-
-static int
-set_irq_affinity(int irq,
-		 unsigned int cpu)
-{
-#ifdef CONFIG_SMP
-	int err = irq_set_affinity(irq, cpumask_of(cpu));
-	if (err)
-		pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-			   irq, cpu);
-	return err;
-#else
-	return -EINVAL;
-#endif
-}
-
-static int
-init_cpu_pmu(void)
-{
-	int i, irqs, err = 0;
-	struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU];
-
-	if (!pdev)
-		return -ENODEV;
-
-	irqs = pdev->num_resources;
-
-	/*
-	 * If we have a single PMU interrupt that we can't shift, assume that
-	 * we're running on a uniprocessor machine and continue.
-	 */
-	if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0)))
-		return 0;
-
-	for (i = 0; i < irqs; ++i) {
-		err = set_irq_affinity(platform_get_irq(pdev, i), i);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-int
-init_pmu(enum arm_pmu_type type)
-{
-	int err = 0;
-
-	switch (type) {
-	case ARM_PMU_DEVICE_CPU:
-		err = init_cpu_pmu();
-		break;
-	default:
-		pr_warning("attempt to initialise PMU of unknown "
-			   "type %d\n", type);
-		err = -EINVAL;
-	}
-
-	return err;
+	clear_bit_unlock(type, pmu_lock);
 }
-EXPORT_SYMBOL_GPL(init_pmu);
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 1a347f481e5e..fd0814076ff6 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -319,7 +319,7 @@ void show_regs(struct pt_regs * regs)
 	printk("\n");
 	printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
 	__show_regs(regs);
-	__backtrace();
+	dump_stack();
 }
 
 ATOMIC_NOTIFIER_HEAD(thread_notify_head);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 6136144f8f8d..bda0a218f4a5 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -29,6 +29,8 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/memblock.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
 
 #include <asm/unified.h>
 #include <asm/cpu.h>
@@ -42,6 +44,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/tlbflush.h>
+#include <asm/system.h>
 
 #include <asm/prom.h>
 #include <asm/mach/arch.h>
@@ -115,6 +118,13 @@ struct outer_cache_fns outer_cache __read_mostly;
 EXPORT_SYMBOL(outer_cache);
 #endif
 
+/*
+ * Cached cpu_architecture() result for use by assembler code.
+ * C code should use the cpu_architecture() function instead of accessing this
+ * variable directly.
+ */
+int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN;
+
 struct stack {
 	u32 irq[3];
 	u32 abt[3];
@@ -210,7 +220,7 @@ static const char *proc_arch[] = {
 	"?(17)",
 };
 
-int cpu_architecture(void)
+static int __get_cpu_architecture(void)
 {
 	int cpu_arch;
 
@@ -243,11 +253,22 @@ int cpu_architecture(void)
 	return cpu_arch;
 }
 
+int __pure cpu_architecture(void)
+{
+	BUG_ON(__cpu_architecture == CPU_ARCH_UNKNOWN);
+
+	return __cpu_architecture;
+}
+
 static int cpu_has_aliasing_icache(unsigned int arch)
 {
 	int aliasing_icache;
 	unsigned int id_reg, num_sets, line_size;
 
+	/* PIPT caches never alias. */
+	if (icache_is_pipt())
+		return 0;
+
 	/* arch specifies the register format */
 	switch (arch) {
 	case CPU_ARCH_ARMv7:
@@ -282,8 +303,14 @@ static void __init cacheid_init(void)
 			/* ARMv7 register format */
 			arch = CPU_ARCH_ARMv7;
 			cacheid = CACHEID_VIPT_NONALIASING;
-			if ((cachetype & (3 << 14)) == 1 << 14)
+			switch (cachetype & (3 << 14)) {
+			case (1 << 14):
 				cacheid |= CACHEID_ASID_TAGGED;
+				break;
+			case (3 << 14):
+				cacheid |= CACHEID_PIPT;
+				break;
+			}
 		} else {
 			arch = CPU_ARCH_ARMv6;
 			if (cachetype & (1 << 23))
@@ -300,10 +327,11 @@ static void __init cacheid_init(void)
 	printk("CPU: %s data cache, %s instruction cache\n",
 		cache_is_vivt() ? "VIVT" :
 		cache_is_vipt_aliasing() ? "VIPT aliasing" :
-		cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown",
+		cache_is_vipt_nonaliasing() ? "PIPT / VIPT nonaliasing" : "unknown",
 		cache_is_vivt() ? "VIVT" :
 		icache_is_vivt_asid_tagged() ? "VIVT ASID tagged" :
 		icache_is_vipt_aliasing() ? "VIPT aliasing" :
+		icache_is_pipt() ? "PIPT" :
 		cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown");
 }
 
@@ -414,6 +442,7 @@ static void __init setup_processor(void)
 	}
 
 	cpu_name = list->cpu_name;
+	__cpu_architecture = __get_cpu_architecture();
 
 #ifdef MULTI_CPU
 	processor = *list->proc;
@@ -844,7 +873,7 @@ static struct machine_desc * __init setup_machine_tags(unsigned int nr)
 	}
 
 	if (mdesc->fixup)
-		mdesc->fixup(mdesc, tags, &from, &meminfo);
+		mdesc->fixup(tags, &from, &meminfo);
 
 	if (tags->hdr.tag == ATAG_CORE) {
 		if (meminfo.nr_banks != 0)
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index dc902f2c6845..020e99c845e7 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -8,92 +8,61 @@
 	.text
 
 /*
- * Save CPU state for a suspend
- *  r1 = v:p offset
- *  r2 = suspend function arg0
- *  r3 = suspend function
+ * Save CPU state for a suspend.  This saves the CPU general purpose
+ * registers, and allocates space on the kernel stack to save the CPU
+ * specific registers and some other data for resume.
+ *  r0 = suspend function arg0
+ *  r1 = suspend function
  */
 ENTRY(__cpu_suspend)
 	stmfd	sp!, {r4 - r11, lr}
 #ifdef MULTI_CPU
 	ldr	r10, =processor
-	ldr	r5, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
-	ldr	ip, [r10, #CPU_DO_RESUME] @ virtual resume function
+	ldr	r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state
 #else
-	ldr	r5, =cpu_suspend_size
-	ldr	ip, =cpu_do_resume
+	ldr	r4, =cpu_suspend_size
 #endif
-	mov	r6, sp			@ current virtual SP
-	sub	sp, sp, r5		@ allocate CPU state on stack
-	mov	r0, sp			@ save pointer to CPU save block
-	add	ip, ip, r1		@ convert resume fn to phys
-	stmfd	sp!, {r1, r6, ip}	@ save v:p, virt SP, phys resume fn
-	ldr	r5, =sleep_save_sp
-	add	r6, sp, r1		@ convert SP to phys
-	stmfd	sp!, {r2, r3}		@ save suspend func arg and pointer
+	mov	r5, sp			@ current virtual SP
+	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
+	sub	sp, sp, r4		@ allocate CPU state on stack
+	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
+	add	r0, sp, #8		@ save pointer to save block
+	mov	r1, r4			@ size of save block
+	mov	r2, r5			@ virtual SP
+	ldr	r3, =sleep_save_sp
 #ifdef CONFIG_SMP
 	ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
 	ALT_UP(mov lr, #0)
 	and	lr, lr, #15
-	str	r6, [r5, lr, lsl #2]	@ save phys SP
-#else
-	str	r6, [r5]		@ save phys SP
-#endif
-#ifdef MULTI_CPU
-	mov	lr, pc
-	ldr	pc, [r10, #CPU_DO_SUSPEND] @ save CPU state
-#else
-	bl	cpu_do_suspend
-#endif
-
-	@ flush data cache
-#ifdef MULTI_CACHE
-	ldr	r10, =cpu_cache
-	mov	lr, pc
-	ldr	pc, [r10, #CACHE_FLUSH_KERN_ALL]
-#else
-	bl	__cpuc_flush_kern_all
+	add	r3, r3, lr, lsl #2
 #endif
+	bl	__cpu_suspend_save
 	adr	lr, BSYM(cpu_suspend_abort)
 	ldmfd	sp!, {r0, pc}		@ call suspend fn
 ENDPROC(__cpu_suspend)
 	.ltorg
 
 cpu_suspend_abort:
-	ldmia	sp!, {r1 - r3}		@ pop v:p, virt SP, phys resume fn
+	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
+	teq	r0, #0
+	moveq	r0, #1			@ force non-zero value
 	mov	sp, r2
 	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_suspend_abort)
 
 /*
  * r0 = control register value
- * r1 = v:p offset (preserved by cpu_do_resume)
- * r2 = phys page table base
- * r3 = L1 section flags
  */
+	.align	5
 ENTRY(cpu_resume_mmu)
-	adr	r4, cpu_resume_turn_mmu_on
-	mov	r4, r4, lsr #20
-	orr	r3, r3, r4, lsl #20
-	ldr	r5, [r2, r4, lsl #2]	@ save old mapping
-	str	r3, [r2, r4, lsl #2]	@ setup 1:1 mapping for mmu code
-	sub	r2, r2, r1
 	ldr	r3, =cpu_resume_after_mmu
-	bic	r1, r0, #CR_C		@ ensure D-cache is disabled
-	b	cpu_resume_turn_mmu_on
-ENDPROC(cpu_resume_mmu)
-	.ltorg
-	.align	5
-cpu_resume_turn_mmu_on:
-	mcr	p15, 0, r1, c1, c0, 0	@ turn on MMU, I-cache, etc
-	mrc	p15, 0, r1, c0, c0, 0	@ read id reg
-	mov	r1, r1
-	mov	r1, r1
+	mcr	p15, 0, r0, c1, c0, 0	@ turn on MMU, I-cache, etc
+	mrc	p15, 0, r0, c0, c0, 0	@ read id reg
+	mov	r0, r0
+	mov	r0, r0
 	mov	pc, r3			@ jump to virtual address
-ENDPROC(cpu_resume_turn_mmu_on)
+ENDPROC(cpu_resume_mmu)
 cpu_resume_after_mmu:
-	str	r5, [r2, r4, lsl #2]	@ restore old mapping
-	mcr	p15, 0, r0, c1, c0, 0	@ turn on D-cache
 	bl	cpu_init		@ restore the und/abt/irq banked regs
 	mov	r0, #0			@ return zero on success
 	ldmfd	sp!, {r4 - r11, pc}
@@ -119,7 +88,7 @@ ENTRY(cpu_resume)
 	ldr	r0, sleep_save_sp	@ stack phys addr
 #endif
 	setmode	PSR_I_BIT | PSR_F_BIT | SVC_MODE, r1  @ set SVC, irqs off
-	@ load v:p, stack, resume fn
+	@ load phys pgd, stack, resume fn
   ARM(	ldmia	r0!, {r1, sp, pc}	)
 THUMB(	ldmia	r0!, {r1, r2, r3}	)
 THUMB(	mov	sp, r2			)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index d88ff0230e82..ef5640b9e218 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -16,7 +16,6 @@
 #include <linux/cache.h>
 #include <linux/profile.h>
 #include <linux/errno.h>
-#include <linux/ftrace.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/cpu.h>
@@ -31,6 +30,8 @@
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
+#include <asm/exception.h>
+#include <asm/topology.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -39,6 +40,7 @@
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
 #include <asm/localtimer.h>
+#include <asm/smp_plat.h>
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
@@ -259,6 +261,20 @@ void __ref cpu_die(void)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
+int __cpu_logical_map[NR_CPUS];
+
+void __init smp_setup_processor_id(void)
+{
+	int i;
+	u32 cpu = is_smp() ? read_cpuid_mpidr() & 0xff : 0;
+
+	cpu_logical_map(0) = cpu;
+	for (i = 1; i < NR_CPUS; ++i)
+		cpu_logical_map(i) = i == cpu ? 0 : i;
+
+	printk(KERN_INFO "Booting Linux on physical CPU %d\n", cpu);
+}
+
 /*
  * Called by both boot and secondaries to move global data into
  * per-processor storage.
@@ -268,6 +284,8 @@ static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
 	struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid);
 
 	cpu_info->loops_per_jiffy = loops_per_jiffy;
+
+	store_cpu_topology(cpuid);
 }
 
 /*
@@ -301,17 +319,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	 */
 	platform_secondary_init(cpu);
 
-	/*
-	 * Enable local interrupts.
-	 */
 	notify_cpu_starting(cpu);
-	local_irq_enable();
-	local_fiq_enable();
-
-	/*
-	 * Setup the percpu timer for this CPU.
-	 */
-	percpu_timer_setup();
 
 	calibrate_delay();
 
@@ -323,10 +331,23 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	 * before we continue.
 	 */
 	set_cpu_online(cpu, true);
+
+	/*
+	 * Setup the percpu timer for this CPU.
+	 */
+	percpu_timer_setup();
+
 	while (!cpu_active(cpu))
 		cpu_relax();
 
 	/*
+	 * cpu_active bit is set, so it's safe to enalbe interrupts
+	 * now.
+	 */
+	local_irq_enable();
+	local_fiq_enable();
+
+	/*
 	 * OK, it's off to the idle thread for us
 	 */
 	cpu_idle();
@@ -358,6 +379,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int ncores = num_possible_cpus();
 
+	init_cpu_topology();
+
 	smp_store_cpu_info(smp_processor_id());
 
 	/*
@@ -437,10 +460,6 @@ u64 smp_irq_stat_cpu(unsigned int cpu)
 	for (i = 0; i < NR_IPI; i++)
 		sum += __get_irq_stat(cpu, ipi_irqs[i]);
 
-#ifdef CONFIG_LOCAL_TIMERS
-	sum += __get_irq_stat(cpu, local_timer_irqs);
-#endif
-
 	return sum;
 }
 
@@ -457,33 +476,6 @@ static void ipi_timer(void)
 	irq_exit();
 }
 
-#ifdef CONFIG_LOCAL_TIMERS
-asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-	int cpu = smp_processor_id();
-
-	if (local_timer_ack()) {
-		__inc_irq_stat(cpu, local_timer_irqs);
-		ipi_timer();
-	}
-
-	set_irq_regs(old_regs);
-}
-
-void show_local_irqs(struct seq_file *p, int prec)
-{
-	unsigned int cpu;
-
-	seq_printf(p, "%*s: ", prec, "LOC");
-
-	for_each_present_cpu(cpu)
-		seq_printf(p, "%10u ", __get_irq_stat(cpu, local_timer_irqs));
-
-	seq_printf(p, " Local timer interrupts\n");
-}
-#endif
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static void smp_timer_broadcast(const struct cpumask *mask)
 {
@@ -534,11 +526,11 @@ static void percpu_timer_stop(void)
 	unsigned int cpu = smp_processor_id();
 	struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu);
 
-	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	local_timer_stop(evt);
 }
 #endif
 
-static DEFINE_SPINLOCK(stop_lock);
+static DEFINE_RAW_SPINLOCK(stop_lock);
 
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
@@ -547,10 +539,10 @@ static void ipi_cpu_stop(unsigned int cpu)
 {
 	if (system_state == SYSTEM_BOOTING ||
 	    system_state == SYSTEM_RUNNING) {
-		spin_lock(&stop_lock);
+		raw_spin_lock(&stop_lock);
 		printk(KERN_CRIT "CPU%u: stopping\n", cpu);
 		dump_stack();
-		spin_unlock(&stop_lock);
+		raw_spin_unlock(&stop_lock);
 	}
 
 	set_cpu_online(cpu, false);
@@ -567,6 +559,11 @@ static void ipi_cpu_stop(unsigned int cpu)
  */
 asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 {
+	handle_IPI(ipinr, regs);
+}
+
+void handle_IPI(int ipinr, struct pt_regs *regs)
+{
 	unsigned int cpu = smp_processor_id();
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c
index 7fcddb75c877..8f5dd7963356 100644
--- a/arch/arm/kernel/smp_scu.c
+++ b/arch/arm/kernel/smp_scu.c
@@ -34,7 +34,7 @@ unsigned int __init scu_get_core_count(void __iomem *scu_base)
 /*
  * Enable the SCU
  */
-void __init scu_enable(void __iomem *scu_base)
+void scu_enable(void __iomem *scu_base)
 {
 	u32 scu_ctrl;
 
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 01c186222f3b..a8a6682d6b52 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -19,6 +19,7 @@
 #include <linux/io.h>
 
 #include <asm/smp_twd.h>
+#include <asm/localtimer.h>
 #include <asm/hardware/gic.h>
 
 /* set up by the platform code */
@@ -26,6 +27,8 @@ void __iomem *twd_base;
 
 static unsigned long twd_timer_rate;
 
+static struct clock_event_device __percpu **twd_evt;
+
 static void twd_set_mode(enum clock_event_mode mode,
 			struct clock_event_device *clk)
 {
@@ -80,6 +83,12 @@ int twd_timer_ack(void)
 	return 0;
 }
 
+void twd_timer_stop(struct clock_event_device *clk)
+{
+	twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk);
+	disable_percpu_irq(clk->irq);
+}
+
 static void __cpuinit twd_calibrate_rate(void)
 {
 	unsigned long count;
@@ -119,11 +128,43 @@ static void __cpuinit twd_calibrate_rate(void)
 	}
 }
 
+static irqreturn_t twd_handler(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = *(struct clock_event_device **)dev_id;
+
+	if (twd_timer_ack()) {
+		evt->event_handler(evt);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
 /*
  * Setup the local clock events for a CPU.
  */
 void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 {
+	struct clock_event_device **this_cpu_clk;
+
+	if (!twd_evt) {
+		int err;
+
+		twd_evt = alloc_percpu(struct clock_event_device *);
+		if (!twd_evt) {
+			pr_err("twd: can't allocate memory\n");
+			return;
+		}
+
+		err = request_percpu_irq(clk->irq, twd_handler,
+					 "twd", twd_evt);
+		if (err) {
+			pr_err("twd: can't register interrupt %d (%d)\n",
+			       clk->irq, err);
+			return;
+		}
+	}
+
 	twd_calibrate_rate();
 
 	clk->name = "local_timer";
@@ -137,8 +178,10 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
 	clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
 	clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
 
+	this_cpu_clk = __this_cpu_ptr(twd_evt);
+	*this_cpu_clk = clk;
+
 	clockevents_register_device(clk);
 
-	/* Make sure our local interrupt controller has this enabled */
-	gic_enable_ppi(clk->irq);
+	enable_percpu_irq(clk->irq, 0);
 }
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
new file mode 100644
index 000000000000..93a22d282c16
--- /dev/null
+++ b/arch/arm/kernel/suspend.c
@@ -0,0 +1,72 @@
+#include <linux/init.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+static pgd_t *suspend_pgd;
+
+extern int __cpu_suspend(unsigned long, int (*)(unsigned long));
+extern void cpu_resume_mmu(void);
+
+/*
+ * This is called by __cpu_suspend() to save the state, and do whatever
+ * flushing is required to ensure that when the CPU goes to sleep we have
+ * the necessary data available when the caches are not searched.
+ */
+void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
+{
+	*save_ptr = virt_to_phys(ptr);
+
+	/* This must correspond to the LDM in cpu_resume() assembly */
+	*ptr++ = virt_to_phys(suspend_pgd);
+	*ptr++ = sp;
+	*ptr++ = virt_to_phys(cpu_do_resume);
+
+	cpu_do_suspend(ptr);
+
+	flush_cache_all();
+	outer_clean_range(*save_ptr, *save_ptr + ptrsz);
+	outer_clean_range(virt_to_phys(save_ptr),
+			  virt_to_phys(save_ptr) + sizeof(*save_ptr));
+}
+
+/*
+ * Hide the first two arguments to __cpu_suspend - these are an implementation
+ * detail which platform code shouldn't have to know about.
+ */
+int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+{
+	struct mm_struct *mm = current->active_mm;
+	int ret;
+
+	if (!suspend_pgd)
+		return -EINVAL;
+
+	/*
+	 * Provide a temporary page table with an identity mapping for
+	 * the MMU-enable code, required for resuming.  On successful
+	 * resume (indicated by a zero return code), we need to switch
+	 * back to the correct page tables.
+	 */
+	ret = __cpu_suspend(arg, fn);
+	if (ret == 0) {
+		cpu_switch_mm(mm->pgd, mm);
+		local_flush_tlb_all();
+	}
+
+	return ret;
+}
+
+static int __init cpu_suspend_init(void)
+{
+	suspend_pgd = pgd_alloc(&init_mm);
+	if (suspend_pgd) {
+		unsigned long addr = virt_to_phys(cpu_resume_mmu);
+		identity_mapping_add(suspend_pgd, addr, addr + SECTION_SIZE);
+	}
+	return suspend_pgd ? 0 : -ENOMEM;
+}
+core_initcall(cpu_suspend_init);
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index cb634c3e28e9..5a54b95d6bd2 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -39,13 +39,11 @@
  */
 static struct sys_timer *system_timer;
 
-#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE)
+#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) || \
+    defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE)
 /* this needs a better home */
 DEFINE_SPINLOCK(rtc_lock);
-
-#ifdef CONFIG_RTC_DRV_CMOS_MODULE
 EXPORT_SYMBOL(rtc_lock);
-#endif
 #endif	/* pc-style 'CMOS' RTC support */
 
 /* change this if you have some constant time drift */
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
new file mode 100644
index 000000000000..1040c00405d0
--- /dev/null
+++ b/arch/arm/kernel/topology.c
@@ -0,0 +1,148 @@
+/*
+ * arch/arm/kernel/topology.c
+ *
+ * Copyright (C) 2011 Linaro Limited.
+ * Written by: Vincent Guittot
+ *
+ * based on arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/sched.h>
+
+#include <asm/cputype.h>
+#include <asm/topology.h>
+
+#define MPIDR_SMP_BITMASK (0x3 << 30)
+#define MPIDR_SMP_VALUE (0x2 << 30)
+
+#define MPIDR_MT_BITMASK (0x1 << 24)
+
+/*
+ * These masks reflect the current use of the affinity levels.
+ * The affinity level can be up to 16 bits according to ARM ARM
+ */
+
+#define MPIDR_LEVEL0_MASK 0x3
+#define MPIDR_LEVEL0_SHIFT 0
+
+#define MPIDR_LEVEL1_MASK 0xF
+#define MPIDR_LEVEL1_SHIFT 8
+
+#define MPIDR_LEVEL2_MASK 0xFF
+#define MPIDR_LEVEL2_SHIFT 16
+
+struct cputopo_arm cpu_topology[NR_CPUS];
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+	return &cpu_topology[cpu].core_sibling;
+}
+
+/*
+ * store_cpu_topology is called at boot when only one cpu is running
+ * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
+ * which prevents simultaneous write access to cpu_topology array
+ */
+void store_cpu_topology(unsigned int cpuid)
+{
+	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+	unsigned int mpidr;
+	unsigned int cpu;
+
+	/* If the cpu topology has been already set, just return */
+	if (cpuid_topo->core_id != -1)
+		return;
+
+	mpidr = read_cpuid_mpidr();
+
+	/* create cpu topology mapping */
+	if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) {
+		/*
+		 * This is a multiprocessor system
+		 * multiprocessor format & multiprocessor mode field are set
+		 */
+
+		if (mpidr & MPIDR_MT_BITMASK) {
+			/* core performance interdependency */
+			cpuid_topo->thread_id = (mpidr >> MPIDR_LEVEL0_SHIFT)
+				& MPIDR_LEVEL0_MASK;
+			cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL1_SHIFT)
+				& MPIDR_LEVEL1_MASK;
+			cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL2_SHIFT)
+				& MPIDR_LEVEL2_MASK;
+		} else {
+			/* largely independent cores */
+			cpuid_topo->thread_id = -1;
+			cpuid_topo->core_id = (mpidr >> MPIDR_LEVEL0_SHIFT)
+				& MPIDR_LEVEL0_MASK;
+			cpuid_topo->socket_id = (mpidr >> MPIDR_LEVEL1_SHIFT)
+				& MPIDR_LEVEL1_MASK;
+		}
+	} else {
+		/*
+		 * This is an uniprocessor system
+		 * we are in multiprocessor format but uniprocessor system
+		 * or in the old uniprocessor format
+		 */
+		cpuid_topo->thread_id = -1;
+		cpuid_topo->core_id = 0;
+		cpuid_topo->socket_id = -1;
+	}
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->socket_id == cpu_topo->socket_id) {
+			cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+			if (cpu != cpuid)
+				cpumask_set_cpu(cpu,
+					&cpuid_topo->core_sibling);
+
+			if (cpuid_topo->core_id == cpu_topo->core_id) {
+				cpumask_set_cpu(cpuid,
+					&cpu_topo->thread_sibling);
+				if (cpu != cpuid)
+					cpumask_set_cpu(cpu,
+						&cpuid_topo->thread_sibling);
+			}
+		}
+	}
+	smp_wmb();
+
+	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
+		cpuid, cpu_topology[cpuid].thread_id,
+		cpu_topology[cpuid].core_id,
+		cpu_topology[cpuid].socket_id, mpidr);
+}
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void init_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	/* init core mask */
+	for_each_possible_cpu(cpu) {
+		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id =  -1;
+		cpu_topo->socket_id = -1;
+		cpumask_clear(&cpu_topo->core_sibling);
+		cpumask_clear(&cpu_topo->thread_sibling);
+	}
+	smp_wmb();
+}
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index bc9f9da782cb..99a572702509 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -21,12 +21,14 @@
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/kexec.h>
+#include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
+#include <asm/exception.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
 #include <asm/traps.h>
@@ -255,7 +257,7 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 	return ret;
 }
 
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
 
 /*
  * This function is protected against re-entrancy.
@@ -267,9 +269,11 @@ void die(const char *str, struct pt_regs *regs, int err)
 
 	oops_enter();
 
-	spin_lock_irq(&die_lock);
+	raw_spin_lock_irq(&die_lock);
 	console_verbose();
 	bust_spinlocks(1);
+	if (!user_mode(regs))
+		report_bug(regs->ARM_pc, regs);
 	ret = __die(str, err, thread, regs);
 
 	if (regs && kexec_should_crash(thread->task))
@@ -277,7 +281,7 @@ void die(const char *str, struct pt_regs *regs, int err)
 
 	bust_spinlocks(0);
 	add_taint(TAINT_DIE);
-	spin_unlock_irq(&die_lock);
+	raw_spin_unlock_irq(&die_lock);
 	oops_exit();
 
 	if (in_interrupt())
@@ -301,25 +305,43 @@ void arm_notify_die(const char *str, struct pt_regs *regs,
 	}
 }
 
+#ifdef CONFIG_GENERIC_BUG
+
+int is_valid_bugaddr(unsigned long pc)
+{
+#ifdef CONFIG_THUMB2_KERNEL
+	unsigned short bkpt;
+#else
+	unsigned long bkpt;
+#endif
+
+	if (probe_kernel_address((unsigned *)pc, bkpt))
+		return 0;
+
+	return bkpt == BUG_INSTR_VALUE;
+}
+
+#endif
+
 static LIST_HEAD(undef_hook);
-static DEFINE_SPINLOCK(undef_lock);
+static DEFINE_RAW_SPINLOCK(undef_lock);
 
 void register_undef_hook(struct undef_hook *hook)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&undef_lock, flags);
+	raw_spin_lock_irqsave(&undef_lock, flags);
 	list_add(&hook->node, &undef_hook);
-	spin_unlock_irqrestore(&undef_lock, flags);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
 }
 
 void unregister_undef_hook(struct undef_hook *hook)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&undef_lock, flags);
+	raw_spin_lock_irqsave(&undef_lock, flags);
 	list_del(&hook->node);
-	spin_unlock_irqrestore(&undef_lock, flags);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
 }
 
 static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
@@ -328,12 +350,12 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
 	unsigned long flags;
 	int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL;
 
-	spin_lock_irqsave(&undef_lock, flags);
+	raw_spin_lock_irqsave(&undef_lock, flags);
 	list_for_each_entry(hook, &undef_hook, node)
 		if ((instr & hook->instr_mask) == hook->instr_val &&
 		    (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val)
 			fn = hook->fn;
-	spin_unlock_irqrestore(&undef_lock, flags);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
 
 	return fn ? fn(regs, instr) : 1;
 }
@@ -706,16 +728,6 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
 	arm_notify_die("unknown data abort code", regs, &info, instr, 0);
 }
 
-void __attribute__((noreturn)) __bug(const char *file, int line)
-{
-	printk(KERN_CRIT"kernel BUG at %s:%d!\n", file, line);
-	*(int *)0 = 0;
-
-	/* Avoid "noreturn function does return" */
-	for (;;);
-}
-EXPORT_SYMBOL(__bug);
-
 void __readwrite_bug(const char *fn)
 {
 	printk("%s called, but not implemented\n", fn);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 4e66f62b8d41..20b3041e0860 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -21,7 +21,8 @@
 #define ARM_CPU_KEEP(x)
 #endif
 
-#if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)
+#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+	defined(CONFIG_GENERIC_BUG)
 #define ARM_EXIT_KEEP(x)	x
 #define ARM_EXIT_DISCARD(x)
 #else
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index a673297b0cf1..cd07b5814c23 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -22,15 +22,10 @@
 #define mask	r7
 #define offset	r8
 
-ENTRY(__backtrace)
-		mov	r1, #0x10
-		mov	r0, fp
-
 ENTRY(c_backtrace)
 
 #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
 		mov	pc, lr
-ENDPROC(__backtrace)
 ENDPROC(c_backtrace)
 #else
 		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
@@ -107,7 +102,6 @@ for_each_frame:	tst	frame, mask		@ Check for address exceptions
 		mov	r1, frame
 		bl	printk
 no_frame:	ldmfd	sp!, {r4 - r8, pc}
-ENDPROC(__backtrace)
 ENDPROC(c_backtrace)
 		
 		.pushsection __ex_table,"a"
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index faa7748142da..e55c4842c290 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -13,6 +13,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/unwind.h>
 
 #ifdef __ARMEB__
 #define xh r0
@@ -44,6 +45,7 @@
  */
 
 ENTRY(__do_div64)
+UNWIND(.fnstart)
 
 	@ Test for easy paths first.
 	subs	ip, r4, #1
@@ -189,7 +191,12 @@ ENTRY(__do_div64)
 	moveq	yh, xh
 	moveq	xh, #0
 	moveq	pc, lr
+UNWIND(.fnend)
 
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
 	@ Division by 0:
 	str	lr, [sp, #-8]!
 	bl	__div0
@@ -200,4 +207,5 @@ ENTRY(__do_div64)
 	mov	xh, #0
 	ldr	pc, [sp], #8
 
+UNWIND(.fnend)
 ENDPROC(__do_div64)
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 8b9b13649f81..025f742dd4df 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/hardirq.h> /* for in_atomic() */
 #include <linux/gfp.h>
+#include <linux/highmem.h>
 #include <asm/current.h>
 #include <asm/page.h>
 
diff --git a/arch/arm/mach-at91/Makefile.boot b/arch/arm/mach-at91/Makefile.boot
index 3462b815054a..9ab5a3e5f4f1 100644
--- a/arch/arm/mach-at91/Makefile.boot
+++ b/arch/arm/mach-at91/Makefile.boot
@@ -4,15 +4,15 @@
 #   INITRD_PHYS must be in RAM
 
 ifeq ($(CONFIG_ARCH_AT91CAP9),y)
-   zreladdr-y	:= 0x70008000
+   zreladdr-y	+= 0x70008000
 params_phys-y	:= 0x70000100
 initrd_phys-y	:= 0x70410000
 else ifeq ($(CONFIG_ARCH_AT91SAM9G45),y)
-   zreladdr-y	:= 0x70008000
+   zreladdr-y	+= 0x70008000
 params_phys-y	:= 0x70000100
 initrd_phys-y	:= 0x70410000
 else
-   zreladdr-y	:= 0x20008000
+   zreladdr-y	+= 0x20008000
 params_phys-y	:= 0x20000100
 initrd_phys-y	:= 0x20410000
 endif
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index a53b3de9daa2..3c2b580b9d75 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -319,7 +319,7 @@ void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data)
 	if (!data)
 		return;
 
-	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+	for (i = 0; i < ATMCI_MAX_NR_SLOTS; i++) {
 		if (data->slot[i].bus_width) {
 			/* input/irq */
 			if (data->slot[i].detect_pin) {
diff --git a/arch/arm/mach-bcmring/Kconfig b/arch/arm/mach-bcmring/Kconfig
index 457b4384913e..9170d16dca50 100644
--- a/arch/arm/mach-bcmring/Kconfig
+++ b/arch/arm/mach-bcmring/Kconfig
@@ -17,5 +17,3 @@ config BCM_ZRELADDR
 	hex "Compressed ZREL ADDR"
 
 endmenu
-
-# source "drivers/char/bcmring/Kconfig"
diff --git a/arch/arm/mach-bcmring/Makefile.boot b/arch/arm/mach-bcmring/Makefile.boot
index fb53b283bebb..aef2467757fa 100644
--- a/arch/arm/mach-bcmring/Makefile.boot
+++ b/arch/arm/mach-bcmring/Makefile.boot
@@ -1,6 +1,6 @@
 # Address where decompressor will be written and eventually executed.
 #
 # default to SDRAM
-zreladdr-y      := $(CONFIG_BCM_ZRELADDR)
+zreladdr-y      += $(CONFIG_BCM_ZRELADDR)
 params_phys-y   := 0x00000800
 
diff --git a/arch/arm/mach-bcmring/arch.c b/arch/arm/mach-bcmring/arch.c
index a604b9ebb501..31a143592c81 100644
--- a/arch/arm/mach-bcmring/arch.c
+++ b/arch/arm/mach-bcmring/arch.c
@@ -136,8 +136,8 @@ static void __init bcmring_init_machine(void)
 *
 *****************************************************************************/
 
-static void __init bcmring_fixup(struct machine_desc *desc,
-     struct tag *t, char **cmdline, struct meminfo *mi) {
+static void __init bcmring_fixup(struct tag *t, char **cmdline,
+	struct meminfo *mi) {
 #ifdef CONFIG_BLK_DEV_INITRD
 	printk(KERN_NOTICE "bcmring_fixup\n");
 	t->hdr.tag = ATAG_CORE;
diff --git a/arch/arm/mach-bcmring/irq.c b/arch/arm/mach-bcmring/irq.c
index c48feaf4e8e9..437fa683bcb2 100644
--- a/arch/arm/mach-bcmring/irq.c
+++ b/arch/arm/mach-bcmring/irq.c
@@ -20,7 +20,6 @@
 #include <linux/stddef.h>
 #include <linux/list.h>
 #include <linux/timer.h>
-#include <linux/version.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-bcmring/timer.c b/arch/arm/mach-bcmring/timer.c
index 2d415d2a8e68..af9c3d7e2a0c 100644
--- a/arch/arm/mach-bcmring/timer.c
+++ b/arch/arm/mach-bcmring/timer.c
@@ -12,7 +12,6 @@
 * consent.
 *****************************************************************************/
 
-#include <linux/version.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <csp/tmrHw.h>
diff --git a/arch/arm/mach-clps711x/Makefile.boot b/arch/arm/mach-clps711x/Makefile.boot
index a51fcef64fe0..9398e859b5af 100644
--- a/arch/arm/mach-clps711x/Makefile.boot
+++ b/arch/arm/mach-clps711x/Makefile.boot
@@ -1,5 +1,5 @@
 # The standard locations for stuff on CLPS711x type processors
-   zreladdr-y				:= 0xc0028000
+   zreladdr-y				+= 0xc0028000
 params_phys-y				:= 0xc0000100
 # Should probably have some agreement on these...
 initrd_phys-$(CONFIG_ARCH_P720T)	:= 0xc0400000
diff --git a/arch/arm/mach-clps711x/clep7312.c b/arch/arm/mach-clps711x/clep7312.c
index 06c8abd9371f..80496c09ac59 100644
--- a/arch/arm/mach-clps711x/clep7312.c
+++ b/arch/arm/mach-clps711x/clep7312.c
@@ -26,8 +26,7 @@
 #include "common.h"
 
 static void __init
-fixup_clep7312(struct machine_desc *desc, struct tag *tags,
-	    char **cmdline, struct meminfo *mi)
+fixup_clep7312(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 	mi->nr_banks=1;
 	mi->bank[0].start = 0xc0000000;
diff --git a/arch/arm/mach-clps711x/edb7211-arch.c b/arch/arm/mach-clps711x/edb7211-arch.c
index abf522d1ec9b..9721f6111dc0 100644
--- a/arch/arm/mach-clps711x/edb7211-arch.c
+++ b/arch/arm/mach-clps711x/edb7211-arch.c
@@ -37,8 +37,7 @@ static void __init edb7211_reserve(void)
 }
 
 static void __init
-fixup_edb7211(struct machine_desc *desc, struct tag *tags,
-	      char **cmdline, struct meminfo *mi)
+fixup_edb7211(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 	/*
 	 * Bank start addresses are not present in the information
diff --git a/arch/arm/mach-clps711x/fortunet.c b/arch/arm/mach-clps711x/fortunet.c
index b6f7d86bb1c9..d99256687298 100644
--- a/arch/arm/mach-clps711x/fortunet.c
+++ b/arch/arm/mach-clps711x/fortunet.c
@@ -57,8 +57,7 @@ typedef struct tag_IMAGE_PARAMS
 #define IMAGE_PARAMS_PHYS	0xC01F0000
 
 static void __init
-fortunet_fixup(struct machine_desc *desc, struct tag *tags,
-		 char **cmdline, struct meminfo *mi)
+fortunet_fixup(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 	IMAGE_PARAMS *ip = phys_to_virt(IMAGE_PARAMS_PHYS);
 	*cmdline = phys_to_virt(ip->command_line);
diff --git a/arch/arm/mach-clps711x/p720t.c b/arch/arm/mach-clps711x/p720t.c
index e7f75aeb1e5b..6ecea95f38b2 100644
--- a/arch/arm/mach-clps711x/p720t.c
+++ b/arch/arm/mach-clps711x/p720t.c
@@ -56,8 +56,7 @@ static struct map_desc p720t_io_desc[] __initdata = {
 };
 
 static void __init
-fixup_p720t(struct machine_desc *desc, struct tag *tag,
-	    char **cmdline, struct meminfo *mi)
+fixup_p720t(struct tag *tag, char **cmdline, struct meminfo *mi)
 {
 	/*
 	 * Our bootloader doesn't setup any tags (yet).
diff --git a/arch/arm/mach-cns3xxx/Makefile.boot b/arch/arm/mach-cns3xxx/Makefile.boot
index 777012865220..d079de0b6e3b 100644
--- a/arch/arm/mach-cns3xxx/Makefile.boot
+++ b/arch/arm/mach-cns3xxx/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00C00000
diff --git a/arch/arm/mach-davinci/Makefile.boot b/arch/arm/mach-davinci/Makefile.boot
index db97ef2c6477..04a6c4e67b14 100644
--- a/arch/arm/mach-davinci/Makefile.boot
+++ b/arch/arm/mach-davinci/Makefile.boot
@@ -2,12 +2,12 @@ ifeq ($(CONFIG_ARCH_DAVINCI_DA8XX),y)
 ifeq ($(CONFIG_ARCH_DAVINCI_DMx),y)
 $(error Cannot enable DaVinci and DA8XX platforms concurrently)
 else
-   zreladdr-y	:= 0xc0008000
+   zreladdr-y	+= 0xc0008000
 params_phys-y	:= 0xc0000100
 initrd_phys-y	:= 0xc0800000
 endif
 else
-   zreladdr-y	:= 0x80008000
+   zreladdr-y	+= 0x80008000
 params_phys-y	:= 0x80000100
 initrd_phys-y	:= 0x80800000
 endif
diff --git a/arch/arm/mach-dove/Makefile.boot b/arch/arm/mach-dove/Makefile.boot
index 67039c3e0c48..760a0efe7580 100644
--- a/arch/arm/mach-dove/Makefile.boot
+++ b/arch/arm/mach-dove/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-ebsa110/Makefile.boot b/arch/arm/mach-ebsa110/Makefile.boot
index 232126044935..83cf07c38ada 100644
--- a/arch/arm/mach-ebsa110/Makefile.boot
+++ b/arch/arm/mach-ebsa110/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000400
 initrd_phys-y	:= 0x00800000
 
diff --git a/arch/arm/mach-ebsa110/include/mach/io.h b/arch/arm/mach-ebsa110/include/mach/io.h
index f68daa632af0..44679db672fb 100644
--- a/arch/arm/mach-ebsa110/include/mach/io.h
+++ b/arch/arm/mach-ebsa110/include/mach/io.h
@@ -13,8 +13,6 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
-#define IO_SPACE_LIMIT 0xffff
-
 u8 __inb8(unsigned int port);
 void __outb8(u8  val, unsigned int port);
 
diff --git a/arch/arm/mach-ep93xx/Makefile.boot b/arch/arm/mach-ep93xx/Makefile.boot
index 0ad33f15c622..d3113a71cb40 100644
--- a/arch/arm/mach-ep93xx/Makefile.boot
+++ b/arch/arm/mach-ep93xx/Makefile.boot
@@ -1,14 +1,14 @@
-   zreladdr-$(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)	:= 0x00008000
+   zreladdr-$(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)	+= 0x00008000
 params_phys-$(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)	:= 0x00000100
 
-   zreladdr-$(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)		:= 0xc0008000
+   zreladdr-$(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)		+= 0xc0008000
 params_phys-$(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)		:= 0xc0000100
 
-   zreladdr-$(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)		:= 0xd0008000
+   zreladdr-$(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)		+= 0xd0008000
 params_phys-$(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)		:= 0xd0000100
 
-   zreladdr-$(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)		:= 0xe0008000
+   zreladdr-$(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)		+= 0xe0008000
 params_phys-$(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)		:= 0xe0000100
 
-   zreladdr-$(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)	:= 0xf0008000
+   zreladdr-$(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)	+= 0xf0008000
 params_phys-$(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)	:= 0xf0000100
diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c
index c63a5ec1a8e3..70ef8c527d27 100644
--- a/arch/arm/mach-ep93xx/edb93xx.c
+++ b/arch/arm/mach-ep93xx/edb93xx.c
@@ -160,6 +160,11 @@ static void __init edb93xx_register_spi(void)
 /*************************************************************************
  * EDB93xx I2S
  *************************************************************************/
+static struct platform_device edb93xx_audio_device = {
+	.name		= "edb93xx-audio",
+	.id		= -1,
+};
+
 static int __init edb93xx_has_audio(void)
 {
 	return (machine_is_edb9301() || machine_is_edb9302() ||
@@ -171,6 +176,7 @@ static void __init edb93xx_register_i2s(void)
 {
 	if (edb93xx_has_audio()) {
 		ep93xx_register_i2s();
+		platform_device_register(&edb93xx_audio_device);
 	}
 }
 
diff --git a/arch/arm/mach-ep93xx/include/mach/gpio.h b/arch/arm/mach-ep93xx/include/mach/gpio.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/arm/mach-ep93xx/include/mach/gpio.h
@@ -0,0 +1 @@
+/* empty */
diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c
index d6f286b4db9c..52e090dc9d27 100644
--- a/arch/arm/mach-ep93xx/simone.c
+++ b/arch/arm/mach-ep93xx/simone.c
@@ -53,6 +53,17 @@ static struct i2c_board_info __initdata simone_i2c_board_info[] = {
 	},
 };
 
+static struct platform_device simone_audio_device = {
+	.name		= "simone-audio",
+	.id		= -1,
+};
+
+static void __init simone_register_audio(void)
+{
+	ep93xx_register_ac97();
+	platform_device_register(&simone_audio_device);
+}
+
 static void __init simone_init_machine(void)
 {
 	ep93xx_init_devices();
@@ -61,7 +72,7 @@ static void __init simone_init_machine(void)
 	ep93xx_register_fb(&simone_fb_info);
 	ep93xx_register_i2c(&simone_i2c_gpio_data, simone_i2c_board_info,
 			    ARRAY_SIZE(simone_i2c_board_info));
-	ep93xx_register_ac97();
+	simone_register_audio();
 }
 
 MACHINE_START(SIM_ONE, "Simplemachines Sim.One Board")
diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 2b4d4b0201df..8121e3aedc0a 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -150,6 +150,17 @@ static struct ep93xxfb_mach_info __initdata snappercl15_fb_info = {
 	.bpp			= 16,
 };
 
+static struct platform_device snappercl15_audio_device = {
+	.name		= "snappercl15-audio",
+	.id		= -1,
+};
+
+static void __init snappercl15_register_audio(void)
+{
+	ep93xx_register_i2s();
+	platform_device_register(&snappercl15_audio_device);
+}
+
 static void __init snappercl15_init_machine(void)
 {
 	ep93xx_init_devices();
@@ -157,7 +168,7 @@ static void __init snappercl15_init_machine(void)
 	ep93xx_register_i2c(&snappercl15_i2c_gpio_data, snappercl15_i2c_data,
 			    ARRAY_SIZE(snappercl15_i2c_data));
 	ep93xx_register_fb(&snappercl15_fb_info);
-	ep93xx_register_i2s();
+	snappercl15_register_audio();
 	platform_device_register(&snappercl15_nand_device);
 }
 
diff --git a/arch/arm/mach-exynos4/Kconfig b/arch/arm/mach-exynos4/Kconfig
index 0c77ab99fa16..fc1f92dfbea8 100644
--- a/arch/arm/mach-exynos4/Kconfig
+++ b/arch/arm/mach-exynos4/Kconfig
@@ -12,6 +12,7 @@ if ARCH_EXYNOS4
 config CPU_EXYNOS4210
 	bool
 	select S3C_PL330_DMA
+	select ARM_CPU_SUSPEND if PM
 	help
 	  Enable EXYNOS4210 CPU support
 
diff --git a/arch/arm/mach-exynos4/Makefile.boot b/arch/arm/mach-exynos4/Makefile.boot
index d65956ffb43d..b9862e22bf10 100644
--- a/arch/arm/mach-exynos4/Makefile.boot
+++ b/arch/arm/mach-exynos4/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x40008000
+   zreladdr-y	+= 0x40008000
 params_phys-y	:= 0x40000100
diff --git a/arch/arm/mach-exynos4/include/mach/entry-macro.S b/arch/arm/mach-exynos4/include/mach/entry-macro.S
index d7a1e281ce7a..006a4f4c65c6 100644
--- a/arch/arm/mach-exynos4/include/mach/entry-macro.S
+++ b/arch/arm/mach-exynos4/include/mach/entry-macro.S
@@ -55,7 +55,7 @@
 
 		bic     \irqnr, \irqstat, #0x1c00
 
-		cmp     \irqnr, #29
+		cmp     \irqnr, #15
 		cmpcc	\irqnr, \irqnr
 		cmpne	\irqnr, \tmp
 		cmpcs	\irqnr, \irqnr
@@ -76,8 +76,3 @@
 		strcc	\irqstat, [\base, #GIC_CPU_EOI]
 		cmpcs	\irqnr, \irqnr
 		.endm
-
-		/* As above, this assumes that irqstat and base are preserved.. */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		.endm
diff --git a/arch/arm/mach-exynos4/mct.c b/arch/arm/mach-exynos4/mct.c
index ddd86864fb83..582b874aab0e 100644
--- a/arch/arm/mach-exynos4/mct.c
+++ b/arch/arm/mach-exynos4/mct.c
@@ -386,9 +386,11 @@ static void exynos4_mct_tick_init(struct clock_event_device *evt)
 
 	if (cpu == 0) {
 		mct_tick0_event_irq.dev_id = &mct_tick[cpu];
+		evt->irq = IRQ_MCT_L0;
 		setup_irq(IRQ_MCT_L0, &mct_tick0_event_irq);
 	} else {
 		mct_tick1_event_irq.dev_id = &mct_tick[cpu];
+		evt->irq = IRQ_MCT_L1;
 		setup_irq(IRQ_MCT_L1, &mct_tick1_event_irq);
 		irq_set_affinity(IRQ_MCT_L1, cpumask_of(1));
 	}
@@ -402,9 +404,10 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
 	return 0;
 }
 
-int local_timer_ack(void)
+void local_timer_stop(struct clock_event_device *evt)
 {
-	return 0;
+	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	disable_irq(evt->irq);
 }
 
 #endif /* CONFIG_LOCAL_TIMERS */
diff --git a/arch/arm/mach-exynos4/platsmp.c b/arch/arm/mach-exynos4/platsmp.c
index df6ef1b2f98b..0c90896ad9a0 100644
--- a/arch/arm/mach-exynos4/platsmp.c
+++ b/arch/arm/mach-exynos4/platsmp.c
@@ -193,12 +193,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "EXYNOS4: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index c8e7afcf14ec..f643ef819da6 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -4,8 +4,8 @@ menu "Footbridge Implementations"
 
 config ARCH_CATS
 	bool "CATS"
-	select CLKSRC_I8253
 	select CLKEVT_I8253
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
@@ -61,8 +61,8 @@ config ARCH_EBSA285_HOST
 
 config ARCH_NETWINDER
 	bool "NetWinder"
-	select CLKSRC_I8253
 	select CLKEVT_I8253
+	select CLKSRC_I8253
 	select FOOTBRIDGE_HOST
 	select ISA
 	select ISA_DMA
diff --git a/arch/arm/mach-footbridge/Makefile.boot b/arch/arm/mach-footbridge/Makefile.boot
index c7e75acfe6c9..ff0a4b5b0a82 100644
--- a/arch/arm/mach-footbridge/Makefile.boot
+++ b/arch/arm/mach-footbridge/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 
diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c
index a3da5d1106c2..d5f178540928 100644
--- a/arch/arm/mach-footbridge/cats-hw.c
+++ b/arch/arm/mach-footbridge/cats-hw.c
@@ -76,8 +76,7 @@ __initcall(cats_hw_init);
  * hard reboots fail on early boards.
  */
 static void __init
-fixup_cats(struct machine_desc *desc, struct tag *tags,
-	   char **cmdline, struct meminfo *mi)
+fixup_cats(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 	screen_info.orig_video_lines  = 25;
 	screen_info.orig_video_points = 16;
diff --git a/arch/arm/mach-footbridge/include/mach/hardware.h b/arch/arm/mach-footbridge/include/mach/hardware.h
index 15d54981674c..e3d6ccac2162 100644
--- a/arch/arm/mach-footbridge/include/mach/hardware.h
+++ b/arch/arm/mach-footbridge/include/mach/hardware.h
@@ -93,7 +93,7 @@
 #define CPLD_FLASH_WR_ENABLE	1
 
 #ifndef __ASSEMBLY__
-extern spinlock_t nw_gpio_lock;
+extern raw_spinlock_t nw_gpio_lock;
 extern void nw_gpio_modify_op(unsigned int mask, unsigned int set);
 extern void nw_gpio_modify_io(unsigned int mask, unsigned int in);
 extern unsigned int nw_gpio_read(void);
diff --git a/arch/arm/mach-footbridge/include/mach/io.h b/arch/arm/mach-footbridge/include/mach/io.h
index 32e4cc397c28..15a70396c27d 100644
--- a/arch/arm/mach-footbridge/include/mach/io.h
+++ b/arch/arm/mach-footbridge/include/mach/io.h
@@ -23,8 +23,6 @@
 #define PCIO_SIZE       0x00100000
 #define PCIO_BASE       MMU_IO(0xff000000, 0x7c000000)
 
-#define IO_SPACE_LIMIT 0xffff
-
 /*
  * Translation of various region addresses to virtual addresses
  */
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c
index d8c1c922e24c..0d3846f3b60d 100644
--- a/arch/arm/mach-footbridge/netwinder-hw.c
+++ b/arch/arm/mach-footbridge/netwinder-hw.c
@@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int val)
 /*
  * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
  */
-DEFINE_SPINLOCK(nw_gpio_lock);
+DEFINE_RAW_SPINLOCK(nw_gpio_lock);
 EXPORT_SYMBOL(nw_gpio_lock);
 
 static unsigned int current_gpio_op;
@@ -327,9 +327,9 @@ static inline void wb977_init_gpio(void)
 	/*
 	 * Set Group1/Group2 outputs
 	 */
-	spin_lock_irqsave(&nw_gpio_lock, flags);
+	raw_spin_lock_irqsave(&nw_gpio_lock, flags);
 	nw_gpio_modify_op(-1, GPIO_RED_LED | GPIO_FAN);
-	spin_unlock_irqrestore(&nw_gpio_lock, flags);
+	raw_spin_unlock_irqrestore(&nw_gpio_lock, flags);
 }
 
 /*
@@ -390,9 +390,9 @@ static void __init cpld_init(void)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&nw_gpio_lock, flags);
+	raw_spin_lock_irqsave(&nw_gpio_lock, flags);
 	nw_cpld_modify(-1, CPLD_UNMUTE | CPLD_7111_DISABLE);
-	spin_unlock_irqrestore(&nw_gpio_lock, flags);
+	raw_spin_unlock_irqrestore(&nw_gpio_lock, flags);
 }
 
 static unsigned char rwa_unlock[] __initdata =
@@ -616,9 +616,9 @@ static int __init nw_hw_init(void)
 		cpld_init();
 		rwa010_init();
 
-		spin_lock_irqsave(&nw_gpio_lock, flags);
+		raw_spin_lock_irqsave(&nw_gpio_lock, flags);
 		nw_gpio_modify_op(GPIO_RED_LED|GPIO_GREEN_LED, DEFAULT_LEDS);
-		spin_unlock_irqrestore(&nw_gpio_lock, flags);
+		raw_spin_unlock_irqrestore(&nw_gpio_lock, flags);
 	}
 	return 0;
 }
@@ -631,8 +631,7 @@ __initcall(nw_hw_init);
  * the parameter page.
  */
 static void __init
-fixup_netwinder(struct machine_desc *desc, struct tag *tags,
-		char **cmdline, struct meminfo *mi)
+fixup_netwinder(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 #ifdef CONFIG_ISAPNP
 	extern int isapnp_disable;
diff --git a/arch/arm/mach-footbridge/netwinder-leds.c b/arch/arm/mach-footbridge/netwinder-leds.c
index 00269fe0be8a..e57102e871fc 100644
--- a/arch/arm/mach-footbridge/netwinder-leds.c
+++ b/arch/arm/mach-footbridge/netwinder-leds.c
@@ -31,13 +31,13 @@
 static char led_state;
 static char hw_led_state;
 
-static DEFINE_SPINLOCK(leds_lock);
+static DEFINE_RAW_SPINLOCK(leds_lock);
 
 static void netwinder_leds_event(led_event_t evt)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&leds_lock, flags);
+	raw_spin_lock_irqsave(&leds_lock, flags);
 
 	switch (evt) {
 	case led_start:
@@ -117,12 +117,12 @@ static void netwinder_leds_event(led_event_t evt)
 		break;
 	}
 
-	spin_unlock_irqrestore(&leds_lock, flags);
+	raw_spin_unlock_irqrestore(&leds_lock, flags);
 
 	if  (led_state & LED_STATE_ENABLED) {
-		spin_lock_irqsave(&nw_gpio_lock, flags);
+		raw_spin_lock_irqsave(&nw_gpio_lock, flags);
 		nw_gpio_modify_op(GPIO_RED_LED | GPIO_GREEN_LED, hw_led_state);
-		spin_unlock_irqrestore(&nw_gpio_lock, flags);
+		raw_spin_unlock_irqrestore(&nw_gpio_lock, flags);
 	}
 }
 
diff --git a/arch/arm/mach-gemini/Makefile.boot b/arch/arm/mach-gemini/Makefile.boot
index 22a52c228d93..683f52b20e3d 100644
--- a/arch/arm/mach-gemini/Makefile.boot
+++ b/arch/arm/mach-gemini/Makefile.boot
@@ -1,9 +1,9 @@
 ifeq ($(CONFIG_GEMINI_MEM_SWAP),y)
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 else
-   zreladdr-y	:= 0x10008000
+   zreladdr-y	+= 0x10008000
 params_phys-y	:= 0x10000100
 initrd_phys-y	:= 0x10800000
 endif
diff --git a/arch/arm/mach-h720x/Makefile.boot b/arch/arm/mach-h720x/Makefile.boot
index 52984017bd91..d875a7094dfe 100644
--- a/arch/arm/mach-h720x/Makefile.boot
+++ b/arch/arm/mach-h720x/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-$(CONFIG_ARCH_H720X)	:= 0x40008000
+   zreladdr-$(CONFIG_ARCH_H720X)	+= 0x40008000
 
diff --git a/arch/arm/mach-imx/Makefile.boot b/arch/arm/mach-imx/Makefile.boot
index ebee18b3884c..dbe61201bcd8 100644
--- a/arch/arm/mach-imx/Makefile.boot
+++ b/arch/arm/mach-imx/Makefile.boot
@@ -1,19 +1,19 @@
-zreladdr-$(CONFIG_ARCH_MX1)	:= 0x08008000
+zreladdr-$(CONFIG_ARCH_MX1)	+= 0x08008000
 params_phys-$(CONFIG_ARCH_MX1)	:= 0x08000100
 initrd_phys-$(CONFIG_ARCH_MX1)	:= 0x08800000
 
-zreladdr-$(CONFIG_MACH_MX21)	:= 0xC0008000
+zreladdr-$(CONFIG_MACH_MX21)	+= 0xC0008000
 params_phys-$(CONFIG_MACH_MX21)	:= 0xC0000100
 initrd_phys-$(CONFIG_MACH_MX21)	:= 0xC0800000
 
-zreladdr-$(CONFIG_ARCH_MX25)	:= 0x80008000
+zreladdr-$(CONFIG_ARCH_MX25)	+= 0x80008000
 params_phys-$(CONFIG_ARCH_MX25)	:= 0x80000100
 initrd_phys-$(CONFIG_ARCH_MX25)	:= 0x80800000
 
-zreladdr-$(CONFIG_MACH_MX27)	:= 0xA0008000
+zreladdr-$(CONFIG_MACH_MX27)	+= 0xA0008000
 params_phys-$(CONFIG_MACH_MX27)	:= 0xA0000100
 initrd_phys-$(CONFIG_MACH_MX27)	:= 0xA0800000
 
-zreladdr-$(CONFIG_ARCH_MX3)	:= 0x80008000
+zreladdr-$(CONFIG_ARCH_MX3)	+= 0x80008000
 params_phys-$(CONFIG_ARCH_MX3)	:= 0x80000100
 initrd_phys-$(CONFIG_ARCH_MX3)	:= 0x80800000
diff --git a/arch/arm/mach-imx/mach-mx27_3ds.c b/arch/arm/mach-imx/mach-mx27_3ds.c
index 2eafbac2c763..377230497dcc 100644
--- a/arch/arm/mach-imx/mach-mx27_3ds.c
+++ b/arch/arm/mach-imx/mach-mx27_3ds.c
@@ -241,7 +241,7 @@ static struct regulator_init_data gpo_init = {
 };
 
 static struct regulator_consumer_supply vmmc1_consumers[] = {
-	REGULATOR_SUPPLY("lcd_2v8", NULL),
+	REGULATOR_SUPPLY("vcore", "spi0.0"),
 };
 
 static struct regulator_init_data vmmc1_init = {
@@ -257,7 +257,7 @@ static struct regulator_init_data vmmc1_init = {
 };
 
 static struct regulator_consumer_supply vgen_consumers[] = {
-	REGULATOR_SUPPLY("vdd_lcdio", NULL),
+	REGULATOR_SUPPLY("vdd", "spi0.0"),
 };
 
 static struct regulator_init_data vgen_init = {
@@ -348,8 +348,6 @@ static const struct imx_fb_platform_data mx27_3ds_fb_data __initconst = {
 static struct l4f00242t03_pdata mx27_3ds_lcd_pdata = {
 	.reset_gpio		= LCD_RESET,
 	.data_enable_gpio	= LCD_ENABLE,
-	.core_supply		= "lcd_2v8",
-	.io_supply		= "vdd_lcdio",
 };
 
 static struct spi_board_info mx27_3ds_spi_devs[] __initdata = {
diff --git a/arch/arm/mach-imx/mach-mx31_3ds.c b/arch/arm/mach-imx/mach-mx31_3ds.c
index 712a7fb1cf01..0de4d1835ea4 100644
--- a/arch/arm/mach-imx/mach-mx31_3ds.c
+++ b/arch/arm/mach-imx/mach-mx31_3ds.c
@@ -285,8 +285,6 @@ static struct mx3fb_platform_data mx3fb_pdata __initdata = {
 static struct l4f00242t03_pdata mx31_3ds_l4f00242t03_pdata = {
 	.reset_gpio		= IOMUX_TO_GPIO(MX31_PIN_LCS1),
 	.data_enable_gpio	= IOMUX_TO_GPIO(MX31_PIN_SER_RS),
-	.core_supply		= "lcd_2v8",
-	.io_supply		= "vdd_lcdio",
 };
 
 /*
@@ -411,7 +409,7 @@ static struct regulator_init_data vmmc2_init = {
 };
 
 static struct regulator_consumer_supply vmmc1_consumers[] = {
-	REGULATOR_SUPPLY("lcd_2v8", NULL),
+	REGULATOR_SUPPLY("vcore", "spi0.0"),
 	REGULATOR_SUPPLY("cmos_2v8", "soc-camera-pdrv.0"),
 };
 
@@ -428,7 +426,7 @@ static struct regulator_init_data vmmc1_init = {
 };
 
 static struct regulator_consumer_supply vgen_consumers[] = {
-	REGULATOR_SUPPLY("vdd_lcdio", NULL),
+	REGULATOR_SUPPLY("vdd", "spi0.0"),
 };
 
 static struct regulator_init_data vgen_init = {
diff --git a/arch/arm/mach-integrator/Makefile.boot b/arch/arm/mach-integrator/Makefile.boot
index c7e75acfe6c9..ff0a4b5b0a82 100644
--- a/arch/arm/mach-integrator/Makefile.boot
+++ b/arch/arm/mach-integrator/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 
diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c
index 77315b995681..4b38e13667ac 100644
--- a/arch/arm/mach-integrator/core.c
+++ b/arch/arm/mach-integrator/core.c
@@ -126,6 +126,10 @@ static struct clk_lookup lookups[] = {
 	{	/* Bus clock */
 		.con_id		= "apb_pclk",
 		.clk		= &dummy_apb_pclk,
+	}, {
+		/* Integrator/AP timer frequency */
+		.dev_id		= "ap_timer",
+		.clk		= &clk24mhz,
 	}, {	/* UART0 */
 		.dev_id		= "mb:16",
 		.clk		= &uartclk,
@@ -205,7 +209,7 @@ static struct amba_pl010_data integrator_uart_data = {
 
 #define CM_CTRL	IO_ADDRESS(INTEGRATOR_HDR_CTRL)
 
-static DEFINE_SPINLOCK(cm_lock);
+static DEFINE_RAW_SPINLOCK(cm_lock);
 
 /**
  * cm_control - update the CM_CTRL register.
@@ -217,10 +221,10 @@ void cm_control(u32 mask, u32 set)
 	unsigned long flags;
 	u32 val;
 
-	spin_lock_irqsave(&cm_lock, flags);
+	raw_spin_lock_irqsave(&cm_lock, flags);
 	val = readl(CM_CTRL) & ~mask;
 	writel(val | set, CM_CTRL);
-	spin_unlock_irqrestore(&cm_lock, flags);
+	raw_spin_unlock_irqrestore(&cm_lock, flags);
 }
 
 EXPORT_SYMBOL(cm_control);
diff --git a/arch/arm/mach-integrator/include/mach/io.h b/arch/arm/mach-integrator/include/mach/io.h
index f21bb5493dd9..37beed3fa3ed 100644
--- a/arch/arm/mach-integrator/include/mach/io.h
+++ b/arch/arm/mach-integrator/include/mach/io.h
@@ -20,8 +20,6 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
-#define IO_SPACE_LIMIT 0xffff
-
 /*
  * WARNING: this has to mirror definitions in platform.h
  */
diff --git a/arch/arm/mach-integrator/include/mach/platform.h b/arch/arm/mach-integrator/include/mach/platform.h
index 5e6ea5cfea6e..ec467baade09 100644
--- a/arch/arm/mach-integrator/include/mach/platform.h
+++ b/arch/arm/mach-integrator/include/mach/platform.h
@@ -13,9 +13,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-/* DO NOT EDIT!! - this file automatically generated
- *                 from .s file by awk -f s2h.awk
- */
 /**************************************************************************
  * * Copyright © ARM Limited 1998.  All rights reserved.
  * ***********************************************************************/
@@ -399,15 +396,6 @@
 #define INTEGRATOR_TIMER1_BASE          (INTEGRATOR_CT_BASE + 0x100)
 #define INTEGRATOR_TIMER2_BASE          (INTEGRATOR_CT_BASE + 0x200)
 
-#define TICKS_PER_uSEC                  24
-
-/*
- *  These are useconds NOT ticks.
- *
- */
-#define mSEC_1                          1000
-#define mSEC_10                         (mSEC_1 * 10)
-
 #define INTEGRATOR_CSR_BASE             0x10000000
 #define INTEGRATOR_CSR_SIZE             0x10000000
 
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index a20fb3f2bc45..a1769f35a86e 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -32,6 +32,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/mtd/physmap.h>
+#include <linux/clk.h>
 #include <video/vga.h>
 
 #include <mach/hardware.h>
@@ -322,27 +323,16 @@ static void __init ap_init(void)
 #define TIMER1_VA_BASE IO_ADDRESS(INTEGRATOR_TIMER1_BASE)
 #define TIMER2_VA_BASE IO_ADDRESS(INTEGRATOR_TIMER2_BASE)
 
-/*
- * How long is the timer interval?
- */
-#define TIMER_INTERVAL	(TICKS_PER_uSEC * mSEC_10)
-#if TIMER_INTERVAL >= 0x100000
-#define TICKS2USECS(x)	(256 * (x) / TICKS_PER_uSEC)
-#elif TIMER_INTERVAL >= 0x10000
-#define TICKS2USECS(x)	(16 * (x) / TICKS_PER_uSEC)
-#else
-#define TICKS2USECS(x)	((x) / TICKS_PER_uSEC)
-#endif
-
 static unsigned long timer_reload;
 
-static void integrator_clocksource_init(u32 khz)
+static void integrator_clocksource_init(unsigned long inrate)
 {
 	void __iomem *base = (void __iomem *)TIMER2_VA_BASE;
 	u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
+	unsigned long rate = inrate;
 
-	if (khz >= 1500) {
-		khz /= 16;
+	if (rate >= 1500000) {
+		rate /= 16;
 		ctrl |= TIMER_CTRL_DIV16;
 	}
 
@@ -350,7 +340,7 @@ static void integrator_clocksource_init(u32 khz)
 	writel(ctrl, base + TIMER_CTRL);
 
 	clocksource_mmio_init(base + TIMER_VALUE, "timer2",
-		khz * 1000, 200, 16, clocksource_mmio_readl_down);
+			rate, 200, 16, clocksource_mmio_readl_down);
 }
 
 static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE;
@@ -374,15 +364,29 @@ static void clkevt_set_mode(enum clock_event_mode mode, struct clock_event_devic
 {
 	u32 ctrl = readl(clkevt_base + TIMER_CTRL) & ~TIMER_CTRL_ENABLE;
 
-	BUG_ON(mode == CLOCK_EVT_MODE_ONESHOT);
+	/* Disable timer */
+	writel(ctrl, clkevt_base + TIMER_CTRL);
 
-	if (mode == CLOCK_EVT_MODE_PERIODIC) {
-		writel(ctrl, clkevt_base + TIMER_CTRL);
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* Enable the timer and start the periodic tick */
 		writel(timer_reload, clkevt_base + TIMER_LOAD);
 		ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE;
+		writel(ctrl, clkevt_base + TIMER_CTRL);
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* Leave the timer disabled, .set_next_event will enable it */
+		ctrl &= ~TIMER_CTRL_PERIODIC;
+		writel(ctrl, clkevt_base + TIMER_CTRL);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_RESUME:
+	default:
+		/* Just leave in disabled state */
+		break;
 	}
 
-	writel(ctrl, clkevt_base + TIMER_CTRL);
 }
 
 static int clkevt_set_next_event(unsigned long next, struct clock_event_device *evt)
@@ -398,12 +402,10 @@ static int clkevt_set_next_event(unsigned long next, struct clock_event_device *
 
 static struct clock_event_device integrator_clockevent = {
 	.name		= "timer1",
-	.shift		= 34,
-	.features	= CLOCK_EVT_FEAT_PERIODIC,
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= clkevt_set_mode,
 	.set_next_event	= clkevt_set_next_event,
 	.rating		= 300,
-	.cpumask	= cpu_all_mask,
 };
 
 static struct irqaction integrator_timer_irq = {
@@ -413,29 +415,27 @@ static struct irqaction integrator_timer_irq = {
 	.dev_id		= &integrator_clockevent,
 };
 
-static void integrator_clockevent_init(u32 khz)
+static void integrator_clockevent_init(unsigned long inrate)
 {
-	struct clock_event_device *evt = &integrator_clockevent;
+	unsigned long rate = inrate;
 	unsigned int ctrl = 0;
 
-	if (khz * 1000 > 0x100000 * HZ) {
-		khz /= 256;
+	/* Calculate and program a divisor */
+	if (rate > 0x100000 * HZ) {
+		rate /= 256;
 		ctrl |= TIMER_CTRL_DIV256;
-	} else if (khz * 1000 > 0x10000 * HZ) {
-		khz /= 16;
+	} else if (rate > 0x10000 * HZ) {
+		rate /= 16;
 		ctrl |= TIMER_CTRL_DIV16;
 	}
-
-	timer_reload = khz * 1000 / HZ;
+	timer_reload = rate / HZ;
 	writel(ctrl, clkevt_base + TIMER_CTRL);
 
-	evt->irq = IRQ_TIMERINT1;
-	evt->mult = div_sc(khz, NSEC_PER_MSEC, evt->shift);
-	evt->max_delta_ns = clockevent_delta2ns(0xffff, evt);
-	evt->min_delta_ns = clockevent_delta2ns(0xf, evt);
-
 	setup_irq(IRQ_TIMERINT1, &integrator_timer_irq);
-	clockevents_register_device(evt);
+	clockevents_config_and_register(&integrator_clockevent,
+					rate,
+					1,
+					0xffffU);
 }
 
 /*
@@ -443,14 +443,20 @@ static void integrator_clockevent_init(u32 khz)
  */
 static void __init ap_init_timer(void)
 {
-	u32 khz = TICKS_PER_uSEC * 1000;
+	struct clk *clk;
+	unsigned long rate;
+
+	clk = clk_get_sys("ap_timer", NULL);
+	BUG_ON(IS_ERR(clk));
+	clk_enable(clk);
+	rate = clk_get_rate(clk);
 
 	writel(0, TIMER0_VA_BASE + TIMER_CTRL);
 	writel(0, TIMER1_VA_BASE + TIMER_CTRL);
 	writel(0, TIMER2_VA_BASE + TIMER_CTRL);
 
-	integrator_clocksource_init(khz);
-	integrator_clockevent_init(khz);
+	integrator_clocksource_init(rate);
+	integrator_clockevent_init(rate);
 }
 
 static struct sys_timer ap_timer = {
diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 11b86e5b71c2..b4d8f8b8a085 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -163,7 +163,7 @@
  *	 7:2	register number
  *  
  */
-static DEFINE_SPINLOCK(v3_lock);
+static DEFINE_RAW_SPINLOCK(v3_lock);
 
 #define PCI_BUS_NONMEM_START	0x00000000
 #define PCI_BUS_NONMEM_SIZE	SZ_256M
@@ -284,7 +284,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where,
 	unsigned long flags;
 	u32 v;
 
-	spin_lock_irqsave(&v3_lock, flags);
+	raw_spin_lock_irqsave(&v3_lock, flags);
 	addr = v3_open_config_window(bus, devfn, where);
 
 	switch (size) {
@@ -302,7 +302,7 @@ static int v3_read_config(struct pci_bus *bus, unsigned int devfn, int where,
 	}
 
 	v3_close_config_window();
-	spin_unlock_irqrestore(&v3_lock, flags);
+	raw_spin_unlock_irqrestore(&v3_lock, flags);
 
 	*val = v;
 	return PCIBIOS_SUCCESSFUL;
@@ -314,7 +314,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where,
 	unsigned long addr;
 	unsigned long flags;
 
-	spin_lock_irqsave(&v3_lock, flags);
+	raw_spin_lock_irqsave(&v3_lock, flags);
 	addr = v3_open_config_window(bus, devfn, where);
 
 	switch (size) {
@@ -335,7 +335,7 @@ static int v3_write_config(struct pci_bus *bus, unsigned int devfn, int where,
 	}
 
 	v3_close_config_window();
-	spin_unlock_irqrestore(&v3_lock, flags);
+	raw_spin_unlock_irqrestore(&v3_lock, flags);
 
 	return PCIBIOS_SUCCESSFUL;
 }
@@ -513,7 +513,7 @@ void __init pci_v3_preinit(void)
 	hook_fault_code(8, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
 	hook_fault_code(10, v3_pci_fault, SIGBUS, 0, "external abort on non-linefetch");
 
-	spin_lock_irqsave(&v3_lock, flags);
+	raw_spin_lock_irqsave(&v3_lock, flags);
 
 	/*
 	 * Unlock V3 registers, but only if they were previously locked.
@@ -586,7 +586,7 @@ void __init pci_v3_preinit(void)
 		printk(KERN_ERR "PCI: unable to grab PCI error "
 		       "interrupt: %d\n", ret);
 
-	spin_unlock_irqrestore(&v3_lock, flags);
+	raw_spin_unlock_irqrestore(&v3_lock, flags);
 }
 
 void __init pci_v3_postinit(void)
diff --git a/arch/arm/mach-iop13xx/Makefile.boot b/arch/arm/mach-iop13xx/Makefile.boot
index 0b0e19fdfe6c..3a8c38c3189c 100644
--- a/arch/arm/mach-iop13xx/Makefile.boot
+++ b/arch/arm/mach-iop13xx/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y   := 0x00008000
+   zreladdr-y   += 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y 	:= 0x00800000
diff --git a/arch/arm/mach-iop32x/Makefile.boot b/arch/arm/mach-iop32x/Makefile.boot
index 47000dccd61f..0a833b11e38c 100644
--- a/arch/arm/mach-iop32x/Makefile.boot
+++ b/arch/arm/mach-iop32x/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0xa0008000
+   zreladdr-y	+= 0xa0008000
 params_phys-y	:= 0xa0000100
 initrd_phys-y	:= 0xa0800000
diff --git a/arch/arm/mach-iop33x/Makefile.boot b/arch/arm/mach-iop33x/Makefile.boot
index 67039c3e0c48..760a0efe7580 100644
--- a/arch/arm/mach-iop33x/Makefile.boot
+++ b/arch/arm/mach-iop33x/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-ixp2000/Makefile.boot b/arch/arm/mach-ixp2000/Makefile.boot
index d84c5807a43d..9c7af91d93da 100644
--- a/arch/arm/mach-ixp2000/Makefile.boot
+++ b/arch/arm/mach-ixp2000/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 
diff --git a/arch/arm/mach-ixp23xx/Makefile.boot b/arch/arm/mach-ixp23xx/Makefile.boot
index d5561ad15bad..44fb4a717c3f 100644
--- a/arch/arm/mach-ixp23xx/Makefile.boot
+++ b/arch/arm/mach-ixp23xx/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
diff --git a/arch/arm/mach-ixp4xx/Makefile.boot b/arch/arm/mach-ixp4xx/Makefile.boot
index d84c5807a43d..9c7af91d93da 100644
--- a/arch/arm/mach-ixp4xx/Makefile.boot
+++ b/arch/arm/mach-ixp4xx/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 2131832ee6ba..f72a3a893c47 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -54,7 +54,7 @@ unsigned long ixp4xx_pci_reg_base = 0;
  * these transactions are atomic or we will end up
  * with corrupt data on the bus or in a driver.
  */
-static DEFINE_SPINLOCK(ixp4xx_pci_lock);
+static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock);
 
 /*
  * Read from PCI config space
@@ -62,10 +62,10 @@ static DEFINE_SPINLOCK(ixp4xx_pci_lock);
 static void crp_read(u32 ad_cbe, u32 *data)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&ixp4xx_pci_lock, flags);
+	raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags);
 	*PCI_CRP_AD_CBE = ad_cbe;
 	*data = *PCI_CRP_RDATA;
-	spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
+	raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
 }
 
 /*
@@ -74,10 +74,10 @@ static void crp_read(u32 ad_cbe, u32 *data)
 static void crp_write(u32 ad_cbe, u32 data)
 { 
 	unsigned long flags;
-	spin_lock_irqsave(&ixp4xx_pci_lock, flags);
+	raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags);
 	*PCI_CRP_AD_CBE = CRP_AD_CBE_WRITE | ad_cbe;
 	*PCI_CRP_WDATA = data;
-	spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
+	raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
 }
 
 static inline int check_master_abort(void)
@@ -101,7 +101,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data)
 	int retval = 0;
 	int i;
 
-	spin_lock_irqsave(&ixp4xx_pci_lock, flags);
+	raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags);
 
 	*PCI_NP_AD = addr;
 
@@ -118,7 +118,7 @@ int ixp4xx_pci_read_errata(u32 addr, u32 cmd, u32* data)
 	if(check_master_abort())
 		retval = 1;
 
-	spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
+	raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
 	return retval;
 }
 
@@ -127,7 +127,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data)
 	unsigned long flags;
 	int retval = 0;
 
-	spin_lock_irqsave(&ixp4xx_pci_lock, flags);
+	raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags);
 
 	*PCI_NP_AD = addr;
 
@@ -140,7 +140,7 @@ int ixp4xx_pci_read_no_errata(u32 addr, u32 cmd, u32* data)
 	if(check_master_abort())
 		retval = 1;
 
-	spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
+	raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
 	return retval;
 }
 
@@ -149,7 +149,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data)
 	unsigned long flags;
 	int retval = 0;
 
-	spin_lock_irqsave(&ixp4xx_pci_lock, flags);
+	raw_spin_lock_irqsave(&ixp4xx_pci_lock, flags);
 
 	*PCI_NP_AD = addr;
 
@@ -162,7 +162,7 @@ int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data)
 	if(check_master_abort())
 		retval = 1;
 
-	spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
+	raw_spin_unlock_irqrestore(&ixp4xx_pci_lock, flags);
 	return retval;
 }
 
@@ -397,7 +397,8 @@ void __init ixp4xx_pci_preinit(void)
 		local_write_config(PCI_BASE_ADDRESS_0, 4, PHYS_OFFSET);
 		local_write_config(PCI_BASE_ADDRESS_1, 4, PHYS_OFFSET + SZ_16M);
 		local_write_config(PCI_BASE_ADDRESS_2, 4, PHYS_OFFSET + SZ_32M);
-		local_write_config(PCI_BASE_ADDRESS_3, 4, PHYS_OFFSET + SZ_48M);
+		local_write_config(PCI_BASE_ADDRESS_3, 4,
+					PHYS_OFFSET + SZ_32M + SZ_16M);
 
 		/*
 		 * Enable CSR window at 64 MiB to allow PCI masters
diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h
index 57b5410c31f4..ffb9d6afb89f 100644
--- a/arch/arm/mach-ixp4xx/include/mach/io.h
+++ b/arch/arm/mach-ixp4xx/include/mach/io.h
@@ -17,8 +17,6 @@
 
 #include <mach/hardware.h>
 
-#define IO_SPACE_LIMIT 0x0000ffff
-
 extern int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data);
 extern int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data);
 
diff --git a/arch/arm/mach-kirkwood/Makefile.boot b/arch/arm/mach-kirkwood/Makefile.boot
index 67039c3e0c48..760a0efe7580 100644
--- a/arch/arm/mach-kirkwood/Makefile.boot
+++ b/arch/arm/mach-kirkwood/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-ks8695/Makefile.boot b/arch/arm/mach-ks8695/Makefile.boot
index 48eb2cb3ac77..c9b0bebcf237 100644
--- a/arch/arm/mach-ks8695/Makefile.boot
+++ b/arch/arm/mach-ks8695/Makefile.boot
@@ -3,6 +3,6 @@
 #   PARAMS_PHYS must be within 4MB of ZRELADDR
 #   INITRD_PHYS must be in RAM
 
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-lpc32xx/Makefile.boot b/arch/arm/mach-lpc32xx/Makefile.boot
index b796b41ebf8f..2cfe0ee635c5 100644
--- a/arch/arm/mach-lpc32xx/Makefile.boot
+++ b/arch/arm/mach-lpc32xx/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x80008000
+   zreladdr-y	+= 0x80008000
 params_phys-y	:= 0x80000100
 initrd_phys-y	:= 0x82000000
 
diff --git a/arch/arm/mach-lpc32xx/include/mach/gpio.h b/arch/arm/mach-lpc32xx/include/mach/gpio.h
index e69de29bb2d1..40a8c178f10d 100644
--- a/arch/arm/mach-lpc32xx/include/mach/gpio.h
+++ b/arch/arm/mach-lpc32xx/include/mach/gpio.h
@@ -0,0 +1 @@
+/* empty */
diff --git a/arch/arm/mach-mmp/Makefile.boot b/arch/arm/mach-mmp/Makefile.boot
index 574a4aa8321a..5edf03e2beed 100644
--- a/arch/arm/mach-mmp/Makefile.boot
+++ b/arch/arm/mach-mmp/Makefile.boot
@@ -1 +1 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
diff --git a/arch/arm/mach-mmp/aspenite.c b/arch/arm/mach-mmp/aspenite.c
index 833c3a2784aa..06b5ad774604 100644
--- a/arch/arm/mach-mmp/aspenite.c
+++ b/arch/arm/mach-mmp/aspenite.c
@@ -160,7 +160,7 @@ static struct mtd_partition aspenite_nand_partitions[] = {
 	}, {
 		.name		= "filesystem",
 		.offset		= MTDPART_OFS_APPEND,
-		.size		= SZ_48M,
+		.size		= SZ_32M + SZ_16M,
 		.mask_flags	= 0,
 	}
 };
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index 7f005843a707..7fb568d2845b 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -35,6 +35,13 @@ extern struct pxa_device_desc pxa168_device_fb;
 extern struct pxa_device_desc pxa168_device_keypad;
 extern struct pxa_device_desc pxa168_device_eth;
 
+struct pxa168_usb_pdata {
+	/* If NULL, default phy init routine for PXA168 would be called */
+	int (*phy_init)(void __iomem *usb_phy_reg_base);
+};
+/* pdata can be NULL */
+int __init pxa168_add_usb_host(struct pxa168_usb_pdata *pdata);
+
 static inline int pxa168_add_uart(int id)
 {
 	struct pxa_device_desc *d = NULL;
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index 50c1763911ff..76ca15c00e45 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -24,6 +24,9 @@
 #include <mach/dma.h>
 #include <mach/devices.h>
 #include <mach/mfp.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <mach/pxa168.h>
 
 #include "common.h"
 #include "clock.h"
@@ -82,6 +85,7 @@ static APBC_CLK(keypad, PXA168_KPC, 0, 32000);
 static APMU_CLK(nand, NAND, 0x19b, 156000000);
 static APMU_CLK(lcd, LCD, 0x7f, 312000000);
 static APMU_CLK(eth, ETH, 0x09, 0);
+static APMU_CLK(usb, USB, 0x12, 0);
 
 /* device and clock bindings */
 static struct clk_lookup pxa168_clkregs[] = {
@@ -103,6 +107,7 @@ static struct clk_lookup pxa168_clkregs[] = {
 	INIT_CLKREG(&clk_lcd, "pxa168-fb", NULL),
 	INIT_CLKREG(&clk_keypad, "pxa27x-keypad", NULL),
 	INIT_CLKREG(&clk_eth, "pxa168-eth", "MFUCLK"),
+	INIT_CLKREG(&clk_usb, "pxa168-ehci", "PXA168-USBCLK"),
 };
 
 static int __init pxa168_init(void)
@@ -168,3 +173,44 @@ PXA168_DEVICE(ssp5, "pxa168-ssp", 4, SSP5, 0xd4021000, 0x40, 60, 61);
 PXA168_DEVICE(fb, "pxa168-fb", -1, LCD, 0xd420b000, 0x1c8);
 PXA168_DEVICE(keypad, "pxa27x-keypad", -1, KEYPAD, 0xd4012000, 0x4c);
 PXA168_DEVICE(eth, "pxa168-eth", -1, MFU, 0xc0800000, 0x0fff);
+
+struct resource pxa168_usb_host_resources[] = {
+	/* USB Host conroller register base */
+	[0] = {
+		.start	= 0xd4209000,
+		.end	= 0xd4209000 + 0x200,
+		.flags	= IORESOURCE_MEM,
+		.name	= "pxa168-usb-host",
+	},
+	/* USB PHY register base */
+	[1] = {
+		.start	= 0xd4206000,
+		.end	= 0xd4206000 + 0xff,
+		.flags	= IORESOURCE_MEM,
+		.name	= "pxa168-usb-phy",
+	},
+	[2] = {
+		.start	= IRQ_PXA168_USB2,
+		.end	= IRQ_PXA168_USB2,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static u64 pxa168_usb_host_dmamask = DMA_BIT_MASK(32);
+struct platform_device pxa168_device_usb_host = {
+	.name = "pxa168-ehci",
+	.id   = -1,
+	.dev  = {
+		.dma_mask = &pxa168_usb_host_dmamask,
+		.coherent_dma_mask = DMA_BIT_MASK(32),
+	},
+
+	.num_resources = ARRAY_SIZE(pxa168_usb_host_resources),
+	.resource      = pxa168_usb_host_resources,
+};
+
+int __init pxa168_add_usb_host(struct pxa168_usb_pdata *pdata)
+{
+	pxa168_device_usb_host.dev.platform_data = pdata;
+	return platform_device_register(&pxa168_device_usb_host);
+}
diff --git a/arch/arm/mach-mmp/ttc_dkb.c b/arch/arm/mach-mmp/ttc_dkb.c
index 6bd37a27e5fc..176515a76989 100644
--- a/arch/arm/mach-mmp/ttc_dkb.c
+++ b/arch/arm/mach-mmp/ttc_dkb.c
@@ -93,7 +93,7 @@ static struct mtd_partition ttc_dkb_onenand_partitions[] = {
 	}, {
 		.name		= "filesystem",
 		.offset		= MTDPART_OFS_APPEND,
-		.size		= SZ_48M,
+		.size		= SZ_32M + SZ_16M,
 		.mask_flags	= 0,
 	}
 };
diff --git a/arch/arm/mach-msm/Makefile.boot b/arch/arm/mach-msm/Makefile.boot
index 24dfbf8c07c4..9b803a578b4d 100644
--- a/arch/arm/mach-msm/Makefile.boot
+++ b/arch/arm/mach-msm/Makefile.boot
@@ -1,3 +1,3 @@
-  zreladdr-y		:= 0x10008000
+  zreladdr-y		+= 0x10008000
 params_phys-y		:= 0x10000100
 initrd_phys-y		:= 0x10800000
diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c
index 16c86f8b4f3f..a60ab6d04ec5 100644
--- a/arch/arm/mach-msm/board-halibut.c
+++ b/arch/arm/mach-msm/board-halibut.c
@@ -78,8 +78,8 @@ static void __init halibut_init(void)
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init halibut_fixup(struct machine_desc *desc, struct tag *tags,
-				 char **cmdline, struct meminfo *mi)
+static void __init halibut_fixup(struct tag *tags, char **cmdline,
+				 struct meminfo *mi)
 {
 	mi->nr_banks=1;
 	mi->bank[0].start = PHYS_OFFSET;
diff --git a/arch/arm/mach-msm/board-mahimahi.c b/arch/arm/mach-msm/board-mahimahi.c
index 8a1672ee4e4a..5a4882fc6f7a 100644
--- a/arch/arm/mach-msm/board-mahimahi.c
+++ b/arch/arm/mach-msm/board-mahimahi.c
@@ -53,8 +53,8 @@ static void __init mahimahi_init(void)
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init mahimahi_fixup(struct machine_desc *desc, struct tag *tags,
-				 char **cmdline, struct meminfo *mi)
+static void __init mahimahi_fixup(struct tag *tags, char **cmdline,
+				  struct meminfo *mi)
 {
 	mi->nr_banks = 2;
 	mi->bank[0].start = PHYS_OFFSET;
diff --git a/arch/arm/mach-msm/board-msm8x60.c b/arch/arm/mach-msm/board-msm8x60.c
index 9221f54778be..106170fb1844 100644
--- a/arch/arm/mach-msm/board-msm8x60.c
+++ b/arch/arm/mach-msm/board-msm8x60.c
@@ -53,8 +53,6 @@ static void __init msm8x60_map_io(void)
 
 static void __init msm8x60_init_irq(void)
 {
-	unsigned int i;
-
 	gic_init(0, GIC_PPI_START, MSM_QGIC_DIST_BASE,
 		 (void *)MSM_QGIC_CPU_BASE);
 
@@ -66,15 +64,6 @@ static void __init msm8x60_init_irq(void)
 	 */
 	if (!machine_is_msm8x60_sim())
 		writel(0x0000FFFF, MSM_QGIC_DIST_BASE + GIC_DIST_ENABLE_SET);
-
-	/* FIXME: Not installing AVS_SVICINT and AVS_SVICINTSWDONE yet
-	 * as they are configured as level, which does not play nice with
-	 * handle_percpu_irq.
-	 */
-	for (i = GIC_PPI_START; i < GIC_SPI_START; i++) {
-		if (i != AVS_SVICINT && i != AVS_SVICINTSWDONE)
-			irq_set_handler(i, handle_percpu_irq);
-	}
 }
 
 static void __init msm8x60_init(void)
diff --git a/arch/arm/mach-msm/board-sapphire.c b/arch/arm/mach-msm/board-sapphire.c
index afa9293d5800..32b465763dbd 100644
--- a/arch/arm/mach-msm/board-sapphire.c
+++ b/arch/arm/mach-msm/board-sapphire.c
@@ -76,8 +76,8 @@ static struct map_desc sapphire_io_desc[] __initdata = {
 	}
 };
 
-static void __init sapphire_fixup(struct machine_desc *desc, struct tag *tags,
-				  char **cmdline, struct meminfo *mi)
+static void __init sapphire_fixup(struct tag *tags, char **cmdline,
+				  struct meminfo *mi)
 {
 	int smi_sz = parse_tag_smi((const struct tag *)tags);
 
diff --git a/arch/arm/mach-msm/board-trout.c b/arch/arm/mach-msm/board-trout.c
index 22d5694f5fea..6b9b227c87c5 100644
--- a/arch/arm/mach-msm/board-trout.c
+++ b/arch/arm/mach-msm/board-trout.c
@@ -48,8 +48,8 @@ static void __init trout_init_irq(void)
 	msm_init_irq();
 }
 
-static void __init trout_fixup(struct machine_desc *desc, struct tag *tags,
-				char **cmdline, struct meminfo *mi)
+static void __init trout_fixup(struct tag *tags, char **cmdline,
+			       struct meminfo *mi)
 {
 	mi->nr_banks = 1;
 	mi->bank[0].start = PHYS_OFFSET;
diff --git a/arch/arm/mach-msm/clock.c b/arch/arm/mach-msm/clock.c
index 22a537669624..d9145dfc2a3b 100644
--- a/arch/arm/mach-msm/clock.c
+++ b/arch/arm/mach-msm/clock.c
@@ -18,7 +18,7 @@
 #include <linux/list.h>
 #include <linux/err.h>
 #include <linux/spinlock.h>
-#include <linux/pm_qos_params.h>
+#include <linux/pm_qos.h>
 #include <linux/mutex.h>
 #include <linux/clk.h>
 #include <linux/string.h>
diff --git a/arch/arm/mach-msm/devices-msm7x00.c b/arch/arm/mach-msm/devices-msm7x00.c
index c4f5e26feb4d..993780f490ad 100644
--- a/arch/arm/mach-msm/devices-msm7x00.c
+++ b/arch/arm/mach-msm/devices-msm7x00.c
@@ -176,12 +176,6 @@ static struct resource resources_sdc1[] = {
 		.name	= "cmd_irq",
 	},
 	{
-		.start	= INT_SDC1_1,
-		.end	= INT_SDC1_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
-	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
 	},
@@ -204,12 +198,6 @@ static struct resource resources_sdc2[] = {
 		.flags	= IORESOURCE_IRQ,
 		.name	= "cmd_irq",
 	},
-		{
-		.start	= INT_SDC2_1,
-		.end	= INT_SDC2_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
 	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
@@ -233,12 +221,6 @@ static struct resource resources_sdc3[] = {
 		.flags	= IORESOURCE_IRQ,
 		.name	= "cmd_irq",
 	},
-		{
-		.start	= INT_SDC3_1,
-		.end	= INT_SDC3_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
 	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
@@ -262,12 +244,6 @@ static struct resource resources_sdc4[] = {
 		.flags	= IORESOURCE_IRQ,
 		.name	= "cmd_irq",
 	},
-		{
-		.start	= INT_SDC4_1,
-		.end	= INT_SDC4_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
 	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
diff --git a/arch/arm/mach-msm/devices-qsd8x50.c b/arch/arm/mach-msm/devices-qsd8x50.c
index 12d8deb78d9c..131633b12a34 100644
--- a/arch/arm/mach-msm/devices-qsd8x50.c
+++ b/arch/arm/mach-msm/devices-qsd8x50.c
@@ -140,12 +140,6 @@ static struct resource resources_sdc1[] = {
 		.name	= "cmd_irq",
 	},
 	{
-		.start	= INT_SDC1_1,
-		.end	= INT_SDC1_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
-	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
 	},
@@ -168,12 +162,6 @@ static struct resource resources_sdc2[] = {
 		.flags	= IORESOURCE_IRQ,
 		.name	= "cmd_irq",
 	},
-		{
-		.start	= INT_SDC2_1,
-		.end	= INT_SDC2_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
 	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
@@ -197,12 +185,6 @@ static struct resource resources_sdc3[] = {
 		.flags	= IORESOURCE_IRQ,
 		.name	= "cmd_irq",
 	},
-		{
-		.start	= INT_SDC3_1,
-		.end	= INT_SDC3_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
 	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
@@ -226,12 +208,6 @@ static struct resource resources_sdc4[] = {
 		.flags	= IORESOURCE_IRQ,
 		.name	= "cmd_irq",
 	},
-		{
-		.start	= INT_SDC4_1,
-		.end	= INT_SDC4_1,
-		.flags	= IORESOURCE_IRQ,
-		.name	= "pio_irq",
-	},
 	{
 		.flags	= IORESOURCE_IRQ | IORESOURCE_DISABLED,
 		.name	= "status_irq"
diff --git a/arch/arm/mach-msm/include/mach/entry-macro-qgic.S b/arch/arm/mach-msm/include/mach/entry-macro-qgic.S
index 12467157afb9..717076f3ca73 100644
--- a/arch/arm/mach-msm/include/mach/entry-macro-qgic.S
+++ b/arch/arm/mach-msm/include/mach/entry-macro-qgic.S
@@ -8,81 +8,10 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <mach/hardware.h>
-#include <asm/hardware/gic.h>
+#include <asm/hardware/entry-macro-gic.S>
 
 	.macro	disable_fiq
 	.endm
 
-	.macro  get_irqnr_preamble, base, tmp
-	ldr	\base, =gic_cpu_base_addr
-	ldr	\base, [\base]
-	.endm
-
 	.macro  arch_ret_to_user, tmp1, tmp2
 	.endm
-
-	/*
-	 * The interrupt numbering scheme is defined in the
-	 * interrupt controller spec.  To wit:
-	 *
-	 * Migrated the code from ARM MP port to be more consistent
-	 * with interrupt processing , the following still holds true
-	 * however, all interrupts are treated the same regardless of
-	 * if they are local IPI or PPI
-	 *
-	 * Interrupts 0-15 are IPI
-	 * 16-31 are PPI
-	 *   (16-18 are the timers)
-	 * 32-1020 are global
-	 * 1021-1022 are reserved
-	 * 1023 is "spurious" (no interrupt)
-	 *
-	 * A simple read from the controller will tell us the number of the
-	 * highest priority enabled interrupt.  We then just need to check
-	 * whether it is in the valid range for an IRQ (0-1020 inclusive).
-	 *
-	 * Base ARM code assumes that the local (private) peripheral interrupts
-	 * are not valid, we treat them differently, in that the privates are
-	 * handled like normal shared interrupts with the exception that only
-	 * one processor can register the interrupt and the handler must be
-	 * the same for all processors.
-	 */
-
-	.macro  get_irqnr_and_base, irqnr, irqstat, base, tmp
-
-	ldr  \irqstat, [\base, #GIC_CPU_INTACK] /* bits 12-10 =srcCPU,
-						   9-0 =int # */
-
-	bic     \irqnr, \irqstat, #0x1c00	@mask src
-	cmp     \irqnr, #15
-	ldr		\tmp, =1021
-	cmpcc	\irqnr, \irqnr
-	cmpne	\irqnr, \tmp
-	cmpcs	\irqnr, \irqnr
-
-	.endm
-
-	/* We assume that irqstat (the raw value of the IRQ acknowledge
-	 * register) is preserved from the macro above.
-	 * If there is an IPI, we immediately signal end of interrupt on the
-	 * controller, since this requires the original irqstat value which
-	 * we won't easily be able to recreate later.
-	 */
-	.macro test_for_ipi, irqnr, irqstat, base, tmp
-    bic \irqnr, \irqstat, #0x1c00
-    cmp \irqnr, #16
-    strcc   \irqstat, [\base, #GIC_CPU_EOI]
-    cmpcs   \irqnr, \irqnr
-	.endm
-
-	/* As above, this assumes that irqstat and base are preserved.. */
-
-	.macro test_for_ltirq, irqnr, irqstat, base, tmp
-    bic \irqnr, \irqstat, #0x1c00
-    mov     \tmp, #0
-    cmp \irqnr, #16
-    moveq   \tmp, #1
-    streq   \irqstat, [\base, #GIC_CPU_EOI]
-    cmp \tmp, #0
-	.endm
diff --git a/arch/arm/mach-msm/include/mach/mmc.h b/arch/arm/mach-msm/include/mach/mmc.h
index 5631b51cec46..ffcd9e3a6a7e 100644
--- a/arch/arm/mach-msm/include/mach/mmc.h
+++ b/arch/arm/mach-msm/include/mach/mmc.h
@@ -8,13 +8,6 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/sdio_func.h>
 
-struct embedded_sdio_data {
-	struct sdio_cis cis;
-	struct sdio_cccr cccr;
-	struct sdio_embedded_func *funcs;
-	int num_funcs;
-};
-
 struct msm_mmc_gpio {
 	unsigned no;
 	const char *name;
@@ -29,9 +22,9 @@ struct msm_mmc_platform_data {
 	unsigned int ocr_mask;			/* available voltages */
 	u32 (*translate_vdd)(struct device *, unsigned int);
 	unsigned int (*status)(struct device *);
-	struct embedded_sdio_data *embedded_sdio;
 	int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id);
 	struct msm_mmc_gpio_data *gpio_data;
+	void (*init_card)(struct mmc_card *card);
 };
 
 #endif
diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c
index 1a1af9e56250..727659520912 100644
--- a/arch/arm/mach-msm/platsmp.c
+++ b/arch/arm/mach-msm/platsmp.c
@@ -156,6 +156,12 @@ void __init smp_init_cpus(void)
 {
 	unsigned int i, ncores = get_core_count();
 
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
+	}
+
 	for (i = 0; i < ncores; i++)
 		set_cpu_possible(i, true);
 
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 63621f152c98..afeeca52fc66 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -71,12 +71,16 @@ enum timer_location {
 struct msm_clock {
 	struct clock_event_device   clockevent;
 	struct clocksource          clocksource;
-	struct irqaction            irq;
+	unsigned int		    irq;
 	void __iomem                *regbase;
 	uint32_t                    freq;
 	uint32_t                    shift;
 	void __iomem                *global_counter;
 	void __iomem                *local_counter;
+	union {
+		struct clock_event_device		*evt;
+		struct clock_event_device __percpu	**percpu_evt;
+	};		
 };
 
 enum {
@@ -87,13 +91,10 @@ enum {
 
 
 static struct msm_clock msm_clocks[];
-static struct clock_event_device *local_clock_event;
 
 static irqreturn_t msm_timer_interrupt(int irq, void *dev_id)
 {
-	struct clock_event_device *evt = dev_id;
-	if (smp_processor_id() != 0)
-		evt = local_clock_event;
+	struct clock_event_device *evt = *(struct clock_event_device **)dev_id;
 	if (evt->event_handler == NULL)
 		return IRQ_HANDLED;
 	evt->event_handler(evt);
@@ -171,13 +172,7 @@ static struct msm_clock msm_clocks[] = {
 			.mask           = CLOCKSOURCE_MASK(32),
 			.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 		},
-		.irq = {
-			.name    = "gp_timer",
-			.flags   = IRQF_DISABLED | IRQF_TIMER | IRQF_TRIGGER_RISING,
-			.handler = msm_timer_interrupt,
-			.dev_id  = &msm_clocks[0].clockevent,
-			.irq     = INT_GP_TIMER_EXP
-		},
+		.irq = INT_GP_TIMER_EXP,
 		.freq = GPT_HZ,
 	},
 	[MSM_CLOCK_DGT] = {
@@ -196,13 +191,7 @@ static struct msm_clock msm_clocks[] = {
 			.mask           = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)),
 			.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 		},
-		.irq = {
-			.name    = "dg_timer",
-			.flags   = IRQF_DISABLED | IRQF_TIMER | IRQF_TRIGGER_RISING,
-			.handler = msm_timer_interrupt,
-			.dev_id  = &msm_clocks[1].clockevent,
-			.irq     = INT_DEBUG_TIMER_EXP
-		},
+		.irq = INT_DEBUG_TIMER_EXP,
 		.freq = DGT_HZ >> MSM_DGT_SHIFT,
 		.shift = MSM_DGT_SHIFT,
 	}
@@ -261,10 +250,30 @@ static void __init msm_timer_init(void)
 			printk(KERN_ERR "msm_timer_init: clocksource_register "
 			       "failed for %s\n", cs->name);
 
-		res = setup_irq(clock->irq.irq, &clock->irq);
+		ce->irq = clock->irq;
+		if (cpu_is_msm8x60() || cpu_is_msm8960()) {
+			clock->percpu_evt = alloc_percpu(struct clock_event_device *);
+			if (!clock->percpu_evt) {
+				pr_err("msm_timer_init: memory allocation "
+				       "failed for %s\n", ce->name);
+				continue;
+			}
+
+			*__this_cpu_ptr(clock->percpu_evt) = ce;
+			res = request_percpu_irq(ce->irq, msm_timer_interrupt,
+						 ce->name, clock->percpu_evt);
+			if (!res)
+				enable_percpu_irq(ce->irq, 0);
+		} else {
+			clock->evt = ce;
+			res = request_irq(ce->irq, msm_timer_interrupt,
+					  IRQF_TIMER | IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
+					  ce->name, &clock->evt);
+		}
+
 		if (res)
-			printk(KERN_ERR "msm_timer_init: setup_irq "
-			       "failed for %s\n", cs->name);
+			pr_err("msm_timer_init: request_irq failed for %s\n",
+			       ce->name);
 
 		clockevents_register_device(ce);
 	}
@@ -273,6 +282,7 @@ static void __init msm_timer_init(void)
 #ifdef CONFIG_SMP
 int __cpuinit local_timer_setup(struct clock_event_device *evt)
 {
+	static bool local_timer_inited;
 	struct msm_clock *clock = &msm_clocks[MSM_GLOBAL_TIMER];
 
 	/* Use existing clock_event for cpu 0 */
@@ -281,12 +291,13 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
 
 	writel(DGT_CLK_CTL_DIV_4, MSM_TMR_BASE + DGT_CLK_CTL);
 
-	if (!local_clock_event) {
+	if (!local_timer_inited) {
 		writel(0, clock->regbase  + TIMER_ENABLE);
 		writel(0, clock->regbase + TIMER_CLEAR);
 		writel(~0, clock->regbase + TIMER_MATCH_VAL);
+		local_timer_inited = true;
 	}
-	evt->irq = clock->irq.irq;
+	evt->irq = clock->irq;
 	evt->name = "local_timer";
 	evt->features = CLOCK_EVT_FEAT_ONESHOT;
 	evt->rating = clock->clockevent.rating;
@@ -298,17 +309,17 @@ int __cpuinit local_timer_setup(struct clock_event_device *evt)
 		clockevent_delta2ns(0xf0000000 >> clock->shift, evt);
 	evt->min_delta_ns = clockevent_delta2ns(4, evt);
 
-	local_clock_event = evt;
-
-	gic_enable_ppi(clock->irq.irq);
+	*__this_cpu_ptr(clock->percpu_evt) = evt;
+	enable_percpu_irq(evt->irq, 0);
 
 	clockevents_register_device(evt);
 	return 0;
 }
 
-inline int local_timer_ack(void)
+void local_timer_stop(struct clock_event_device *evt)
 {
-	return 1;
+	evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+	disable_percpu_irq(evt->irq);
 }
 
 #endif
diff --git a/arch/arm/mach-mv78xx0/Makefile.boot b/arch/arm/mach-mv78xx0/Makefile.boot
index 67039c3e0c48..760a0efe7580 100644
--- a/arch/arm/mach-mv78xx0/Makefile.boot
+++ b/arch/arm/mach-mv78xx0/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-mx5/Makefile.boot b/arch/arm/mach-mx5/Makefile.boot
index e928be1b6757..ca207ca305ec 100644
--- a/arch/arm/mach-mx5/Makefile.boot
+++ b/arch/arm/mach-mx5/Makefile.boot
@@ -1,9 +1,9 @@
-   zreladdr-$(CONFIG_ARCH_MX50)	:= 0x70008000
+   zreladdr-$(CONFIG_ARCH_MX50)	+= 0x70008000
 params_phys-$(CONFIG_ARCH_MX50)	:= 0x70000100
 initrd_phys-$(CONFIG_ARCH_MX50)	:= 0x70800000
-   zreladdr-$(CONFIG_ARCH_MX51)	:= 0x90008000
+   zreladdr-$(CONFIG_ARCH_MX51)	+= 0x90008000
 params_phys-$(CONFIG_ARCH_MX51)	:= 0x90000100
 initrd_phys-$(CONFIG_ARCH_MX51)	:= 0x90800000
-   zreladdr-$(CONFIG_ARCH_MX53)	:= 0x70008000
+   zreladdr-$(CONFIG_ARCH_MX53)	+= 0x70008000
 params_phys-$(CONFIG_ARCH_MX53)	:= 0x70000100
 initrd_phys-$(CONFIG_ARCH_MX53)	:= 0x70800000
diff --git a/arch/arm/mach-mxs/Makefile.boot b/arch/arm/mach-mxs/Makefile.boot
index eb541e0291da..07b11fe6453f 100644
--- a/arch/arm/mach-mxs/Makefile.boot
+++ b/arch/arm/mach-mxs/Makefile.boot
@@ -1 +1 @@
-zreladdr-y := 0x40008000
+zreladdr-y += 0x40008000
diff --git a/arch/arm/mach-netx/Makefile.boot b/arch/arm/mach-netx/Makefile.boot
index b81cf6aff0ac..534a4d27055e 100644
--- a/arch/arm/mach-netx/Makefile.boot
+++ b/arch/arm/mach-netx/Makefile.boot
@@ -1,2 +1,2 @@
-    zreladdr-y			:= 0x80008000
+    zreladdr-y			+= 0x80008000
 
diff --git a/arch/arm/mach-nomadik/Makefile.boot b/arch/arm/mach-nomadik/Makefile.boot
index c7e75acfe6c9..ff0a4b5b0a82 100644
--- a/arch/arm/mach-nomadik/Makefile.boot
+++ b/arch/arm/mach-nomadik/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 
diff --git a/arch/arm/mach-nuc93x/Makefile.boot b/arch/arm/mach-nuc93x/Makefile.boot
index a057b546b6e5..6c3d421c2d11 100644
--- a/arch/arm/mach-nuc93x/Makefile.boot
+++ b/arch/arm/mach-nuc93x/Makefile.boot
@@ -1,3 +1,3 @@
-zreladdr-y	:= 0x00008000
+zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 
diff --git a/arch/arm/mach-nuc93x/time.c b/arch/arm/mach-nuc93x/time.c
index 2f90f9dc6e30..f9807c029ec5 100644
--- a/arch/arm/mach-nuc93x/time.c
+++ b/arch/arm/mach-nuc93x/time.c
@@ -82,7 +82,7 @@ static void nuc93x_timer_setup(void)
 	timer0_load = (rate / TICKS_PER_SEC);
 	__raw_writel(timer0_load, REG_TICR0);
 
-	val |= (PERIOD | COUNTEN | INTEN | PRESCALE);;
+	val |= (PERIOD | COUNTEN | INTEN | PRESCALE);
 	__raw_writel(val, REG_TCSR0);
 
 }
diff --git a/arch/arm/mach-omap1/Makefile.boot b/arch/arm/mach-omap1/Makefile.boot
index 292d56c5a888..13bda8dbd604 100644
--- a/arch/arm/mach-omap1/Makefile.boot
+++ b/arch/arm/mach-omap1/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y		:= 0x10008000
+   zreladdr-y		+= 0x10008000
 params_phys-y		:= 0x10000100
 initrd_phys-y		:= 0x10800000
diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 943072d5a1d5..7868e75ad077 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_clock.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
 #include <linux/clk.h>
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 57b66d590c52..89bfb49389f2 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -36,6 +36,7 @@ config ARCH_OMAP3
 	select ARM_L1_CACHE_SHIFT_6 if !ARCH_OMAP4
 	select ARCH_HAS_OPP
 	select PM_OPP if PM
+	select ARM_CPU_SUSPEND if PM
 
 config ARCH_OMAP4
 	bool "TI OMAP4"
@@ -50,6 +51,7 @@ config ARCH_OMAP4
 	select ARCH_HAS_OPP
 	select PM_OPP if PM
 	select USB_ARCH_HAS_EHCI
+	select ARM_CPU_SUSPEND if PM
 
 comment "OMAP Core Type"
 	depends on ARCH_OMAP2
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index f34336560437..7317a2b39dd1 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -242,14 +242,11 @@ obj-$(CONFIG_MACH_IGEP0020)		+= board-igep0020.o \
 obj-$(CONFIG_MACH_OMAP3_TOUCHBOOK)	+= board-omap3touchbook.o \
 					   hsmmc.o
 obj-$(CONFIG_MACH_OMAP_4430SDP)		+= board-4430sdp.o \
-					   hsmmc.o \
-					   omap_phy_internal.o
+					   hsmmc.o
 obj-$(CONFIG_MACH_OMAP4_PANDA)		+= board-omap4panda.o \
-					   hsmmc.o \
-					   omap_phy_internal.o
+					   hsmmc.o
 
-obj-$(CONFIG_MACH_OMAP3517EVM)		+= board-am3517evm.o \
-					   omap_phy_internal.o \
+obj-$(CONFIG_MACH_OMAP3517EVM)		+= board-am3517evm.o
 
 obj-$(CONFIG_MACH_CRANEBOARD)		+= board-am3517crane.o
 
@@ -260,6 +257,8 @@ obj-$(CONFIG_MACH_TI8168EVM)		+= board-ti8168evm.o
 usbfs-$(CONFIG_ARCH_OMAP_OTG)		:= usb-fs.o
 obj-y					+= $(usbfs-m) $(usbfs-y)
 obj-y					+= usb-musb.o
+obj-y					+= omap_phy_internal.o
+
 obj-$(CONFIG_MACH_OMAP2_TUSB6010)	+= usb-tusb6010.o
 obj-y					+= usb-host.o
 
diff --git a/arch/arm/mach-omap2/Makefile.boot b/arch/arm/mach-omap2/Makefile.boot
index 565aff7f37a9..b03e562acc60 100644
--- a/arch/arm/mach-omap2/Makefile.boot
+++ b/arch/arm/mach-omap2/Makefile.boot
@@ -1,3 +1,3 @@
-  zreladdr-y		:= 0x80008000
+  zreladdr-y		+= 0x80008000
 params_phys-y		:= 0x80000100
 initrd_phys-y		:= 0x80800000
diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index 87f43ade4405..f8ce84b69eb1 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -39,6 +39,9 @@
 #include <plat/usb.h>
 #include <plat/gpmc-smc91x.h>
 
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
+
 #include "mux.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
@@ -99,20 +102,72 @@ static struct platform_device sdp2430_flash_device = {
 	.resource	= &sdp2430_flash_resource,
 };
 
-static struct platform_device sdp2430_lcd_device = {
-	.name		= "sdp2430_lcd",
-	.id		= -1,
-};
-
 static struct platform_device *sdp2430_devices[] __initdata = {
 	&sdp2430_flash_device,
+};
+
+/* LCD */
+#define SDP2430_LCD_PANEL_BACKLIGHT_GPIO	91
+#define SDP2430_LCD_PANEL_ENABLE_GPIO		154
+
+static int sdp2430_panel_enable_lcd(struct omap_dss_device *dssdev)
+{
+	gpio_direction_output(SDP2430_LCD_PANEL_ENABLE_GPIO, 1);
+	gpio_direction_output(SDP2430_LCD_PANEL_BACKLIGHT_GPIO, 1);
+
+	return 0;
+}
+
+static void sdp2430_panel_disable_lcd(struct omap_dss_device *dssdev)
+{
+	gpio_direction_output(SDP2430_LCD_PANEL_ENABLE_GPIO, 0);
+	gpio_direction_output(SDP2430_LCD_PANEL_BACKLIGHT_GPIO, 0);
+}
+
+static struct panel_generic_dpi_data sdp2430_panel_data = {
+	.name			= "nec_nl2432dr22-11b",
+	.platform_enable	= sdp2430_panel_enable_lcd,
+	.platform_disable	= sdp2430_panel_disable_lcd,
+};
+
+static struct omap_dss_device sdp2430_lcd_device = {
+	.name			= "lcd",
+	.driver_name		= "generic_dpi_panel",
+	.type			= OMAP_DISPLAY_TYPE_DPI,
+	.phy.dpi.data_lines	= 16,
+	.data			= &sdp2430_panel_data,
+};
+
+static struct omap_dss_device *sdp2430_dss_devices[] = {
 	&sdp2430_lcd_device,
 };
 
-static struct omap_lcd_config sdp2430_lcd_config __initdata = {
-	.ctrl_name	= "internal",
+static struct omap_dss_board_info sdp2430_dss_data = {
+	.num_devices	= ARRAY_SIZE(sdp2430_dss_devices),
+	.devices	= sdp2430_dss_devices,
+	.default_device	= &sdp2430_lcd_device,
 };
 
+static void __init sdp2430_display_init(void)
+{
+	int r;
+
+	static struct gpio gpios[] __initdata = {
+		{ SDP2430_LCD_PANEL_ENABLE_GPIO, GPIOF_OUT_INIT_LOW,
+			"LCD reset" },
+		{ SDP2430_LCD_PANEL_BACKLIGHT_GPIO, GPIOF_OUT_INIT_LOW,
+			"LCD Backlight" },
+	};
+
+	r = gpio_request_array(gpios, ARRAY_SIZE(gpios));
+	if (r) {
+		pr_err("Cannot request LCD GPIOs, error %d\n", r);
+		return;
+	}
+
+	omap_display_init(&sdp2430_dss_data);
+}
+
 #if defined(CONFIG_SMC91X) || defined(CONFIG_SMC91x_MODULE)
 
 static struct omap_smc91x_platform_data board_smc91x_data = {
@@ -137,10 +192,6 @@ static inline void board_smc91x_init(void)
 
 #endif
 
-static struct omap_board_config_kernel sdp2430_config[] __initdata = {
-	{OMAP_TAG_LCD, &sdp2430_lcd_config},
-};
-
 static void __init omap_2430sdp_init_early(void)
 {
 	omap2_init_common_infrastructure();
@@ -229,9 +280,6 @@ static void __init omap_2430sdp_init(void)
 {
 	omap2430_mux_init(board_mux, OMAP_PACKAGE_ZAC);
 
-	omap_board_config = sdp2430_config;
-	omap_board_config_size = ARRAY_SIZE(sdp2430_config);
-
 	omap2430_i2c_init();
 
 	platform_add_devices(sdp2430_devices, ARRAY_SIZE(sdp2430_devices));
@@ -247,6 +295,8 @@ static void __init omap_2430sdp_init(void)
 	/* Turn off secondary LCD backlight */
 	gpio_request_one(SECONDARY_LCD_GPIO, GPIOF_OUT_INIT_LOW,
 			 "Secondary LCD backlight");
+
+	sdp2430_display_init();
 }
 
 static void __init omap_2430sdp_map_io(void)
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index 2430531b2239..204beddcb949 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -37,7 +37,7 @@
 #include <plat/dma.h>
 #include <plat/gpmc.h>
 #include <video/omapdss.h>
-#include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 
 #include <plat/gpmc-smc91x.h>
 
@@ -186,8 +186,7 @@ static struct omap_dss_device sdp3430_lcd_device = {
 	.platform_disable	= sdp3430_panel_disable_lcd,
 };
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= sdp3430_panel_enable_dvi,
 	.platform_disable	= sdp3430_panel_disable_dvi,
 };
@@ -195,7 +194,7 @@ static struct panel_generic_dpi_data dvi_panel = {
 static struct omap_dss_device sdp3430_dvi_device = {
 	.name			= "dvi",
 	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index e749c950d1bc..a3e37ff3ad18 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -38,6 +38,8 @@
 #include <plat/mmc.h>
 #include <plat/omap4-keypad.h>
 #include <video/omapdss.h>
+#include <video/omap-panel-nokia-dsi.h>
+#include <video/omap-panel-picodlp.h>
 #include <linux/wl12xx.h>
 
 #include "mux.h"
@@ -52,6 +54,8 @@
 #define OMAP4_SFH7741_ENABLE_GPIO		188
 #define HDMI_GPIO_HPD 60 /* Hot plug pin for HDMI */
 #define HDMI_GPIO_LS_OE 41 /* Level shifter for HDMI */
+#define DISPLAY_SEL_GPIO	59	/* LCD2/PicoDLP switch */
+#define DLP_POWER_ON_GPIO	40
 
 #define GPIO_WIFI_PMENA		54
 #define GPIO_WIFI_IRQ		53
@@ -340,11 +344,6 @@ static int __init omap_ethernet_init(void)
 	return status;
 }
 
-static struct platform_device sdp4430_lcd_device = {
-	.name		= "sdp4430_lcd",
-	.id		= -1,
-};
-
 static struct regulator_consumer_supply sdp4430_vbat_supply[] = {
 	REGULATOR_SUPPLY("vddvibl", "twl6040-vibra"),
 	REGULATOR_SUPPLY("vddvibr", "twl6040-vibra"),
@@ -374,21 +373,12 @@ static struct platform_device sdp4430_vbat = {
 };
 
 static struct platform_device *sdp4430_devices[] __initdata = {
-	&sdp4430_lcd_device,
 	&sdp4430_gpio_keys_device,
 	&sdp4430_leds_gpio,
 	&sdp4430_leds_pwm,
 	&sdp4430_vbat,
 };
 
-static struct omap_lcd_config sdp4430_lcd_config __initdata = {
-	.ctrl_name	= "internal",
-};
-
-static struct omap_board_config_kernel sdp4430_config[] __initdata = {
-	{ OMAP_TAG_LCD,		&sdp4430_lcd_config },
-};
-
 static void __init omap_4430sdp_init_early(void)
 {
 	omap2_init_common_infrastructure();
@@ -648,37 +638,202 @@ static void sdp4430_panel_disable_hdmi(struct omap_dss_device *dssdev)
 	gpio_free(HDMI_GPIO_HPD);
 }
 
-static struct omap_dss_device sdp4430_hdmi_device = {
-	.name = "hdmi",
-	.driver_name = "hdmi_panel",
-	.type = OMAP_DISPLAY_TYPE_HDMI,
-	.clocks	= {
-		.dispc	= {
+static struct nokia_dsi_panel_data dsi1_panel = {
+		.name		= "taal",
+		.reset_gpio	= 102,
+		.use_ext_te	= false,
+		.ext_te_gpio	= 101,
+		.esd_interval	= 0,
+};
+
+static struct omap_dss_device sdp4430_lcd_device = {
+	.name			= "lcd",
+	.driver_name		= "taal",
+	.type			= OMAP_DISPLAY_TYPE_DSI,
+	.data			= &dsi1_panel,
+	.phy.dsi		= {
+		.clk_lane	= 1,
+		.clk_pol	= 0,
+		.data1_lane	= 2,
+		.data1_pol	= 0,
+		.data2_lane	= 3,
+		.data2_pol	= 0,
+
+		.module		= 0,
+	},
+
+	.clocks = {
+		.dispc = {
+			.channel = {
+				/* Logic Clock = 172.8 MHz */
+				.lck_div	= 1,
+				/* Pixel Clock = 34.56 MHz */
+				.pck_div	= 5,
+				.lcd_clk_src	= OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC,
+			},
 			.dispc_fclk_src	= OMAP_DSS_CLK_SRC_FCK,
 		},
-		.hdmi	= {
-			.regn	= 15,
-			.regm2	= 1,
+
+		.dsi = {
+			.regn		= 16,	/* Fint = 2.4 MHz */
+			.regm		= 180,	/* DDR Clock = 216 MHz */
+			.regm_dispc	= 5,	/* PLL1_CLK1 = 172.8 MHz */
+			.regm_dsi	= 5,	/* PLL1_CLK2 = 172.8 MHz */
+
+			.lp_clk_div	= 10,	/* LP Clock = 8.64 MHz */
+			.dsi_fclk_src	= OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DSI,
+		},
+	},
+	.channel		= OMAP_DSS_CHANNEL_LCD,
+};
+
+static struct nokia_dsi_panel_data dsi2_panel = {
+		.name		= "taal",
+		.reset_gpio	= 104,
+		.use_ext_te	= false,
+		.ext_te_gpio	= 103,
+		.esd_interval	= 0,
+};
+
+static struct omap_dss_device sdp4430_lcd2_device = {
+	.name			= "lcd2",
+	.driver_name		= "taal",
+	.type			= OMAP_DISPLAY_TYPE_DSI,
+	.data			= &dsi2_panel,
+	.phy.dsi		= {
+		.clk_lane	= 1,
+		.clk_pol	= 0,
+		.data1_lane	= 2,
+		.data1_pol	= 0,
+		.data2_lane	= 3,
+		.data2_pol	= 0,
+
+		.module		= 1,
+	},
+
+	.clocks = {
+		.dispc = {
+			.channel = {
+				/* Logic Clock = 172.8 MHz */
+				.lck_div	= 1,
+				/* Pixel Clock = 34.56 MHz */
+				.pck_div	= 5,
+				.lcd_clk_src	= OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC,
+			},
+			.dispc_fclk_src	= OMAP_DSS_CLK_SRC_FCK,
+		},
+
+		.dsi = {
+			.regn		= 16,	/* Fint = 2.4 MHz */
+			.regm		= 180,	/* DDR Clock = 216 MHz */
+			.regm_dispc	= 5,	/* PLL1_CLK1 = 172.8 MHz */
+			.regm_dsi	= 5,	/* PLL1_CLK2 = 172.8 MHz */
+
+			.lp_clk_div	= 10,	/* LP Clock = 8.64 MHz */
+			.dsi_fclk_src	= OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DSI,
 		},
 	},
+	.channel		= OMAP_DSS_CHANNEL_LCD2,
+};
+
+static void sdp4430_lcd_init(void)
+{
+	int r;
+
+	r = gpio_request_one(dsi1_panel.reset_gpio, GPIOF_DIR_OUT,
+		"lcd1_reset_gpio");
+	if (r)
+		pr_err("%s: Could not get lcd1_reset_gpio\n", __func__);
+
+	r = gpio_request_one(dsi2_panel.reset_gpio, GPIOF_DIR_OUT,
+		"lcd2_reset_gpio");
+	if (r)
+		pr_err("%s: Could not get lcd2_reset_gpio\n", __func__);
+}
+
+static struct omap_dss_device sdp4430_hdmi_device = {
+	.name = "hdmi",
+	.driver_name = "hdmi_panel",
+	.type = OMAP_DISPLAY_TYPE_HDMI,
 	.platform_enable = sdp4430_panel_enable_hdmi,
 	.platform_disable = sdp4430_panel_disable_hdmi,
 	.channel = OMAP_DSS_CHANNEL_DIGIT,
 };
 
+static struct picodlp_panel_data sdp4430_picodlp_pdata = {
+	.picodlp_adapter_id	= 2,
+	.emu_done_gpio		= 44,
+	.pwrgood_gpio		= 45,
+};
+
+static void sdp4430_picodlp_init(void)
+{
+	int r;
+	const struct gpio picodlp_gpios[] = {
+		{DLP_POWER_ON_GPIO, GPIOF_OUT_INIT_LOW,
+			"DLP POWER ON"},
+		{sdp4430_picodlp_pdata.emu_done_gpio, GPIOF_IN,
+			"DLP EMU DONE"},
+		{sdp4430_picodlp_pdata.pwrgood_gpio, GPIOF_OUT_INIT_LOW,
+			"DLP PWRGOOD"},
+	};
+
+	r = gpio_request_array(picodlp_gpios, ARRAY_SIZE(picodlp_gpios));
+	if (r)
+		pr_err("Cannot request PicoDLP GPIOs, error %d\n", r);
+}
+
+static int sdp4430_panel_enable_picodlp(struct omap_dss_device *dssdev)
+{
+	gpio_set_value(DISPLAY_SEL_GPIO, 0);
+	gpio_set_value(DLP_POWER_ON_GPIO, 1);
+
+	return 0;
+}
+
+static void sdp4430_panel_disable_picodlp(struct omap_dss_device *dssdev)
+{
+	gpio_set_value(DLP_POWER_ON_GPIO, 0);
+	gpio_set_value(DISPLAY_SEL_GPIO, 1);
+}
+
+static struct omap_dss_device sdp4430_picodlp_device = {
+	.name			= "picodlp",
+	.driver_name		= "picodlp_panel",
+	.type			= OMAP_DISPLAY_TYPE_DPI,
+	.phy.dpi.data_lines	= 24,
+	.channel		= OMAP_DSS_CHANNEL_LCD2,
+	.platform_enable	= sdp4430_panel_enable_picodlp,
+	.platform_disable	= sdp4430_panel_disable_picodlp,
+	.data			= &sdp4430_picodlp_pdata,
+};
+
 static struct omap_dss_device *sdp4430_dss_devices[] = {
+	&sdp4430_lcd_device,
+	&sdp4430_lcd2_device,
 	&sdp4430_hdmi_device,
+	&sdp4430_picodlp_device,
 };
 
 static struct omap_dss_board_info sdp4430_dss_data = {
 	.num_devices	= ARRAY_SIZE(sdp4430_dss_devices),
 	.devices	= sdp4430_dss_devices,
-	.default_device	= &sdp4430_hdmi_device,
+	.default_device	= &sdp4430_lcd_device,
 };
 
-void omap_4430sdp_display_init(void)
+static void omap_4430sdp_display_init(void)
 {
+	int r;
+
+	/* Enable LCD2 by default (instead of Pico DLP) */
+	r = gpio_request_one(DISPLAY_SEL_GPIO, GPIOF_OUT_INIT_HIGH,
+			"display_sel");
+	if (r)
+		pr_err("%s: Could not get display_sel GPIO\n", __func__);
+
+	sdp4430_lcd_init();
 	sdp4430_hdmi_mux_init();
+	sdp4430_picodlp_init();
 	omap_display_init(&sdp4430_dss_data);
 }
 
@@ -802,9 +957,6 @@ static void __init omap_4430sdp_init(void)
 		package = OMAP_PACKAGE_CBL;
 	omap4_mux_init(board_mux, NULL, package);
 
-	omap_board_config = sdp4430_config;
-	omap_board_config_size = ARRAY_SIZE(sdp4430_config);
-
 	omap4_i2c_init();
 	omap_sfh7741prox_init();
 	platform_add_devices(sdp4430_devices, ARRAY_SIZE(sdp4430_devices));
diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c
index 1325085e453d..ab10f75984d8 100644
--- a/arch/arm/mach-omap2/board-am3517evm.c
+++ b/arch/arm/mach-omap2/board-am3517evm.c
@@ -36,6 +36,7 @@
 #include <plat/usb.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 
 #include "mux.h"
 #include "control.h"
@@ -333,8 +334,7 @@ static void am3517_evm_panel_disable_dvi(struct omap_dss_device *dssdev)
 	dvi_enabled = 0;
 }
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= am3517_evm_panel_enable_dvi,
 	.platform_disable	= am3517_evm_panel_disable_dvi,
 };
@@ -342,7 +342,7 @@ static struct panel_generic_dpi_data dvi_panel = {
 static struct omap_dss_device am3517_evm_dvi_device = {
 	.type			= OMAP_DISPLAY_TYPE_DPI,
 	.name			= "dvi",
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index 67800e647d7a..ad55351e0cab 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -40,6 +40,9 @@
 #include <plat/common.h>
 #include <plat/gpmc.h>
 
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
+
 #include "mux.h"
 #include "control.h"
 
@@ -149,11 +152,6 @@ static struct platform_device apollon_smc91x_device = {
 	.resource	= apollon_smc91x_resources,
 };
 
-static struct platform_device apollon_lcd_device = {
-	.name		= "apollon_lcd",
-	.id		= -1,
-};
-
 static struct omap_led_config apollon_led_config[] = {
 	{
 		.cdev	= {
@@ -191,7 +189,6 @@ static struct platform_device apollon_led_device = {
 static struct platform_device *apollon_devices[] __initdata = {
 	&apollon_onenand_device,
 	&apollon_smc91x_device,
-	&apollon_lcd_device,
 	&apollon_led_device,
 };
 
@@ -265,12 +262,26 @@ static struct omap_usb_config apollon_usb_config __initdata = {
 	.pins[0]	= 6,
 };
 
-static struct omap_lcd_config apollon_lcd_config __initdata = {
-	.ctrl_name	= "internal",
+static struct panel_generic_dpi_data apollon_panel_data = {
+	.name			= "apollon",
 };
 
-static struct omap_board_config_kernel apollon_config[] __initdata = {
-	{ OMAP_TAG_LCD,		&apollon_lcd_config },
+static struct omap_dss_device apollon_lcd_device = {
+	.name			= "lcd",
+	.driver_name		= "generic_dpi_panel",
+	.type			= OMAP_DISPLAY_TYPE_DPI,
+	.phy.dpi.data_lines	= 18,
+	.data			= &apollon_panel_data,
+};
+
+static struct omap_dss_device *apollon_dss_devices[] = {
+	&apollon_lcd_device,
+};
+
+static struct omap_dss_board_info apollon_dss_data = {
+	.num_devices	= ARRAY_SIZE(apollon_dss_devices),
+	.devices	= apollon_dss_devices,
+	.default_device	= &apollon_lcd_device,
 };
 
 static void __init omap_apollon_init_early(void)
@@ -314,8 +325,6 @@ static void __init omap_apollon_init(void)
 	u32 v;
 
 	omap2420_mux_init(board_mux, OMAP_PACKAGE_ZAC);
-	omap_board_config = apollon_config;
-	omap_board_config_size = ARRAY_SIZE(apollon_config);
 
 	apollon_init_smc91x();
 	apollon_led_init();
@@ -340,6 +349,8 @@ static void __init omap_apollon_init(void)
 	 */
 	platform_add_devices(apollon_devices, ARRAY_SIZE(apollon_devices));
 	omap_serial_init();
+
+	omap_display_init(&apollon_dss_data);
 }
 
 static void __init omap_apollon_map_io(void)
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index 38179c175503..6e0f0d2e39bc 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -43,6 +43,7 @@
 #include <plat/usb.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 #include <plat/mcspi.h>
 
 #include <mach/hardware.h>
@@ -242,8 +243,7 @@ static struct omap_dss_device cm_t35_lcd_device = {
 	.phy.dpi.data_lines	= 18,
 };
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= cm_t35_panel_enable_dvi,
 	.platform_disable	= cm_t35_panel_disable_dvi,
 };
@@ -251,7 +251,7 @@ static struct panel_generic_dpi_data dvi_panel = {
 static struct omap_dss_device cm_t35_dvi_device = {
 	.name			= "dvi",
 	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index 99a42432ac93..d9bfe54917e4 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -47,6 +47,7 @@
 #include <plat/usb.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 
 #include <plat/mcspi.h>
 #include <linux/input/matrix_keypad.h>
@@ -139,7 +140,7 @@ static struct regulator_consumer_supply devkit8000_vio_supply[] = {
 };
 
 static struct panel_generic_dpi_data lcd_panel = {
-	.name			= "generic",
+	.name			= "innolux_at070tn83",
 	.platform_enable        = devkit8000_panel_enable_lcd,
 	.platform_disable       = devkit8000_panel_disable_lcd,
 };
@@ -152,8 +153,7 @@ static struct omap_dss_device devkit8000_lcd_device = {
 	.phy.dpi.data_lines     = 24,
 };
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable        = devkit8000_panel_enable_dvi,
 	.platform_disable       = devkit8000_panel_disable_dvi,
 };
@@ -161,7 +161,7 @@ static struct panel_generic_dpi_data dvi_panel = {
 static struct omap_dss_device devkit8000_dvi_device = {
 	.name                   = "dvi",
 	.type                   = OMAP_DISPLAY_TYPE_DPI,
-	.driver_name            = "generic_dpi_panel",
+	.driver_name            = "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines     = 24,
 };
@@ -267,7 +267,7 @@ static struct twl4030_gpio_platform_data devkit8000_gpio_data = {
 
 static struct regulator_consumer_supply devkit8000_vpll1_supplies[] = {
 	REGULATOR_SUPPLY("vdds_dsi", "omapdss"),
-	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi1"),
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi.0"),
 };
 
 /* VMMC1 for MMC1 pins CMD, CLK, DAT0..DAT3 (20 mA, plus card == max 220 mA) */
diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 82421a4cfa92..8fcf79628ca1 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -39,6 +39,9 @@
 #include <plat/dma.h>
 #include <plat/gpmc.h>
 
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
+
 #include "mux.h"
 #include "control.h"
 
@@ -156,17 +159,33 @@ static struct platform_device h4_kp_device = {
 	},
 };
 
-static struct platform_device h4_lcd_device = {
-	.name		= "lcd_h4",
-	.id		= -1,
-};
-
 static struct platform_device *h4_devices[] __initdata = {
 	&h4_flash_device,
 	&h4_kp_device,
+};
+
+static struct panel_generic_dpi_data h4_panel_data = {
+	.name			= "h4",
+};
+
+static struct omap_dss_device h4_lcd_device = {
+	.name			= "lcd",
+	.driver_name		= "generic_dpi_panel",
+	.type			= OMAP_DISPLAY_TYPE_DPI,
+	.phy.dpi.data_lines	= 16,
+	.data			= &h4_panel_data,
+};
+
+static struct omap_dss_device *h4_dss_devices[] = {
 	&h4_lcd_device,
 };
 
+static struct omap_dss_board_info h4_dss_data = {
+	.num_devices	= ARRAY_SIZE(h4_dss_devices),
+	.devices	= h4_dss_devices,
+	.default_device	= &h4_lcd_device,
+};
+
 /* 2420 Sysboot setup (2430 is different) */
 static u32 get_sysboot_value(void)
 {
@@ -270,10 +289,6 @@ static void __init h4_init_flash(void)
 	h4_flash_resource.end	= base + SZ_64M - 1;
 }
 
-static struct omap_lcd_config h4_lcd_config __initdata = {
-	.ctrl_name	= "internal",
-};
-
 static struct omap_usb_config h4_usb_config __initdata = {
 	/* S1.10 OFF -- usb "download port"
 	 * usb0 switched to Mini-B port and isp1105 transceiver;
@@ -285,10 +300,6 @@ static struct omap_usb_config h4_usb_config __initdata = {
 	.hmc_mode	= 0x00,		/* 0:dev|otg 1:disable 2:disable */
 };
 
-static struct omap_board_config_kernel h4_config[] __initdata = {
-	{ OMAP_TAG_LCD,		&h4_lcd_config },
-};
-
 static void __init omap_h4_init_early(void)
 {
 	omap2_init_common_infrastructure();
@@ -330,9 +341,6 @@ static void __init omap_h4_init(void)
 {
 	omap2420_mux_init(board_mux, OMAP_PACKAGE_ZAF);
 
-	omap_board_config = h4_config;
-	omap_board_config_size = ARRAY_SIZE(h4_config);
-
 	/*
 	 * Make sure the serial ports are muxed on at this point.
 	 * You have to mux them off in device drivers later on
@@ -371,6 +379,8 @@ static void __init omap_h4_init(void)
 	omap2_usbfs_init(&h4_usb_config);
 	omap_serial_init();
 	h4_init_flash();
+
+	omap_display_init(&h4_dss_data);
 }
 
 static void __init omap_h4_map_io(void)
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 7040352b16b4..96f9ef34d2fb 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -32,7 +32,7 @@
 #include <plat/gpmc.h>
 #include <plat/usb.h>
 #include <video/omapdss.h>
-#include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 #include <plat/onenand.h>
 
 #include "mux.h"
@@ -455,16 +455,16 @@ static void igep2_disable_dvi(struct omap_dss_device *dssdev)
 	gpio_direction_output(IGEP2_GPIO_DVI_PUP, 0);
 }
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= igep2_enable_dvi,
 	.platform_disable	= igep2_disable_dvi,
+	.i2c_bus_num = 3,
 };
 
 static struct omap_dss_device igep2_dvi_device = {
 	.type			= OMAP_DISPLAY_TYPE_DPI,
 	.name			= "dvi",
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index abe8c7e496a2..f8f8a68a4899 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -27,6 +27,7 @@
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 #include <linux/mmc/host.h>
+#include <linux/gpio.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -43,6 +44,9 @@
 #include <plat/usb.h>
 #include <plat/gpmc-smsc911x.h>
 
+#include <video/omapdss.h>
+#include <video/omap-panel-generic-dpi.h>
+
 #include "board-flash.h"
 #include "mux.h"
 #include "hsmmc.h"
@@ -179,29 +183,108 @@ static inline void __init ldp_init_smsc911x(void)
 	gpmc_smsc911x_init(&smsc911x_cfg);
 }
 
-static struct platform_device ldp_lcd_device = {
-	.name		= "ldp_lcd",
-	.id		= -1,
+/* LCD */
+
+static int ldp_backlight_gpio;
+static int ldp_lcd_enable_gpio;
+
+#define LCD_PANEL_RESET_GPIO		55
+#define LCD_PANEL_QVGA_GPIO		56
+
+static int ldp_panel_enable_lcd(struct omap_dss_device *dssdev)
+{
+	if (gpio_is_valid(ldp_lcd_enable_gpio))
+		gpio_direction_output(ldp_lcd_enable_gpio, 1);
+	if (gpio_is_valid(ldp_backlight_gpio))
+		gpio_direction_output(ldp_backlight_gpio, 1);
+
+	return 0;
+}
+
+static void ldp_panel_disable_lcd(struct omap_dss_device *dssdev)
+{
+	if (gpio_is_valid(ldp_lcd_enable_gpio))
+		gpio_direction_output(ldp_lcd_enable_gpio, 0);
+	if (gpio_is_valid(ldp_backlight_gpio))
+		gpio_direction_output(ldp_backlight_gpio, 0);
+}
+
+static struct panel_generic_dpi_data ldp_panel_data = {
+	.name			= "nec_nl2432dr22-11b",
+	.platform_enable	= ldp_panel_enable_lcd,
+	.platform_disable	= ldp_panel_disable_lcd,
 };
 
-static struct omap_lcd_config ldp_lcd_config __initdata = {
-	.ctrl_name	= "internal",
+static struct omap_dss_device ldp_lcd_device = {
+	.name			= "lcd",
+	.driver_name		= "generic_dpi_panel",
+	.type			= OMAP_DISPLAY_TYPE_DPI,
+	.phy.dpi.data_lines	= 18,
+	.data			= &ldp_panel_data,
+};
+
+static struct omap_dss_device *ldp_dss_devices[] = {
+	&ldp_lcd_device,
 };
 
-static struct omap_board_config_kernel ldp_config[] __initdata = {
-	{ OMAP_TAG_LCD,		&ldp_lcd_config },
+static struct omap_dss_board_info ldp_dss_data = {
+	.num_devices	= ARRAY_SIZE(ldp_dss_devices),
+	.devices	= ldp_dss_devices,
+	.default_device	= &ldp_lcd_device,
 };
 
+static void __init ldp_display_init(void)
+{
+	int r;
+
+	static struct gpio gpios[] __initdata = {
+		{LCD_PANEL_RESET_GPIO, GPIOF_OUT_INIT_HIGH, "LCD RESET"},
+		{LCD_PANEL_QVGA_GPIO, GPIOF_OUT_INIT_HIGH, "LCD QVGA"},
+	};
+
+	r = gpio_request_array(gpios, ARRAY_SIZE(gpios));
+	if (r) {
+		pr_err("Cannot request LCD GPIOs, error %d\n", r);
+		return;
+	}
+
+	omap_display_init(&ldp_dss_data);
+}
+
 static void __init omap_ldp_init_early(void)
 {
 	omap2_init_common_infrastructure();
 	omap2_init_common_devices(NULL, NULL);
 }
 
+static int ldp_twl_gpio_setup(struct device *dev, unsigned gpio, unsigned ngpio)
+{
+	int r;
+
+	struct gpio gpios[] = {
+		{gpio + 7 , GPIOF_OUT_INIT_LOW, "LCD ENABLE"},
+		{gpio + 15, GPIOF_OUT_INIT_LOW, "LCD BACKLIGHT"},
+	};
+
+	r = gpio_request_array(gpios, ARRAY_SIZE(gpios));
+	if (r) {
+		pr_err("Cannot request LCD GPIOs, error %d\n", r);
+		ldp_backlight_gpio = -EINVAL;
+		ldp_lcd_enable_gpio = -EINVAL;
+		return r;
+	}
+
+	ldp_backlight_gpio = gpio + 15;
+	ldp_lcd_enable_gpio = gpio + 7;
+
+	return 0;
+}
+
 static struct twl4030_gpio_platform_data ldp_gpio_data = {
 	.gpio_base	= OMAP_MAX_GPIO_LINES,
 	.irq_base	= TWL4030_GPIO_IRQ_BASE,
 	.irq_end	= TWL4030_GPIO_IRQ_END,
+	.setup		= ldp_twl_gpio_setup,
 };
 
 static struct regulator_consumer_supply ldp_vmmc1_supply[] = {
@@ -243,10 +326,31 @@ static struct regulator_init_data ldp_vaux1 = {
 	.consumer_supplies		= ldp_vaux1_supplies,
 };
 
+static struct regulator_consumer_supply ldp_vpll2_supplies[] = {
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss"),
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi1"),
+};
+
+static struct regulator_init_data ldp_vpll2 = {
+	.constraints = {
+		.name			= "VDVI",
+		.min_uV			= 1800000,
+		.max_uV			= 1800000,
+		.apply_uV		= true,
+		.valid_modes_mask	= REGULATOR_MODE_NORMAL
+					| REGULATOR_MODE_STANDBY,
+		.valid_ops_mask		= REGULATOR_CHANGE_MODE
+					| REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= ARRAY_SIZE(ldp_vpll2_supplies),
+	.consumer_supplies	= ldp_vpll2_supplies,
+};
+
 static struct twl4030_platform_data ldp_twldata = {
 	/* platform_data for children goes here */
 	.vmmc1		= &ldp_vmmc1,
 	.vaux1		= &ldp_vaux1,
+	.vpll2		= &ldp_vpll2,
 	.gpio		= &ldp_gpio_data,
 	.keypad		= &ldp_kp_twl4030_data,
 };
@@ -272,7 +376,6 @@ static struct omap2_hsmmc_info mmc[] __initdata = {
 };
 
 static struct platform_device *ldp_devices[] __initdata = {
-	&ldp_lcd_device,
 	&ldp_gpio_keys_device,
 };
 
@@ -317,8 +420,6 @@ static struct mtd_partition ldp_nand_partitions[] = {
 static void __init omap_ldp_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
-	omap_board_config = ldp_config;
-	omap_board_config_size = ARRAY_SIZE(ldp_config);
 	ldp_init_smsc911x();
 	omap_i2c_init();
 	platform_add_devices(ldp_devices, ARRAY_SIZE(ldp_devices));
@@ -329,6 +430,7 @@ static void __init omap_ldp_init(void)
 		ARRAY_SIZE(ldp_nand_partitions), ZOOM_NAND_CS, 0);
 
 	omap2_hsmmc_init(mmc);
+	ldp_display_init();
 }
 
 MACHINE_START(OMAP_LDP, "OMAP LDP board")
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 1fde8a0474bb..928933ba28ce 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -42,7 +42,7 @@
 #include <plat/board.h>
 #include <plat/common.h>
 #include <video/omapdss.h>
-#include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 #include <plat/gpmc.h>
 #include <plat/nand.h>
 #include <plat/usb.h>
@@ -203,16 +203,16 @@ static void beagle_disable_dvi(struct omap_dss_device *dssdev)
 		gpio_set_value(dssdev->reset_gpio, 0);
 }
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name = "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable = beagle_enable_dvi,
 	.platform_disable = beagle_disable_dvi,
+	.i2c_bus_num = 3,
 };
 
 static struct omap_dss_device beagle_dvi_device = {
 	.type = OMAP_DISPLAY_TYPE_DPI,
 	.name = "dvi",
-	.driver_name = "generic_dpi_panel",
+	.driver_name = "dvi",
 	.data = &dvi_panel,
 	.phy.dpi.data_lines = 24,
 	.reset_gpio = -EINVAL,
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 15c69a0c1ce5..0d5a9e46a6af 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -45,7 +45,7 @@
 #include <plat/common.h>
 #include <plat/mcspi.h>
 #include <video/omapdss.h>
-#include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 
 #include "mux.h"
 #include "sdram-micron-mt46h32m32lf-6.h"
@@ -247,8 +247,7 @@ static void omap3_evm_disable_dvi(struct omap_dss_device *dssdev)
 	dvi_enabled = 0;
 }
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= omap3_evm_enable_dvi,
 	.platform_disable	= omap3_evm_disable_dvi,
 };
@@ -256,7 +255,7 @@ static struct panel_generic_dpi_data dvi_panel = {
 static struct omap_dss_device omap3_evm_dvi_device = {
 	.name			= "dvi",
 	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index ace56938dd3b..cca523eb73b4 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -335,7 +335,7 @@ static struct regulator_consumer_supply pandora_vmmc3_supply[] = {
 static struct regulator_consumer_supply pandora_vdds_supplies[] = {
 	REGULATOR_SUPPLY("vdds_sdi", "omapdss"),
 	REGULATOR_SUPPLY("vdds_dsi", "omapdss"),
-	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi1"),
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi.0"),
 };
 
 static struct regulator_consumer_supply pandora_vcc_lcd_supply[] = {
diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c
index ba13e1d5d0ab..4732589ad97e 100644
--- a/arch/arm/mach-omap2/board-omap3stalker.c
+++ b/arch/arm/mach-omap2/board-omap3stalker.c
@@ -41,6 +41,7 @@
 #include <plat/usb.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 
 #include <plat/mcspi.h>
 #include <linux/input/matrix_keypad.h>
@@ -107,39 +108,6 @@ static void __init omap3_stalker_display_init(void)
 	return;
 }
 
-static int omap3_stalker_enable_lcd(struct omap_dss_device *dssdev)
-{
-	if (dvi_enabled) {
-		printk(KERN_ERR "cannot enable LCD, DVI is enabled\n");
-		return -EINVAL;
-	}
-	gpio_set_value(DSS_ENABLE_GPIO, 1);
-	gpio_set_value(LCD_PANEL_BKLIGHT_GPIO, 1);
-	lcd_enabled = 1;
-	return 0;
-}
-
-static void omap3_stalker_disable_lcd(struct omap_dss_device *dssdev)
-{
-	gpio_set_value(DSS_ENABLE_GPIO, 0);
-	gpio_set_value(LCD_PANEL_BKLIGHT_GPIO, 0);
-	lcd_enabled = 0;
-}
-
-static struct panel_generic_dpi_data lcd_panel = {
-	.name			= "generic",
-	.platform_enable	= omap3_stalker_enable_lcd,
-	.platform_disable	= omap3_stalker_disable_lcd,
-};
-
-static struct omap_dss_device omap3_stalker_lcd_device = {
-	.name			= "lcd",
-	.driver_name		= "generic_dpi_panel",
-	.data			= &lcd_panel,
-	.phy.dpi.data_lines	= 24,
-	.type			= OMAP_DISPLAY_TYPE_DPI,
-};
-
 static int omap3_stalker_enable_tv(struct omap_dss_device *dssdev)
 {
 	return 0;
@@ -179,8 +147,7 @@ static void omap3_stalker_disable_dvi(struct omap_dss_device *dssdev)
 	dvi_enabled = 0;
 }
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= omap3_stalker_enable_dvi,
 	.platform_disable	= omap3_stalker_disable_dvi,
 };
@@ -188,13 +155,12 @@ static struct panel_generic_dpi_data dvi_panel = {
 static struct omap_dss_device omap3_stalker_dvi_device = {
 	.name			= "dvi",
 	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
 
 static struct omap_dss_device *omap3_stalker_dss_devices[] = {
-	&omap3_stalker_lcd_device,
 	&omap3_stalker_tv_device,
 	&omap3_stalker_dvi_device,
 };
diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c
index 49e4bd207cb6..abb68913e047 100644
--- a/arch/arm/mach-omap2/board-omap3touchbook.c
+++ b/arch/arm/mach-omap2/board-omap3touchbook.c
@@ -104,15 +104,6 @@ static struct omap2_hsmmc_info mmc[] = {
 	{}	/* Terminator */
 };
 
-static struct platform_device omap3_touchbook_lcd_device = {
-	.name		= "omap3touchbook_lcd",
-	.id		= -1,
-};
-
-static struct omap_lcd_config omap3_touchbook_lcd_config __initdata = {
-	.ctrl_name	= "internal",
-};
-
 static struct regulator_consumer_supply touchbook_vmmc1_supply[] = {
 	REGULATOR_SUPPLY("vmmc", "omap_hsmmc.0"),
 };
@@ -165,14 +156,12 @@ static struct twl4030_gpio_platform_data touchbook_gpio_data = {
 static struct regulator_consumer_supply touchbook_vdac_supply[] = {
 {
 	.supply		= "vdac",
-	.dev		= &omap3_touchbook_lcd_device.dev,
 },
 };
 
 static struct regulator_consumer_supply touchbook_vdvi_supply[] = {
 {
 	.supply		= "vdvi",
-	.dev		= &omap3_touchbook_lcd_device.dev,
 },
 };
 
@@ -316,10 +305,6 @@ static struct platform_device keys_gpio = {
 	},
 };
 
-static struct omap_board_config_kernel omap3_touchbook_config[] __initdata = {
-	{ OMAP_TAG_LCD,		&omap3_touchbook_lcd_config },
-};
-
 #ifdef CONFIG_OMAP_MUX
 static struct omap_board_mux board_mux[] __initdata = {
 	{ .reg_offset = OMAP_MUX_TERMINATOR },
@@ -339,7 +324,6 @@ static void __init omap3_touchbook_init_irq(void)
 }
 
 static struct platform_device *omap3_touchbook_devices[] __initdata = {
-	&omap3_touchbook_lcd_device,
 	&leds_gpio,
 	&keys_gpio,
 };
@@ -376,8 +360,6 @@ early_param("tbr", early_touchbook_revision);
 static void __init omap3_touchbook_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
-	omap_board_config = omap3_touchbook_config;
-	omap_board_config_size = ARRAY_SIZE(omap3_touchbook_config);
 
 	pm_power_off = omap3_touchbook_poweroff;
 
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 683bede73d54..ed38d8fd090f 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -40,7 +40,7 @@
 #include <plat/common.h>
 #include <plat/usb.h>
 #include <plat/mmc.h>
-#include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 
 #include "hsmmc.h"
 #include "control.h"
@@ -455,16 +455,16 @@ static void omap4_panda_disable_dvi(struct omap_dss_device *dssdev)
 }
 
 /* Using generic display panel */
-static struct panel_generic_dpi_data omap4_dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data omap4_dvi_panel = {
 	.platform_enable	= omap4_panda_enable_dvi,
 	.platform_disable	= omap4_panda_disable_dvi,
+	.i2c_bus_num = 3,
 };
 
 struct omap_dss_device omap4_panda_dvi_device = {
 	.type			= OMAP_DISPLAY_TYPE_DPI,
 	.name			= "dvi",
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &omap4_dvi_panel,
 	.phy.dpi.data_lines	= 24,
 	.reset_gpio		= PANDA_DVI_TFP410_POWER_DOWN_GPIO,
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index e592fb134c4e..ec0f60c1cb7c 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -46,6 +46,7 @@
 #include <plat/common.h>
 #include <video/omapdss.h>
 #include <video/omap-panel-generic-dpi.h>
+#include <video/omap-panel-dvi.h>
 #include <plat/gpmc.h>
 #include <mach/hardware.h>
 #include <plat/nand.h>
@@ -182,16 +183,16 @@ static void overo_panel_disable_dvi(struct omap_dss_device *dssdev)
 	dvi_enabled = 0;
 }
 
-static struct panel_generic_dpi_data dvi_panel = {
-	.name			= "generic",
+static struct panel_dvi_platform_data dvi_panel = {
 	.platform_enable	= overo_panel_enable_dvi,
 	.platform_disable	= overo_panel_disable_dvi,
+	.i2c_bus_num		= 3,
 };
 
 static struct omap_dss_device overo_dvi_device = {
 	.name			= "dvi",
 	.type			= OMAP_DISPLAY_TYPE_DPI,
-	.driver_name		= "generic_dpi_panel",
+	.driver_name		= "dvi",
 	.data			= &dvi_panel,
 	.phy.dpi.data_lines	= 24,
 };
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 5a886cd2c598..ba1aa07bdb29 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -900,7 +900,6 @@ static struct twl4030_platform_data rx51_twldata __initdata = {
 };
 
 static struct tpa6130a2_platform_data rx51_tpa6130a2_data __initdata_or_module = {
-	.id			= TPA6130A2,
 	.power_gpio		= 98,
 };
 
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index a6c473bbb3d6..faa2a8e28de5 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -79,29 +79,6 @@ static struct cpuidle_params rx51_cpuidle_params[] = {
 	{7505 + 15274, 484329, 1},
 };
 
-static struct omap_lcd_config rx51_lcd_config = {
-	.ctrl_name	= "internal",
-};
-
-static struct omap_fbmem_config rx51_fbmem0_config = {
-	.size = 752 * 1024,
-};
-
-static struct omap_fbmem_config rx51_fbmem1_config = {
-	.size = 752 * 1024,
-};
-
-static struct omap_fbmem_config rx51_fbmem2_config = {
-	.size = 752 * 1024,
-};
-
-static struct omap_board_config_kernel rx51_config[] = {
-	{ OMAP_TAG_FBMEM,	&rx51_fbmem0_config },
-	{ OMAP_TAG_FBMEM,	&rx51_fbmem1_config },
-	{ OMAP_TAG_FBMEM,	&rx51_fbmem2_config },
-	{ OMAP_TAG_LCD,		&rx51_lcd_config },
-};
-
 static void __init rx51_init_early(void)
 {
 	struct omap_sdrc_params *sdrc_params;
@@ -128,8 +105,6 @@ static struct omap_musb_board_data musb_board_data = {
 static void __init rx51_init(void)
 {
 	omap3_mux_init(board_mux, OMAP_PACKAGE_CBB);
-	omap_board_config = rx51_config;
-	omap_board_config_size = ARRAY_SIZE(rx51_config);
 	omap3_pm_init_cpuidle(rx51_cpuidle_params);
 	omap_serial_init();
 	usb_musb_init(&musb_board_data);
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 5391079c8689..ae8ea5b3b1a0 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -329,6 +329,38 @@ static void omap_init_audio(void)
 static inline void omap_init_audio(void) {}
 #endif
 
+#if defined(CONFIG_SND_OMAP_SOC_MCPDM) || \
+		defined(CONFIG_SND_OMAP_SOC_MCPDM_MODULE)
+
+static struct omap_device_pm_latency omap_mcpdm_latency[] = {
+	{
+		.deactivate_func = omap_device_idle_hwmods,
+		.activate_func = omap_device_enable_hwmods,
+		.flags = OMAP_DEVICE_LATENCY_AUTO_ADJUST,
+	},
+};
+
+static void omap_init_mcpdm(void)
+{
+	struct omap_hwmod *oh;
+	struct omap_device *od;
+
+	oh = omap_hwmod_lookup("mcpdm");
+	if (!oh) {
+		printk(KERN_ERR "Could not look up mcpdm hw_mod\n");
+		return;
+	}
+
+	od = omap_device_build("omap-mcpdm", -1, oh, NULL, 0,
+				omap_mcpdm_latency,
+				ARRAY_SIZE(omap_mcpdm_latency), 0);
+	if (IS_ERR(od))
+		printk(KERN_ERR "Could not build omap_device for omap-mcpdm-dai\n");
+}
+#else
+static inline void omap_init_mcpdm(void) {}
+#endif
+
 #if defined(CONFIG_SPI_OMAP24XX) || defined(CONFIG_SPI_OMAP24XX_MODULE)
 
 #include <plat/mcspi.h>
@@ -682,6 +714,7 @@ static int __init omap2_init_devices(void)
 	 * in alphabetical order so they're easier to sort through.
 	 */
 	omap_init_audio();
+	omap_init_mcpdm();
 	omap_init_camera();
 	omap_init_mbox();
 	omap_init_mcspi();
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index a5b7a236aa5b..62510ec863c6 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -27,6 +27,8 @@
 #include <plat/omap_device.h>
 #include <plat/omap-pm.h>
 
+#include "control.h"
+
 static struct platform_device omap_display_device = {
 	.name          = "omapdss",
 	.id            = -1,
@@ -61,7 +63,7 @@ static const struct omap_dss_hwmod_data omap3_dss_hwmod_data[] __initdata = {
 	{ "dss_dispc", "omapdss_dispc", -1 },
 	{ "dss_rfbi", "omapdss_rfbi", -1 },
 	{ "dss_venc", "omapdss_venc", -1 },
-	{ "dss_dsi1", "omapdss_dsi1", -1 },
+	{ "dss_dsi1", "omapdss_dsi", 0 },
 };
 
 static const struct omap_dss_hwmod_data omap4_dss_hwmod_data[] __initdata = {
@@ -69,11 +71,58 @@ static const struct omap_dss_hwmod_data omap4_dss_hwmod_data[] __initdata = {
 	{ "dss_dispc", "omapdss_dispc", -1 },
 	{ "dss_rfbi", "omapdss_rfbi", -1 },
 	{ "dss_venc", "omapdss_venc", -1 },
-	{ "dss_dsi1", "omapdss_dsi1", -1 },
-	{ "dss_dsi2", "omapdss_dsi2", -1 },
+	{ "dss_dsi1", "omapdss_dsi", 0 },
+	{ "dss_dsi2", "omapdss_dsi", 1 },
 	{ "dss_hdmi", "omapdss_hdmi", -1 },
 };
 
+static int omap4_dsi_mux_pads(int dsi_id, unsigned lanes)
+{
+	u32 enable_mask, enable_shift;
+	u32 pipd_mask, pipd_shift;
+	u32 reg;
+
+	if (dsi_id == 0) {
+		enable_mask = OMAP4_DSI1_LANEENABLE_MASK;
+		enable_shift = OMAP4_DSI1_LANEENABLE_SHIFT;
+		pipd_mask = OMAP4_DSI1_PIPD_MASK;
+		pipd_shift = OMAP4_DSI1_PIPD_SHIFT;
+	} else if (dsi_id == 1) {
+		enable_mask = OMAP4_DSI2_LANEENABLE_MASK;
+		enable_shift = OMAP4_DSI2_LANEENABLE_SHIFT;
+		pipd_mask = OMAP4_DSI2_PIPD_MASK;
+		pipd_shift = OMAP4_DSI2_PIPD_SHIFT;
+	} else {
+		return -ENODEV;
+	}
+
+	reg = omap4_ctrl_pad_readl(OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_DSIPHY);
+
+	reg &= ~enable_mask;
+	reg &= ~pipd_mask;
+
+	reg |= (lanes << enable_shift) & enable_mask;
+	reg |= (lanes << pipd_shift) & pipd_mask;
+
+	omap4_ctrl_pad_writel(reg, OMAP4_CTRL_MODULE_PAD_CORE_CONTROL_DSIPHY);
+
+	return 0;
+}
+
+static int omap_dsi_enable_pads(int dsi_id, unsigned lane_mask)
+{
+	if (cpu_is_omap44xx())
+		return omap4_dsi_mux_pads(dsi_id, lane_mask);
+
+	return 0;
+}
+
+static void omap_dsi_disable_pads(int dsi_id, unsigned lane_mask)
+{
+	if (cpu_is_omap44xx())
+		omap4_dsi_mux_pads(dsi_id, 0);
+}
+
 int __init omap_display_init(struct omap_dss_board_info *board_data)
 {
 	int r = 0;
@@ -96,6 +145,11 @@ int __init omap_display_init(struct omap_dss_board_info *board_data)
 		oh_count = ARRAY_SIZE(omap4_dss_hwmod_data);
 	}
 
+	if (board_data->dsi_enable_pads == NULL)
+		board_data->dsi_enable_pads = omap_dsi_enable_pads;
+	if (board_data->dsi_disable_pads == NULL)
+		board_data->dsi_disable_pads = omap_dsi_disable_pads;
+
 	pdata.board_data = board_data;
 	pdata.board_data->get_context_loss_count =
 		omap_pm_get_dev_context_loss_count;
diff --git a/arch/arm/mach-omap2/include/mach/entry-macro.S b/arch/arm/mach-omap2/include/mach/entry-macro.S
index ceb8b7e593d7..feb90a10945a 100644
--- a/arch/arm/mach-omap2/include/mach/entry-macro.S
+++ b/arch/arm/mach-omap2/include/mach/entry-macro.S
@@ -78,7 +78,7 @@
 4401:		ldr     \irqstat, [\base, #GIC_CPU_INTACK]
 		ldr     \tmp, =1021
 		bic     \irqnr, \irqstat, #0x1c00
-		cmp     \irqnr, #29
+		cmp     \irqnr, #15
 		cmpcc   \irqnr, \irqnr
 		cmpne   \irqnr, \tmp
 		cmpcs   \irqnr, \irqnr
@@ -101,18 +101,6 @@
 		it	cs
 		cmpcs	\irqnr, \irqnr
 		.endm
-
-		/* As above, this assumes that irqstat and base are preserved */
-
-		.macro test_for_ltirq, irqnr, irqstat, base, tmp
-		bic	\irqnr, \irqstat, #0x1c00
-		mov 	\tmp, #0
-		cmp	\irqnr, #29
-		itt	eq
-		moveq	\tmp, #1
-		streq	\irqstat, [\base, #GIC_CPU_EOI]
-		cmp	\tmp, #0
-		.endm
 #endif	/* CONFIG_SMP */
 
 #else	/* MULTI_OMAP2 */
diff --git a/arch/arm/mach-omap2/iommu2.c b/arch/arm/mach-omap2/iommu2.c
index f286012783c6..eefc37912ef3 100644
--- a/arch/arm/mach-omap2/iommu2.c
+++ b/arch/arm/mach-omap2/iommu2.c
@@ -66,7 +66,7 @@
 	 ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
 
 
-static void __iommu_set_twl(struct iommu *obj, bool on)
+static void __iommu_set_twl(struct omap_iommu *obj, bool on)
 {
 	u32 l = iommu_read_reg(obj, MMU_CNTL);
 
@@ -85,7 +85,7 @@ static void __iommu_set_twl(struct iommu *obj, bool on)
 }
 
 
-static int omap2_iommu_enable(struct iommu *obj)
+static int omap2_iommu_enable(struct omap_iommu *obj)
 {
 	u32 l, pa;
 	unsigned long timeout;
@@ -127,7 +127,7 @@ static int omap2_iommu_enable(struct iommu *obj)
 	return 0;
 }
 
-static void omap2_iommu_disable(struct iommu *obj)
+static void omap2_iommu_disable(struct omap_iommu *obj)
 {
 	u32 l = iommu_read_reg(obj, MMU_CNTL);
 
@@ -138,12 +138,12 @@ static void omap2_iommu_disable(struct iommu *obj)
 	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
 }
 
-static void omap2_iommu_set_twl(struct iommu *obj, bool on)
+static void omap2_iommu_set_twl(struct omap_iommu *obj, bool on)
 {
 	__iommu_set_twl(obj, false);
 }
 
-static u32 omap2_iommu_fault_isr(struct iommu *obj, u32 *ra)
+static u32 omap2_iommu_fault_isr(struct omap_iommu *obj, u32 *ra)
 {
 	u32 stat, da;
 	u32 errs = 0;
@@ -173,13 +173,13 @@ static u32 omap2_iommu_fault_isr(struct iommu *obj, u32 *ra)
 	return errs;
 }
 
-static void omap2_tlb_read_cr(struct iommu *obj, struct cr_regs *cr)
+static void omap2_tlb_read_cr(struct omap_iommu *obj, struct cr_regs *cr)
 {
 	cr->cam = iommu_read_reg(obj, MMU_READ_CAM);
 	cr->ram = iommu_read_reg(obj, MMU_READ_RAM);
 }
 
-static void omap2_tlb_load_cr(struct iommu *obj, struct cr_regs *cr)
+static void omap2_tlb_load_cr(struct omap_iommu *obj, struct cr_regs *cr)
 {
 	iommu_write_reg(obj, cr->cam | MMU_CAM_V, MMU_CAM);
 	iommu_write_reg(obj, cr->ram, MMU_RAM);
@@ -193,7 +193,8 @@ static u32 omap2_cr_to_virt(struct cr_regs *cr)
 	return cr->cam & mask;
 }
 
-static struct cr_regs *omap2_alloc_cr(struct iommu *obj, struct iotlb_entry *e)
+static struct cr_regs *omap2_alloc_cr(struct omap_iommu *obj,
+						struct iotlb_entry *e)
 {
 	struct cr_regs *cr;
 
@@ -230,7 +231,8 @@ static u32 omap2_get_pte_attr(struct iotlb_entry *e)
 	return attr;
 }
 
-static ssize_t omap2_dump_cr(struct iommu *obj, struct cr_regs *cr, char *buf)
+static ssize_t
+omap2_dump_cr(struct omap_iommu *obj, struct cr_regs *cr, char *buf)
 {
 	char *p = buf;
 
@@ -254,7 +256,8 @@ static ssize_t omap2_dump_cr(struct iommu *obj, struct cr_regs *cr, char *buf)
 			goto out;					\
 	} while (0)
 
-static ssize_t omap2_iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t len)
+static ssize_t
+omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
 {
 	char *p = buf;
 
@@ -280,7 +283,7 @@ out:
 	return p - buf;
 }
 
-static void omap2_iommu_save_ctx(struct iommu *obj)
+static void omap2_iommu_save_ctx(struct omap_iommu *obj)
 {
 	int i;
 	u32 *p = obj->ctx;
@@ -293,7 +296,7 @@ static void omap2_iommu_save_ctx(struct iommu *obj)
 	BUG_ON(p[0] != IOMMU_ARCH_VERSION);
 }
 
-static void omap2_iommu_restore_ctx(struct iommu *obj)
+static void omap2_iommu_restore_ctx(struct omap_iommu *obj)
 {
 	int i;
 	u32 *p = obj->ctx;
@@ -343,13 +346,13 @@ static const struct iommu_functions omap2_iommu_ops = {
 
 static int __init omap2_iommu_init(void)
 {
-	return install_iommu_arch(&omap2_iommu_ops);
+	return omap_install_iommu_arch(&omap2_iommu_ops);
 }
 module_init(omap2_iommu_init);
 
 static void __exit omap2_iommu_exit(void)
 {
-	uninstall_iommu_arch(&omap2_iommu_ops);
+	omap_uninstall_iommu_arch(&omap2_iommu_ops);
 }
 module_exit(omap2_iommu_exit);
 
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index ce65e9329c7b..889464dc7b2d 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -109,12 +109,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_get_core_count(scu_base);
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "OMAP4: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 6201422c0606..79325c65c23c 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -5430,7 +5430,7 @@ static __initdata struct omap_hwmod *omap44xx_hwmods[] = {
 	&omap44xx_mcbsp4_hwmod,
 
 	/* mcpdm class */
-/*	&omap44xx_mcpdm_hwmod, */
+	&omap44xx_mcpdm_hwmod,
 
 	/* mcspi class */
 	&omap44xx_mcspi1_hwmod,
diff --git a/arch/arm/mach-omap2/smartreflex.c b/arch/arm/mach-omap2/smartreflex.c
index 34c01a7de810..f49804f181d4 100644
--- a/arch/arm/mach-omap2/smartreflex.c
+++ b/arch/arm/mach-omap2/smartreflex.c
@@ -247,7 +247,7 @@ static void sr_stop_vddautocomp(struct omap_sr *sr)
  * driver register and sr device intializtion API's. Only one call
  * will ultimately succeed.
  *
- * Currently this function registers interrrupt handler for a particular SR
+ * Currently this function registers interrupt handler for a particular SR
  * if smartreflex class driver is already registered and has
  * requested for interrupts and the SR interrupt line in present.
  */
diff --git a/arch/arm/mach-omap2/twl-common.c b/arch/arm/mach-omap2/twl-common.c
index daa056ed8738..522435772168 100644
--- a/arch/arm/mach-omap2/twl-common.c
+++ b/arch/arm/mach-omap2/twl-common.c
@@ -99,7 +99,7 @@ static struct regulator_init_data omap3_vdac_idata = {
 
 static struct regulator_consumer_supply omap3_vpll2_supplies[] = {
 	REGULATOR_SUPPLY("vdds_dsi", "omapdss"),
-	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi1"),
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi.0"),
 };
 
 static struct regulator_init_data omap3_vpll2_idata = {
@@ -235,6 +235,12 @@ static struct regulator_init_data omap4_vana_idata = {
 	},
 };
 
+static struct regulator_consumer_supply omap4_vcxio_supply[] = {
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dss"),
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi.0"),
+	REGULATOR_SUPPLY("vdds_dsi", "omapdss_dsi.1"),
+};
+
 static struct regulator_init_data omap4_vcxio_idata = {
 	.constraints = {
 		.min_uV			= 1800000,
@@ -243,7 +249,10 @@ static struct regulator_init_data omap4_vcxio_idata = {
 					| REGULATOR_MODE_STANDBY,
 		.valid_ops_mask		= REGULATOR_CHANGE_MODE
 					| REGULATOR_CHANGE_STATUS,
+		.always_on		= true,
 	},
+	.num_consumer_supplies	= ARRAY_SIZE(omap4_vcxio_supply),
+	.consumer_supplies	= omap4_vcxio_supply,
 };
 
 static struct regulator_init_data omap4_vusb_idata = {
diff --git a/arch/arm/mach-orion5x/Makefile.boot b/arch/arm/mach-orion5x/Makefile.boot
index 67039c3e0c48..760a0efe7580 100644
--- a/arch/arm/mach-orion5x/Makefile.boot
+++ b/arch/arm/mach-orion5x/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 0ab531d047fc..22ace0bf2f92 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -308,8 +308,8 @@ void __init orion5x_init(void)
  * Many orion-based systems have buggy bootloader implementations.
  * This is a common fixup for bogus memory tags.
  */
-void __init tag_fixup_mem32(struct machine_desc *mdesc, struct tag *t,
-			    char **from, struct meminfo *meminfo)
+void __init tag_fixup_mem32(struct tag *t, char **from,
+			    struct meminfo *meminfo)
 {
 	for (; t->hdr.size; t = tag_next(t))
 		if (t->hdr.tag == ATAG_MEM &&
diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
index 3e5499dda49a..909489f4d23e 100644
--- a/arch/arm/mach-orion5x/common.h
+++ b/arch/arm/mach-orion5x/common.h
@@ -53,11 +53,9 @@ int orion5x_pci_sys_setup(int nr, struct pci_sys_data *sys);
 struct pci_bus *orion5x_pci_sys_scan_bus(int nr, struct pci_sys_data *sys);
 int orion5x_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
 
-struct machine_desc;
 struct meminfo;
 struct tag;
-extern void __init tag_fixup_mem32(struct machine_desc *, struct tag *,
-				   char **, struct meminfo *);
+extern void __init tag_fixup_mem32(struct tag *, char **, struct meminfo *);
 
 
 #endif
diff --git a/arch/arm/mach-pnx4008/Makefile.boot b/arch/arm/mach-pnx4008/Makefile.boot
index 44c7117e20dd..9fa19baa7f2e 100644
--- a/arch/arm/mach-pnx4008/Makefile.boot
+++ b/arch/arm/mach-pnx4008/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y		:= 0x80008000
+   zreladdr-y		+= 0x80008000
 params_phys-y		:= 0x80000100
 initrd_phys-y		:= 0x80800000
 
diff --git a/arch/arm/mach-prima2/Makefile.boot b/arch/arm/mach-prima2/Makefile.boot
index d023db3ae4ff..c77a4883a4ee 100644
--- a/arch/arm/mach-prima2/Makefile.boot
+++ b/arch/arm/mach-prima2/Makefile.boot
@@ -1,3 +1,3 @@
-zreladdr-y		:= 0x00008000
+zreladdr-y		+= 0x00008000
 params_phys-y		:= 0x00000100
 initrd_phys-y		:= 0x00800000
diff --git a/arch/arm/mach-pxa/Makefile.boot b/arch/arm/mach-pxa/Makefile.boot
index 1ead67178eca..2c1ae92f2106 100644
--- a/arch/arm/mach-pxa/Makefile.boot
+++ b/arch/arm/mach-pxa/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0xa0008000
+   zreladdr-y	+= 0xa0008000
 
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index e40dc47cf2f1..3a7387f93c38 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -837,8 +837,8 @@ static void __init cm_x300_init(void)
 	cm_x300_init_bl();
 }
 
-static void __init cm_x300_fixup(struct machine_desc *mdesc, struct tag *tags,
-				 char **cmdline, struct meminfo *mi)
+static void __init cm_x300_fixup(struct tag *tags, char **cmdline,
+				 struct meminfo *mi)
 {
 	/* Make sure that mi->bank[0].start = PHYS_ADDR */
 	for (; tags->hdr.size; tags = tag_next(tags))
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 185a37cad254..3e9483b06053 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -705,8 +705,8 @@ static void __init corgi_init(void)
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init fixup_corgi(struct machine_desc *desc,
-		struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init fixup_corgi(struct tag *tags, char **cmdline,
+			       struct meminfo *mi)
 {
 	sharpsl_save_param();
 	mi->nr_banks=1;
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index e823c54057f3..8e697dd8accd 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -41,8 +41,7 @@
 #include "clock.h"
 
 /* Only e800 has 128MB RAM */
-void __init eseries_fixup(struct machine_desc *desc,
-	struct tag *tags, char **cmdline, struct meminfo *mi)
+void __init eseries_fixup(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 	mi->nr_banks=1;
 	mi->bank[0].start = 0xa0000000;
diff --git a/arch/arm/mach-pxa/eseries.h b/arch/arm/mach-pxa/eseries.h
index 5930f5e2a123..be921965e91a 100644
--- a/arch/arm/mach-pxa/eseries.h
+++ b/arch/arm/mach-pxa/eseries.h
@@ -1,5 +1,4 @@
-void __init eseries_fixup(struct machine_desc *desc,
-	struct tag *tags, char **cmdline, struct meminfo *mi);
+void __init eseries_fixup(struct tag *tags, char **cmdline, struct meminfo *mi);
 
 extern struct pxa2xx_udc_mach_info e7xx_udc_mach_info;
 extern struct pxaficp_platform_data e7xx_ficp_platform_data;
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 25f29adf5114..532c5d3a97d2 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -18,6 +18,8 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 
+#include <asm/exception.h>
+
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/gpio-pxa.h>
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index a113ea9ab4ab..948ce3e729fa 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -454,8 +454,8 @@ static void __init poodle_init(void)
 	poodle_init_spi();
 }
 
-static void __init fixup_poodle(struct machine_desc *desc,
-		struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init fixup_poodle(struct tag *tags, char **cmdline,
+				struct meminfo *mi)
 {
 	sharpsl_save_param();
 	mi->nr_banks=1;
diff --git a/arch/arm/mach-pxa/saar.c b/arch/arm/mach-pxa/saar.c
index 602d70b50f81..fc2c1e05af9c 100644
--- a/arch/arm/mach-pxa/saar.c
+++ b/arch/arm/mach-pxa/saar.c
@@ -540,7 +540,7 @@ static struct mtd_partition saar_onenand_partitions[] = {
 	}, {
 		.name		= "filesystem",
 		.offset		= MTDPART_OFS_APPEND,
-		.size		= SZ_48M,
+		.size		= SZ_32M + SZ_16M,
 		.mask_flags	= 0,
 	}
 };
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 438c7b5e451f..d8dec9113aad 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -970,8 +970,8 @@ static void __init spitz_init(void)
 	spitz_i2c_init();
 }
 
-static void __init spitz_fixup(struct machine_desc *desc,
-		struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init spitz_fixup(struct tag *tags, char **cmdline,
+			       struct meminfo *mi)
 {
 	sharpsl_save_param();
 	mi->nr_banks = 1;
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 9f69a2682693..402b0c96613b 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -960,8 +960,8 @@ static void __init tosa_init(void)
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
-static void __init fixup_tosa(struct machine_desc *desc,
-		struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init fixup_tosa(struct tag *tags, char **cmdline,
+			      struct meminfo *mi)
 {
 	sharpsl_save_param();
 	mi->nr_banks=1;
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
index 54930cccbe54..70e1730ef282 100644
--- a/arch/arm/mach-pxa/xcep.c
+++ b/arch/arm/mach-pxa/xcep.c
@@ -142,8 +142,7 @@ static struct platform_device *devices[] __initdata = {
 
 /* We have to state that there are HWMON devices on the I2C bus on XCEP.
  * Drivers for HWMON verify capabilities of the adapter when loading and
- * refuse to attach if the adapter doesn't support HWMON class of devices.
- * See also Documentation/i2c/porting-clients. */
+ * refuse to attach if the adapter doesn't support HWMON class of devices. */
 static struct i2c_pxa_platform_data xcep_i2c_platform_data  = {
 	.class = I2C_CLASS_HWMON
 };
diff --git a/arch/arm/mach-realview/Makefile.boot b/arch/arm/mach-realview/Makefile.boot
index d97e003d3df4..d2c3d788f688 100644
--- a/arch/arm/mach-realview/Makefile.boot
+++ b/arch/arm/mach-realview/Makefile.boot
@@ -1,9 +1,9 @@
 ifeq ($(CONFIG_REALVIEW_HIGH_PHYS_OFFSET),y)
-   zreladdr-y	:= 0x70008000
+   zreladdr-y	+= 0x70008000
 params_phys-y	:= 0x70000100
 initrd_phys-y	:= 0x70800000
 else
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 endif
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 5c23450d2d1d..d5ed5d4f77d6 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -517,8 +517,7 @@ void __init realview_timer_init(unsigned int timer_irq)
 /*
  * Setup the memory banks.
  */
-void realview_fixup(struct machine_desc *mdesc, struct tag *tags, char **from,
-		    struct meminfo *meminfo)
+void realview_fixup(struct tag *tags, char **from, struct meminfo *meminfo)
 {
 	/*
 	 * Most RealView platforms have 512MB contiguous RAM at 0x70000000.
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 5c83d1e87a03..47259c89a75e 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -63,8 +63,8 @@ extern int realview_flash_register(struct resource *res, u32 num);
 extern int realview_eth_register(const char *name, struct resource *res);
 extern int realview_usb_register(struct resource *res);
 extern void realview_init_early(void);
-extern void realview_fixup(struct machine_desc *mdesc, struct tag *tags,
-			   char **from, struct meminfo *meminfo);
+extern void realview_fixup(struct tag *tags, char **from,
+			   struct meminfo *meminfo);
 extern void (*realview_reset)(char);
 
 #endif
diff --git a/arch/arm/mach-realview/include/mach/board-pb1176.h b/arch/arm/mach-realview/include/mach/board-pb1176.h
index 002ab5d8c11c..2a15fef94730 100644
--- a/arch/arm/mach-realview/include/mach/board-pb1176.h
+++ b/arch/arm/mach-realview/include/mach/board-pb1176.h
@@ -70,6 +70,7 @@
 
 #define REALVIEW_DC1176_GIC_CPU_BASE		0x10120000 /* GIC CPU interface, on devchip */
 #define REALVIEW_DC1176_GIC_DIST_BASE		0x10121000 /* GIC distributor, on devchip */
+#define REALVIEW_DC1176_ROM_BASE		0x10200000 /* 16KiB NRAM preudo-ROM, on devchip */
 #define REALVIEW_PB1176_GIC_CPU_BASE		0x10040000 /* GIC CPU interface, on FPGA */
 #define REALVIEW_PB1176_GIC_DIST_BASE		0x10041000 /* GIC distributor, on FPGA */
 #define REALVIEW_PB1176_L220_BASE		0x10110000 /* L220 registers */
diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c
index 4ae943bafa92..e83c654a58d0 100644
--- a/arch/arm/mach-realview/platsmp.c
+++ b/arch/arm/mach-realview/platsmp.c
@@ -52,12 +52,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "Realview: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index 7263dea77779..c057540ec776 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -26,6 +26,8 @@
 #include <linux/amba/pl061.h>
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
+#include <linux/mtd/physmap.h>
+#include <linux/mtd/partitions.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -204,22 +206,48 @@ static struct amba_device *amba_devs[] __initdata = {
  * RealView PB1176 platform devices
  */
 static struct resource realview_pb1176_flash_resources[] = {
-	[0] = {
+	{
 		.start		= REALVIEW_PB1176_FLASH_BASE,
 		.end		= REALVIEW_PB1176_FLASH_BASE + REALVIEW_PB1176_FLASH_SIZE - 1,
 		.flags		= IORESOURCE_MEM,
 	},
-	[1] = {
+#ifdef CONFIG_REALVIEW_PB1176_SECURE_FLASH
+	{
 		.start		= REALVIEW_PB1176_SEC_FLASH_BASE,
 		.end		= REALVIEW_PB1176_SEC_FLASH_BASE + REALVIEW_PB1176_SEC_FLASH_SIZE - 1,
 		.flags		= IORESOURCE_MEM,
 	},
-};
-#ifdef CONFIG_REALVIEW_PB1176_SECURE_FLASH
-#define PB1176_FLASH_BLOCKS	2
-#else
-#define PB1176_FLASH_BLOCKS	1
 #endif
+};
+
+static struct physmap_flash_data pb1176_rom_pdata = {
+	.probe_type	= "map_rom",
+	.width		= 4,
+	.nr_parts	= 0,
+};
+
+static struct resource pb1176_rom_resources[] = {
+	/*
+	 * This exposes the PB1176 DevChip ROM as an MTD ROM mapping.
+	 * The reference manual states that this is actually a pseudo-ROM
+	 * programmed in NVRAM.
+	 */
+	{
+		.start		= REALVIEW_DC1176_ROM_BASE,
+		.end		= REALVIEW_DC1176_ROM_BASE + SZ_16K - 1,
+		.flags		= IORESOURCE_MEM,
+	}
+};
+
+static struct platform_device pb1176_rom_device = {
+	.name		= "physmap-flash",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(pb1176_rom_resources),
+	.resource	= pb1176_rom_resources,
+	.dev = {
+		.platform_data = &pb1176_rom_pdata,
+	},
+};
 
 static struct resource realview_pb1176_smsc911x_resources[] = {
 	[0] = {
@@ -316,8 +344,7 @@ static void realview_pb1176_reset(char mode)
 	__raw_writel(REALVIEW_PB1176_SYS_SOFT_RESET, reset_ctrl);
 }
 
-static void realview_pb1176_fixup(struct machine_desc *mdesc,
-				  struct tag *tags, char **from,
+static void realview_pb1176_fixup(struct tag *tags, char **from,
 				  struct meminfo *meminfo)
 {
 	/*
@@ -338,7 +365,8 @@ static void __init realview_pb1176_init(void)
 #endif
 
 	realview_flash_register(realview_pb1176_flash_resources,
-				PB1176_FLASH_BLOCKS);
+				ARRAY_SIZE(realview_pb1176_flash_resources));
+	platform_device_register(&pb1176_rom_device);
 	realview_eth_register(NULL, realview_pb1176_smsc911x_resources);
 	platform_device_register(&realview_i2c_device);
 	realview_usb_register(realview_pb1176_isp1761_resources);
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index 8ec7e52618b4..63c4114afae9 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -319,8 +319,8 @@ static struct sys_timer realview_pbx_timer = {
 	.init		= realview_pbx_timer_init,
 };
 
-static void realview_pbx_fixup(struct machine_desc *mdesc, struct tag *tags,
-			       char **from, struct meminfo *meminfo)
+static void realview_pbx_fixup(struct tag *tags, char **from,
+			       struct meminfo *meminfo)
 {
 #ifdef CONFIG_SPARSEMEM
 	/*
@@ -335,7 +335,7 @@ static void realview_pbx_fixup(struct machine_desc *mdesc, struct tag *tags,
 	meminfo->bank[2].size = SZ_256M;
 	meminfo->nr_banks = 3;
 #else
-	realview_fixup(mdesc, tags, from, meminfo);
+	realview_fixup(tags, from, meminfo);
 #endif
 }
 
diff --git a/arch/arm/mach-rpc/Makefile.boot b/arch/arm/mach-rpc/Makefile.boot
index 9c9e7685ec7c..ae2df0d7d037 100644
--- a/arch/arm/mach-rpc/Makefile.boot
+++ b/arch/arm/mach-rpc/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x10008000
+   zreladdr-y	+= 0x10008000
 params_phys-y	:= 0x10000100
 initrd_phys-y	:= 0x18000000
 
diff --git a/arch/arm/mach-rpc/include/mach/hardware.h b/arch/arm/mach-rpc/include/mach/hardware.h
index dde6b3c0e299..050d63c74cc1 100644
--- a/arch/arm/mach-rpc/include/mach/hardware.h
+++ b/arch/arm/mach-rpc/include/mach/hardware.h
@@ -36,7 +36,7 @@
 
 #define EASI_SIZE		0x08000000	/* EASI I/O */
 #define EASI_START		0x08000000
-#define EASI_BASE		0xe5000000
+#define EASI_BASE		IOMEM(0xe5000000)
 
 #define IO_START		0x03000000	/* I/O */
 #define IO_SIZE			0x01000000
@@ -51,21 +51,20 @@
 /*
  * IO Addresses
  */
-#define VIDC_BASE		IOMEM(0xe0400000)
-#define EXPMASK_BASE		0xe0360000
-#define IOMD_BASE		IOMEM(0xe0200000)
-#define IOC_BASE		IOMEM(0xe0200000)
-#define PCIO_BASE		IOMEM(0xe0010000)
-#define FLOPPYDMA_BASE		IOMEM(0xe002a000)
+#define ECARD_EASI_BASE		(EASI_BASE)
+#define VIDC_BASE		(IO_BASE + 0x00400000)
+#define EXPMASK_BASE		(IO_BASE + 0x00360000)
+#define ECARD_IOC4_BASE		(IO_BASE + 0x00270000)
+#define ECARD_IOC_BASE		(IO_BASE + 0x00240000)
+#define IOMD_BASE		(IO_BASE + 0x00200000)
+#define IOC_BASE		(IO_BASE + 0x00200000)
+#define ECARD_MEMC8_BASE	(IO_BASE + 0x0002b000)
+#define FLOPPYDMA_BASE		(IO_BASE + 0x0002a000)
+#define PCIO_BASE		(IO_BASE + 0x00010000)
+#define ECARD_MEMC_BASE		(IO_BASE + 0x00000000)
 
 #define vidc_writel(val)	__raw_writel(val, VIDC_BASE)
 
-#define IO_EC_EASI_BASE		0x81400000
-#define IO_EC_IOC4_BASE		0x8009c000
-#define IO_EC_IOC_BASE		0x80090000
-#define IO_EC_MEMC8_BASE	0x8000ac00
-#define IO_EC_MEMC_BASE		0x80000000
-
 #define NETSLOT_BASE		0x0302b000
 #define NETSLOT_SIZE		0x00001000
 
diff --git a/arch/arm/mach-rpc/include/mach/io.h b/arch/arm/mach-rpc/include/mach/io.h
index 20da7f486e51..695f4ed2e11b 100644
--- a/arch/arm/mach-rpc/include/mach/io.h
+++ b/arch/arm/mach-rpc/include/mach/io.h
@@ -15,195 +15,18 @@
 
 #include <mach/hardware.h>
 
-#define IO_SPACE_LIMIT 0xffffffff
+#define IO_SPACE_LIMIT 0xffff
 
 /*
- * We use two different types of addressing - PC style addresses, and ARM
- * addresses.  PC style accesses the PC hardware with the normal PC IO
- * addresses, eg 0x3f8 for serial#1.  ARM addresses are 0x80000000+
- * and are translated to the start of IO.  Note that all addresses are
- * shifted left!
- */
-#define __PORT_PCIO(x)	(!((x) & 0x80000000))
-
-/*
- * Dynamic IO functions.
- */
-static inline void __outb (unsigned int value, unsigned int port)
-{
-	unsigned long temp;
-	__asm__ __volatile__(
-	"tst	%2, #0x80000000\n\t"
-	"mov	%0, %4\n\t"
-	"addeq	%0, %0, %3\n\t"
-	"strb	%1, [%0, %2, lsl #2]	@ outb"
-	: "=&r" (temp)
-	: "r" (value), "r" (port), "Ir" (PCIO_BASE - IO_BASE), "Ir" (IO_BASE)
-	: "cc");
-}
-
-static inline void __outw (unsigned int value, unsigned int port)
-{
-	unsigned long temp;
-	__asm__ __volatile__(
-	"tst	%2, #0x80000000\n\t"
-	"mov	%0, %4\n\t"
-	"addeq	%0, %0, %3\n\t"
-	"str	%1, [%0, %2, lsl #2]	@ outw"
-	: "=&r" (temp)
-	: "r" (value|value<<16), "r" (port), "Ir" (PCIO_BASE - IO_BASE), "Ir" (IO_BASE)
-	: "cc");
-}
-
-static inline void __outl (unsigned int value, unsigned int port)
-{
-	unsigned long temp;
-	__asm__ __volatile__(
-	"tst	%2, #0x80000000\n\t"
-	"mov	%0, %4\n\t"
-	"addeq	%0, %0, %3\n\t"
-	"str	%1, [%0, %2, lsl #2]	@ outl"
-	: "=&r" (temp)
-	: "r" (value), "r" (port), "Ir" (PCIO_BASE - IO_BASE), "Ir" (IO_BASE)
-	: "cc");
-}
-
-#define DECLARE_DYN_IN(sz,fnsuffix,instr)					\
-static inline unsigned sz __in##fnsuffix (unsigned int port)		\
-{										\
-	unsigned long temp, value;						\
-	__asm__ __volatile__(							\
-	"tst	%2, #0x80000000\n\t"						\
-	"mov	%0, %4\n\t"							\
-	"addeq	%0, %0, %3\n\t"							\
-	"ldr" instr "	%1, [%0, %2, lsl #2]	@ in" #fnsuffix			\
-	: "=&r" (temp), "=r" (value)						\
-	: "r" (port), "Ir" (PCIO_BASE - IO_BASE), "Ir" (IO_BASE)		\
-	: "cc");								\
-	return (unsigned sz)value;						\
-}
-
-static inline void __iomem *__deprecated __ioaddr(unsigned int port)
-{
-	void __iomem *ret;
-	if (__PORT_PCIO(port))
-		ret = PCIO_BASE;
-	else
-		ret = IO_BASE;
-	return ret + (port << 2);
-}
-
-#define DECLARE_IO(sz,fnsuffix,instr)	\
-	DECLARE_DYN_IN(sz,fnsuffix,instr)
-
-DECLARE_IO(char,b,"b")
-DECLARE_IO(short,w,"")
-DECLARE_IO(int,l,"")
-
-#undef DECLARE_IO
-#undef DECLARE_DYN_IN
-
-/*
- * Constant address IO functions
+ * We need PC style IO addressing for:
+ *  - floppy (at 0x3f2,0x3f4,0x3f5,0x3f7)
+ *  - parport (at 0x278-0x27a, 0x27b-0x27f, 0x778-0x77a)
+ *  - 8250 serial (only for compile)
  *
- * These have to be macros for the 'J' constraint to work -
- * +/-4096 immediate operand.
+ * These peripherals are found in an area of MMIO which looks very much
+ * like an ISA bus, but with registers at the low byte of each word.
  */
-#define __outbc(value,port)							\
-({										\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"strb	%0, [%1, %2]	@ outbc"				\
-		: : "r" (value), "r" (PCIO_BASE), "Jr" ((port) << 2));		\
-	else									\
-		__asm__ __volatile__(						\
-		"strb	%0, [%1, %2]	@ outbc"				\
-		: : "r" (value), "r" (IO_BASE), "r" ((port) << 2));		\
-})
-
-#define __inbc(port)								\
-({										\
-	unsigned char result;							\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"ldrb	%0, [%1, %2]	@ inbc"					\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" ((port) << 2));		\
-	else									\
-		__asm__ __volatile__(						\
-		"ldrb	%0, [%1, %2]	@ inbc"					\
-		: "=r" (result) : "r" (IO_BASE), "r" ((port) << 2));		\
-	result;									\
-})
-
-#define __outwc(value,port)							\
-({										\
-	unsigned long __v = value;						\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"str	%0, [%1, %2]	@ outwc"				\
-		: : "r" (__v|__v<<16), "r" (PCIO_BASE), "Jr" ((port) << 2));	\
-	else									\
-		__asm__ __volatile__(						\
-		"str	%0, [%1, %2]	@ outwc"				\
-		: : "r" (__v|__v<<16), "r" (IO_BASE), "r" ((port) << 2));		\
-})
-
-#define __inwc(port)								\
-({										\
-	unsigned short result;							\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"ldr	%0, [%1, %2]	@ inwc"					\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" ((port) << 2));		\
-	else									\
-		__asm__ __volatile__(						\
-		"ldr	%0, [%1, %2]	@ inwc"					\
-		: "=r" (result) : "r" (IO_BASE), "r" ((port) << 2));		\
-	result & 0xffff;							\
-})
-
-#define __outlc(value,port)							\
-({										\
-	unsigned long __v = value;						\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"str	%0, [%1, %2]	@ outlc"				\
-		: : "r" (__v), "r" (PCIO_BASE), "Jr" ((port) << 2));		\
-	else									\
-		__asm__ __volatile__(						\
-		"str	%0, [%1, %2]	@ outlc"				\
-		: : "r" (__v), "r" (IO_BASE), "r" ((port) << 2));		\
-})
-
-#define __inlc(port)								\
-({										\
-	unsigned long result;							\
-	if (__PORT_PCIO((port)))						\
-		__asm__ __volatile__(						\
-		"ldr	%0, [%1, %2]	@ inlc"					\
-		: "=r" (result) : "r" (PCIO_BASE), "Jr" ((port) << 2));		\
-	else									\
-		__asm__ __volatile__(						\
-		"ldr	%0, [%1, %2]	@ inlc"					\
-		: "=r" (result) : "r" (IO_BASE), "r" ((port) << 2));		\
-	result;									\
-})
-
-#define inb(p)	 	(__builtin_constant_p((p)) ? __inbc(p)    : __inb(p))
-#define inw(p)	 	(__builtin_constant_p((p)) ? __inwc(p)    : __inw(p))
-#define inl(p)	 	(__builtin_constant_p((p)) ? __inlc(p)    : __inl(p))
-#define outb(v,p)	(__builtin_constant_p((p)) ? __outbc(v,p) : __outb(v,p))
-#define outw(v,p)	(__builtin_constant_p((p)) ? __outwc(v,p) : __outw(v,p))
-#define outl(v,p)	(__builtin_constant_p((p)) ? __outlc(v,p) : __outl(v,p))
-
-/* the following macro is deprecated */
-#define ioaddr(port)	((unsigned long)__ioaddr((port)))
-
-#define insb(p,d,l)	__raw_readsb(__ioaddr(p),d,l)
-#define insw(p,d,l)	__raw_readsw(__ioaddr(p),d,l)
-
-#define outsb(p,d,l)	__raw_writesb(__ioaddr(p),d,l)
-#define outsw(p,d,l)	__raw_writesw(__ioaddr(p),d,l)
+#define __io(a)		(PCIO_BASE + ((a) << 2))
 
 /*
  * 1:1 mapping for ioremapped regions.
diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c
index a9241eb87724..8559598ab767 100644
--- a/arch/arm/mach-rpc/riscpc.c
+++ b/arch/arm/mach-rpc/riscpc.c
@@ -74,7 +74,7 @@ static struct map_desc rpc_io_desc[] __initdata = {
 		.length		= 	IO_SIZE	 ,
 		.type		= MT_DEVICE
 	}, {	/* EASI space	*/
-		.virtual	= EASI_BASE,
+		.virtual	= (unsigned long)EASI_BASE,
 		.pfn		= __phys_to_pfn(EASI_START),
 		.length		= EASI_SIZE,
 		.type		= MT_DEVICE
diff --git a/arch/arm/mach-s3c2410/Makefile.boot b/arch/arm/mach-s3c2410/Makefile.boot
index 58c1dd7f8e1d..4457605ba04a 100644
--- a/arch/arm/mach-s3c2410/Makefile.boot
+++ b/arch/arm/mach-s3c2410/Makefile.boot
@@ -1,7 +1,7 @@
 ifeq ($(CONFIG_PM_H1940),y)
-	zreladdr-y		:= 0x30108000
+	zreladdr-y	+= 0x30108000
 	params_phys-y	:= 0x30100100
 else
-	zreladdr-y		:= 0x30008000
+	zreladdr-y	+= 0x30008000
 	params_phys-y	:= 0x30000100
 endif
diff --git a/arch/arm/mach-s3c2410/include/mach/io.h b/arch/arm/mach-s3c2410/include/mach/io.h
index 9813dbf2ae4f..118749f37c4c 100644
--- a/arch/arm/mach-s3c2410/include/mach/io.h
+++ b/arch/arm/mach-s3c2410/include/mach/io.h
@@ -199,8 +199,6 @@ DECLARE_IO(int,l,"")
 #define outw(v,p)	(__builtin_constant_p((p)) ? __outwc(v,p) : __outw(v,p))
 #define outl(v,p)	(__builtin_constant_p((p)) ? __outlc(v,p) : __outl(v,p))
 #define __ioaddr(p)	(__builtin_constant_p((p)) ? __ioaddr(p)  : __ioaddrc(p))
-/* the following macro is deprecated */
-#define ioaddr(port)	__ioaddr((port))
 
 #define insb(p,d,l)	__raw_readsb(__ioaddr(p),d,l)
 #define insw(p,d,l)	__raw_readsw(__ioaddr(p),d,l)
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c
index f1d3bd8f6f17..343a540d86a9 100644
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -170,7 +170,9 @@ int __init s3c2410_init(void)
 {
 	printk("S3C2410: Initialising architecture\n");
 
+#ifdef CONFIG_PM
 	register_syscore_ops(&s3c2410_pm_syscore_ops);
+#endif
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 
 	return sysdev_register(&s3c2410_sysdev);
diff --git a/arch/arm/mach-s3c2412/mach-smdk2413.c b/arch/arm/mach-s3c2412/mach-smdk2413.c
index d6325ede9f29..f1eec1b54932 100644
--- a/arch/arm/mach-s3c2412/mach-smdk2413.c
+++ b/arch/arm/mach-s3c2412/mach-smdk2413.c
@@ -92,8 +92,7 @@ static struct platform_device *smdk2413_devices[] __initdata = {
 	&s3c_device_usbgadget,
 };
 
-static void __init smdk2413_fixup(struct machine_desc *desc,
-				  struct tag *tags, char **cmdline,
+static void __init smdk2413_fixup(struct tag *tags, char **cmdline,
 				  struct meminfo *mi)
 {
 	if (tags != phys_to_virt(S3C2410_SDRAM_PA + 0x100)) {
diff --git a/arch/arm/mach-s3c2412/mach-vstms.c b/arch/arm/mach-s3c2412/mach-vstms.c
index 5955c15018b4..1bbb1ef5f4ff 100644
--- a/arch/arm/mach-s3c2412/mach-vstms.c
+++ b/arch/arm/mach-s3c2412/mach-vstms.c
@@ -129,9 +129,8 @@ static struct platform_device *vstms_devices[] __initdata = {
 	&s3c_device_nand,
 };
 
-static void __init vstms_fixup(struct machine_desc *desc,
-				  struct tag *tags, char **cmdline,
-				  struct meminfo *mi)
+static void __init vstms_fixup(struct tag *tags, char **cmdline,
+			       struct meminfo *mi)
 {
 	if (tags != phys_to_virt(S3C2410_SDRAM_PA + 0x100)) {
 		mi->nr_banks=1;
diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c
index ef0958d3e5c6..57a1e01e4e50 100644
--- a/arch/arm/mach-s3c2412/s3c2412.c
+++ b/arch/arm/mach-s3c2412/s3c2412.c
@@ -245,7 +245,9 @@ int __init s3c2412_init(void)
 {
 	printk("S3C2412: Initialising architecture\n");
 
+#ifdef CONFIG_PM
 	register_syscore_ops(&s3c2412_pm_syscore_ops);
+#endif
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 
 	return sysdev_register(&s3c2412_sysdev);
diff --git a/arch/arm/mach-s3c2416/s3c2416.c b/arch/arm/mach-s3c2416/s3c2416.c
index 494ce913dc95..20b3fdfb3051 100644
--- a/arch/arm/mach-s3c2416/s3c2416.c
+++ b/arch/arm/mach-s3c2416/s3c2416.c
@@ -97,7 +97,9 @@ int __init s3c2416_init(void)
 
 	s3c_fb_setname("s3c2443-fb");
 
+#ifdef CONFIG_PM
 	register_syscore_ops(&s3c2416_pm_syscore_ops);
+#endif
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 
 	return sysdev_register(&s3c2416_sysdev);
diff --git a/arch/arm/mach-s3c2440/s3c2440.c b/arch/arm/mach-s3c2440/s3c2440.c
index ce99ff72838d..2270d3360216 100644
--- a/arch/arm/mach-s3c2440/s3c2440.c
+++ b/arch/arm/mach-s3c2440/s3c2440.c
@@ -55,7 +55,9 @@ int __init s3c2440_init(void)
 
 	/* register suspend/resume handlers */
 
+#ifdef CONFIG_PM
 	register_syscore_ops(&s3c2410_pm_syscore_ops);
+#endif
 	register_syscore_ops(&s3c244x_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 
diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c
index 9ad99f8016a1..6f2b65e6e068 100644
--- a/arch/arm/mach-s3c2440/s3c2442.c
+++ b/arch/arm/mach-s3c2440/s3c2442.c
@@ -169,7 +169,9 @@ int __init s3c2442_init(void)
 {
 	printk("S3C2442: Initialising architecture\n");
 
+#ifdef CONFIG_PM
 	register_syscore_ops(&s3c2410_pm_syscore_ops);
+#endif
 	register_syscore_ops(&s3c244x_pm_syscore_ops);
 	register_syscore_ops(&s3c24xx_irq_syscore_ops);
 
diff --git a/arch/arm/mach-s3c64xx/Makefile.boot b/arch/arm/mach-s3c64xx/Makefile.boot
index ba41fdc0a586..c642333af3ed 100644
--- a/arch/arm/mach-s3c64xx/Makefile.boot
+++ b/arch/arm/mach-s3c64xx/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x50008000
+   zreladdr-y	+= 0x50008000
 params_phys-y	:= 0x50000100
diff --git a/arch/arm/mach-s3c64xx/dev-uart.c b/arch/arm/mach-s3c64xx/dev-uart.c
index f797f748b999..c681b99eda08 100644
--- a/arch/arm/mach-s3c64xx/dev-uart.c
+++ b/arch/arm/mach-s3c64xx/dev-uart.c
@@ -37,21 +37,10 @@ static struct resource s3c64xx_uart0_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S3CUART_RX0,
-		.end	= IRQ_S3CUART_RX0,
+		.start	= IRQ_UART0,
+		.end	= IRQ_UART0,
 		.flags	= IORESOURCE_IRQ,
 	},
-	[2] = {
-		.start	= IRQ_S3CUART_TX0,
-		.end	= IRQ_S3CUART_TX0,
-		.flags	= IORESOURCE_IRQ,
-
-	},
-	[3] = {
-		.start	= IRQ_S3CUART_ERR0,
-		.end	= IRQ_S3CUART_ERR0,
-		.flags	= IORESOURCE_IRQ,
-	}
 };
 
 static struct resource s3c64xx_uart1_resource[] = {
@@ -61,19 +50,8 @@ static struct resource s3c64xx_uart1_resource[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S3CUART_RX1,
-		.end	= IRQ_S3CUART_RX1,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S3CUART_TX1,
-		.end	= IRQ_S3CUART_TX1,
-		.flags	= IORESOURCE_IRQ,
-
-	},
-	[3] = {
-		.start	= IRQ_S3CUART_ERR1,
-		.end	= IRQ_S3CUART_ERR1,
+		.start	= IRQ_UART1,
+		.end	= IRQ_UART1,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
@@ -85,19 +63,8 @@ static struct resource s3c6xx_uart2_resource[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S3CUART_RX2,
-		.end	= IRQ_S3CUART_RX2,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S3CUART_TX2,
-		.end	= IRQ_S3CUART_TX2,
-		.flags	= IORESOURCE_IRQ,
-
-	},
-	[3] = {
-		.start	= IRQ_S3CUART_ERR2,
-		.end	= IRQ_S3CUART_ERR2,
+		.start	= IRQ_UART2,
+		.end	= IRQ_UART2,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
@@ -109,19 +76,8 @@ static struct resource s3c64xx_uart3_resource[] = {
 		.flags = IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S3CUART_RX3,
-		.end	= IRQ_S3CUART_RX3,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S3CUART_TX3,
-		.end	= IRQ_S3CUART_TX3,
-		.flags	= IORESOURCE_IRQ,
-
-	},
-	[3] = {
-		.start	= IRQ_S3CUART_ERR3,
-		.end	= IRQ_S3CUART_ERR3,
+		.start	= IRQ_UART3,
+		.end	= IRQ_UART3,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
diff --git a/arch/arm/mach-s3c64xx/include/mach/irqs.h b/arch/arm/mach-s3c64xx/include/mach/irqs.h
index c026f67a80de..443f85b3c203 100644
--- a/arch/arm/mach-s3c64xx/include/mach/irqs.h
+++ b/arch/arm/mach-s3c64xx/include/mach/irqs.h
@@ -27,36 +27,6 @@
 #define IRQ_VIC0_BASE	S3C_IRQ(0)
 #define IRQ_VIC1_BASE	S3C_IRQ(32)
 
-/* UART interrupts, each UART has 4 intterupts per channel so
- * use the space between the ISA and S3C main interrupts. Note, these
- * are not in the same order as the S3C24XX series! */
-
-#define IRQ_S3CUART_BASE0	(16)
-#define IRQ_S3CUART_BASE1	(20)
-#define IRQ_S3CUART_BASE2	(24)
-#define IRQ_S3CUART_BASE3	(28)
-
-#define UART_IRQ_RXD		(0)
-#define UART_IRQ_ERR		(1)
-#define UART_IRQ_TXD		(2)
-#define UART_IRQ_MODEM		(3)
-
-#define IRQ_S3CUART_RX0		(IRQ_S3CUART_BASE0 + UART_IRQ_RXD)
-#define IRQ_S3CUART_TX0		(IRQ_S3CUART_BASE0 + UART_IRQ_TXD)
-#define IRQ_S3CUART_ERR0	(IRQ_S3CUART_BASE0 + UART_IRQ_ERR)
-
-#define IRQ_S3CUART_RX1		(IRQ_S3CUART_BASE1 + UART_IRQ_RXD)
-#define IRQ_S3CUART_TX1		(IRQ_S3CUART_BASE1 + UART_IRQ_TXD)
-#define IRQ_S3CUART_ERR1	(IRQ_S3CUART_BASE1 + UART_IRQ_ERR)
-
-#define IRQ_S3CUART_RX2		(IRQ_S3CUART_BASE2 + UART_IRQ_RXD)
-#define IRQ_S3CUART_TX2		(IRQ_S3CUART_BASE2 + UART_IRQ_TXD)
-#define IRQ_S3CUART_ERR2	(IRQ_S3CUART_BASE2 + UART_IRQ_ERR)
-
-#define IRQ_S3CUART_RX3		(IRQ_S3CUART_BASE3 + UART_IRQ_RXD)
-#define IRQ_S3CUART_TX3		(IRQ_S3CUART_BASE3 + UART_IRQ_TXD)
-#define IRQ_S3CUART_ERR3	(IRQ_S3CUART_BASE3 + UART_IRQ_ERR)
-
 /* VIC based IRQs */
 
 #define S3C64XX_IRQ_VIC0(x)	(IRQ_VIC0_BASE + (x))
diff --git a/arch/arm/mach-s3c64xx/irq.c b/arch/arm/mach-s3c64xx/irq.c
index 75d9a0e49193..b07357e94958 100644
--- a/arch/arm/mach-s3c64xx/irq.c
+++ b/arch/arm/mach-s3c64xx/irq.c
@@ -25,29 +25,6 @@
 #include <plat/irq-uart.h>
 #include <plat/cpu.h>
 
-static struct s3c_uart_irq uart_irqs[] = {
-	[0] = {
-		.regs		= S3C_VA_UART0,
-		.base_irq	= IRQ_S3CUART_BASE0,
-		.parent_irq	= IRQ_UART0,
-	},
-	[1] = {
-		.regs		= S3C_VA_UART1,
-		.base_irq	= IRQ_S3CUART_BASE1,
-		.parent_irq	= IRQ_UART1,
-	},
-	[2] = {
-		.regs		= S3C_VA_UART2,
-		.base_irq	= IRQ_S3CUART_BASE2,
-		.parent_irq	= IRQ_UART2,
-	},
-	[3] = {
-		.regs		= S3C_VA_UART3,
-		.base_irq	= IRQ_S3CUART_BASE3,
-		.parent_irq	= IRQ_UART3,
-	},
-};
-
 /* setup the sources the vic should advertise resume for, even though it
  * is not doing the wake (set_irq_wake needs to be valid) */
 #define IRQ_VIC0_RESUME (1 << (IRQ_RTC_TIC - IRQ_VIC0_BASE))
@@ -67,6 +44,4 @@ void __init s3c64xx_init_irq(u32 vic0_valid, u32 vic1_valid)
 
 	/* add the timer sub-irqs */
 	s3c_init_vic_timer_irq(5, IRQ_TIMER0);
-
-	s3c_init_uart_irqs(uart_irqs, ARRAY_SIZE(uart_irqs));
 }
diff --git a/arch/arm/mach-s5p64x0/Makefile.boot b/arch/arm/mach-s5p64x0/Makefile.boot
index ff90aa13bd67..79ece4055b02 100644
--- a/arch/arm/mach-s5p64x0/Makefile.boot
+++ b/arch/arm/mach-s5p64x0/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x20008000
+   zreladdr-y	+= 0x20008000
 params_phys-y	:= 0x20000100
diff --git a/arch/arm/mach-s5pc100/Makefile.boot b/arch/arm/mach-s5pc100/Makefile.boot
index ff90aa13bd67..79ece4055b02 100644
--- a/arch/arm/mach-s5pc100/Makefile.boot
+++ b/arch/arm/mach-s5pc100/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x20008000
+   zreladdr-y	+= 0x20008000
 params_phys-y	:= 0x20000100
diff --git a/arch/arm/mach-s5pv210/Makefile.boot b/arch/arm/mach-s5pv210/Makefile.boot
index ff90aa13bd67..79ece4055b02 100644
--- a/arch/arm/mach-s5pv210/Makefile.boot
+++ b/arch/arm/mach-s5pv210/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x20008000
+   zreladdr-y	+= 0x20008000
 params_phys-y	:= 0x20000100
diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile
index 73a5c6431792..ed7408d3216c 100644
--- a/arch/arm/mach-sa1100/Makefile
+++ b/arch/arm/mach-sa1100/Makefile
@@ -45,7 +45,6 @@ obj-$(CONFIG_SA1100_PLEB)		+= pleb.o
 obj-$(CONFIG_SA1100_SHANNON)		+= shannon.o
 
 obj-$(CONFIG_SA1100_SIMPAD)		+= simpad.o
-led-$(CONFIG_SA1100_SIMPAD)		+= leds-simpad.o
 
 # LEDs support
 obj-$(CONFIG_LEDS) += $(led-y)
diff --git a/arch/arm/mach-sa1100/Makefile.boot b/arch/arm/mach-sa1100/Makefile.boot
index a56ad0417cf2..5a616f6e5612 100644
--- a/arch/arm/mach-sa1100/Makefile.boot
+++ b/arch/arm/mach-sa1100/Makefile.boot
@@ -1,6 +1,7 @@
-   zreladdr-y	:= 0xc0008000
 ifeq ($(CONFIG_ARCH_SA1100),y)
-   zreladdr-$(CONFIG_SA1111)		:= 0xc0208000
+   zreladdr-$(CONFIG_SA1111)		+= 0xc0208000
+else
+   zreladdr-y	+= 0xc0008000
 endif
 params_phys-y	:= 0xc0000100
 initrd_phys-y	:= 0xc0800000
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d40da5f1f37b..3dd133f18415 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -301,8 +301,7 @@ static void __init get_assabet_scr(void)
 }
 
 static void __init
-fixup_assabet(struct machine_desc *desc, struct tag *tags,
-	      char **cmdline, struct meminfo *mi)
+fixup_assabet(struct tag *tags, char **cmdline, struct meminfo *mi)
 {
 	/* This must be done before any call to machine_has_neponset() */
 	map_sa1100_gpio_regs();
diff --git a/arch/arm/mach-sa1100/include/mach/io.h b/arch/arm/mach-sa1100/include/mach/io.h
index d8b43f3dcd2d..dfc27ff08344 100644
--- a/arch/arm/mach-sa1100/include/mach/io.h
+++ b/arch/arm/mach-sa1100/include/mach/io.h
@@ -10,11 +10,9 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
-#define IO_SPACE_LIMIT 0xffffffff
-
 /*
- * We don't actually have real ISA nor PCI buses, but there is so many 
- * drivers out there that might just work if we fake them...
+ * __io() is required to be an equivalent mapping to __mem_pci() for
+ * SOC_COMMON to work.
  */
 #define __io(a)		__typesafe_io(a)
 #define __mem_pci(a)	(a)
diff --git a/arch/arm/mach-sa1100/include/mach/simpad.h b/arch/arm/mach-sa1100/include/mach/simpad.h
index 9296c4513ce1..db28118103eb 100644
--- a/arch/arm/mach-sa1100/include/mach/simpad.h
+++ b/arch/arm/mach-sa1100/include/mach/simpad.h
@@ -48,32 +48,80 @@
 #define GPIO_SMART_CARD		GPIO_GPIO10
 #define IRQ_GPIO_SMARD_CARD	IRQ_GPIO10
 
-// CS3 Latch is write only, a shadow is necessary
+/*--- ucb1x00 GPIO ---*/
+#define SIMPAD_UCB1X00_GPIO_BASE	(GPIO_MAX + 1)
+#define SIMPAD_UCB1X00_GPIO_PROG1	(SIMPAD_UCB1X00_GPIO_BASE)
+#define SIMPAD_UCB1X00_GPIO_PROG2	(SIMPAD_UCB1X00_GPIO_BASE + 1)
+#define SIMPAD_UCB1X00_GPIO_UP		(SIMPAD_UCB1X00_GPIO_BASE + 2)
+#define SIMPAD_UCB1X00_GPIO_DOWN	(SIMPAD_UCB1X00_GPIO_BASE + 3)
+#define SIMPAD_UCB1X00_GPIO_LEFT	(SIMPAD_UCB1X00_GPIO_BASE + 4)
+#define SIMPAD_UCB1X00_GPIO_RIGHT	(SIMPAD_UCB1X00_GPIO_BASE + 5)
+#define SIMPAD_UCB1X00_GPIO_6		(SIMPAD_UCB1X00_GPIO_BASE + 6)
+#define SIMPAD_UCB1X00_GPIO_7		(SIMPAD_UCB1X00_GPIO_BASE + 7)
+#define SIMPAD_UCB1X00_GPIO_HEADSET	(SIMPAD_UCB1X00_GPIO_BASE + 8)
+#define SIMPAD_UCB1X00_GPIO_SPEAKER	(SIMPAD_UCB1X00_GPIO_BASE + 9)
+
+/*--- CS3 Latch ---*/
+#define SIMPAD_CS3_GPIO_BASE		(GPIO_MAX + 11)
+#define SIMPAD_CS3_VCC_5V_EN		(SIMPAD_CS3_GPIO_BASE)
+#define SIMPAD_CS3_VCC_3V_EN		(SIMPAD_CS3_GPIO_BASE + 1)
+#define SIMPAD_CS3_EN1			(SIMPAD_CS3_GPIO_BASE + 2)
+#define SIMPAD_CS3_EN0			(SIMPAD_CS3_GPIO_BASE + 3)
+#define SIMPAD_CS3_DISPLAY_ON		(SIMPAD_CS3_GPIO_BASE + 4)
+#define SIMPAD_CS3_PCMCIA_BUFF_DIS	(SIMPAD_CS3_GPIO_BASE + 5)
+#define SIMPAD_CS3_MQ_RESET		(SIMPAD_CS3_GPIO_BASE + 6)
+#define SIMPAD_CS3_PCMCIA_RESET		(SIMPAD_CS3_GPIO_BASE + 7)
+#define SIMPAD_CS3_DECT_POWER_ON	(SIMPAD_CS3_GPIO_BASE + 8)
+#define SIMPAD_CS3_IRDA_SD		(SIMPAD_CS3_GPIO_BASE + 9)
+#define SIMPAD_CS3_RS232_ON		(SIMPAD_CS3_GPIO_BASE + 10)
+#define SIMPAD_CS3_SD_MEDIAQ		(SIMPAD_CS3_GPIO_BASE + 11)
+#define SIMPAD_CS3_LED2_ON		(SIMPAD_CS3_GPIO_BASE + 12)
+#define SIMPAD_CS3_IRDA_MODE		(SIMPAD_CS3_GPIO_BASE + 13)
+#define SIMPAD_CS3_ENABLE_5V		(SIMPAD_CS3_GPIO_BASE + 14)
+#define SIMPAD_CS3_RESET_SIMCARD	(SIMPAD_CS3_GPIO_BASE + 15)
+
+#define SIMPAD_CS3_PCMCIA_BVD1		(SIMPAD_CS3_GPIO_BASE + 16)
+#define SIMPAD_CS3_PCMCIA_BVD2		(SIMPAD_CS3_GPIO_BASE + 17)
+#define SIMPAD_CS3_PCMCIA_VS1		(SIMPAD_CS3_GPIO_BASE + 18)
+#define SIMPAD_CS3_PCMCIA_VS2		(SIMPAD_CS3_GPIO_BASE + 19)
+#define SIMPAD_CS3_LOCK_IND		(SIMPAD_CS3_GPIO_BASE + 20)
+#define SIMPAD_CS3_CHARGING_STATE	(SIMPAD_CS3_GPIO_BASE + 21)
+#define SIMPAD_CS3_PCMCIA_SHORT		(SIMPAD_CS3_GPIO_BASE + 22)
+#define SIMPAD_CS3_GPIO_23		(SIMPAD_CS3_GPIO_BASE + 23)
 
-#define CS3BUSTYPE unsigned volatile long
 #define CS3_BASE        0xf1000000
 
-#define VCC_5V_EN       0x0001 // For 5V PCMCIA
-#define VCC_3V_EN       0x0002 // FOR 3.3V PCMCIA
-#define EN1             0x0004 // This is only for EPROM's
-#define EN0             0x0008 // Both should be enable for 3.3V or 5V
-#define DISPLAY_ON      0x0010
-#define PCMCIA_BUFF_DIS 0x0020
-#define MQ_RESET        0x0040
-#define PCMCIA_RESET    0x0080
-#define DECT_POWER_ON   0x0100
-#define IRDA_SD         0x0200 // Shutdown for powersave
-#define RS232_ON        0x0400
-#define SD_MEDIAQ       0x0800 // Shutdown for powersave
-#define LED2_ON         0x1000
-#define IRDA_MODE       0x2000 // Fast/Slow IrDA mode
-#define ENABLE_5V       0x4000 // Enable 5V circuit
-#define RESET_SIMCARD   0x8000
-
-#define RS232_ENABLE    0x0440
-#define PCMCIAMASK      0x402f
-
-
+long simpad_get_cs3_ro(void);
+long simpad_get_cs3_shadow(void);
+void simpad_set_cs3_bit(int value);
+void simpad_clear_cs3_bit(int value);
+
+#define VCC_5V_EN	0x0001 /* For 5V PCMCIA */
+#define VCC_3V_EN	0x0002 /* FOR 3.3V PCMCIA */
+#define EN1		0x0004 /* This is only for EPROM's */
+#define EN0		0x0008 /* Both should be enable for 3.3V or 5V */
+#define DISPLAY_ON	0x0010
+#define PCMCIA_BUFF_DIS	0x0020
+#define MQ_RESET	0x0040
+#define PCMCIA_RESET	0x0080
+#define DECT_POWER_ON	0x0100
+#define IRDA_SD		0x0200 /* Shutdown for powersave */
+#define RS232_ON	0x0400
+#define SD_MEDIAQ	0x0800 /* Shutdown for powersave */
+#define LED2_ON		0x1000
+#define IRDA_MODE	0x2000 /* Fast/Slow IrDA mode */
+#define ENABLE_5V	0x4000 /* Enable 5V circuit */
+#define RESET_SIMCARD	0x8000
+
+#define PCMCIA_BVD1	0x01
+#define PCMCIA_BVD2	0x02
+#define PCMCIA_VS1	0x04
+#define PCMCIA_VS2	0x08
+#define LOCK_IND	0x10
+#define CHARGING_STATE	0x20
+#define PCMCIA_SHORT	0x40
+
+/*--- Battery ---*/
 struct simpad_battery {
 	unsigned char ac_status;	/* line connected yes/no */
 	unsigned char status;		/* battery loading yes/no */
diff --git a/arch/arm/mach-sa1100/leds-simpad.c b/arch/arm/mach-sa1100/leds-simpad.c
deleted file mode 100644
index d50f4eeaa12e..000000000000
--- a/arch/arm/mach-sa1100/leds-simpad.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * linux/arch/arm/mach-sa1100/leds-simpad.c
- *
- * Author: Juergen Messerer <juergen.messerer@siemens.ch>
- */
-#include <linux/init.h>
-
-#include <mach/hardware.h>
-#include <asm/leds.h>
-#include <asm/system.h>
-#include <mach/simpad.h>
-
-#include "leds.h"
-
-
-#define LED_STATE_ENABLED	1
-#define LED_STATE_CLAIMED	2
-
-static unsigned int led_state;
-static unsigned int hw_led_state;
-
-#define	LED_GREEN	(1)
-#define	LED_MASK	(1)
-
-extern void set_cs3_bit(int value);
-extern void clear_cs3_bit(int value);     
-
-void simpad_leds_event(led_event_t evt)
-{
-	switch (evt)
-	{
-	case led_start:
-	        hw_led_state = LED_GREEN;
-		led_state = LED_STATE_ENABLED;
-		break;
-
-	case led_stop:
-		led_state &= ~LED_STATE_ENABLED;
-		break;
-
-	case led_claim:
-		led_state |= LED_STATE_CLAIMED;
-		hw_led_state = LED_GREEN;
-		break;
-
-	case led_release:
-		led_state &= ~LED_STATE_CLAIMED;
-		hw_led_state = LED_GREEN;
-		break;
-
-#ifdef CONFIG_LEDS_TIMER
-	case led_timer:
-		if (!(led_state & LED_STATE_CLAIMED))
-			hw_led_state ^= LED_GREEN;
-		break;
-#endif
-
-#ifdef CONFIG_LEDS_CPU
-	case led_idle_start:
-		break;
-
-	case led_idle_end:
-		break;
-#endif
-
-	case led_halted:
-		break;
-
-	case led_green_on:
-		if (led_state & LED_STATE_CLAIMED)
-			hw_led_state |= LED_GREEN;
-		break;
-
-	case led_green_off:
-		if (led_state & LED_STATE_CLAIMED)
-			hw_led_state &= ~LED_GREEN;
-		break;
-
-	case led_amber_on:
-		break;
-
-	case led_amber_off:
-		break;
-
-	case led_red_on:
-		break;
-
-	case led_red_off:
-		break;
-
-	default:
-		break;
-	}
-
-	if  (led_state & LED_STATE_ENABLED)
-		set_cs3_bit(LED2_ON);
-	else 
-	        clear_cs3_bit(LED2_ON);
-}
-
diff --git a/arch/arm/mach-sa1100/leds.c b/arch/arm/mach-sa1100/leds.c
index bbfe197fb4d6..5fe71a0f1053 100644
--- a/arch/arm/mach-sa1100/leds.c
+++ b/arch/arm/mach-sa1100/leds.c
@@ -42,8 +42,6 @@ sa1100_leds_init(void)
 		leds_event = adsbitsy_leds_event;
 	if (machine_is_pt_system3())
 		leds_event = system3_leds_event;
-	if (machine_is_simpad())
-		leds_event = simpad_leds_event; /* what about machine registry? including led, apm... -zecke */
 
 	leds_event(led_start);
 	return 0;
diff --git a/arch/arm/mach-sa1100/leds.h b/arch/arm/mach-sa1100/leds.h
index 68cc9f773d6d..776b6020f550 100644
--- a/arch/arm/mach-sa1100/leds.h
+++ b/arch/arm/mach-sa1100/leds.h
@@ -11,4 +11,3 @@ extern void pfs168_leds_event(led_event_t evt);
 extern void graphicsmaster_leds_event(led_event_t evt);
 extern void adsbitsy_leds_event(led_event_t evt);
 extern void system3_leds_event(led_event_t evt);
-extern void simpad_leds_event(led_event_t evt);
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index a1c2427655da..4790f3f3d008 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -13,6 +13,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
+#include <linux/gpio.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -28,35 +29,92 @@
 
 #include <linux/serial_core.h>
 #include <linux/ioport.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+#include <linux/leds.h>
+#include <linux/i2c-gpio.h>
 
 #include "generic.h"
 
-long cs3_shadow;
+/*
+ * CS3 support
+ */
 
-long get_cs3_shadow(void)
+static long cs3_shadow;
+static spinlock_t cs3_lock;
+static struct gpio_chip cs3_gpio;
+
+long simpad_get_cs3_ro(void)
+{
+	return readl(CS3_BASE);
+}
+EXPORT_SYMBOL(simpad_get_cs3_ro);
+
+long simpad_get_cs3_shadow(void)
 {
 	return cs3_shadow;
 }
+EXPORT_SYMBOL(simpad_get_cs3_shadow);
 
-void set_cs3(long value)
+static void __simpad_write_cs3(void)
 {
-	*(CS3BUSTYPE *)(CS3_BASE) = cs3_shadow = value;
+	writel(cs3_shadow, CS3_BASE);
 }
 
-void set_cs3_bit(int value)
+void simpad_set_cs3_bit(int value)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&cs3_lock, flags);
 	cs3_shadow |= value;
-	*(CS3BUSTYPE *)(CS3_BASE) = cs3_shadow;
+	__simpad_write_cs3();
+	spin_unlock_irqrestore(&cs3_lock, flags);
 }
+EXPORT_SYMBOL(simpad_set_cs3_bit);
 
-void clear_cs3_bit(int value)
+void simpad_clear_cs3_bit(int value)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&cs3_lock, flags);
 	cs3_shadow &= ~value;
-	*(CS3BUSTYPE *)(CS3_BASE) = cs3_shadow;
+	__simpad_write_cs3();
+	spin_unlock_irqrestore(&cs3_lock, flags);
 }
+EXPORT_SYMBOL(simpad_clear_cs3_bit);
+
+static void cs3_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	if (offset > 15)
+		return;
+	if (value)
+		simpad_set_cs3_bit(1 << offset);
+	else
+		simpad_clear_cs3_bit(1 << offset);
+};
+
+static int cs3_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	if (offset > 15)
+		return simpad_get_cs3_ro() & (1 << (offset - 16));
+	return simpad_get_cs3_shadow() & (1 << offset);
+};
 
-EXPORT_SYMBOL(set_cs3_bit);
-EXPORT_SYMBOL(clear_cs3_bit);
+static int cs3_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	if (offset > 15)
+		return 0;
+	return -EINVAL;
+};
+
+static int cs3_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
+	int value)
+{
+	if (offset > 15)
+		return -EINVAL;
+	cs3_gpio_set(chip, offset, value);
+	return 0;
+};
 
 static struct map_desc simpad_io_desc[] __initdata = {
 	{	/* MQ200 */
@@ -64,9 +122,9 @@ static struct map_desc simpad_io_desc[] __initdata = {
 		.pfn		= __phys_to_pfn(0x4b800000),
 		.length		= 0x00800000,
 		.type		= MT_DEVICE
-	}, {	/* Paules CS3, write only */
-		.virtual	=  0xf1000000,
-		.pfn		= __phys_to_pfn(0x18000000),
+	}, {	/* Simpad CS3 */
+		.virtual	= CS3_BASE,
+		.pfn		= __phys_to_pfn(SA1100_CS3_PHYS),
 		.length		= 0x00100000,
 		.type		= MT_DEVICE
 	},
@@ -78,12 +136,12 @@ static void simpad_uart_pm(struct uart_port *port, u_int state, u_int oldstate)
 	if (port->mapbase == (u_int)&Ser1UTCR0) {
 		if (state)
 		{
-			clear_cs3_bit(RS232_ON);
-			clear_cs3_bit(DECT_POWER_ON);
+			simpad_clear_cs3_bit(RS232_ON);
+			simpad_clear_cs3_bit(DECT_POWER_ON);
 		}else
 		{
-			set_cs3_bit(RS232_ON);
-			set_cs3_bit(DECT_POWER_ON);
+			simpad_set_cs3_bit(RS232_ON);
+			simpad_set_cs3_bit(DECT_POWER_ON);
 		}
 	}
 }
@@ -132,6 +190,7 @@ static struct resource simpad_flash_resources [] = {
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
 };
 
 
@@ -142,9 +201,10 @@ static void __init simpad_map_io(void)
 
 	iotable_init(simpad_io_desc, ARRAY_SIZE(simpad_io_desc));
 
-	set_cs3_bit (EN1 | EN0 | LED2_ON | DISPLAY_ON | RS232_ON |
-		      ENABLE_5V | RESET_SIMCARD | DECT_POWER_ON);
-
+	/* Initialize CS3 */
+	cs3_shadow = (EN1 | EN0 | LED2_ON | DISPLAY_ON |
+		RS232_ON | ENABLE_5V | RESET_SIMCARD | DECT_POWER_ON);
+	__simpad_write_cs3(); /* Spinlocks not yet initialized */
 
         sa1100_register_uart_fns(&simpad_port_fns);
 	sa1100_register_uart(0, 3);  /* serial interface */
@@ -170,13 +230,14 @@ static void __init simpad_map_io(void)
 
 static void simpad_power_off(void)
 {
-	local_irq_disable(); // was cli
-	set_cs3(0x800);        /* only SD_MEDIAQ */
+	local_irq_disable();
+	cs3_shadow = SD_MEDIAQ;
+	__simpad_write_cs3(); /* Bypass spinlock here */
 
 	/* disable internal oscillator, float CS lines */
 	PCFR = (PCFR_OPDE | PCFR_FP | PCFR_FS);
-	/* enable wake-up on GPIO0 (Assabet...) */
-	PWER = GFER = GRER = 1;
+	/* enable wake-up on GPIO0 */
+	PWER = GFER = GRER = PWER_GPIO0;
 	/*
 	 * set scratchpad to zero, just in case it is used as a
 	 * restart address by the bootloader.
@@ -192,6 +253,91 @@ static void simpad_power_off(void)
 
 }
 
+/*
+ * gpio_keys
+*/
+
+static struct gpio_keys_button simpad_button_table[] = {
+	{ KEY_POWER, IRQ_GPIO_POWER_BUTTON, 1, "power button" },
+};
+
+static struct gpio_keys_platform_data simpad_keys_data = {
+	.buttons = simpad_button_table,
+	.nbuttons = ARRAY_SIZE(simpad_button_table),
+};
+
+static struct platform_device simpad_keys = {
+	.name = "gpio-keys",
+	.dev = {
+		.platform_data = &simpad_keys_data,
+	},
+};
+
+static struct gpio_keys_button simpad_polled_button_table[] = {
+	{ KEY_PROG1, SIMPAD_UCB1X00_GPIO_PROG1, 1, "prog1 button" },
+	{ KEY_PROG2, SIMPAD_UCB1X00_GPIO_PROG2, 1, "prog2 button" },
+	{ KEY_UP,    SIMPAD_UCB1X00_GPIO_UP,    1, "up button" },
+	{ KEY_DOWN,  SIMPAD_UCB1X00_GPIO_DOWN,  1, "down button" },
+	{ KEY_LEFT,  SIMPAD_UCB1X00_GPIO_LEFT,  1, "left button" },
+	{ KEY_RIGHT, SIMPAD_UCB1X00_GPIO_RIGHT, 1, "right button" },
+};
+
+static struct gpio_keys_platform_data simpad_polled_keys_data = {
+	.buttons = simpad_polled_button_table,
+	.nbuttons = ARRAY_SIZE(simpad_polled_button_table),
+	.poll_interval = 50,
+};
+
+static struct platform_device simpad_polled_keys = {
+	.name = "gpio-keys-polled",
+	.dev = {
+		.platform_data = &simpad_polled_keys_data,
+	},
+};
+
+/*
+ * GPIO LEDs
+ */
+
+static struct gpio_led simpad_leds[] = {
+	{
+		.name = "simpad:power",
+		.gpio = SIMPAD_CS3_LED2_ON,
+		.active_low = 0,
+		.default_trigger = "default-on",
+	},
+};
+
+static struct gpio_led_platform_data simpad_led_data = {
+	.num_leds = ARRAY_SIZE(simpad_leds),
+	.leds = simpad_leds,
+};
+
+static struct platform_device simpad_gpio_leds = {
+	.name = "leds-gpio",
+	.id = 0,
+	.dev = {
+		.platform_data = &simpad_led_data,
+	},
+};
+
+/*
+ * i2c
+ */
+static struct i2c_gpio_platform_data simpad_i2c_data = {
+	.sda_pin = GPIO_GPIO21,
+	.scl_pin = GPIO_GPIO25,
+	.udelay = 10,
+	.timeout = HZ,
+};
+
+static struct platform_device simpad_i2c = {
+	.name = "i2c-gpio",
+	.id = 0,
+	.dev = {
+		.platform_data = &simpad_i2c_data,
+	},
+};
 
 /*
  * MediaQ Video Device
@@ -202,7 +348,11 @@ static struct platform_device simpad_mq200fb = {
 };
 
 static struct platform_device *devices[] __initdata = {
-	&simpad_mq200fb
+	&simpad_keys,
+	&simpad_polled_keys,
+	&simpad_mq200fb,
+	&simpad_gpio_leds,
+	&simpad_i2c,
 };
 
 
@@ -211,6 +361,19 @@ static int __init simpad_init(void)
 {
 	int ret;
 
+	spin_lock_init(&cs3_lock);
+
+	cs3_gpio.label = "simpad_cs3";
+	cs3_gpio.base = SIMPAD_CS3_GPIO_BASE;
+	cs3_gpio.ngpio = 24;
+	cs3_gpio.set = cs3_gpio_set;
+	cs3_gpio.get = cs3_gpio_get;
+	cs3_gpio.direction_input = cs3_gpio_direction_input;
+	cs3_gpio.direction_output = cs3_gpio_direction_output;
+	ret = gpiochip_add(&cs3_gpio);
+	if (ret)
+		printk(KERN_WARNING "simpad: Unable to register cs3 GPIO device");
+
 	pm_power_off = simpad_power_off;
 
 	sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources,
diff --git a/arch/arm/mach-shark/Makefile.boot b/arch/arm/mach-shark/Makefile.boot
index 4320f8b92771..e40e24e4ca34 100644
--- a/arch/arm/mach-shark/Makefile.boot
+++ b/arch/arm/mach-shark/Makefile.boot
@@ -1,2 +1,2 @@
-   zreladdr-y	:= 0x08008000
+   zreladdr-y	+= 0x08008000
 
diff --git a/arch/arm/mach-shark/leds.c b/arch/arm/mach-shark/leds.c
index c9e32de4adf9..ccd49189bbd0 100644
--- a/arch/arm/mach-shark/leds.c
+++ b/arch/arm/mach-shark/leds.c
@@ -36,7 +36,7 @@ static char led_state;
 static short hw_led_state;
 static short saved_state;
 
-static DEFINE_SPINLOCK(leds_lock);
+static DEFINE_RAW_SPINLOCK(leds_lock);
 
 short sequoia_read(int addr) {
   outw(addr,0x24);
@@ -52,7 +52,7 @@ static void sequoia_leds_event(led_event_t evt)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&leds_lock, flags);
+	raw_spin_lock_irqsave(&leds_lock, flags);
 
 	hw_led_state = sequoia_read(0x09);
 
@@ -144,7 +144,7 @@ static void sequoia_leds_event(led_event_t evt)
 	if  (led_state & LED_STATE_ENABLED)
 		sequoia_write(hw_led_state,0x09);
 
-	spin_unlock_irqrestore(&leds_lock, flags);
+	raw_spin_unlock_irqrestore(&leds_lock, flags);
 }
 
 static int __init leds_init(void)
diff --git a/arch/arm/mach-shmobile/Makefile.boot b/arch/arm/mach-shmobile/Makefile.boot
index 1c08ee9de86a..498efd99338d 100644
--- a/arch/arm/mach-shmobile/Makefile.boot
+++ b/arch/arm/mach-shmobile/Makefile.boot
@@ -1,7 +1,7 @@
 __ZRELADDR	:= $(shell /bin/bash -c 'printf "0x%08x" \
 		     $$[$(CONFIG_MEMORY_START) + 0x8000]')
 
-   zreladdr-y   := $(__ZRELADDR)
+   zreladdr-y   += $(__ZRELADDR)
 
 # Unsupported legacy stuff
 #
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 5fde49da399a..475342bcc95c 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -355,14 +355,17 @@ static struct resource sdhi0_resources[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.name	= SH_MOBILE_SDHI_IRQ_CARD_DETECT,
 		.start	= gic_spi(83),
 		.flags	= IORESOURCE_IRQ,
 	},
 	[2] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDCARD,
 		.start	= gic_spi(84),
 		.flags	= IORESOURCE_IRQ,
 	},
 	[3] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDIO,
 		.start	= gic_spi(85),
 		.flags	= IORESOURCE_IRQ,
 	},
@@ -398,14 +401,17 @@ static struct resource sdhi1_resources[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.name	= SH_MOBILE_SDHI_IRQ_CARD_DETECT,
 		.start	= gic_spi(87),
 		.flags	= IORESOURCE_IRQ,
 	},
 	[2] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDCARD,
 		.start	= gic_spi(88),
 		.flags	= IORESOURCE_IRQ,
 	},
 	[3] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDIO,
 		.start	= gic_spi(89),
 		.flags	= IORESOURCE_IRQ,
 	},
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index b622d8d3ab72..5b7edadf4647 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -42,6 +42,7 @@
 #include <linux/leds.h>
 #include <linux/input/sh_keysc.h>
 #include <linux/usb/r8a66597.h>
+#include <linux/pm_clock.h>
 #include <linux/dma-mapping.h>
 
 #include <media/sh_mobile_ceu.h>
@@ -1411,6 +1412,11 @@ static void __init ap4evb_init(void)
 	sh7372_add_device_to_domain(&sh7372_a4lc, &lcdc_device);
 	sh7372_add_device_to_domain(&sh7372_a4mp, &fsi_device);
 
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sh_mmcif_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi0_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi1_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &ceu_device);
+
 	hdmi_init_pm_clock();
 	fsi_init_pm_clock();
 	sh7372_pm_init();
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index de2253d7f157..3689ad2e9156 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -39,7 +39,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
-#include <linux/pm_runtime.h>
+#include <linux/pm_clock.h>
 #include <linux/smsc911x.h>
 #include <linux/sh_intc.h>
 #include <linux/tca6416_keypad.h>
@@ -811,6 +811,7 @@ static struct usbhs_private usbhs1_private = {
 		},
 		.driver_param = {
 			.buswait_bwait	= 4,
+			.has_otg	= 1,
 			.pipe_type	= usbhs1_pipe_cfg,
 			.pipe_size	= ARRAY_SIZE(usbhs1_pipe_cfg),
 			.d0_tx_id	= SHDMA_SLAVE_USB1_TX,
@@ -1071,14 +1072,17 @@ static struct resource sdhi1_resources[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.name	= SH_MOBILE_SDHI_IRQ_CARD_DETECT,
 		.start	= evt2irq(0x0e80), /* SDHI1_SDHI1I0 */
 		.flags	= IORESOURCE_IRQ,
 	},
 	[2] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDCARD,
 		.start	= evt2irq(0x0ea0), /* SDHI1_SDHI1I1 */
 		.flags	= IORESOURCE_IRQ,
 	},
 	[3] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDIO,
 		.start	= evt2irq(0x0ec0), /* SDHI1_SDHI1I2 */
 		.flags	= IORESOURCE_IRQ,
 	},
@@ -1122,14 +1126,17 @@ static struct resource sdhi2_resources[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.name	= SH_MOBILE_SDHI_IRQ_CARD_DETECT,
 		.start	= evt2irq(0x1200), /* SDHI2_SDHI2I0 */
 		.flags	= IORESOURCE_IRQ,
 	},
 	[2] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDCARD,
 		.start	= evt2irq(0x1220), /* SDHI2_SDHI2I1 */
 		.flags	= IORESOURCE_IRQ,
 	},
 	[3] = {
+		.name	= SH_MOBILE_SDHI_IRQ_SDIO,
 		.start	= evt2irq(0x1240), /* SDHI2_SDHI2I2 */
 		.flags	= IORESOURCE_IRQ,
 	},
@@ -1590,7 +1597,17 @@ static void __init mackerel_init(void)
 
 	sh7372_add_device_to_domain(&sh7372_a4lc, &lcdc_device);
 	sh7372_add_device_to_domain(&sh7372_a4lc, &hdmi_lcdc_device);
+	sh7372_add_device_to_domain(&sh7372_a4lc, &meram_device);
 	sh7372_add_device_to_domain(&sh7372_a4mp, &fsi_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &usbhs0_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &usbhs1_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sh_mmcif_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi0_device);
+#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi1_device);
+#endif
+	sh7372_add_device_to_domain(&sh7372_a3sp, &sdhi2_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &ceu_device);
 
 	hdmi_init_pm_clock();
 	sh7372_pm_init();
diff --git a/arch/arm/mach-shmobile/entry-intc.S b/arch/arm/mach-shmobile/entry-intc.S
index cac0a7ae2084..1a1c00ca39a2 100644
--- a/arch/arm/mach-shmobile/entry-intc.S
+++ b/arch/arm/mach-shmobile/entry-intc.S
@@ -51,7 +51,4 @@
 	.macro  test_for_ipi, irqnr, irqstat, base, tmp
 	.endm
 
-	.macro  test_for_ltirq, irqnr, irqstat, base, tmp
-	.endm
-
 	arch_irq_handler shmobile_handle_irq_intc
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index 06aecb31d9c7..c0cdbf997c91 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -35,8 +35,8 @@ extern void sh7372_add_standard_devices(void);
 extern void sh7372_clock_init(void);
 extern void sh7372_pinmux_init(void);
 extern void sh7372_pm_init(void);
-extern void sh7372_cpu_suspend(void);
-extern void sh7372_cpu_resume(void);
+extern void sh7372_resume_core_standby_a3sm(void);
+extern int sh7372_do_idle_a3sm(unsigned long unused);
 extern struct clk sh7372_extal1_clk;
 extern struct clk sh7372_extal2_clk;
 
diff --git a/arch/arm/mach-shmobile/include/mach/entry-macro.S b/arch/arm/mach-shmobile/include/mach/entry-macro.S
index d791f10eeac7..8d4a416d4285 100644
--- a/arch/arm/mach-shmobile/include/mach/entry-macro.S
+++ b/arch/arm/mach-shmobile/include/mach/entry-macro.S
@@ -27,8 +27,5 @@
 	.macro  test_for_ipi, irqnr, irqstat, base, tmp
 	.endm
 
-	.macro  test_for_ltirq, irqnr, irqstat, base, tmp
-	.endm
-
 	.macro  arch_ret_to_user, tmp1, tmp2
 	.endm
diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h
index 24e63a85e669..84532f9629b2 100644
--- a/arch/arm/mach-shmobile/include/mach/sh7372.h
+++ b/arch/arm/mach-shmobile/include/mach/sh7372.h
@@ -479,7 +479,12 @@ struct platform_device;
 
 struct sh7372_pm_domain {
 	struct generic_pm_domain genpd;
+	struct dev_power_governor *gov;
+	void (*suspend)(void);
+	void (*resume)(void);
 	unsigned int bit_shift;
+	bool no_debug;
+	bool stay_on;
 };
 
 static inline struct sh7372_pm_domain *to_sh7372_pd(struct generic_pm_domain *d)
@@ -491,16 +496,24 @@ static inline struct sh7372_pm_domain *to_sh7372_pd(struct generic_pm_domain *d)
 extern struct sh7372_pm_domain sh7372_a4lc;
 extern struct sh7372_pm_domain sh7372_a4mp;
 extern struct sh7372_pm_domain sh7372_d4;
+extern struct sh7372_pm_domain sh7372_a4r;
 extern struct sh7372_pm_domain sh7372_a3rv;
 extern struct sh7372_pm_domain sh7372_a3ri;
+extern struct sh7372_pm_domain sh7372_a3sp;
 extern struct sh7372_pm_domain sh7372_a3sg;
 
 extern void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd);
 extern void sh7372_add_device_to_domain(struct sh7372_pm_domain *sh7372_pd,
 					struct platform_device *pdev);
+extern void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd,
+				    struct sh7372_pm_domain *sh7372_sd);
 #else
 #define sh7372_init_pm_domain(pd) do { } while(0)
 #define sh7372_add_device_to_domain(pd, pdev) do { } while(0)
+#define sh7372_pm_add_subdomain(pd, sd) do { } while(0)
 #endif /* CONFIG_PM */
 
+extern void sh7372_intcs_suspend(void);
+extern void sh7372_intcs_resume(void);
+
 #endif /* __ASM_SH7372_H__ */
diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c
index 739315e30eb9..29cdc0522d9c 100644
--- a/arch/arm/mach-shmobile/intc-sh7372.c
+++ b/arch/arm/mach-shmobile/intc-sh7372.c
@@ -606,9 +606,16 @@ static void intcs_demux(unsigned int irq, struct irq_desc *desc)
 	generic_handle_irq(intcs_evt2irq(evtcodeas));
 }
 
+static void __iomem *intcs_ffd2;
+static void __iomem *intcs_ffd5;
+
 void __init sh7372_init_irq(void)
 {
-	void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE);
+	void __iomem *intevtsa;
+
+	intcs_ffd2 = ioremap_nocache(0xffd20000, PAGE_SIZE);
+	intevtsa = intcs_ffd2 + 0x100;
+	intcs_ffd5 = ioremap_nocache(0xffd50000, PAGE_SIZE);
 
 	register_intc_controller(&intca_desc);
 	register_intc_controller(&intcs_desc);
@@ -617,3 +624,46 @@ void __init sh7372_init_irq(void)
 	irq_set_handler_data(evt2irq(0xf80), (void *)intevtsa);
 	irq_set_chained_handler(evt2irq(0xf80), intcs_demux);
 }
+
+static unsigned short ffd2[0x200];
+static unsigned short ffd5[0x100];
+
+void sh7372_intcs_suspend(void)
+{
+	int k;
+
+	for (k = 0x00; k <= 0x30; k += 4)
+		ffd2[k] = __raw_readw(intcs_ffd2 + k);
+
+	for (k = 0x80; k <= 0xb0; k += 4)
+		ffd2[k] = __raw_readb(intcs_ffd2 + k);
+
+	for (k = 0x180; k <= 0x188; k += 4)
+		ffd2[k] = __raw_readb(intcs_ffd2 + k);
+
+	for (k = 0x00; k <= 0x3c; k += 4)
+		ffd5[k] = __raw_readw(intcs_ffd5 + k);
+
+	for (k = 0x80; k <= 0x9c; k += 4)
+		ffd5[k] = __raw_readb(intcs_ffd5 + k);
+}
+
+void sh7372_intcs_resume(void)
+{
+	int k;
+
+	for (k = 0x00; k <= 0x30; k += 4)
+		__raw_writew(ffd2[k], intcs_ffd2 + k);
+
+	for (k = 0x80; k <= 0xb0; k += 4)
+		__raw_writeb(ffd2[k], intcs_ffd2 + k);
+
+	for (k = 0x180; k <= 0x188; k += 4)
+		__raw_writeb(ffd2[k], intcs_ffd2 + k);
+
+	for (k = 0x00; k <= 0x3c; k += 4)
+		__raw_writew(ffd5[k], intcs_ffd5 + k);
+
+	for (k = 0x80; k <= 0x9c; k += 4)
+		__raw_writeb(ffd5[k], intcs_ffd5 + k);
+}
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index 66f980625a33..e4e485fa2532 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -56,6 +56,12 @@ void __init smp_init_cpus(void)
 	unsigned int ncores = shmobile_smp_get_core_count();
 	unsigned int i;
 
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
+	}
+
 	for (i = 0; i < ncores; i++)
 		set_cpu_possible(i, true);
 
diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c
index 933fb411be0f..79612737c5b2 100644
--- a/arch/arm/mach-shmobile/pm-sh7372.c
+++ b/arch/arm/mach-shmobile/pm-sh7372.c
@@ -15,23 +15,61 @@
 #include <linux/list.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/pm_runtime.h>
+#include <linux/pm_clock.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/bitrev.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/tlbflush.h>
+#include <asm/suspend.h>
 #include <mach/common.h>
 #include <mach/sh7372.h>
 
-#define SMFRAM 0xe6a70000
-#define SYSTBCR 0xe6150024
-#define SBAR 0xe6180020
-#define APARMBAREA 0xe6f10020
+/* DBG */
+#define DBGREG1 0xe6100020
+#define DBGREG9 0xe6100040
 
+/* CPGA */
+#define SYSTBCR 0xe6150024
+#define MSTPSR0 0xe6150030
+#define MSTPSR1 0xe6150038
+#define MSTPSR2 0xe6150040
+#define MSTPSR3 0xe6150048
+#define MSTPSR4 0xe615004c
+#define PLLC01STPCR 0xe61500c8
+
+/* SYSC */
 #define SPDCR 0xe6180008
 #define SWUCR 0xe6180014
+#define SBAR 0xe6180020
+#define WUPRMSK 0xe6180028
+#define WUPSMSK 0xe618002c
+#define WUPSMSK2 0xe6180048
 #define PSTR 0xe6180080
+#define WUPSFAC 0xe6180098
+#define IRQCR 0xe618022c
+#define IRQCR2 0xe6180238
+#define IRQCR3 0xe6180244
+#define IRQCR4 0xe6180248
+#define PDNSEL 0xe6180254
+
+/* INTC */
+#define ICR1A 0xe6900000
+#define ICR2A 0xe6900004
+#define ICR3A 0xe6900008
+#define ICR4A 0xe690000c
+#define INTMSK00A 0xe6900040
+#define INTMSK10A 0xe6900044
+#define INTMSK20A 0xe6900048
+#define INTMSK30A 0xe690004c
+
+/* MFIS */
+#define SMFRAM 0xe6a70000
+
+/* AP-System Core */
+#define APARMBAREA 0xe6f10020
 
 #define PSTR_RETRIES 100
 #define PSTR_DELAY_US 10
@@ -43,6 +81,12 @@ static int pd_power_down(struct generic_pm_domain *genpd)
 	struct sh7372_pm_domain *sh7372_pd = to_sh7372_pd(genpd);
 	unsigned int mask = 1 << sh7372_pd->bit_shift;
 
+	if (sh7372_pd->suspend)
+		sh7372_pd->suspend();
+
+	if (sh7372_pd->stay_on)
+		return 0;
+
 	if (__raw_readl(PSTR) & mask) {
 		unsigned int retry_count;
 
@@ -55,8 +99,9 @@ static int pd_power_down(struct generic_pm_domain *genpd)
 		}
 	}
 
-	pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n",
-		 mask, __raw_readl(PSTR));
+	if (!sh7372_pd->no_debug)
+		pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n",
+			 mask, __raw_readl(PSTR));
 
 	return 0;
 }
@@ -68,6 +113,9 @@ static int pd_power_up(struct generic_pm_domain *genpd)
 	unsigned int retry_count;
 	int ret = 0;
 
+	if (sh7372_pd->stay_on)
+		goto out;
+
 	if (__raw_readl(PSTR) & mask)
 		goto out;
 
@@ -84,66 +132,48 @@ static int pd_power_up(struct generic_pm_domain *genpd)
 	if (__raw_readl(SWUCR) & mask)
 		ret = -EIO;
 
+	if (!sh7372_pd->no_debug)
+		pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n",
+			 mask, __raw_readl(PSTR));
+
  out:
-	pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n",
-		 mask, __raw_readl(PSTR));
+	if (ret == 0 && sh7372_pd->resume)
+		sh7372_pd->resume();
 
 	return ret;
 }
 
-static int pd_power_up_a3rv(struct generic_pm_domain *genpd)
+static void sh7372_a4r_suspend(void)
 {
-	int ret = pd_power_up(genpd);
-
-	/* force A4LC on after A3RV has been requested on */
-	pm_genpd_poweron(&sh7372_a4lc.genpd);
-
-	return ret;
+	sh7372_intcs_suspend();
+	__raw_writel(0x300fffff, WUPRMSK); /* avoid wakeup */
 }
 
-static int pd_power_down_a3rv(struct generic_pm_domain *genpd)
+static bool pd_active_wakeup(struct device *dev)
 {
-	int ret = pd_power_down(genpd);
-
-	/* try to power down A4LC after A3RV is requested off */
-	genpd_queue_power_off_work(&sh7372_a4lc.genpd);
-
-	return ret;
+	return true;
 }
 
-static int pd_power_down_a4lc(struct generic_pm_domain *genpd)
+static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain)
 {
-	/* only power down A4LC if A3RV is off */
-	if (!(__raw_readl(PSTR) & (1 << sh7372_a3rv.bit_shift)))
-		return pd_power_down(genpd);
-
-	return -EBUSY;
+	return false;
 }
 
-static bool pd_active_wakeup(struct device *dev)
-{
-	return true;
-}
+struct dev_power_governor sh7372_always_on_gov = {
+	.power_down_ok = sh7372_power_down_forbidden,
+};
 
 void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd)
 {
 	struct generic_pm_domain *genpd = &sh7372_pd->genpd;
 
-	pm_genpd_init(genpd, NULL, false);
+	pm_genpd_init(genpd, sh7372_pd->gov, false);
 	genpd->stop_device = pm_clk_suspend;
 	genpd->start_device = pm_clk_resume;
+	genpd->dev_irq_safe = true;
 	genpd->active_wakeup = pd_active_wakeup;
-
-	if (sh7372_pd == &sh7372_a4lc) {
-		genpd->power_off = pd_power_down_a4lc;
-		genpd->power_on = pd_power_up;
-	} else if (sh7372_pd == &sh7372_a3rv) {
-		genpd->power_off = pd_power_down_a3rv;
-		genpd->power_on = pd_power_up_a3rv;
-	} else {
-		genpd->power_off = pd_power_down;
-		genpd->power_on = pd_power_up;
-	}
+	genpd->power_off = pd_power_down;
+	genpd->power_on = pd_power_up;
 	genpd->power_on(&sh7372_pd->genpd);
 }
 
@@ -152,11 +182,15 @@ void sh7372_add_device_to_domain(struct sh7372_pm_domain *sh7372_pd,
 {
 	struct device *dev = &pdev->dev;
 
-	if (!dev->power.subsys_data) {
-		pm_clk_init(dev);
-		pm_clk_add(dev, NULL);
-	}
 	pm_genpd_add_device(&sh7372_pd->genpd, dev);
+	if (pm_clk_no_clocks(dev))
+		pm_clk_add(dev, NULL);
+}
+
+void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd,
+			     struct sh7372_pm_domain *sh7372_sd)
+{
+	pm_genpd_add_subdomain(&sh7372_pd->genpd, &sh7372_sd->genpd);
 }
 
 struct sh7372_pm_domain sh7372_a4lc = {
@@ -171,6 +205,14 @@ struct sh7372_pm_domain sh7372_d4 = {
 	.bit_shift = 3,
 };
 
+struct sh7372_pm_domain sh7372_a4r = {
+	.bit_shift = 5,
+	.gov = &sh7372_always_on_gov,
+	.suspend = sh7372_a4r_suspend,
+	.resume = sh7372_intcs_resume,
+	.stay_on = true,
+};
+
 struct sh7372_pm_domain sh7372_a3rv = {
 	.bit_shift = 6,
 };
@@ -179,39 +221,187 @@ struct sh7372_pm_domain sh7372_a3ri = {
 	.bit_shift = 8,
 };
 
+struct sh7372_pm_domain sh7372_a3sp = {
+	.bit_shift = 11,
+	.gov = &sh7372_always_on_gov,
+	.no_debug = true,
+};
+
 struct sh7372_pm_domain sh7372_a3sg = {
 	.bit_shift = 13,
 };
 
 #endif /* CONFIG_PM */
 
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_CPU_IDLE)
+static int sh7372_do_idle_core_standby(unsigned long unused)
+{
+	cpu_do_idle(); /* WFI when SYSTBCR == 0x10 -> Core Standby */
+	return 0;
+}
+
 static void sh7372_enter_core_standby(void)
 {
-	void __iomem *smfram = (void __iomem *)SMFRAM;
+	/* set reset vector, translate 4k */
+	__raw_writel(__pa(sh7372_resume_core_standby_a3sm), SBAR);
+	__raw_writel(0, APARMBAREA);
 
-	__raw_writel(0, APARMBAREA); /* translate 4k */
-	__raw_writel(__pa(sh7372_cpu_resume), SBAR); /* set reset vector */
-	__raw_writel(0x10, SYSTBCR); /* enable core standby */
+	/* enter sleep mode with SYSTBCR to 0x10 */
+	__raw_writel(0x10, SYSTBCR);
+	cpu_suspend(0, sh7372_do_idle_core_standby);
+	__raw_writel(0, SYSTBCR);
 
-	__raw_writel(0, smfram + 0x3c); /* clear page table address */
+	 /* disable reset vector translation */
+	__raw_writel(0, SBAR);
+}
+#endif
+
+#ifdef CONFIG_SUSPEND
+static void sh7372_enter_a3sm_common(int pllc0_on)
+{
+	/* set reset vector, translate 4k */
+	__raw_writel(__pa(sh7372_resume_core_standby_a3sm), SBAR);
+	__raw_writel(0, APARMBAREA);
+
+	if (pllc0_on)
+		__raw_writel(0, PLLC01STPCR);
+	else
+		__raw_writel(1 << 28, PLLC01STPCR);
+
+	__raw_writel(0, PDNSEL); /* power-down A3SM only, not A4S */
+	__raw_readl(WUPSFAC); /* read wakeup int. factor before sleep */
+	cpu_suspend(0, sh7372_do_idle_a3sm);
+	__raw_readl(WUPSFAC); /* read wakeup int. factor after wakeup */
+
+	 /* disable reset vector translation */
+	__raw_writel(0, SBAR);
+}
+
+static int sh7372_a3sm_valid(unsigned long *mskp, unsigned long *msk2p)
+{
+	unsigned long mstpsr0, mstpsr1, mstpsr2, mstpsr3, mstpsr4;
+	unsigned long msk, msk2;
+
+	/* check active clocks to determine potential wakeup sources */
+
+	mstpsr0 = __raw_readl(MSTPSR0);
+	if ((mstpsr0 & 0x00000003) != 0x00000003) {
+		pr_debug("sh7372 mstpsr0 0x%08lx\n", mstpsr0);
+		return 0;
+	}
+
+	mstpsr1 = __raw_readl(MSTPSR1);
+	if ((mstpsr1 & 0xff079b7f) != 0xff079b7f) {
+		pr_debug("sh7372 mstpsr1 0x%08lx\n", mstpsr1);
+		return 0;
+	}
 
-	sh7372_cpu_suspend();
-	cpu_init();
+	mstpsr2 = __raw_readl(MSTPSR2);
+	if ((mstpsr2 & 0x000741ff) != 0x000741ff) {
+		pr_debug("sh7372 mstpsr2 0x%08lx\n", mstpsr2);
+		return 0;
+	}
 
-	/* if page table address is non-NULL then we have been powered down */
-	if (__raw_readl(smfram + 0x3c)) {
-		__raw_writel(__raw_readl(smfram + 0x40),
-			     __va(__raw_readl(smfram + 0x3c)));
+	mstpsr3 = __raw_readl(MSTPSR3);
+	if ((mstpsr3 & 0x1a60f010) != 0x1a60f010) {
+		pr_debug("sh7372 mstpsr3 0x%08lx\n", mstpsr3);
+		return 0;
+	}
 
-		flush_tlb_all();
-		set_cr(__raw_readl(smfram + 0x38));
+	mstpsr4 = __raw_readl(MSTPSR4);
+	if ((mstpsr4 & 0x00008cf0) != 0x00008cf0) {
+		pr_debug("sh7372 mstpsr4 0x%08lx\n", mstpsr4);
+		return 0;
 	}
 
-	__raw_writel(0, SYSTBCR); /* disable core standby */
-	__raw_writel(0, SBAR); /* disable reset vector translation */
+	msk = 0;
+	msk2 = 0;
+
+	/* make bitmaps of limited number of wakeup sources */
+
+	if ((mstpsr2 & (1 << 23)) == 0) /* SPU2 */
+		msk |= 1 << 31;
+
+	if ((mstpsr2 & (1 << 12)) == 0) /* MFI_MFIM */
+		msk |= 1 << 21;
+
+	if ((mstpsr4 & (1 << 3)) == 0) /* KEYSC */
+		msk |= 1 << 2;
+
+	if ((mstpsr1 & (1 << 24)) == 0) /* CMT0 */
+		msk |= 1 << 1;
+
+	if ((mstpsr3 & (1 << 29)) == 0) /* CMT1 */
+		msk |= 1 << 1;
+
+	if ((mstpsr4 & (1 << 0)) == 0) /* CMT2 */
+		msk |= 1 << 1;
+
+	if ((mstpsr2 & (1 << 13)) == 0) /* MFI_MFIS */
+		msk2 |= 1 << 17;
+
+	*mskp = msk;
+	*msk2p = msk2;
+
+	return 1;
+}
+
+static void sh7372_icr_to_irqcr(unsigned long icr, u16 *irqcr1p, u16 *irqcr2p)
+{
+	u16 tmp, irqcr1, irqcr2;
+	int k;
+
+	irqcr1 = 0;
+	irqcr2 = 0;
+
+	/* convert INTCA ICR register layout to SYSC IRQCR+IRQCR2 */
+	for (k = 0; k <= 7; k++) {
+		tmp = (icr >> ((7 - k) * 4)) & 0xf;
+		irqcr1 |= (tmp & 0x03) << (k * 2);
+		irqcr2 |= (tmp >> 2) << (k * 2);
+	}
+
+	*irqcr1p = irqcr1;
+	*irqcr2p = irqcr2;
+}
+
+static void sh7372_setup_a3sm(unsigned long msk, unsigned long msk2)
+{
+	u16 irqcrx_low, irqcrx_high, irqcry_low, irqcry_high;
+	unsigned long tmp;
+
+	/* read IRQ0A -> IRQ15A mask */
+	tmp = bitrev8(__raw_readb(INTMSK00A));
+	tmp |= bitrev8(__raw_readb(INTMSK10A)) << 8;
+
+	/* setup WUPSMSK from clocks and external IRQ mask */
+	msk = (~msk & 0xc030000f) | (tmp << 4);
+	__raw_writel(msk, WUPSMSK);
+
+	/* propage level/edge trigger for external IRQ 0->15 */
+	sh7372_icr_to_irqcr(__raw_readl(ICR1A), &irqcrx_low, &irqcry_low);
+	sh7372_icr_to_irqcr(__raw_readl(ICR2A), &irqcrx_high, &irqcry_high);
+	__raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR);
+	__raw_writel((irqcry_high << 16) | irqcry_low, IRQCR2);
+
+	/* read IRQ16A -> IRQ31A mask */
+	tmp = bitrev8(__raw_readb(INTMSK20A));
+	tmp |= bitrev8(__raw_readb(INTMSK30A)) << 8;
+
+	/* setup WUPSMSK2 from clocks and external IRQ mask */
+	msk2 = (~msk2 & 0x00030000) | tmp;
+	__raw_writel(msk2, WUPSMSK2);
+
+	/* propage level/edge trigger for external IRQ 16->31 */
+	sh7372_icr_to_irqcr(__raw_readl(ICR3A), &irqcrx_low, &irqcry_low);
+	sh7372_icr_to_irqcr(__raw_readl(ICR4A), &irqcrx_high, &irqcry_high);
+	__raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR3);
+	__raw_writel((irqcry_high << 16) | irqcry_low, IRQCR4);
 }
+#endif
 
 #ifdef CONFIG_CPU_IDLE
+
 static void sh7372_cpuidle_setup(struct cpuidle_device *dev)
 {
 	struct cpuidle_state *state;
@@ -239,9 +429,25 @@ static void sh7372_cpuidle_init(void) {}
 #endif
 
 #ifdef CONFIG_SUSPEND
+
 static int sh7372_enter_suspend(suspend_state_t suspend_state)
 {
-	sh7372_enter_core_standby();
+	unsigned long msk, msk2;
+
+	/* check active clocks to determine potential wakeup sources */
+	if (sh7372_a3sm_valid(&msk, &msk2)) {
+
+		/* convert INTC mask and sense to SYSC mask and sense */
+		sh7372_setup_a3sm(msk, msk2);
+
+		/* enter A3SM sleep with PLLC0 off */
+		pr_debug("entering A3SM\n");
+		sh7372_enter_a3sm_common(0);
+	} else {
+		/* default to Core Standby that supports all wakeup sources */
+		pr_debug("entering Core Standby\n");
+		sh7372_enter_core_standby();
+	}
 	return 0;
 }
 
@@ -253,9 +459,6 @@ static void sh7372_suspend_init(void)
 static void sh7372_suspend_init(void) {}
 #endif
 
-#define DBGREG1 0xe6100020
-#define DBGREG9 0xe6100040
-
 void __init sh7372_pm_init(void)
 {
 	/* enable DBG hardware block to kick SYSC */
@@ -263,6 +466,9 @@ void __init sh7372_pm_init(void)
 	__raw_writel(0x0000a501, DBGREG9);
 	__raw_writel(0x00000000, DBGREG1);
 
+	/* do not convert A3SM, A3SP, A3SG, A4R power down into A4S */
+	__raw_writel(0, PDNSEL);
+
 	sh7372_suspend_init();
 	sh7372_cpuidle_init();
 }
diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c
index 6ec454e1e063..bd5c6a3b8c55 100644
--- a/arch/arm/mach-shmobile/pm_runtime.c
+++ b/arch/arm/mach-shmobile/pm_runtime.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
+#include <linux/pm_clock.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index 2d9b1b1a2538..2380389e6ac5 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -30,6 +30,7 @@
 #include <linux/sh_dma.h>
 #include <linux/sh_intc.h>
 #include <linux/sh_timer.h>
+#include <linux/pm_domain.h>
 #include <mach/hardware.h>
 #include <mach/sh7372.h>
 #include <asm/mach-types.h>
@@ -990,9 +991,14 @@ void __init sh7372_add_standard_devices(void)
 	sh7372_init_pm_domain(&sh7372_a4lc);
 	sh7372_init_pm_domain(&sh7372_a4mp);
 	sh7372_init_pm_domain(&sh7372_d4);
+	sh7372_init_pm_domain(&sh7372_a4r);
 	sh7372_init_pm_domain(&sh7372_a3rv);
 	sh7372_init_pm_domain(&sh7372_a3ri);
 	sh7372_init_pm_domain(&sh7372_a3sg);
+	sh7372_init_pm_domain(&sh7372_a3sp);
+
+	sh7372_pm_add_subdomain(&sh7372_a4lc, &sh7372_a3rv);
+	sh7372_pm_add_subdomain(&sh7372_a4r, &sh7372_a4lc);
 
 	platform_add_devices(sh7372_early_devices,
 			    ARRAY_SIZE(sh7372_early_devices));
@@ -1003,6 +1009,25 @@ void __init sh7372_add_standard_devices(void)
 	sh7372_add_device_to_domain(&sh7372_a3rv, &vpu_device);
 	sh7372_add_device_to_domain(&sh7372_a4mp, &spu0_device);
 	sh7372_add_device_to_domain(&sh7372_a4mp, &spu1_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif0_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif1_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif2_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif3_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif4_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif5_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &scif6_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &iic1_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &dma0_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &dma1_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &dma2_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &usb_dma0_device);
+	sh7372_add_device_to_domain(&sh7372_a3sp, &usb_dma1_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &iic0_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &veu0_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &veu1_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &veu2_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &veu3_device);
+	sh7372_add_device_to_domain(&sh7372_a4r, &jpu_device);
 }
 
 void __init sh7372_add_early_devices(void)
diff --git a/arch/arm/mach-shmobile/sleep-sh7372.S b/arch/arm/mach-shmobile/sleep-sh7372.S
index d37d3ca4d18f..f3ab3c5810ea 100644
--- a/arch/arm/mach-shmobile/sleep-sh7372.S
+++ b/arch/arm/mach-shmobile/sleep-sh7372.S
@@ -30,58 +30,20 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
 #include <asm/assembler.h>
 
-#define SMFRAM 0xe6a70000
-
-	.align
-kernel_flush:
-	.word	v7_flush_dcache_all
-
-	.align	3
-ENTRY(sh7372_cpu_suspend)
-	stmfd	sp!, {r0-r12, lr}	@ save registers on stack
-
-	ldr	r8, =SMFRAM
-
-	mov	r4, sp			@ Store sp
-	mrs	r5, spsr		@ Store spsr
-	mov	r6, lr			@ Store lr
-	stmia	r8!, {r4-r6}
-
-	mrc	p15, 0, r4, c1, c0, 2	@ Coprocessor access control register
-	mrc	p15, 0, r5, c2, c0, 0	@ TTBR0
-	mrc	p15, 0, r6, c2, c0, 1	@ TTBR1
-	mrc	p15, 0, r7, c2, c0, 2	@ TTBCR
-	stmia	r8!, {r4-r7}
-
-	mrc	p15, 0, r4, c3, c0, 0	@ Domain access Control Register
-	mrc	p15, 0, r5, c10, c2, 0	@ PRRR
-	mrc	p15, 0, r6, c10, c2, 1	@ NMRR
-	stmia	r8!,{r4-r6}
-
-	mrc	p15, 0, r4, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r5, c13, c0, 2	@ User r/w thread and process ID
-	mrc	p15, 0, r6, c12, c0, 0	@ Secure or NS vector base address
-	mrs	r7, cpsr		@ Store current cpsr
-	stmia	r8!, {r4-r7}
-
-	mrc	p15, 0, r4, c1, c0, 0	@ save control register
-	stmia	r8!, {r4}
-
-	/*
-	 * jump out to kernel flush routine
-	 *  - reuse that code is better
-	 *  - it executes in a cached space so is faster than refetch per-block
-	 *  - should be faster and will change with kernel
-	 *  - 'might' have to copy address, load and jump to it
-	 * Flush all data from the L1 data cache before disabling
-	 * SCTLR.C bit.
-	 */
-	ldr	r1, kernel_flush
-	mov	lr, pc
-	bx	r1
+#if defined(CONFIG_SUSPEND) || defined(CONFIG_CPU_IDLE)
+	.align	12
+	.text
+	.global sh7372_resume_core_standby_a3sm
+sh7372_resume_core_standby_a3sm:
+	ldr     pc, 1f
+1:	.long   cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET
 
+	.global	sh7372_do_idle_a3sm
+sh7372_do_idle_a3sm:
 	/*
 	 * Clear the SCTLR.C bit to prevent further data cache
 	 * allocation. Clearing SCTLR.C would make all the data accesses
@@ -92,10 +54,13 @@ ENTRY(sh7372_cpu_suspend)
 	mcr	p15, 0, r0, c1, c0, 0
 	isb
 
+	/* disable L2 cache in the aux control register */
+	mrc     p15, 0, r10, c1, c0, 1
+	bic     r10, r10, #2
+	mcr     p15, 0, r10, c1, c0, 1
+
 	/*
-	 * Invalidate L1 data cache. Even though only invalidate is
-	 * necessary exported flush API is used here. Doing clean
-	 * on already clean cache would be almost NOP.
+	 * Invalidate data cache again.
 	 */
 	ldr	r1, kernel_flush
 	blx	r1
@@ -115,146 +80,16 @@ ENTRY(sh7372_cpu_suspend)
 	dsb
 	dmb
 
-/*
- * ===================================
- * == WFI instruction => Enter idle ==
- * ===================================
- */
-	wfi				@ wait for interrupt
-
-/*
- * ===================================
- * == Resume path for non-OFF modes ==
- * ===================================
- */
-	mrc	p15, 0, r0, c1, c0, 0
-	tst	r0, #(1 << 2)		@ Check C bit enabled?
-	orreq	r0, r0, #(1 << 2)	@ Enable the C bit if cleared
-	mcreq	p15, 0, r0, c1, c0, 0
-	isb
-
-/*
- * ===================================
- * == Exit point from non-OFF modes ==
- * ===================================
- */
-	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
+#define SPDCR 0xe6180008
+#define A3SM (1 << 12)
 
-	.pool
+	/* A3SM power down */
+	ldr     r0, =SPDCR
+	ldr     r1, =A3SM
+	str     r1, [r0]
+1:
+	b      1b
 
-	.align	12
-	.text
-	.global	sh7372_cpu_resume
-sh7372_cpu_resume:
-
-	mov	r1, #0
-	/*
-	 * Invalidate all instruction caches to PoU
-	 * and flush branch target cache
-	 */
-	mcr	p15, 0, r1, c7, c5, 0
-
-	ldr	r3, =SMFRAM
-
-	ldmia	r3!, {r4-r6}
-	mov	sp, r4			@ Restore sp
-	msr	spsr_cxsf, r5		@ Restore spsr
-	mov	lr, r6			@ Restore lr
-
-	ldmia	r3!, {r4-r7}
-	mcr	p15, 0, r4, c1, c0, 2	@ Coprocessor access Control Register
-	mcr	p15, 0, r5, c2, c0, 0	@ TTBR0
-	mcr	p15, 0, r6, c2, c0, 1	@ TTBR1
-	mcr	p15, 0, r7, c2, c0, 2	@ TTBCR
-
-	ldmia	r3!,{r4-r6}
-	mcr	p15, 0, r4, c3, c0, 0	@ Domain access Control Register
-	mcr	p15, 0, r5, c10, c2, 0	@ PRRR
-	mcr	p15, 0, r6, c10, c2, 1	@ NMRR
-
-	ldmia	r3!,{r4-r7}
-	mcr	p15, 0, r4, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r5, c13, c0, 2	@ User r/w thread and process ID
-	mrc	p15, 0, r6, c12, c0, 0	@ Secure or NS vector base address
-	msr	cpsr, r7		@ store cpsr
-
-	/* Starting to enable MMU here */
-	mrc	p15, 0, r7, c2, c0, 2 	@ Read TTBRControl
-	/* Extract N (0:2) bits and decide whether to use TTBR0 or TTBR1 */
-	and	r7, #0x7
-	cmp	r7, #0x0
-	beq	usettbr0
-ttbr_error:
-	/*
-	 * More work needs to be done to support N[0:2] value other than 0
-	 * So looping here so that the error can be detected
-	 */
-	b	ttbr_error
-
-	.align
-cache_pred_disable_mask:
-	.word	0xFFFFE7FB
-ttbrbit_mask:
-	.word	0xFFFFC000
-table_index_mask:
-	.word	0xFFF00000
-table_entry:
-	.word	0x00000C02
-usettbr0:
-
-	mrc	p15, 0, r2, c2, c0, 0
-	ldr	r5, ttbrbit_mask
-	and	r2, r5
-	mov	r4, pc
-	ldr	r5, table_index_mask
-	and	r4, r5			@ r4 = 31 to 20 bits of pc
-	/* Extract the value to be written to table entry */
-	ldr	r6, table_entry
-	/* r6 has the value to be written to table entry */
-	add	r6, r6, r4
-	/* Getting the address of table entry to modify */
-	lsr	r4, #18
-	/* r2 has the location which needs to be modified */
-	add	r2, r4
-	ldr	r4, [r2]
-	str	r6, [r2] /* modify the table entry */
-
-	mov	r7, r6
-	mov	r5, r2
-	mov	r6, r4
-	/* r5 = original page table address */
-	/* r6 = original page table data */
-
-	mov	r0, #0
-	mcr	p15, 0, r0, c7, c5, 4	@ Flush prefetch buffer
-	mcr	p15, 0, r0, c7, c5, 6	@ Invalidate branch predictor array
-	mcr	p15, 0, r0, c8, c5, 0	@ Invalidate instruction TLB
-	mcr	p15, 0, r0, c8, c6, 0	@ Invalidate data TLB
-
-	/*
-	 * Restore control register. This enables the MMU.
-	 * The caches and prediction are not enabled here, they
-	 * will be enabled after restoring the MMU table entry.
-	 */
-	ldmia	r3!, {r4}
-	stmia	r3!, {r5} /* save original page table address */
-	stmia	r3!, {r6} /* save original page table data */
-	stmia	r3!, {r7} /* save modified page table data */
-
-	ldr	r2, cache_pred_disable_mask
-	and	r4, r2
-	mcr	p15, 0, r4, c1, c0, 0
-	dsb
-	isb
-
-	ldr     r0, =restoremmu_on
-	bx      r0
-
-/*
- * ==============================
- * == Exit point from OFF mode ==
- * ==============================
- */
-restoremmu_on:
-
-	ldmfd	sp!, {r0-r12, pc}	@ restore regs and return
+kernel_flush:
+	.word v7_flush_dcache_all
+#endif
diff --git a/arch/arm/mach-spear3xx/Makefile.boot b/arch/arm/mach-spear3xx/Makefile.boot
index 7a1f3c0eadb8..4674a4c221db 100644
--- a/arch/arm/mach-spear3xx/Makefile.boot
+++ b/arch/arm/mach-spear3xx/Makefile.boot
@@ -1,3 +1,3 @@
-zreladdr-y	:= 0x00008000
+zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-spear6xx/Makefile.boot b/arch/arm/mach-spear6xx/Makefile.boot
index 7a1f3c0eadb8..4674a4c221db 100644
--- a/arch/arm/mach-spear6xx/Makefile.boot
+++ b/arch/arm/mach-spear6xx/Makefile.boot
@@ -1,3 +1,3 @@
-zreladdr-y	:= 0x00008000
+zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mach-tcc8k/Makefile.boot b/arch/arm/mach-tcc8k/Makefile.boot
index f135c9deae10..5e02d4156b04 100644
--- a/arch/arm/mach-tcc8k/Makefile.boot
+++ b/arch/arm/mach-tcc8k/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y		:= 0x20008000
+   zreladdr-y		+= 0x20008000
 params_phys-y		:= 0x20000100
 initrd_phys-y		:= 0x20800000
diff --git a/arch/arm/mach-tegra/Makefile.boot b/arch/arm/mach-tegra/Makefile.boot
index 428ad122be03..5e870d29eca1 100644
--- a/arch/arm/mach-tegra/Makefile.boot
+++ b/arch/arm/mach-tegra/Makefile.boot
@@ -1,4 +1,4 @@
-zreladdr-$(CONFIG_ARCH_TEGRA_2x_SOC)	:= 0x00008000
+zreladdr-$(CONFIG_ARCH_TEGRA_2x_SOC)	+= 0x00008000
 params_phys-$(CONFIG_ARCH_TEGRA_2x_SOC)	:= 0x00000100
 initrd_phys-$(CONFIG_ARCH_TEGRA_2x_SOC)	:= 0x00800000
 
diff --git a/arch/arm/mach-tegra/board-harmony.c b/arch/arm/mach-tegra/board-harmony.c
index a4d1980e697a..93c793f48caf 100644
--- a/arch/arm/mach-tegra/board-harmony.c
+++ b/arch/arm/mach-tegra/board-harmony.c
@@ -123,8 +123,8 @@ static struct platform_device *harmony_devices[] __initdata = {
 	&harmony_audio_device,
 };
 
-static void __init tegra_harmony_fixup(struct machine_desc *desc,
-	struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init tegra_harmony_fixup(struct tag *tags, char **cmdline,
+	struct meminfo *mi)
 {
 	mi->nr_banks = 2;
 	mi->bank[0].start = PHYS_OFFSET;
diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c
index 3197c4cbaa71..fbc9e0ed926e 100644
--- a/arch/arm/mach-tegra/board-paz00.c
+++ b/arch/arm/mach-tegra/board-paz00.c
@@ -84,8 +84,8 @@ static void paz00_usb_init(void)
 	platform_device_register(&tegra_ehci3_device);
 }
 
-static void __init tegra_paz00_fixup(struct machine_desc *desc,
-	struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init tegra_paz00_fixup(struct tag *tags, char **cmdline,
+	struct meminfo *mi)
 {
 	mi->nr_banks = 1;
 	mi->bank[0].start = PHYS_OFFSET;
diff --git a/arch/arm/mach-tegra/board-trimslice.c b/arch/arm/mach-tegra/board-trimslice.c
index 8489aa8f5154..e3d9ec2f0fe1 100644
--- a/arch/arm/mach-tegra/board-trimslice.c
+++ b/arch/arm/mach-tegra/board-trimslice.c
@@ -126,8 +126,8 @@ static void trimslice_usb_init(void)
 	platform_device_register(&tegra_ehci1_device);
 }
 
-static void __init tegra_trimslice_fixup(struct machine_desc *desc,
-	struct tag *tags, char **cmdline, struct meminfo *mi)
+static void __init tegra_trimslice_fixup(struct tag *tags, char **cmdline,
+	struct meminfo *mi)
 {
 	mi->nr_banks = 2;
 	mi->bank[0].start = PHYS_OFFSET;
diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c
index 0e1016a827ac..0e0fd4d889bd 100644
--- a/arch/arm/mach-tegra/cpu-tegra.c
+++ b/arch/arm/mach-tegra/cpu-tegra.c
@@ -32,7 +32,6 @@
 
 #include <asm/system.h>
 
-#include <mach/hardware.h>
 #include <mach/clk.h>
 
 /* Frequency table index must be sequential starting at 0 */
diff --git a/arch/arm/mach-tegra/include/mach/gpio.h b/arch/arm/mach-tegra/include/mach/gpio.h
index e69de29bb2d1..40a8c178f10d 100644
--- a/arch/arm/mach-tegra/include/mach/gpio.h
+++ b/arch/arm/mach-tegra/include/mach/gpio.h
@@ -0,0 +1 @@
+/* empty */
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 0886cbccddee..7d2b5d03c1df 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -114,10 +114,10 @@ void __init smp_init_cpus(void)
 {
 	unsigned int i, ncores = scu_get_core_count(scu_base);
 
-	if (ncores > NR_CPUS) {
-		printk(KERN_ERR "Tegra: no. of cores (%u) greater than configured (%u), clipping\n",
-			ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
index 7b5c229dc7ea..d6e5d306557b 100644
--- a/arch/arm/mach-u300/Kconfig
+++ b/arch/arm/mach-u300/Kconfig
@@ -6,6 +6,8 @@ comment "ST-Ericsson Mobile Platform Products"
 
 config MACH_U300
 	bool "U300"
+	select PINCTRL
+	select PINMUX_U300
 	select GPIO_U300
 
 comment "ST-Ericsson U300/U330/U335/U365 Feature Selections"
diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile
index 8fd354aaf0a7..285538124e5e 100644
--- a/arch/arm/mach-u300/Makefile
+++ b/arch/arm/mach-u300/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel, U300 machine.
 #
 
-obj-y		:= core.o clock.o timer.o padmux.o
+obj-y		:= core.o clock.o timer.o
 obj-m		:=
 obj-n		:=
 obj-		:=
diff --git a/arch/arm/mach-u300/Makefile.boot b/arch/arm/mach-u300/Makefile.boot
index 6fbfc6ea2d35..69357affbd77 100644
--- a/arch/arm/mach-u300/Makefile.boot
+++ b/arch/arm/mach-u300/Makefile.boot
@@ -4,10 +4,10 @@
 #   INITRD_PHYS must be in RAM
 
 ifdef CONFIG_MACH_U300_SINGLE_RAM
-     zreladdr-y	:= 0x28E08000
+     zreladdr-y	+= 0x28E08000
   params_phys-y	:= 0x28E00100
 else
-     zreladdr-y	:= 0x48008000
+     zreladdr-y	+= 0x48008000
   params_phys-y	:= 0x48000100
 endif
 
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 22c5ab79a74c..f4ad6d2e26f3 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -25,6 +25,8 @@
 #include <linux/err.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/fsmc.h>
+#include <linux/pinctrl/machine.h>
+#include <linux/pinctrl/pinmux.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/types.h>
@@ -1539,6 +1541,14 @@ static struct coh901318_platform coh901318_platform = {
 	.max_channels = U300_DMA_CHANNELS,
 };
 
+static struct resource pinmux_resources[] = {
+	{
+		.start = U300_SYSCON_BASE,
+		.end   = U300_SYSCON_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
 static struct platform_device wdog_device = {
 	.name = "coh901327_wdog",
 	.id = -1,
@@ -1658,6 +1668,72 @@ static struct platform_device dma_device = {
 	},
 };
 
+static struct platform_device pinmux_device = {
+	.name = "pinmux-u300",
+	.id = -1,
+	.num_resources = ARRAY_SIZE(pinmux_resources),
+	.resource = pinmux_resources,
+};
+
+/* Pinmux settings */
+static struct pinmux_map u300_pinmux_map[] = {
+	/* anonymous maps for chip power and EMIFs */
+	PINMUX_MAP_PRIMARY_SYS_HOG("POWER", "power"),
+	PINMUX_MAP_PRIMARY_SYS_HOG("EMIF0", "emif0"),
+	PINMUX_MAP_PRIMARY_SYS_HOG("EMIF1", "emif1"),
+	/* per-device maps for MMC/SD, SPI and UART */
+	PINMUX_MAP_PRIMARY("MMCSD", "mmc0", "mmci"),
+	PINMUX_MAP_PRIMARY("SPI", "spi0", "pl022"),
+	PINMUX_MAP_PRIMARY("UART0", "uart0", "uart0"),
+};
+
+struct u300_mux_hog {
+	const char *name;
+	struct device *dev;
+	struct pinmux *pmx;
+};
+
+static struct u300_mux_hog u300_mux_hogs[] = {
+	{
+		.name = "uart0",
+		.dev = &uart0_device.dev,
+	},
+	{
+		.name = "spi0",
+		.dev = &pl022_device.dev,
+	},
+	{
+		.name = "mmc0",
+		.dev = &mmcsd_device.dev,
+	},
+};
+
+static int __init u300_pinmux_fetch(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(u300_mux_hogs); i++) {
+		struct pinmux *pmx;
+		int ret;
+
+		pmx = pinmux_get(u300_mux_hogs[i].dev, NULL);
+		if (IS_ERR(pmx)) {
+			pr_err("u300: could not get pinmux hog %s\n",
+			       u300_mux_hogs[i].name);
+			continue;
+		}
+		ret = pinmux_enable(pmx);
+		if (ret) {
+			pr_err("u300: could enable pinmux hog %s\n",
+			       u300_mux_hogs[i].name);
+			continue;
+		}
+		u300_mux_hogs[i].pmx = pmx;
+	}
+	return 0;
+}
+subsys_initcall(u300_pinmux_fetch);
+
 /*
  * Notice that AMBA devices are initialized before platform devices.
  *
@@ -1671,10 +1747,10 @@ static struct platform_device *platform_devs[] __initdata = {
 	&gpio_device,
 	&nand_device,
 	&wdog_device,
-	&ave_device
+	&ave_device,
+	&pinmux_device,
 };
 
-
 /*
  * Interrupts: the U300 platforms have two pl190 ARM PrimeCells connected
  * together so some interrupts are connected to the first one and some
@@ -1856,6 +1932,10 @@ void __init u300_init_devices(void)
 
 	u300_assign_physmem();
 
+	/* Initialize pinmuxing */
+	pinmux_register_mappings(u300_pinmux_map,
+				 ARRAY_SIZE(u300_pinmux_map));
+
 	/* Register subdevices on the I2C buses */
 	u300_i2c_register_board_devices();
 
diff --git a/arch/arm/mach-u300/include/mach/gpio.h b/arch/arm/mach-u300/include/mach/gpio.h
index e69de29bb2d1..40a8c178f10d 100644
--- a/arch/arm/mach-u300/include/mach/gpio.h
+++ b/arch/arm/mach-u300/include/mach/gpio.h
@@ -0,0 +1 @@
+/* empty */
diff --git a/arch/arm/mach-u300/include/mach/syscon.h b/arch/arm/mach-u300/include/mach/syscon.h
index 7444f5c7da97..6e84f07a7c6f 100644
--- a/arch/arm/mach-u300/include/mach/syscon.h
+++ b/arch/arm/mach-u300/include/mach/syscon.h
@@ -234,91 +234,6 @@
 #define U300_SYSCON_ECCR_EMIF_1_RET_OUT_CLK_EN_N_DISABLE	(0x0004)
 #define U300_SYSCON_ECCR_EMIF_MEMCLK_RET_EN_N_DISABLE		(0x0002)
 #define U300_SYSCON_ECCR_EMIF_SDRCLK_RET_EN_N_DISABLE		(0x0001)
-/* PAD MUX Control register 1 (LOW) 16bit (R/W) */
-#define U300_SYSCON_PMC1LR					(0x007C)
-#define U300_SYSCON_PMC1LR_MASK					(0xFFFF)
-#define U300_SYSCON_PMC1LR_CDI_MASK				(0xC000)
-#define U300_SYSCON_PMC1LR_CDI_CDI				(0x0000)
-#define U300_SYSCON_PMC1LR_CDI_EMIF				(0x4000)
-#ifdef CONFIG_MACH_U300_BS335
-#define U300_SYSCON_PMC1LR_CDI_CDI2				(0x8000)
-#define U300_SYSCON_PMC1LR_CDI_WCDMA_APP_GPIO			(0xC000)
-#elif CONFIG_MACH_U300_BS365
-#define U300_SYSCON_PMC1LR_CDI_GPIO				(0x8000)
-#define U300_SYSCON_PMC1LR_CDI_WCDMA				(0xC000)
-#endif
-#define U300_SYSCON_PMC1LR_PDI_MASK				(0x3000)
-#define U300_SYSCON_PMC1LR_PDI_PDI				(0x0000)
-#define U300_SYSCON_PMC1LR_PDI_EGG				(0x1000)
-#define U300_SYSCON_PMC1LR_PDI_WCDMA				(0x3000)
-#define U300_SYSCON_PMC1LR_MMCSD_MASK				(0x0C00)
-#define U300_SYSCON_PMC1LR_MMCSD_MMCSD				(0x0000)
-#define U300_SYSCON_PMC1LR_MMCSD_MSPRO				(0x0400)
-#define U300_SYSCON_PMC1LR_MMCSD_DSP				(0x0800)
-#define U300_SYSCON_PMC1LR_MMCSD_WCDMA				(0x0C00)
-#define U300_SYSCON_PMC1LR_ETM_MASK				(0x0300)
-#define U300_SYSCON_PMC1LR_ETM_ACC				(0x0000)
-#define U300_SYSCON_PMC1LR_ETM_APP				(0x0100)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS2_MASK			(0x00C0)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS2_STATIC			(0x0000)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS2_NFIF			(0x0040)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS2_SDRAM			(0x0080)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS2_STATIC_2GB		(0x00C0)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS1_MASK			(0x0030)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS1_STATIC			(0x0000)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS1_NFIF			(0x0010)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS1_SDRAM			(0x0020)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS1_SEMI			(0x0030)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS0_MASK			(0x000C)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS0_STATIC			(0x0000)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS0_NFIF			(0x0004)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS0_SDRAM			(0x0008)
-#define U300_SYSCON_PMC1LR_EMIF_1_CS0_SEMI			(0x000C)
-#define U300_SYSCON_PMC1LR_EMIF_1_MASK				(0x0003)
-#define U300_SYSCON_PMC1LR_EMIF_1_STATIC			(0x0000)
-#define U300_SYSCON_PMC1LR_EMIF_1_SDRAM0			(0x0001)
-#define U300_SYSCON_PMC1LR_EMIF_1_SDRAM1			(0x0002)
-#define U300_SYSCON_PMC1LR_EMIF_1				(0x0003)
-/* PAD MUX Control register 2 (HIGH) 16bit (R/W) */
-#define U300_SYSCON_PMC1HR					(0x007E)
-#define U300_SYSCON_PMC1HR_MASK					(0xFFFF)
-#define U300_SYSCON_PMC1HR_MISC_2_MASK				(0xC000)
-#define U300_SYSCON_PMC1HR_MISC_2_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC1HR_MISC_2_MSPRO				(0x4000)
-#define U300_SYSCON_PMC1HR_MISC_2_DSP				(0x8000)
-#define U300_SYSCON_PMC1HR_MISC_2_AAIF				(0xC000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_2_MASK			(0x3000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_2_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_2_NFIF			(0x1000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_2_DSP			(0x2000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_2_AAIF			(0x3000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_1_MASK			(0x0C00)
-#define U300_SYSCON_PMC1HR_APP_GPIO_1_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC1HR_APP_GPIO_1_MMC			(0x0400)
-#define U300_SYSCON_PMC1HR_APP_GPIO_1_DSP			(0x0800)
-#define U300_SYSCON_PMC1HR_APP_GPIO_1_AAIF			(0x0C00)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK			(0x0300)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_APP_GPIO		(0x0000)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI			(0x0100)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_AAIF			(0x0300)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK			(0x00C0)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_APP_GPIO		(0x0000)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI			(0x0040)
-#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_AAIF			(0x00C0)
-#define U300_SYSCON_PMC1HR_APP_SPI_2_MASK			(0x0030)
-#define U300_SYSCON_PMC1HR_APP_SPI_2_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC1HR_APP_SPI_2_SPI			(0x0010)
-#define U300_SYSCON_PMC1HR_APP_SPI_2_DSP			(0x0020)
-#define U300_SYSCON_PMC1HR_APP_SPI_2_AAIF			(0x0030)
-#define U300_SYSCON_PMC1HR_APP_UART0_2_MASK			(0x000C)
-#define U300_SYSCON_PMC1HR_APP_UART0_2_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC1HR_APP_UART0_2_UART0			(0x0004)
-#define U300_SYSCON_PMC1HR_APP_UART0_2_NFIF_CS			(0x0008)
-#define U300_SYSCON_PMC1HR_APP_UART0_2_AAIF			(0x000C)
-#define U300_SYSCON_PMC1HR_APP_UART0_1_MASK			(0x0003)
-#define U300_SYSCON_PMC1HR_APP_UART0_1_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC1HR_APP_UART0_1_UART0			(0x0001)
-#define U300_SYSCON_PMC1HR_APP_UART0_1_AAIF			(0x0003)
 /* Step one for killing the applications system 16bit (-/W) */
 #define U300_SYSCON_KA1R					(0x0080)
 #define U300_SYSCON_KA1R_MASK					(0xFFFF)
@@ -357,57 +272,6 @@
 #define U300_SYSCON_PUCR_EMIF_1_16BIT_PU_ENABLE			(0x0080)
 #define U300_SYSCON_PUCR_EMIF_1_8BIT_PU_ENABLE			(0x0040)
 #define U300_SYSCON_PUCR_KEY_IN_PU_EN_MASK			(0x003F)
-/* Padmux 2 control */
-#define U300_SYSCON_PMC2R					(0x100)
-#define U300_SYSCON_PMC2R_APP_MISC_0_MASK			(0x00C0)
-#define U300_SYSCON_PMC2R_APP_MISC_0_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC2R_APP_MISC_0_EMIF_SDRAM			(0x0040)
-#define U300_SYSCON_PMC2R_APP_MISC_0_MMC			(0x0080)
-#define U300_SYSCON_PMC2R_APP_MISC_0_CDI2			(0x00C0)
-#define U300_SYSCON_PMC2R_APP_MISC_1_MASK			(0x0300)
-#define U300_SYSCON_PMC2R_APP_MISC_1_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC2R_APP_MISC_1_EMIF_SDRAM			(0x0100)
-#define U300_SYSCON_PMC2R_APP_MISC_1_MMC			(0x0200)
-#define U300_SYSCON_PMC2R_APP_MISC_1_CDI2			(0x0300)
-#define U300_SYSCON_PMC2R_APP_MISC_2_MASK			(0x0C00)
-#define U300_SYSCON_PMC2R_APP_MISC_2_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC2R_APP_MISC_2_EMIF_SDRAM			(0x0400)
-#define U300_SYSCON_PMC2R_APP_MISC_2_MMC			(0x0800)
-#define U300_SYSCON_PMC2R_APP_MISC_2_CDI2			(0x0C00)
-#define U300_SYSCON_PMC2R_APP_MISC_3_MASK			(0x3000)
-#define U300_SYSCON_PMC2R_APP_MISC_3_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC2R_APP_MISC_3_EMIF_SDRAM			(0x1000)
-#define U300_SYSCON_PMC2R_APP_MISC_3_MMC			(0x2000)
-#define U300_SYSCON_PMC2R_APP_MISC_3_CDI2			(0x3000)
-#define U300_SYSCON_PMC2R_APP_MISC_4_MASK			(0xC000)
-#define U300_SYSCON_PMC2R_APP_MISC_4_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC2R_APP_MISC_4_EMIF_SDRAM			(0x4000)
-#define U300_SYSCON_PMC2R_APP_MISC_4_MMC			(0x8000)
-#define U300_SYSCON_PMC2R_APP_MISC_4_ACC_GPIO			(0xC000)
-/* TODO: More SYSCON registers missing */
-#define U300_SYSCON_PMC3R					(0x10c)
-#define U300_SYSCON_PMC3R_APP_MISC_11_MASK			(0xc000)
-#define U300_SYSCON_PMC3R_APP_MISC_11_SPI			(0x4000)
-#define U300_SYSCON_PMC3R_APP_MISC_10_MASK			(0x3000)
-#define U300_SYSCON_PMC3R_APP_MISC_10_SPI			(0x1000)
-/* TODO: Missing other configs */
-#define U300_SYSCON_PMC4R					(0x168)
-#define U300_SYSCON_PMC4R_APP_MISC_12_MASK			(0x0003)
-#define U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO			(0x0000)
-#define U300_SYSCON_PMC4R_APP_MISC_13_MASK			(0x000C)
-#define U300_SYSCON_PMC4R_APP_MISC_13_CDI			(0x0000)
-#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA			(0x0004)
-#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA2			(0x0008)
-#define U300_SYSCON_PMC4R_APP_MISC_13_APP_GPIO			(0x000C)
-#define U300_SYSCON_PMC4R_APP_MISC_14_MASK			(0x0030)
-#define U300_SYSCON_PMC4R_APP_MISC_14_CDI			(0x0000)
-#define U300_SYSCON_PMC4R_APP_MISC_14_SMIA			(0x0010)
-#define U300_SYSCON_PMC4R_APP_MISC_14_CDI2			(0x0020)
-#define U300_SYSCON_PMC4R_APP_MISC_14_APP_GPIO			(0x0030)
-#define U300_SYSCON_PMC4R_APP_MISC_16_MASK			(0x0300)
-#define U300_SYSCON_PMC4R_APP_MISC_16_APP_GPIO_13		(0x0000)
-#define U300_SYSCON_PMC4R_APP_MISC_16_APP_UART1_CTS		(0x0100)
-#define U300_SYSCON_PMC4R_APP_MISC_16_EMIF_1_STATIC_CS5_N	(0x0200)
 /* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */
 #define U300_SYSCON_S0CCR					(0x120)
 #define U300_SYSCON_S0CCR_FIELD_MASK				(0x43FF)
diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c
index d9a5c92db74a..4d482aacc272 100644
--- a/arch/arm/mach-u300/mmc.c
+++ b/arch/arm/mach-u300/mmc.c
@@ -21,7 +21,6 @@
 #include <mach/gpio-u300.h>
 
 #include "mmc.h"
-#include "padmux.h"
 
 static struct mmci_platform_data mmc0_plat_data = {
 	/*
@@ -45,24 +44,9 @@ static struct mmci_platform_data mmc0_plat_data = {
 int __devinit mmc_init(struct amba_device *adev)
 {
 	struct device *mmcsd_device = &adev->dev;
-	struct pmx *pmx;
 	int ret = 0;
 
 	mmcsd_device->platform_data = &mmc0_plat_data;
 
-	/*
-	 * Setup padmuxing for MMC. Since this must always be
-	 * compiled into the kernel, pmx is never released.
-	 */
-	pmx = pmx_get(mmcsd_device, U300_APP_PMX_MMC_SETTING);
-
-	if (IS_ERR(pmx))
-		pr_warning("Could not get padmux handle\n");
-	else {
-		ret = pmx_activate(mmcsd_device, pmx);
-		if (IS_ERR_VALUE(ret))
-			pr_warning("Could not activate padmuxing\n");
-	}
-
 	return ret;
 }
diff --git a/arch/arm/mach-u300/padmux.c b/arch/arm/mach-u300/padmux.c
deleted file mode 100644
index 4c93c6cefd37..000000000000
--- a/arch/arm/mach-u300/padmux.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- *
- * arch/arm/mach-u300/padmux.c
- *
- *
- * Copyright (C) 2009 ST-Ericsson AB
- * License terms: GNU General Public License (GPL) version 2
- * U300 PADMUX functions
- * Author: Martin Persson <martin.persson@stericsson.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-#include <linux/string.h>
-#include <linux/bug.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <mach/u300-regs.h>
-#include <mach/syscon.h>
-#include "padmux.h"
-
-static DEFINE_MUTEX(pmx_mutex);
-
-const u32 pmx_registers[] = {
-	(U300_SYSCON_VBASE + U300_SYSCON_PMC1LR),
-	(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR),
-	(U300_SYSCON_VBASE + U300_SYSCON_PMC2R),
-	(U300_SYSCON_VBASE + U300_SYSCON_PMC3R),
-	(U300_SYSCON_VBASE + U300_SYSCON_PMC4R)
-};
-
-/* High level functionality */
-
-/* Lazy dog:
- * onmask = {
- *   {"PMC1LR" mask, "PMC1LR" value},
- *   {"PMC1HR" mask, "PMC1HR" value},
- *   {"PMC2R"  mask, "PMC2R"  value},
- *   {"PMC3R"  mask, "PMC3R"  value},
- *   {"PMC4R"  mask, "PMC4R"  value}
- * }
- */
-static struct pmx mmc_setting = {
-	.setting = U300_APP_PMX_MMC_SETTING,
-	.default_on = false,
-	.activated = false,
-	.name = "MMC",
-	.onmask = {
-		   {U300_SYSCON_PMC1LR_MMCSD_MASK,
-		    U300_SYSCON_PMC1LR_MMCSD_MMCSD},
-		   {0, 0},
-		   {0, 0},
-		   {0, 0},
-		   {U300_SYSCON_PMC4R_APP_MISC_12_MASK,
-		    U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO}
-		   },
-};
-
-static struct pmx spi_setting = {
-	.setting = U300_APP_PMX_SPI_SETTING,
-	.default_on = false,
-	.activated = false,
-	.name = "SPI",
-	.onmask = {{0, 0},
-		   {U300_SYSCON_PMC1HR_APP_SPI_2_MASK |
-		    U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK |
-		    U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK,
-		    U300_SYSCON_PMC1HR_APP_SPI_2_SPI |
-		    U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI |
-		    U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI},
-		   {0, 0},
-		   {0, 0},
-		   {0, 0}
-		   },
-};
-
-/* Available padmux settings */
-static struct pmx *pmx_settings[] = {
-	&mmc_setting,
-	&spi_setting,
-};
-
-static void update_registers(struct pmx *pmx, bool activate)
-{
-	u16 regval, val, mask;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(pmx_registers); i++) {
-		if (activate)
-			val = pmx->onmask[i].val;
-		else
-			val = 0;
-
-		mask = pmx->onmask[i].mask;
-		if (mask != 0) {
-			regval = readw(pmx_registers[i]);
-			regval &= ~mask;
-			regval |= val;
-			writew(regval, pmx_registers[i]);
-		}
-	}
-}
-
-struct pmx *pmx_get(struct device *dev, enum pmx_settings setting)
-{
-	int i;
-	struct pmx *pmx = ERR_PTR(-ENOENT);
-
-	if (dev == NULL)
-		return ERR_PTR(-EINVAL);
-
-	mutex_lock(&pmx_mutex);
-	for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
-
-		if (setting == pmx_settings[i]->setting) {
-
-			if (pmx_settings[i]->dev != NULL) {
-				WARN(1, "padmux: required setting "
-				     "in use by another consumer\n");
-			} else {
-				pmx = pmx_settings[i];
-				pmx->dev = dev;
-				dev_dbg(dev, "padmux: setting nr %d is now "
-					"bound to %s and ready to use\n",
-					setting, dev_name(dev));
-				break;
-			}
-		}
-	}
-	mutex_unlock(&pmx_mutex);
-
-	return pmx;
-}
-EXPORT_SYMBOL(pmx_get);
-
-int pmx_put(struct device *dev, struct pmx *pmx)
-{
-	int i;
-	int ret = -ENOENT;
-
-	if (pmx == NULL || dev == NULL)
-		return -EINVAL;
-
-	mutex_lock(&pmx_mutex);
-	for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
-
-		if (pmx->setting == pmx_settings[i]->setting) {
-
-			if (dev != pmx->dev) {
-				WARN(1, "padmux: cannot release handle as "
-					"it is bound to another consumer\n");
-				ret = -EINVAL;
-				break;
-			} else {
-				pmx_settings[i]->dev = NULL;
-				ret = 0;
-				break;
-			}
-		}
-	}
-	mutex_unlock(&pmx_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL(pmx_put);
-
-int pmx_activate(struct device *dev, struct pmx *pmx)
-{
-	int i, j, ret;
-	ret = 0;
-
-	if (pmx == NULL || dev == NULL)
-		return -EINVAL;
-
-	mutex_lock(&pmx_mutex);
-
-	/* Make sure the required bits are not used */
-	for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
-
-		if (pmx_settings[i]->dev == NULL || pmx_settings[i] == pmx)
-			continue;
-
-		for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) {
-
-			if (pmx_settings[i]->onmask[j].mask & pmx->
-				onmask[j].mask) {
-				/* More than one entry on the same bits */
-				WARN(1, "padmux: cannot activate "
-					"setting. Bit conflict with "
-					"an active setting\n");
-
-				ret = -EUSERS;
-				goto exit;
-			}
-		}
-	}
-	update_registers(pmx, true);
-	pmx->activated = true;
-	dev_dbg(dev, "padmux: setting nr %d is activated\n",
-		pmx->setting);
-
-exit:
-	mutex_unlock(&pmx_mutex);
-	return ret;
-}
-EXPORT_SYMBOL(pmx_activate);
-
-int pmx_deactivate(struct device *dev, struct pmx *pmx)
-{
-	int i;
-	int ret = -ENOENT;
-
-	if (pmx == NULL || dev == NULL)
-		return -EINVAL;
-
-	mutex_lock(&pmx_mutex);
-	for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
-
-		if (pmx_settings[i]->dev == NULL)
-			continue;
-
-		if (pmx->setting == pmx_settings[i]->setting) {
-
-			if (dev != pmx->dev) {
-				WARN(1, "padmux: cannot deactivate "
-				     "pmx setting as it was activated "
-				     "by another consumer\n");
-
-				ret = -EBUSY;
-				continue;
-			} else {
-				update_registers(pmx, false);
-				pmx_settings[i]->dev = NULL;
-				pmx->activated = false;
-				ret = 0;
-				dev_dbg(dev, "padmux: setting nr %d is deactivated",
-					pmx->setting);
-				break;
-			}
-		}
-	}
-	mutex_unlock(&pmx_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL(pmx_deactivate);
-
-/*
- * For internal use only. If it is to be exported,
- * it should be reentrant. Notice that pmx_activate
- * (i.e. runtime settings) always override default settings.
- */
-static int pmx_set_default(void)
-{
-	/* Used to identify several entries on the same bits */
-	u16 modbits[ARRAY_SIZE(pmx_registers)];
-
-	int i, j;
-
-	memset(modbits, 0, ARRAY_SIZE(pmx_registers) * sizeof(u16));
-
-	for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
-
-		if (!pmx_settings[i]->default_on)
-			continue;
-
-		for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) {
-
-			/* Make sure there is only one entry on the same bits */
-			if (modbits[j] & pmx_settings[i]->onmask[j].mask) {
-				BUG();
-				return -EUSERS;
-			}
-			modbits[j] |= pmx_settings[i]->onmask[j].mask;
-		}
-		update_registers(pmx_settings[i], true);
-	}
-	return 0;
-}
-
-#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
-static int pmx_show(struct seq_file *s, void *data)
-{
-	int i;
-	seq_printf(s, "-------------------------------------------------\n");
-	seq_printf(s, "SETTING     BOUND TO DEVICE               STATE\n");
-	seq_printf(s, "-------------------------------------------------\n");
-	mutex_lock(&pmx_mutex);
-	for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
-		/* Format pmx and device name nicely */
-		char cdp[33];
-		int chars;
-
-		chars = snprintf(&cdp[0], 17, "%s", pmx_settings[i]->name);
-		while (chars < 16) {
-			cdp[chars] = ' ';
-			chars++;
-		}
-		chars = snprintf(&cdp[16], 17, "%s", pmx_settings[i]->dev ?
-				dev_name(pmx_settings[i]->dev) : "N/A");
-		while (chars < 16) {
-			cdp[chars+16] = ' ';
-			chars++;
-		}
-		cdp[32] = '\0';
-
-		seq_printf(s,
-			"%s\t%s\n",
-			&cdp[0],
-			pmx_settings[i]->activated ?
-			"ACTIVATED" : "DEACTIVATED"
-			);
-
-	}
-	mutex_unlock(&pmx_mutex);
-	return 0;
-}
-
-static int pmx_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pmx_show, NULL);
-}
-
-static const struct file_operations pmx_operations = {
-	.owner		= THIS_MODULE,
-	.open		= pmx_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int __init init_pmx_read_debugfs(void)
-{
-	/* Expose a simple debugfs interface to view pmx settings */
-	(void) debugfs_create_file("padmux", S_IFREG | S_IRUGO,
-				   NULL, NULL,
-				   &pmx_operations);
-	return 0;
-}
-
-/*
- * This needs to come in after the core_initcall(),
- * because debugfs is not available until
- * the subsystems come up.
- */
-module_init(init_pmx_read_debugfs);
-#endif
-
-static int __init pmx_init(void)
-{
-	int ret;
-
-	ret = pmx_set_default();
-
-	if (IS_ERR_VALUE(ret))
-		pr_crit("padmux: default settings could not be set\n");
-
-	return 0;
-}
-
-/* Should be initialized before consumers */
-core_initcall(pmx_init);
diff --git a/arch/arm/mach-u300/padmux.h b/arch/arm/mach-u300/padmux.h
deleted file mode 100644
index 6e8b86064097..000000000000
--- a/arch/arm/mach-u300/padmux.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *
- * arch/arm/mach-u300/padmux.h
- *
- *
- * Copyright (C) 2009 ST-Ericsson AB
- * License terms: GNU General Public License (GPL) version 2
- * U300 PADMUX API
- * Author: Martin Persson <martin.persson@stericsson.com>
- */
-
-#ifndef __MACH_U300_PADMUX_H
-#define __MACH_U300_PADMUX_H
-
-enum pmx_settings {
-	U300_APP_PMX_MMC_SETTING,
-	U300_APP_PMX_SPI_SETTING
-};
-
-struct pmx_onmask {
-	u16 mask;		/* Mask bits */
-	u16 val;		/* Value when active */
-};
-
-struct pmx {
-	struct device *dev;
-	enum pmx_settings setting;
-	char *name;
-	bool activated;
-	bool default_on;
-	struct pmx_onmask onmask[];
-};
-
-struct pmx *pmx_get(struct device *dev, enum pmx_settings setting);
-int pmx_put(struct device *dev, struct pmx *pmx);
-int pmx_activate(struct device *dev, struct pmx *pmx);
-int pmx_deactivate(struct device *dev, struct pmx *pmx);
-
-#endif
diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
index 7b597e2b19e2..a1affacfa59c 100644
--- a/arch/arm/mach-u300/spi.c
+++ b/arch/arm/mach-u300/spi.c
@@ -14,8 +14,6 @@
 #include <mach/coh901318.h>
 #include <mach/dma_channels.h>
 
-#include "padmux.h"
-
 /*
  * The following is for the actual devices on the SSP/SPI bus
  */
@@ -95,25 +93,7 @@ static struct pl022_ssp_controller ssp_platform_data = {
 
 void __init u300_spi_init(struct amba_device *adev)
 {
-	struct pmx *pmx;
-
 	adev->dev.platform_data = &ssp_platform_data;
-	/*
-	 * Setup padmuxing for SPI. Since this must always be
-	 * compiled into the kernel, pmx is never released.
-	 */
-	pmx = pmx_get(&adev->dev, U300_APP_PMX_SPI_SETTING);
-
-	if (IS_ERR(pmx))
-		dev_warn(&adev->dev, "Could not get padmux handle\n");
-	else {
-		int ret;
-
-		ret = pmx_activate(&adev->dev, pmx);
-		if (IS_ERR_VALUE(ret))
-			dev_warn(&adev->dev, "Could not activate padmuxing\n");
-	}
-
 }
 
 void __init u300_spi_register_board_devices(void)
diff --git a/arch/arm/mach-ux500/Makefile.boot b/arch/arm/mach-ux500/Makefile.boot
index c7e75acfe6c9..ff0a4b5b0a82 100644
--- a/arch/arm/mach-ux500/Makefile.boot
+++ b/arch/arm/mach-ux500/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index a33df5f4c27a..eb5199102cfa 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -156,12 +156,10 @@ void __init smp_init_cpus(void)
 	ncores = scu_base ? scu_get_core_count(scu_base) : 1;
 
 	/* sanity check */
-	if (ncores > NR_CPUS) {
-		printk(KERN_WARNING
-		       "U8500: no. of cores (%d) greater than configured "
-		       "maximum of %d - clipping\n",
-		       ncores, NR_CPUS);
-		ncores = NR_CPUS;
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
 	}
 
 	for (i = 0; i < ncores; i++)
diff --git a/arch/arm/mach-versatile/Makefile.boot b/arch/arm/mach-versatile/Makefile.boot
index c7e75acfe6c9..ff0a4b5b0a82 100644
--- a/arch/arm/mach-versatile/Makefile.boot
+++ b/arch/arm/mach-versatile/Makefile.boot
@@ -1,4 +1,4 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
 
diff --git a/arch/arm/mach-vexpress/Makefile.boot b/arch/arm/mach-vexpress/Makefile.boot
index 07c2d9c457ec..8630b3d10a4d 100644
--- a/arch/arm/mach-vexpress/Makefile.boot
+++ b/arch/arm/mach-vexpress/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x60008000
+   zreladdr-y	+= 0x60008000
 params_phys-y	:= 0x60000100
 initrd_phys-y	:= 0x60800000
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index bfd32f52c2db..2b1e836a76ed 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -221,6 +221,12 @@ static void ct_ca9x4_init_cpu_map(void)
 {
 	int i, ncores = scu_get_core_count(MMIO_P2V(A9_MPCORE_SCU));
 
+	if (ncores > nr_cpu_ids) {
+		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
+			ncores, nr_cpu_ids);
+		ncores = nr_cpu_ids;
+	}
+
 	for (i = 0; i < ncores; ++i)
 		set_cpu_possible(i, true);
 
diff --git a/arch/arm/mach-vexpress/hotplug.c b/arch/arm/mach-vexpress/hotplug.c
index ea4cbfb90a66..3668cf91d2de 100644
--- a/arch/arm/mach-vexpress/hotplug.c
+++ b/arch/arm/mach-vexpress/hotplug.c
@@ -13,6 +13,7 @@
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
+#include <asm/system.h>
 
 extern volatile int pen_release;
 
@@ -62,13 +63,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 	 * code will have already disabled interrupts
 	 */
 	for (;;) {
-		/*
-		 * here's the WFI
-		 */
-		asm(".word	0xe320f003\n"
-		    :
-		    :
-		    : "memory", "cc");
+		wfi();
 
 		if (pen_release == cpu) {
 			/*
diff --git a/arch/arm/mach-vexpress/include/mach/gpio.h b/arch/arm/mach-vexpress/include/mach/gpio.h
new file mode 100644
index 000000000000..40a8c178f10d
--- /dev/null
+++ b/arch/arm/mach-vexpress/include/mach/gpio.h
@@ -0,0 +1 @@
+/* empty */
diff --git a/arch/arm/mach-vexpress/include/mach/io.h b/arch/arm/mach-vexpress/include/mach/io.h
index 748bb524ee71..13522d86685e 100644
--- a/arch/arm/mach-vexpress/include/mach/io.h
+++ b/arch/arm/mach-vexpress/include/mach/io.h
@@ -20,8 +20,6 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
-#define IO_SPACE_LIMIT 0xffffffff
-
 #define __io(a)		__typesafe_io(a)
 #define __mem_pci(a)	(a)
 
diff --git a/arch/arm/mach-vt8500/Makefile.boot b/arch/arm/mach-vt8500/Makefile.boot
index a8acc4e24902..b79c41cdfdff 100644
--- a/arch/arm/mach-vt8500/Makefile.boot
+++ b/arch/arm/mach-vt8500/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x01000000
diff --git a/arch/arm/mach-vt8500/include/mach/io.h b/arch/arm/mach-vt8500/include/mach/io.h
index 9077239f78c9..46181eecf273 100644
--- a/arch/arm/mach-vt8500/include/mach/io.h
+++ b/arch/arm/mach-vt8500/include/mach/io.h
@@ -20,8 +20,6 @@
 #ifndef __ASM_ARM_ARCH_IO_H
 #define __ASM_ARM_ARCH_IO_H
 
-#define IO_SPACE_LIMIT 0xffff
-
 #define __io(a)		__typesafe_io((a) + 0xf0000000)
 #define __mem_pci(a)	(a)
 
diff --git a/arch/arm/mach-w90x900/Makefile.boot b/arch/arm/mach-w90x900/Makefile.boot
index a057b546b6e5..6c3d421c2d11 100644
--- a/arch/arm/mach-w90x900/Makefile.boot
+++ b/arch/arm/mach-w90x900/Makefile.boot
@@ -1,3 +1,3 @@
-zreladdr-y	:= 0x00008000
+zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 
diff --git a/arch/arm/mach-w90x900/cpu.c b/arch/arm/mach-w90x900/cpu.c
index 83c56324a472..0a235e502330 100644
--- a/arch/arm/mach-w90x900/cpu.c
+++ b/arch/arm/mach-w90x900/cpu.c
@@ -60,7 +60,7 @@ static DEFINE_CLK(emc, 7);
 static DEFINE_SUBCLK(rmii, 2);
 static DEFINE_CLK(usbd, 8);
 static DEFINE_CLK(usbh, 9);
-static DEFINE_CLK(g2d, 10);;
+static DEFINE_CLK(g2d, 10);
 static DEFINE_CLK(pwm, 18);
 static DEFINE_CLK(ps2, 24);
 static DEFINE_CLK(kpi, 25);
diff --git a/arch/arm/mach-zynq/Makefile.boot b/arch/arm/mach-zynq/Makefile.boot
index 67039c3e0c48..760a0efe7580 100644
--- a/arch/arm/mach-zynq/Makefile.boot
+++ b/arch/arm/mach-zynq/Makefile.boot
@@ -1,3 +1,3 @@
-   zreladdr-y	:= 0x00008000
+   zreladdr-y	+= 0x00008000
 params_phys-y	:= 0x00000100
 initrd_phys-y	:= 0x00800000
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index cfbcf8b95599..c335c76e0d88 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -86,16 +86,6 @@ core_param(alignment, ai_usermode, int, 0600);
 #define UM_FIXUP	(1 << 1)
 #define UM_SIGNAL	(1 << 2)
 
-#ifdef CONFIG_PROC_FS
-static const char *usermode_action[] = {
-	"ignored",
-	"warn",
-	"fixup",
-	"fixup+warn",
-	"signal",
-	"signal+warn"
-};
-
 /* Return true if and only if the ARMv6 unaligned access model is in use. */
 static bool cpu_is_v6_unaligned(void)
 {
@@ -123,6 +113,16 @@ static int safe_usermode(int new_usermode, bool warn)
 	return new_usermode;
 }
 
+#ifdef CONFIG_PROC_FS
+static const char *usermode_action[] = {
+	"ignored",
+	"warn",
+	"fixup",
+	"fixup+warn",
+	"signal",
+	"signal+warn"
+};
+
 static int alignment_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "User:\t\t%lu\n", ai_user);
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 9ecfdb511951..8ac9e9f84790 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -16,9 +16,12 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -26,15 +29,23 @@
 #define CACHE_LINE_SIZE		32
 
 static void __iomem *l2x0_base;
-static DEFINE_SPINLOCK(l2x0_lock);
+static DEFINE_RAW_SPINLOCK(l2x0_lock);
 static uint32_t l2x0_way_mask;	/* Bitmask of active ways */
 static uint32_t l2x0_size;
 
+struct l2x0_regs l2x0_saved_regs;
+
+struct l2x0_of_data {
+	void (*setup)(const struct device_node *, __u32 *, __u32 *);
+	void (*save)(void);
+	void (*resume)(void);
+};
+
 static inline void cache_wait_way(void __iomem *reg, unsigned long mask)
 {
 	/* wait for cache operation by line or way to complete */
 	while (readl_relaxed(reg) & mask)
-		;
+		cpu_relax();
 }
 
 #ifdef CONFIG_CACHE_PL310
@@ -115,9 +126,9 @@ static void l2x0_cache_sync(void)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void __l2x0_flush_all(void)
@@ -134,9 +145,9 @@ static void l2x0_flush_all(void)
 	unsigned long flags;
 
 	/* clean all ways */
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	__l2x0_flush_all();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_clean_all(void)
@@ -144,11 +155,11 @@ static void l2x0_clean_all(void)
 	unsigned long flags;
 
 	/* clean all ways */
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY);
 	cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask);
 	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_inv_all(void)
@@ -156,13 +167,13 @@ static void l2x0_inv_all(void)
 	unsigned long flags;
 
 	/* invalidate all ways */
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	/* Invalidating when L2 is enabled is a nono */
 	BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1);
 	writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY);
 	cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask);
 	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_inv_range(unsigned long start, unsigned long end)
@@ -170,7 +181,7 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
 	void __iomem *base = l2x0_base;
 	unsigned long flags;
 
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	if (start & (CACHE_LINE_SIZE - 1)) {
 		start &= ~(CACHE_LINE_SIZE - 1);
 		debug_writel(0x03);
@@ -195,13 +206,13 @@ static void l2x0_inv_range(unsigned long start, unsigned long end)
 		}
 
 		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+			raw_spin_lock_irqsave(&l2x0_lock, flags);
 		}
 	}
 	cache_wait(base + L2X0_INV_LINE_PA, 1);
 	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_clean_range(unsigned long start, unsigned long end)
@@ -214,7 +225,7 @@ static void l2x0_clean_range(unsigned long start, unsigned long end)
 		return;
 	}
 
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
 		unsigned long blk_end = start + min(end - start, 4096UL);
@@ -225,13 +236,13 @@ static void l2x0_clean_range(unsigned long start, unsigned long end)
 		}
 
 		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+			raw_spin_lock_irqsave(&l2x0_lock, flags);
 		}
 	}
 	cache_wait(base + L2X0_CLEAN_LINE_PA, 1);
 	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_flush_range(unsigned long start, unsigned long end)
@@ -244,7 +255,7 @@ static void l2x0_flush_range(unsigned long start, unsigned long end)
 		return;
 	}
 
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	start &= ~(CACHE_LINE_SIZE - 1);
 	while (start < end) {
 		unsigned long blk_end = start + min(end - start, 4096UL);
@@ -257,27 +268,27 @@ static void l2x0_flush_range(unsigned long start, unsigned long end)
 		debug_writel(0x00);
 
 		if (blk_end < end) {
-			spin_unlock_irqrestore(&l2x0_lock, flags);
-			spin_lock_irqsave(&l2x0_lock, flags);
+			raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+			raw_spin_lock_irqsave(&l2x0_lock, flags);
 		}
 	}
 	cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1);
 	cache_sync();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
 static void l2x0_disable(void)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&l2x0_lock, flags);
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
 	__l2x0_flush_all();
 	writel_relaxed(0, l2x0_base + L2X0_CTRL);
 	dsb();
-	spin_unlock_irqrestore(&l2x0_lock, flags);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
 }
 
-static void __init l2x0_unlock(__u32 cache_id)
+static void l2x0_unlock(__u32 cache_id)
 {
 	int lockregs;
 	int i;
@@ -353,6 +364,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 		/* l2x0 controller is disabled */
 		writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
 
+		l2x0_saved_regs.aux_ctrl = aux;
+
 		l2x0_inv_all();
 
 		/* enable L2X0 */
@@ -372,3 +385,202 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
 	printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n",
 			ways, cache_id, aux, l2x0_size);
 }
+
+#ifdef CONFIG_OF
+static void __init l2x0_of_setup(const struct device_node *np,
+				 __u32 *aux_val, __u32 *aux_mask)
+{
+	u32 data[2] = { 0, 0 };
+	u32 tag = 0;
+	u32 dirty = 0;
+	u32 val = 0, mask = 0;
+
+	of_property_read_u32(np, "arm,tag-latency", &tag);
+	if (tag) {
+		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
+		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
+	}
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1]) {
+		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
+			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
+		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
+		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
+	}
+
+	of_property_read_u32(np, "arm,dirty-latency", &dirty);
+	if (dirty) {
+		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
+		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
+static void __init pl310_of_setup(const struct device_node *np,
+				  __u32 *aux_val, __u32 *aux_mask)
+{
+	u32 data[3] = { 0, 0, 0 };
+	u32 tag[3] = { 0, 0, 0 };
+	u32 filter[2] = { 0, 0 };
+
+	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
+	if (tag[0] && tag[1] && tag[2])
+		writel_relaxed(
+			((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
+			((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
+			((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
+			l2x0_base + L2X0_TAG_LATENCY_CTRL);
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1] && data[2])
+		writel_relaxed(
+			((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) |
+			((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) |
+			((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT),
+			l2x0_base + L2X0_DATA_LATENCY_CTRL);
+
+	of_property_read_u32_array(np, "arm,filter-ranges",
+				   filter, ARRAY_SIZE(filter));
+	if (filter[1]) {
+		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
+			       l2x0_base + L2X0_ADDR_FILTER_END);
+		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN,
+			       l2x0_base + L2X0_ADDR_FILTER_START);
+	}
+}
+
+static void __init pl310_save(void)
+{
+	u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
+		L2X0_CACHE_ID_RTL_MASK;
+
+	l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base +
+		L2X0_TAG_LATENCY_CTRL);
+	l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base +
+		L2X0_DATA_LATENCY_CTRL);
+	l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base +
+		L2X0_ADDR_FILTER_END);
+	l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base +
+		L2X0_ADDR_FILTER_START);
+
+	if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
+		/*
+		 * From r2p0, there is Prefetch offset/control register
+		 */
+		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base +
+			L2X0_PREFETCH_CTRL);
+		/*
+		 * From r3p0, there is Power control register
+		 */
+		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
+			l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base +
+				L2X0_POWER_CTRL);
+	}
+}
+
+static void l2x0_resume(void)
+{
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		/* restore aux ctrl and enable l2 */
+		l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID));
+
+		writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base +
+			L2X0_AUX_CTRL);
+
+		l2x0_inv_all();
+
+		writel_relaxed(1, l2x0_base + L2X0_CTRL);
+	}
+}
+
+static void pl310_resume(void)
+{
+	u32 l2x0_revision;
+
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		/* restore pl310 setup */
+		writel_relaxed(l2x0_saved_regs.tag_latency,
+			l2x0_base + L2X0_TAG_LATENCY_CTRL);
+		writel_relaxed(l2x0_saved_regs.data_latency,
+			l2x0_base + L2X0_DATA_LATENCY_CTRL);
+		writel_relaxed(l2x0_saved_regs.filter_end,
+			l2x0_base + L2X0_ADDR_FILTER_END);
+		writel_relaxed(l2x0_saved_regs.filter_start,
+			l2x0_base + L2X0_ADDR_FILTER_START);
+
+		l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) &
+			L2X0_CACHE_ID_RTL_MASK;
+
+		if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) {
+			writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+				l2x0_base + L2X0_PREFETCH_CTRL);
+			if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0)
+				writel_relaxed(l2x0_saved_regs.pwr_ctrl,
+					l2x0_base + L2X0_POWER_CTRL);
+		}
+	}
+
+	l2x0_resume();
+}
+
+static const struct l2x0_of_data pl310_data = {
+	pl310_of_setup,
+	pl310_save,
+	pl310_resume,
+};
+
+static const struct l2x0_of_data l2x0_data = {
+	l2x0_of_setup,
+	NULL,
+	l2x0_resume,
+};
+
+static const struct of_device_id l2x0_ids[] __initconst = {
+	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
+	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
+	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
+	{}
+};
+
+int __init l2x0_of_init(__u32 aux_val, __u32 aux_mask)
+{
+	struct device_node *np;
+	struct l2x0_of_data *data;
+	struct resource res;
+
+	np = of_find_matching_node(NULL, l2x0_ids);
+	if (!np)
+		return -ENODEV;
+
+	if (of_address_to_resource(np, 0, &res))
+		return -ENODEV;
+
+	l2x0_base = ioremap(res.start, resource_size(&res));
+	if (!l2x0_base)
+		return -ENOMEM;
+
+	l2x0_saved_regs.phy_base = res.start;
+
+	data = of_match_node(l2x0_ids, np)->data;
+
+	/* L2 configuration can only be changed if the cache is disabled */
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+		if (data->setup)
+			data->setup(np, &aux_val, &aux_mask);
+	}
+
+	if (data->save)
+		data->save();
+
+	l2x0_init(l2x0_base, aux_val, aux_mask);
+
+	outer_cache.resume = data->resume;
+	return 0;
+}
+#endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index b0ee9ba3cfab..93aac068da94 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -16,7 +16,7 @@
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
 
-static DEFINE_SPINLOCK(cpu_asid_lock);
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 unsigned int cpu_last_asid = ASID_FIRST_VERSION;
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(struct mm_struct *, current_mm);
@@ -31,7 +31,7 @@ DEFINE_PER_CPU(struct mm_struct *, current_mm);
 void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	mm->context.id = 0;
-	spin_lock_init(&mm->context.id_lock);
+	raw_spin_lock_init(&mm->context.id_lock);
 }
 
 static void flush_context(void)
@@ -58,7 +58,7 @@ static void set_mm_context(struct mm_struct *mm, unsigned int asid)
 	 * the broadcast. This function is also called via IPI so the
 	 * mm->context.id_lock has to be IRQ-safe.
 	 */
-	spin_lock_irqsave(&mm->context.id_lock, flags);
+	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
 	if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) {
 		/*
 		 * Old version of ASID found. Set the new one and
@@ -67,7 +67,7 @@ static void set_mm_context(struct mm_struct *mm, unsigned int asid)
 		mm->context.id = asid;
 		cpumask_clear(mm_cpumask(mm));
 	}
-	spin_unlock_irqrestore(&mm->context.id_lock, flags);
+	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
 
 	/*
 	 * Set the mm_cpumask(mm) bit for the current CPU.
@@ -117,7 +117,7 @@ void __new_context(struct mm_struct *mm)
 {
 	unsigned int asid;
 
-	spin_lock(&cpu_asid_lock);
+	raw_spin_lock(&cpu_asid_lock);
 #ifdef CONFIG_SMP
 	/*
 	 * Check the ASID again, in case the change was broadcast from
@@ -125,7 +125,7 @@ void __new_context(struct mm_struct *mm)
 	 */
 	if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) {
 		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-		spin_unlock(&cpu_asid_lock);
+		raw_spin_unlock(&cpu_asid_lock);
 		return;
 	}
 #endif
@@ -153,5 +153,5 @@ void __new_context(struct mm_struct *mm)
 	}
 
 	set_mm_context(mm, asid);
-	spin_unlock(&cpu_asid_lock);
+	raw_spin_unlock(&cpu_asid_lock);
 }
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index b8061519ce77..7d0a8c230342 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -30,7 +30,7 @@
 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
 				  L_PTE_MT_MINICACHE)
 
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
 
 /*
  * ARMv4 mini-dcache optimised copy_user_highpage
@@ -76,14 +76,14 @@ void v4_mc_copy_user_highpage(struct page *to, struct page *from,
 	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
 		__flush_dcache_page(page_mapping(from), from);
 
-	spin_lock(&minicache_lock);
+	raw_spin_lock(&minicache_lock);
 
 	set_pte_ext(TOP_PTE(0xffff8000), pfn_pte(page_to_pfn(from), minicache_pgprot), 0);
 	flush_tlb_kernel_page(0xffff8000);
 
 	mc_copy_user_page((void *)0xffff8000, kto);
 
-	spin_unlock(&minicache_lock);
+	raw_spin_unlock(&minicache_lock);
 
 	kunmap_atomic(kto, KM_USER1);
 }
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 63cca0097130..3d9a1552cef6 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -27,7 +27,7 @@
 #define from_address	(0xffff8000)
 #define to_address	(0xffffc000)
 
-static DEFINE_SPINLOCK(v6_lock);
+static DEFINE_RAW_SPINLOCK(v6_lock);
 
 /*
  * Copy the user page.  No aliasing to deal with so we can just
@@ -88,7 +88,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to,
 	 * Now copy the page using the same cache colour as the
 	 * pages ultimate destination.
 	 */
-	spin_lock(&v6_lock);
+	raw_spin_lock(&v6_lock);
 
 	set_pte_ext(TOP_PTE(from_address) + offset, pfn_pte(page_to_pfn(from), PAGE_KERNEL), 0);
 	set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(to), PAGE_KERNEL), 0);
@@ -101,7 +101,7 @@ static void v6_copy_user_highpage_aliasing(struct page *to,
 
 	copy_page((void *)kto, (void *)kfrom);
 
-	spin_unlock(&v6_lock);
+	raw_spin_unlock(&v6_lock);
 }
 
 /*
@@ -121,13 +121,13 @@ static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vad
 	 * Now clear the page using the same cache colour as
 	 * the pages ultimate destination.
 	 */
-	spin_lock(&v6_lock);
+	raw_spin_lock(&v6_lock);
 
 	set_pte_ext(TOP_PTE(to_address) + offset, pfn_pte(page_to_pfn(page), PAGE_KERNEL), 0);
 	flush_tlb_kernel_page(to);
 	clear_page((void *)to);
 
-	spin_unlock(&v6_lock);
+	raw_spin_unlock(&v6_lock);
 }
 
 struct cpu_user_fns v6_user_fns __initdata = {
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index 649bbcd325bf..610c24ced310 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -32,7 +32,7 @@
 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
 				  L_PTE_MT_MINICACHE)
 
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
 
 /*
  * XScale mini-dcache optimised copy_user_highpage
@@ -98,14 +98,14 @@ void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
 	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
 		__flush_dcache_page(page_mapping(from), from);
 
-	spin_lock(&minicache_lock);
+	raw_spin_lock(&minicache_lock);
 
 	set_pte_ext(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(page_to_pfn(from), minicache_pgprot), 0);
 	flush_tlb_kernel_page(COPYPAGE_MINICACHE);
 
 	mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
 
-	spin_unlock(&minicache_lock);
+	raw_spin_unlock(&minicache_lock);
 
 	kunmap_atomic(kto, KM_USER1);
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 01f5987eb1ad..e4e7f6cba1ab 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -120,9 +120,8 @@ static void __dma_free_buffer(struct page *page, size_t size)
 
 #ifdef CONFIG_MMU
 
-
 #define CONSISTENT_OFFSET(x)	(((unsigned long)(x) - consistent_base) >> PAGE_SHIFT)
-#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PGDIR_SHIFT)
+#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT)
 
 /*
  * These are the page tables (2MB each) covering uncached, DMA consistent allocations
@@ -206,7 +205,7 @@ static int __init consistent_init(void)
 		}
 
 		consistent_pte[i++] = pte;
-		base += (1 << PGDIR_SHIFT);
+		base += PMD_SIZE;
 	} while (base < CONSISTENT_END);
 
 	return ret;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 3b5ea68acbb8..aa33949fef60 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -20,6 +20,7 @@
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 
+#include <asm/exception.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 34409a08ba0d..04e9a92bb47a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -496,6 +496,13 @@ static void __init free_unused_memmap(struct meminfo *mi)
 		 */
 		bank_start = min(bank_start,
 				 ALIGN(prev_bank_end, PAGES_PER_SECTION));
+#else
+		/*
+		 * Align down here since the VM subsystem insists that the
+		 * memmap entries are valid from the bank start aligned to
+		 * MAX_ORDER_NR_PAGES.
+		 */
+		bank_start = round_down(bank_start, MAX_ORDER_NR_PAGES);
 #endif
 		/*
 		 * If we had a previous bank, and there is a space
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index ab506272b2d3..bdb248c4f55c 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -289,6 +289,27 @@ __arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype)
 }
 EXPORT_SYMBOL(__arm_ioremap);
 
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space as memory. Needed when the kernel wants to execute
+ * code in external memory. This is needed for reprogramming source
+ * clocks that would affect normal memory for example. Please see
+ * CONFIG_GENERIC_ALLOCATOR for allocating external memory.
+ */
+void __iomem *
+__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached)
+{
+	unsigned int mtype;
+
+	if (cached)
+		mtype = MT_MEMORY;
+	else
+		mtype = MT_MEMORY_NONCACHED;
+
+	return __arm_ioremap_caller(phys_addr, size, mtype,
+			__builtin_return_address(0));
+}
+
 void __iounmap(volatile void __iomem *io_addr)
 {
 	void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 010566799c80..ad7cce3bc431 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -12,8 +12,8 @@ static inline pmd_t *pmd_off_k(unsigned long virt)
 
 struct mem_type {
 	pteval_t prot_pte;
-	unsigned int prot_l1;
-	unsigned int prot_sect;
+	pmdval_t prot_l1;
+	pmdval_t prot_sect;
 	unsigned int domain;
 };
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 594d677b92c8..dc8c550e6cbd 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(pgprot_kernel);
 struct cachepolicy {
 	const char	policy[16];
 	unsigned int	cr_mask;
-	unsigned int	pmd;
+	pmdval_t	pmd;
 	pteval_t	pte;
 };
 
@@ -273,6 +273,14 @@ static struct mem_type mem_types[] = {
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_SO] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_MT_UNCACHED,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
+				PMD_SECT_UNCACHED | PMD_SECT_XN,
+		.domain    = DOMAIN_KERNEL,
+	},
 };
 
 const struct mem_type *get_mem_type(unsigned int type)
@@ -288,7 +296,7 @@ static void __init build_mem_type_table(void)
 {
 	struct cachepolicy *cp;
 	unsigned int cr = get_cr();
-	unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
+	pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
 	int cpu_arch = cpu_architecture();
 	int i;
 
@@ -863,14 +871,14 @@ static inline void prepare_page_table(void)
 	/*
 	 * Clear out all the mappings below the kernel image.
 	 */
-	for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
+	for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
 
 #ifdef CONFIG_XIP_KERNEL
 	/* The XIP kernel is mapped in the module area -- skip over it */
-	addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
+	addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
 #endif
-	for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
+	for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
 
 	/*
@@ -885,10 +893,12 @@ static inline void prepare_page_table(void)
 	 * memory bank, up to the end of the vmalloc region.
 	 */
 	for (addr = __phys_to_virt(end);
-	     addr < VMALLOC_END; addr += PGDIR_SIZE)
+	     addr < VMALLOC_END; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
 }
 
+#define SWAPPER_PG_DIR_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+
 /*
  * Reserve the special regions of memory
  */
@@ -898,7 +908,7 @@ void __init arm_mm_memblock_reserve(void)
 	 * Reserve the page tables.  These are already in use,
 	 * and can only be in node 0.
 	 */
-	memblock_reserve(__pa(swapper_pg_dir), PTRS_PER_PGD * sizeof(pgd_t));
+	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
 
 #ifdef CONFIG_SA1111
 	/*
@@ -926,7 +936,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
 	 */
 	vectors_page = early_alloc(PAGE_SIZE);
 
-	for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
+	for (addr = VMALLOC_END; addr; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
 
 	/*
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 2e6849b41f66..88fb3d9e0640 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -379,31 +379,26 @@ ENTRY(cpu_arm920_set_pte_ext)
 
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl	cpu_arm920_suspend_size
-.equ	cpu_arm920_suspend_size, 4 * 4
+.equ	cpu_arm920_suspend_size, 4 * 3
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_arm920_do_suspend)
-	stmfd	sp!, {r4 - r7, lr}
+	stmfd	sp!, {r4 - r6, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ PID
 	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c2, c0, 0	@ TTB address
-	mrc	p15, 0, r7, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r7}
-	ldmfd	sp!, {r4 - r7, pc}
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(cpu_arm920_do_suspend)
 
 ENTRY(cpu_arm920_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r7}
+	ldmia	r0, {r4 - r6}
 	mcr	p15, 0, r4, c13, c0, 0	@ PID
 	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r6, c2, c0, 0	@ TTB address
-	mov	r0, r7			@ control register
-	mov	r2, r6, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_BIT4 | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_arm920_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index cd8f79c3a282..9f8fd91f918a 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -394,31 +394,26 @@ ENTRY(cpu_arm926_set_pte_ext)
 
 /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
 .globl	cpu_arm926_suspend_size
-.equ	cpu_arm926_suspend_size, 4 * 4
+.equ	cpu_arm926_suspend_size, 4 * 3
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_arm926_do_suspend)
-	stmfd	sp!, {r4 - r7, lr}
+	stmfd	sp!, {r4 - r6, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ PID
 	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r6, c2, c0, 0	@ TTB address
-	mrc	p15, 0, r7, c1, c0, 0	@ Control register
-	stmia	r0, {r4 - r7}
-	ldmfd	sp!, {r4 - r7, pc}
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(cpu_arm926_do_suspend)
 
 ENTRY(cpu_arm926_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
-	ldmia	r0, {r4 - r7}
+	ldmia	r0, {r4 - r6}
 	mcr	p15, 0, r4, c13, c0, 0	@ PID
 	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r6, c2, c0, 0	@ TTB address
-	mov	r0, r7			@ control register
-	mov	r2, r6, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_BIT4 | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_arm926_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 69e7f2ef7384..7d91545d089b 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -168,20 +168,19 @@ ENTRY(cpu_sa1100_set_pte_ext)
 	mov	pc, lr
 
 .globl	cpu_sa1100_suspend_size
-.equ	cpu_sa1100_suspend_size, 4*4
+.equ	cpu_sa1100_suspend_size, 4 * 3
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_sa1100_do_suspend)
-	stmfd	sp!, {r4 - r7, lr}
+	stmfd	sp!, {r4 - r6, lr}
 	mrc	p15, 0, r4, c3, c0, 0		@ domain ID
-	mrc	p15, 0, r5, c2, c0, 0		@ translation table base addr
-	mrc	p15, 0, r6, c13, c0, 0		@ PID
-	mrc	p15, 0, r7, c1, c0, 0		@ control reg
-	stmia	r0, {r4 - r7}			@ store cp regs
-	ldmfd	sp!, {r4 - r7, pc}
+	mrc	p15, 0, r5, c13, c0, 0		@ PID
+	mrc	p15, 0, r6, c1, c0, 0		@ control reg
+	stmia	r0, {r4 - r6}			@ store cp regs
+	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(cpu_sa1100_do_suspend)
 
 ENTRY(cpu_sa1100_do_resume)
-	ldmia	r0, {r4 - r7}			@ load cp regs
+	ldmia	r0, {r4 - r6}			@ load cp regs
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0		@ flush I+D TLBs
 	mcr	p15, 0, ip, c7, c7, 0		@ flush I&D cache
@@ -189,13 +188,9 @@ ENTRY(cpu_sa1100_do_resume)
 	mcr	p15, 0, ip, c9, c0, 5		@ allow user space to use RB
 
 	mcr	p15, 0, r4, c3, c0, 0		@ domain ID
-	mcr	p15, 0, r5, c2, c0, 0		@ translation table base addr
-	mcr	p15, 0, r6, c13, c0, 0		@ PID
-	mov	r0, r7				@ control register
-	mov	r2, r5, lsr #14			@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0		@ translation table base addr
+	mcr	p15, 0, r5, c13, c0, 0		@ PID
+	mov	r0, r6				@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_sa1100_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index a923aa0fd00d..d061d2fa5506 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -128,20 +128,18 @@ ENTRY(cpu_v6_set_pte_ext)
 
 /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
 .globl	cpu_v6_suspend_size
-.equ	cpu_v6_suspend_size, 4 * 8
+.equ	cpu_v6_suspend_size, 4 * 6
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_v6_do_suspend)
-	stmfd	sp!, {r4 - r11, lr}
+	stmfd	sp!, {r4 - r9, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mrc	p15, 0, r5, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r7, c2, c0, 0	@ Translation table base 0
-	mrc	p15, 0, r8, c2, c0, 1	@ Translation table base 1
-	mrc	p15, 0, r9, c1, c0, 1	@ auxiliary control register
-	mrc	p15, 0, r10, c1, c0, 2	@ co-processor access control
-	mrc	p15, 0, r11, c1, c0, 0	@ control register
-	stmia	r0, {r4 - r11}
-	ldmfd	sp!, {r4- r11, pc}
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mrc	p15, 0, r7, c1, c0, 1	@ auxiliary control register
+	mrc	p15, 0, r8, c1, c0, 2	@ co-processor access control
+	mrc	p15, 0, r9, c1, c0, 0	@ control register
+	stmia	r0, {r4 - r9}
+	ldmfd	sp!, {r4- r9, pc}
 ENDPROC(cpu_v6_do_suspend)
 
 ENTRY(cpu_v6_do_resume)
@@ -150,25 +148,21 @@ ENTRY(cpu_v6_do_resume)
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
 	mcr	p15, 0, ip, c7, c15, 0	@ clean+invalidate cache
 	mcr	p15, 0, ip, c7, c10, 4	@ drain write buffer
-	ldmia	r0, {r4 - r11}
+	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
+	ldmia	r0, {r4 - r9}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mcr	p15, 0, r5, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r7, c2, c0, 0	@ Translation table base 0
-	mcr	p15, 0, r8, c2, c0, 1	@ Translation table base 1
-	mcr	p15, 0, r9, c1, c0, 1	@ auxiliary control register
-	mcr	p15, 0, r10, c1, c0, 2	@ co-processor access control
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
+	mcr	p15, 0, r1, c2, c0, 0	@ Translation table base 0
+	mcr	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mcr	p15, 0, r7, c1, c0, 1	@ auxiliary control register
+	mcr	p15, 0, r8, c1, c0, 2	@ co-processor access control
 	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
 	mcr	p15, 0, ip, c7, c5, 4	@ ISB
-	mov	r0, r11			@ control register
-	mov	r2, r7, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, cpu_resume_l1_flags
+	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_v6_do_resume)
-cpu_resume_l1_flags:
-	ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_SMP)
-	ALT_UP(.long  PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_UP)
 #endif
 
 	string	cpu_v6_name, "ARMv6-compatible processor"
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 9049c0764db2..2c559ac38142 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -217,56 +217,50 @@ ENDPROC(cpu_v7_set_pte_ext)
 
 /* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
 .globl	cpu_v7_suspend_size
-.equ	cpu_v7_suspend_size, 4 * 9
-#ifdef CONFIG_PM_SLEEP
+.equ	cpu_v7_suspend_size, 4 * 7
+#ifdef CONFIG_ARM_CPU_SUSPEND
 ENTRY(cpu_v7_do_suspend)
-	stmfd	sp!, {r4 - r11, lr}
+	stmfd	sp!, {r4 - r10, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mrc	p15, 0, r5, c13, c0, 1	@ Context ID
-	mrc	p15, 0, r6, c13, c0, 3	@ User r/o thread ID
-	stmia	r0!, {r4 - r6}
+	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
+	stmia	r0!, {r4 - r5}
 	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mrc	p15, 0, r7, c2, c0, 0	@ TTB 0
-	mrc	p15, 0, r8, c2, c0, 1	@ TTB 1
-	mrc	p15, 0, r9, c1, c0, 0	@ Control register
-	mrc	p15, 0, r10, c1, c0, 1	@ Auxiliary control register
-	mrc	p15, 0, r11, c1, c0, 2	@ Co-processor access control
-	stmia	r0, {r6 - r11}
-	ldmfd	sp!, {r4 - r11, pc}
+	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1
+	mrc	p15, 0, r8, c1, c0, 0	@ Control register
+	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
+	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
+	stmia	r0, {r6 - r10}
+	ldmfd	sp!, {r4 - r10, pc}
 ENDPROC(cpu_v7_do_suspend)
 
 ENTRY(cpu_v7_do_resume)
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
 	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
-	ldmia	r0!, {r4 - r6}
+	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
+	ldmia	r0!, {r4 - r5}
 	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
-	mcr	p15, 0, r5, c13, c0, 1	@ Context ID
-	mcr	p15, 0, r6, c13, c0, 3	@ User r/o thread ID
-	ldmia	r0, {r6 - r11}
+	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
+	ldmia	r0, {r6 - r10}
 	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
-	mcr	p15, 0, r7, c2, c0, 0	@ TTB 0
-	mcr	p15, 0, r8, c2, c0, 1	@ TTB 1
+	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0
+	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
 	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
 	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
-	teq	r4, r10			@ Is it already set?
-	mcrne	p15, 0, r10, c1, c0, 1	@ No, so write it
-	mcr	p15, 0, r11, c1, c0, 2	@ Co-processor access control
+	teq	r4, r9			@ Is it already set?
+	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
+	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	ldr	r4, =PRRR		@ PRRR
 	ldr	r5, =NMRR		@ NMRR
 	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
 	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
 	isb
 	dsb
-	mov	r0, r9			@ control register
-	mov	r2, r7, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, cpu_resume_l1_flags
+	mov	r0, r8			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_v7_do_resume)
-cpu_resume_l1_flags:
-	ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_SMP)
-	ALT_UP(.long  PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_FLAGS_UP)
 #endif
 
 	__CPUINIT
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 755e1bf22681..abf0507a08ae 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -406,24 +406,23 @@ ENTRY(cpu_xsc3_set_pte_ext)
 	.align
 
 .globl	cpu_xsc3_suspend_size
-.equ	cpu_xsc3_suspend_size, 4 * 7
+.equ	cpu_xsc3_suspend_size, 4 * 6
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_xsc3_do_suspend)
-	stmfd	sp!, {r4 - r10, lr}
+	stmfd	sp!, {r4 - r9, lr}
 	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
 	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mrc	p15, 0, r6, c13, c0, 0	@ PID
 	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc 	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mrc	p15, 0, r9, c1, c0, 1	@ auxiliary control reg
-	mrc 	p15, 0, r10, c1, c0, 0	@ control reg
+	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
 	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r4 - r10}		@ store cp regs
-	ldmia	sp!, {r4 - r10, pc}
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmia	sp!, {r4 - r9, pc}
 ENDPROC(cpu_xsc3_do_suspend)
 
 ENTRY(cpu_xsc3_do_resume)
-	ldmia	r0, {r4 - r10}		@ load cp regs
+	ldmia	r0, {r4 - r9}		@ load cp regs
 	mov	ip, #0
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
 	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
@@ -433,15 +432,10 @@ ENTRY(cpu_xsc3_do_resume)
 	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mcr	p15, 0, r6, c13, c0, 0	@ PID
 	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	mcr	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r9, c1, c0, 1	@ auxiliary control reg
-
-	@ temporarily map resume_turn_on_mmu into the page table,
-	@ otherwise prefetch abort occurs after MMU is turned on
-	mov	r0, r10			@ control register
-	mov	r2, r8, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =0x542e		@ section flags
+	orr	r1, r1, #0x18		@ cache the page table in L2
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_xsc3_do_resume)
 #endif
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index fbc06e55b87a..3277904bebaf 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -520,24 +520,23 @@ ENTRY(cpu_xscale_set_pte_ext)
 	.align
 
 .globl	cpu_xscale_suspend_size
-.equ	cpu_xscale_suspend_size, 4 * 7
+.equ	cpu_xscale_suspend_size, 4 * 6
 #ifdef CONFIG_PM_SLEEP
 ENTRY(cpu_xscale_do_suspend)
-	stmfd	sp!, {r4 - r10, lr}
+	stmfd	sp!, {r4 - r9, lr}
 	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
 	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mrc	p15, 0, r6, c13, c0, 0	@ PID
 	mrc	p15, 0, r7, c3, c0, 0	@ domain ID
-	mrc	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mrc	p15, 0, r9, c1, c1, 0	@ auxiliary control reg
-	mrc	p15, 0, r10, c1, c0, 0	@ control reg
+	mrc	p15, 0, r8, c1, c1, 0	@ auxiliary control reg
+	mrc	p15, 0, r9, c1, c0, 0	@ control reg
 	bic	r4, r4, #2		@ clear frequency change bit
-	stmia	r0, {r4 - r10}		@ store cp regs
-	ldmfd	sp!, {r4 - r10, pc}
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmfd	sp!, {r4 - r9, pc}
 ENDPROC(cpu_xscale_do_suspend)
 
 ENTRY(cpu_xscale_do_resume)
-	ldmia	r0, {r4 - r10}		@ load cp regs
+	ldmia	r0, {r4 - r9}		@ load cp regs
 	mov	ip, #0
 	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
 	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
@@ -545,13 +544,9 @@ ENTRY(cpu_xscale_do_resume)
 	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
 	mcr	p15, 0, r6, c13, c0, 0	@ PID
 	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
-	mcr	p15, 0, r8, c2, c0, 0	@ translation table base addr
-	mcr	p15, 0, r9, c1, c1, 0	@ auxiliary control reg
-	mov	r0, r10			@ control register
-	mov	r2, r8, lsr #14		@ get TTB0 base
-	mov	r2, r2, lsl #14
-	ldr	r3, =PMD_TYPE_SECT | PMD_SECT_BUFFERABLE | \
-		     PMD_SECT_CACHEABLE | PMD_SECT_AP_WRITE
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c1, 0	@ auxiliary control reg
+	mov	r0, r9			@ control register
 	b	cpu_resume_mmu
 ENDPROC(cpu_xscale_do_resume)
 #endif
diff --git a/arch/arm/plat-mxc/Kconfig b/arch/arm/plat-mxc/Kconfig
index a5353fc0793f..4c8fdbcc9467 100644
--- a/arch/arm/plat-mxc/Kconfig
+++ b/arch/arm/plat-mxc/Kconfig
@@ -39,7 +39,7 @@ config ARCH_MX503
 	select ARCH_MX50_SUPPORTED
 	select ARCH_MX53_SUPPORTED
 	help
-	  This enables support for machines using Freescale's i.MX50 and i.MX51
+	  This enables support for machines using Freescale's i.MX50 and i.MX53
 	  processors.
 
 config ARCH_MX51
diff --git a/arch/arm/plat-mxc/devices.c b/arch/arm/plat-mxc/devices.c
index 0d6ed31bdbf2..a34b2ae895f2 100644
--- a/arch/arm/plat-mxc/devices.c
+++ b/arch/arm/plat-mxc/devices.c
@@ -37,59 +37,6 @@ int __init mxc_register_device(struct platform_device *pdev, void *data)
 	return ret;
 }
 
-struct platform_device *__init imx_add_platform_device_dmamask(
-		const char *name, int id,
-		const struct resource *res, unsigned int num_resources,
-		const void *data, size_t size_data, u64 dmamask)
-{
-	int ret = -ENOMEM;
-	struct platform_device *pdev;
-
-	pdev = platform_device_alloc(name, id);
-	if (!pdev)
-		goto err;
-
-	if (dmamask) {
-		/*
-		 * This memory isn't freed when the device is put,
-		 * I don't have a nice idea for that though.  Conceptually
-		 * dma_mask in struct device should not be a pointer.
-		 * See http://thread.gmane.org/gmane.linux.kernel.pci/9081
-		 */
-		pdev->dev.dma_mask =
-			kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL);
-		if (!pdev->dev.dma_mask)
-			/* ret is still -ENOMEM; */
-			goto err;
-
-		*pdev->dev.dma_mask = dmamask;
-		pdev->dev.coherent_dma_mask = dmamask;
-	}
-
-	if (res) {
-		ret = platform_device_add_resources(pdev, res, num_resources);
-		if (ret)
-			goto err;
-	}
-
-	if (data) {
-		ret = platform_device_add_data(pdev, data, size_data);
-		if (ret)
-			goto err;
-	}
-
-	ret = platform_device_add(pdev);
-	if (ret) {
-err:
-		if (dmamask)
-			kfree(pdev->dev.dma_mask);
-		platform_device_put(pdev);
-		return ERR_PTR(ret);
-	}
-
-	return pdev;
-}
-
 struct device mxc_aips_bus = {
 	.init_name	= "mxc_aips",
 	.parent		= &platform_bus,
diff --git a/arch/arm/plat-mxc/include/mach/devices-common.h b/arch/arm/plat-mxc/include/mach/devices-common.h
index 524538aabc4b..543525d76a60 100644
--- a/arch/arm/plat-mxc/include/mach/devices-common.h
+++ b/arch/arm/plat-mxc/include/mach/devices-common.h
@@ -14,10 +14,22 @@
 extern struct device mxc_aips_bus;
 extern struct device mxc_ahb_bus;
 
-struct platform_device *imx_add_platform_device_dmamask(
+static inline struct platform_device *imx_add_platform_device_dmamask(
 		const char *name, int id,
 		const struct resource *res, unsigned int num_resources,
-		const void *data, size_t size_data, u64 dmamask);
+		const void *data, size_t size_data, u64 dmamask)
+{
+	struct platform_device_info pdevinfo = {
+		.name = name,
+		.id = id,
+		.res = res,
+		.num_res = num_resources,
+		.data = data,
+		.size_data = size_data,
+		.dma_mask = dmamask,
+	};
+	return platform_device_register_full(&pdevinfo);
+}
 
 static inline struct platform_device *imx_add_platform_device(
 		const char *name, int id,
diff --git a/arch/arm/plat-mxc/include/mach/i2c.h b/arch/arm/plat-mxc/include/mach/i2c.h
index 4a5dc5c6d8e8..375cdd0cf876 100644
--- a/arch/arm/plat-mxc/include/mach/i2c.h
+++ b/arch/arm/plat-mxc/include/mach/i2c.h
@@ -11,14 +11,10 @@
 
 /**
  * struct imxi2c_platform_data - structure of platform data for MXC I2C driver
- * @init:	Initialise gpio's and other board specific things
- * @exit:	Free everything initialised by @init
  * @bitrate:	Bus speed measured in Hz
  *
  **/
 struct imxi2c_platform_data {
-	int (*init)(struct device *dev);
-	void (*exit)(struct device *dev);
 	int bitrate;
 };
 
diff --git a/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h b/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h
index 3ba4d8f8073b..9605bf227df9 100644
--- a/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h
+++ b/arch/arm/plat-nomadik/include/plat/gpio-nomadik.h
@@ -67,6 +67,9 @@ extern int nmk_gpio_get_mode(int gpio);
 extern void nmk_gpio_wakeups_suspend(void);
 extern void nmk_gpio_wakeups_resume(void);
 
+extern void nmk_gpio_clocks_enable(void);
+extern void nmk_gpio_clocks_disable(void);
+
 extern void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up);
 
 /*
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 95732af7b208..aa59f4247dc5 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -14,6 +14,7 @@ config ARCH_OMAP1
 	select CLKDEV_LOOKUP
 	select CLKSRC_MMIO
 	select GENERIC_IRQ_CHIP
+	select HAVE_IDE
 	select NEED_MACH_MEMORY_H
 	help
 	  "Systems based on omap7xx, omap15xx or omap16xx"
@@ -133,18 +134,6 @@ config OMAP_MBOX_KFIFO_SIZE
 	  This can also be changed at runtime (via the mbox_kfifo_size
 	  module parameter).
 
-config OMAP_IOMMU
-	tristate
-
-config OMAP_IOMMU_DEBUG
-       tristate "Export OMAP IOMMU internals in DebugFS"
-       depends on OMAP_IOMMU && DEBUG_FS
-       help
-         Select this to see extensive information about
-         the internal state of OMAP IOMMU in debugfs.
-
-         Say N unless you know you need this.
-
 config OMAP_IOMMU_IVA2
 	bool
 
diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile
index f0233e6abcdf..985262242f25 100644
--- a/arch/arm/plat-omap/Makefile
+++ b/arch/arm/plat-omap/Makefile
@@ -18,8 +18,6 @@ obj-$(CONFIG_ARCH_OMAP3) += omap_device.o
 obj-$(CONFIG_ARCH_OMAP4) += omap_device.o
 
 obj-$(CONFIG_OMAP_MCBSP) += mcbsp.o
-obj-$(CONFIG_OMAP_IOMMU) += iommu.o iovmm.o
-obj-$(CONFIG_OMAP_IOMMU_DEBUG) += iommu-debug.o
 
 obj-$(CONFIG_CPU_FREQ) += cpu-omap.o
 obj-$(CONFIG_OMAP_DM_TIMER) += dmtimer.o
diff --git a/arch/arm/plat-omap/devices.c b/arch/arm/plat-omap/devices.c
index 64c3bd4aa54e..c46c47afa090 100644
--- a/arch/arm/plat-omap/devices.c
+++ b/arch/arm/plat-omap/devices.c
@@ -73,41 +73,6 @@ void omap_mcbsp_register_board_cfg(struct resource *res, int res_count,
 
 /*-------------------------------------------------------------------------*/
 
-#if defined(CONFIG_SND_OMAP_SOC_MCPDM) || \
-		defined(CONFIG_SND_OMAP_SOC_MCPDM_MODULE)
-
-static struct resource mcpdm_resources[] = {
-	{
-		.name		= "mcpdm_mem",
-		.start		= OMAP44XX_MCPDM_BASE,
-		.end		= OMAP44XX_MCPDM_BASE + SZ_4K,
-		.flags		= IORESOURCE_MEM,
-	},
-	{
-		.name		= "mcpdm_irq",
-		.start		= OMAP44XX_IRQ_MCPDM,
-		.end		= OMAP44XX_IRQ_MCPDM,
-		.flags		= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device omap_mcpdm_device = {
-	.name		= "omap-mcpdm",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(mcpdm_resources),
-	.resource	= mcpdm_resources,
-};
-
-static void omap_init_mcpdm(void)
-{
-	(void) platform_device_register(&omap_mcpdm_device);
-}
-#else
-static inline void omap_init_mcpdm(void) {}
-#endif
-
-/*-------------------------------------------------------------------------*/
-
 #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE) || \
 	defined(CONFIG_MMC_OMAP_HS) || defined(CONFIG_MMC_OMAP_HS_MODULE)
 
@@ -290,7 +255,6 @@ static int __init omap_init_devices(void)
 	 * in alphabetical order so they're easier to sort through.
 	 */
 	omap_init_rng();
-	omap_init_mcpdm();
 	omap_init_uwire();
 	return 0;
 }
diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index 3341ca4703e9..2388b8eebaaa 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -108,6 +108,22 @@ static inline int omap1_i2c_add_bus(int bus_id)
 	res[1].start = INT_I2C;
 	pdata = &i2c_pdata[bus_id - 1];
 
+	/* all OMAP1 have IP version 1 register set */
+	pdata->rev = OMAP_I2C_IP_VERSION_1;
+
+	/* all OMAP1 I2C are implemented like this */
+	pdata->flags = OMAP_I2C_FLAG_NO_FIFO |
+		       OMAP_I2C_FLAG_SIMPLE_CLOCK |
+		       OMAP_I2C_FLAG_16BIT_DATA_REG |
+		       OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK;
+
+	/* how the cpu bus is wired up differs for 7xx only */
+
+	if (cpu_is_omap7xx())
+		pdata->flags |= OMAP_I2C_FLAG_BUS_SHIFT_1;
+	else
+		pdata->flags |= OMAP_I2C_FLAG_BUS_SHIFT_2;
+
 	return platform_device_register(pdev);
 }
 
@@ -138,6 +154,7 @@ static inline int omap2_i2c_add_bus(int bus_id)
 	struct omap_device *od;
 	char oh_name[MAX_OMAP_I2C_HWMOD_NAME_LEN];
 	struct omap_i2c_bus_platform_data *pdata;
+	struct omap_i2c_dev_attr *dev_attr;
 
 	omap2_i2c_mux_pins(bus_id);
 
@@ -152,6 +169,16 @@ static inline int omap2_i2c_add_bus(int bus_id)
 
 	pdata = &i2c_pdata[bus_id - 1];
 	/*
+	 * pass the hwmod class's CPU-specific knowledge of I2C IP revision in
+	 * use, and functionality implementation flags, up to the OMAP I2C
+	 * driver via platform data
+	 */
+	pdata->rev = oh->class->rev;
+
+	dev_attr = (struct omap_i2c_dev_attr *)oh->dev_attr;
+	pdata->flags = dev_attr->flags;
+
+	/*
 	 * When waiting for completion of a i2c transfer, we need to
 	 * set a wake up latency constraint for the MPU. This is to
 	 * ensure quick enough wakeup from idle, when transfer
diff --git a/arch/arm/plat-omap/include/plat/iommu.h b/arch/arm/plat-omap/include/plat/iommu.h
index 174f1b9c8c03..a1d79ee19250 100644
--- a/arch/arm/plat-omap/include/plat/iommu.h
+++ b/arch/arm/plat-omap/include/plat/iommu.h
@@ -25,16 +25,17 @@ struct iotlb_entry {
 	};
 };
 
-struct iommu {
+struct omap_iommu {
 	const char	*name;
 	struct module	*owner;
 	struct clk	*clk;
 	void __iomem	*regbase;
 	struct device	*dev;
 	void		*isr_priv;
+	struct iommu_domain *domain;
 
 	unsigned int	refcount;
-	struct mutex	iommu_lock;	/* global for this whole object */
+	spinlock_t	iommu_lock;	/* global for this whole object */
 
 	/*
 	 * We don't change iopgd for a situation like pgd for a task,
@@ -48,8 +49,6 @@ struct iommu {
 	struct list_head	mmap;
 	struct mutex		mmap_lock; /* protect mmap */
 
-	int (*isr)(struct iommu *obj, u32 da, u32 iommu_errs, void *priv);
-
 	void *ctx; /* iommu context: registres saved area */
 	u32 da_start;
 	u32 da_end;
@@ -81,25 +80,27 @@ struct iotlb_lock {
 struct iommu_functions {
 	unsigned long	version;
 
-	int (*enable)(struct iommu *obj);
-	void (*disable)(struct iommu *obj);
-	void (*set_twl)(struct iommu *obj, bool on);
-	u32 (*fault_isr)(struct iommu *obj, u32 *ra);
+	int (*enable)(struct omap_iommu *obj);
+	void (*disable)(struct omap_iommu *obj);
+	void (*set_twl)(struct omap_iommu *obj, bool on);
+	u32 (*fault_isr)(struct omap_iommu *obj, u32 *ra);
 
-	void (*tlb_read_cr)(struct iommu *obj, struct cr_regs *cr);
-	void (*tlb_load_cr)(struct iommu *obj, struct cr_regs *cr);
+	void (*tlb_read_cr)(struct omap_iommu *obj, struct cr_regs *cr);
+	void (*tlb_load_cr)(struct omap_iommu *obj, struct cr_regs *cr);
 
-	struct cr_regs *(*alloc_cr)(struct iommu *obj, struct iotlb_entry *e);
+	struct cr_regs *(*alloc_cr)(struct omap_iommu *obj,
+							struct iotlb_entry *e);
 	int (*cr_valid)(struct cr_regs *cr);
 	u32 (*cr_to_virt)(struct cr_regs *cr);
 	void (*cr_to_e)(struct cr_regs *cr, struct iotlb_entry *e);
-	ssize_t (*dump_cr)(struct iommu *obj, struct cr_regs *cr, char *buf);
+	ssize_t (*dump_cr)(struct omap_iommu *obj, struct cr_regs *cr,
+							char *buf);
 
 	u32 (*get_pte_attr)(struct iotlb_entry *e);
 
-	void (*save_ctx)(struct iommu *obj);
-	void (*restore_ctx)(struct iommu *obj);
-	ssize_t (*dump_ctx)(struct iommu *obj, char *buf, ssize_t len);
+	void (*save_ctx)(struct omap_iommu *obj);
+	void (*restore_ctx)(struct omap_iommu *obj);
+	ssize_t (*dump_ctx)(struct omap_iommu *obj, char *buf, ssize_t len);
 };
 
 struct iommu_platform_data {
@@ -150,40 +151,31 @@ struct iommu_platform_data {
 /*
  * global functions
  */
-extern u32 iommu_arch_version(void);
-
-extern void iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e);
-extern u32 iotlb_cr_to_virt(struct cr_regs *cr);
-
-extern int load_iotlb_entry(struct iommu *obj, struct iotlb_entry *e);
-extern void iommu_set_twl(struct iommu *obj, bool on);
-extern void flush_iotlb_page(struct iommu *obj, u32 da);
-extern void flush_iotlb_range(struct iommu *obj, u32 start, u32 end);
-extern void flush_iotlb_all(struct iommu *obj);
-
-extern int iopgtable_store_entry(struct iommu *obj, struct iotlb_entry *e);
-extern void iopgtable_lookup_entry(struct iommu *obj, u32 da, u32 **ppgd,
-				   u32 **ppte);
-extern size_t iopgtable_clear_entry(struct iommu *obj, u32 iova);
-
-extern int iommu_set_da_range(struct iommu *obj, u32 start, u32 end);
-extern struct iommu *iommu_get(const char *name);
-extern void iommu_put(struct iommu *obj);
-extern int iommu_set_isr(const char *name,
-			 int (*isr)(struct iommu *obj, u32 da, u32 iommu_errs,
+extern u32 omap_iommu_arch_version(void);
+
+extern void omap_iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e);
+
+extern int
+omap_iopgtable_store_entry(struct omap_iommu *obj, struct iotlb_entry *e);
+
+extern int omap_iommu_set_isr(const char *name,
+		 int (*isr)(struct omap_iommu *obj, u32 da, u32 iommu_errs,
 				    void *priv),
 			 void *isr_priv);
 
-extern void iommu_save_ctx(struct iommu *obj);
-extern void iommu_restore_ctx(struct iommu *obj);
+extern void omap_iommu_save_ctx(struct omap_iommu *obj);
+extern void omap_iommu_restore_ctx(struct omap_iommu *obj);
 
-extern int install_iommu_arch(const struct iommu_functions *ops);
-extern void uninstall_iommu_arch(const struct iommu_functions *ops);
+extern int omap_install_iommu_arch(const struct iommu_functions *ops);
+extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
 
-extern int foreach_iommu_device(void *data,
+extern int omap_foreach_iommu_device(void *data,
 				int (*fn)(struct device *, void *));
 
-extern ssize_t iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t len);
-extern size_t dump_tlb_entries(struct iommu *obj, char *buf, ssize_t len);
+extern ssize_t
+omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
+extern size_t
+omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
+struct device *omap_find_iommu_device(const char *name);
 
 #endif /* __MACH_IOMMU_H */
diff --git a/arch/arm/plat-omap/include/plat/iommu2.h b/arch/arm/plat-omap/include/plat/iommu2.h
index 10ad05f410e9..d4116b595e40 100644
--- a/arch/arm/plat-omap/include/plat/iommu2.h
+++ b/arch/arm/plat-omap/include/plat/iommu2.h
@@ -83,12 +83,12 @@
 /*
  * register accessors
  */
-static inline u32 iommu_read_reg(struct iommu *obj, size_t offs)
+static inline u32 iommu_read_reg(struct omap_iommu *obj, size_t offs)
 {
 	return __raw_readl(obj->regbase + offs);
 }
 
-static inline void iommu_write_reg(struct iommu *obj, u32 val, size_t offs)
+static inline void iommu_write_reg(struct omap_iommu *obj, u32 val, size_t offs)
 {
 	__raw_writel(val, obj->regbase + offs);
 }
diff --git a/arch/arm/plat-omap/iopgtable.h b/arch/arm/plat-omap/include/plat/iopgtable.h
index c3e93bb0911f..66a813977d52 100644
--- a/arch/arm/plat-omap/iopgtable.h
+++ b/arch/arm/plat-omap/include/plat/iopgtable.h
@@ -56,6 +56,19 @@
 
 #define IOPAGE_MASK		IOPTE_MASK
 
+/**
+ * omap_iommu_translate() - va to pa translation
+ * @d:		omap iommu descriptor
+ * @va:		virtual address
+ * @mask:	omap iommu descriptor mask
+ *
+ * va to pa translation
+ */
+static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)
+{
+	return (d & mask) | (va & (~mask));
+}
+
 /*
  * some descriptor attributes.
  */
@@ -64,10 +77,15 @@
 #define IOPGD_SUPER		(1 << 18 | 2 << 0)
 
 #define iopgd_is_table(x)	(((x) & 3) == IOPGD_TABLE)
+#define iopgd_is_section(x)	(((x) & (1 << 18 | 3)) == IOPGD_SECTION)
+#define iopgd_is_super(x)	(((x) & (1 << 18 | 3)) == IOPGD_SUPER)
 
 #define IOPTE_SMALL		(2 << 0)
 #define IOPTE_LARGE		(1 << 0)
 
+#define iopte_is_small(x)	(((x) & 2) == IOPTE_SMALL)
+#define iopte_is_large(x)	(((x) & 3) == IOPTE_LARGE)
+
 /* to find an entry in a page-table-directory */
 #define iopgd_index(da)		(((da) >> IOPGD_SHIFT) & (PTRS_PER_IOPGD - 1))
 #define iopgd_offset(obj, da)	((obj)->iopgd + iopgd_index(da))
@@ -97,6 +115,6 @@ static inline u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa,
 }
 
 #define to_iommu(dev)							\
-	(struct iommu *)platform_get_drvdata(to_platform_device(dev))
+	(struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))
 
 #endif /* __PLAT_OMAP_IOMMU_H */
diff --git a/arch/arm/plat-omap/include/plat/iovmm.h b/arch/arm/plat-omap/include/plat/iovmm.h
index e992b9655fbc..6af1a91c0f36 100644
--- a/arch/arm/plat-omap/include/plat/iovmm.h
+++ b/arch/arm/plat-omap/include/plat/iovmm.h
@@ -13,8 +13,10 @@
 #ifndef __IOMMU_MMAP_H
 #define __IOMMU_MMAP_H
 
+#include <linux/iommu.h>
+
 struct iovm_struct {
-	struct iommu		*iommu;	/* iommu object which this belongs to */
+	struct omap_iommu	*iommu;	/* iommu object which this belongs to */
 	u32			da_start; /* area definition */
 	u32			da_end;
 	u32			flags; /* IOVMF_: see below */
@@ -70,20 +72,18 @@ struct iovm_struct {
 #define IOVMF_DA_FIXED		(1 << (4 + IOVMF_SW_SHIFT))
 
 
-extern struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da);
-extern u32 iommu_vmap(struct iommu *obj, u32 da,
+extern struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da);
+extern u32
+omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
 			const struct sg_table *sgt, u32 flags);
-extern struct sg_table *iommu_vunmap(struct iommu *obj, u32 da);
-extern u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes,
-			   u32 flags);
-extern void iommu_vfree(struct iommu *obj, const u32 da);
-extern u32 iommu_kmap(struct iommu *obj, u32 da, u32 pa, size_t bytes,
-			u32 flags);
-extern void iommu_kunmap(struct iommu *obj, u32 da);
-extern u32 iommu_kmalloc(struct iommu *obj, u32 da, size_t bytes,
-			   u32 flags);
-extern void iommu_kfree(struct iommu *obj, u32 da);
-
-extern void *da_to_va(struct iommu *obj, u32 da);
+extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
+				struct omap_iommu *obj, u32 da);
+extern u32
+omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj,
+				u32 da, size_t bytes, u32 flags);
+extern void
+omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
+				const u32 da);
+extern void *omap_da_to_va(struct omap_iommu *obj, u32 da);
 
 #endif /* __IOMMU_MMAP_H */
diff --git a/arch/arm/plat-omap/include/plat/mmc.h b/arch/arm/plat-omap/include/plat/mmc.h
index c7b874186c27..94cf70afb236 100644
--- a/arch/arm/plat-omap/include/plat/mmc.h
+++ b/arch/arm/plat-omap/include/plat/mmc.h
@@ -31,7 +31,24 @@
 
 #define OMAP_MMC_MAX_SLOTS	2
 
-#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT	BIT(1)
+/*
+ * struct omap_mmc_dev_attr.flags possibilities
+ *
+ * OMAP_HSMMC_SUPPORTS_DUAL_VOLT: Some HSMMC controller instances can
+ *    operate with either 1.8Vdc or 3.0Vdc card voltages; this flag
+ *    should be set if this is the case.  See for example Section 22.5.3
+ *    "MMC/SD/SDIO1 Bus Voltage Selection" of the OMAP34xx Multimedia
+ *    Device Silicon Revision 3.1.x Revision ZR (July 2011) (SWPU223R).
+ *
+ * OMAP_HSMMC_BROKEN_MULTIBLOCK_READ: Multiple-block read transfers
+ *    don't work correctly on some MMC controller instances on some
+ *    OMAP3 SoCs; this flag should be set if this is the case.  See
+ *    for example Advisory 2.1.1.128 "MMC: Multiple Block Read
+ *    Operation Issue" in _OMAP3530/3525/3515/3503 Silicon Errata_
+ *    Revision F (October 2010) (SPRZ278F).
+ */
+#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
+#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
 
 struct omap_mmc_dev_attr {
 	u8 flags;
diff --git a/arch/arm/plat-omap/iommu-debug.c b/arch/arm/plat-omap/iommu-debug.c
deleted file mode 100644
index f07cf2f08e09..000000000000
--- a/arch/arm/plat-omap/iommu-debug.c
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
- * omap iommu: debugfs interface
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/platform_device.h>
-#include <linux/debugfs.h>
-
-#include <plat/iommu.h>
-#include <plat/iovmm.h>
-
-#include "iopgtable.h"
-
-#define MAXCOLUMN 100 /* for short messages */
-
-static DEFINE_MUTEX(iommu_debug_lock);
-
-static struct dentry *iommu_debug_root;
-
-static ssize_t debug_read_ver(struct file *file, char __user *userbuf,
-			      size_t count, loff_t *ppos)
-{
-	u32 ver = iommu_arch_version();
-	char buf[MAXCOLUMN], *p = buf;
-
-	p += sprintf(p, "H/W version: %d.%d\n", (ver >> 4) & 0xf , ver & 0xf);
-
-	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-}
-
-static ssize_t debug_read_regs(struct file *file, char __user *userbuf,
-			       size_t count, loff_t *ppos)
-{
-	struct iommu *obj = file->private_data;
-	char *p, *buf;
-	ssize_t bytes;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	mutex_lock(&iommu_debug_lock);
-
-	bytes = iommu_dump_ctx(obj, p, count);
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, bytes);
-
-	mutex_unlock(&iommu_debug_lock);
-	kfree(buf);
-
-	return bytes;
-}
-
-static ssize_t debug_read_tlb(struct file *file, char __user *userbuf,
-			      size_t count, loff_t *ppos)
-{
-	struct iommu *obj = file->private_data;
-	char *p, *buf;
-	ssize_t bytes, rest;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	mutex_lock(&iommu_debug_lock);
-
-	p += sprintf(p, "%8s %8s\n", "cam:", "ram:");
-	p += sprintf(p, "-----------------------------------------\n");
-	rest = count - (p - buf);
-	p += dump_tlb_entries(obj, p, rest);
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-
-	mutex_unlock(&iommu_debug_lock);
-	kfree(buf);
-
-	return bytes;
-}
-
-static ssize_t debug_write_pagetable(struct file *file,
-		     const char __user *userbuf, size_t count, loff_t *ppos)
-{
-	struct iotlb_entry e;
-	struct cr_regs cr;
-	int err;
-	struct iommu *obj = file->private_data;
-	char buf[MAXCOLUMN], *p = buf;
-
-	count = min(count, sizeof(buf));
-
-	mutex_lock(&iommu_debug_lock);
-	if (copy_from_user(p, userbuf, count)) {
-		mutex_unlock(&iommu_debug_lock);
-		return -EFAULT;
-	}
-
-	sscanf(p, "%x %x", &cr.cam, &cr.ram);
-	if (!cr.cam || !cr.ram) {
-		mutex_unlock(&iommu_debug_lock);
-		return -EINVAL;
-	}
-
-	iotlb_cr_to_e(&cr, &e);
-	err = iopgtable_store_entry(obj, &e);
-	if (err)
-		dev_err(obj->dev, "%s: fail to store cr\n", __func__);
-
-	mutex_unlock(&iommu_debug_lock);
-	return count;
-}
-
-#define dump_ioptable_entry_one(lv, da, val)			\
-	({							\
-		int __err = 0;					\
-		ssize_t bytes;					\
-		const int maxcol = 22;				\
-		const char *str = "%d: %08x %08x\n";		\
-		bytes = snprintf(p, maxcol, str, lv, da, val);	\
-		p += bytes;					\
-		len -= bytes;					\
-		if (len < maxcol)				\
-			__err = -ENOMEM;			\
-		__err;						\
-	})
-
-static ssize_t dump_ioptable(struct iommu *obj, char *buf, ssize_t len)
-{
-	int i;
-	u32 *iopgd;
-	char *p = buf;
-
-	spin_lock(&obj->page_table_lock);
-
-	iopgd = iopgd_offset(obj, 0);
-	for (i = 0; i < PTRS_PER_IOPGD; i++, iopgd++) {
-		int j, err;
-		u32 *iopte;
-		u32 da;
-
-		if (!*iopgd)
-			continue;
-
-		if (!(*iopgd & IOPGD_TABLE)) {
-			da = i << IOPGD_SHIFT;
-
-			err = dump_ioptable_entry_one(1, da, *iopgd);
-			if (err)
-				goto out;
-			continue;
-		}
-
-		iopte = iopte_offset(iopgd, 0);
-
-		for (j = 0; j < PTRS_PER_IOPTE; j++, iopte++) {
-			if (!*iopte)
-				continue;
-
-			da = (i << IOPGD_SHIFT) + (j << IOPTE_SHIFT);
-			err = dump_ioptable_entry_one(2, da, *iopgd);
-			if (err)
-				goto out;
-		}
-	}
-out:
-	spin_unlock(&obj->page_table_lock);
-
-	return p - buf;
-}
-
-static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf,
-				    size_t count, loff_t *ppos)
-{
-	struct iommu *obj = file->private_data;
-	char *p, *buf;
-	size_t bytes;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	p += sprintf(p, "L: %8s %8s\n", "da:", "pa:");
-	p += sprintf(p, "-----------------------------------------\n");
-
-	mutex_lock(&iommu_debug_lock);
-
-	bytes = PAGE_SIZE - (p - buf);
-	p += dump_ioptable(obj, p, bytes);
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return bytes;
-}
-
-static ssize_t debug_read_mmap(struct file *file, char __user *userbuf,
-			       size_t count, loff_t *ppos)
-{
-	struct iommu *obj = file->private_data;
-	char *p, *buf;
-	struct iovm_struct *tmp;
-	int uninitialized_var(i);
-	ssize_t bytes;
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n",
-		     "No", "start", "end", "size", "flags");
-	p += sprintf(p, "-------------------------------------------------\n");
-
-	mutex_lock(&iommu_debug_lock);
-
-	list_for_each_entry(tmp, &obj->mmap, list) {
-		size_t len;
-		const char *str = "%3d %08x-%08x %6x %8x\n";
-		const int maxcol = 39;
-
-		len = tmp->da_end - tmp->da_start;
-		p += snprintf(p, maxcol, str,
-			      i, tmp->da_start, tmp->da_end, len, tmp->flags);
-
-		if (PAGE_SIZE - (p - buf) < maxcol)
-			break;
-		i++;
-	}
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return bytes;
-}
-
-static ssize_t debug_read_mem(struct file *file, char __user *userbuf,
-			      size_t count, loff_t *ppos)
-{
-	struct iommu *obj = file->private_data;
-	char *p, *buf;
-	struct iovm_struct *area;
-	ssize_t bytes;
-
-	count = min_t(ssize_t, count, PAGE_SIZE);
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	mutex_lock(&iommu_debug_lock);
-
-	area = find_iovm_area(obj, (u32)ppos);
-	if (IS_ERR(area)) {
-		bytes = -EINVAL;
-		goto err_out;
-	}
-	memcpy(p, area->va, count);
-	p += count;
-
-	bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
-err_out:
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return bytes;
-}
-
-static ssize_t debug_write_mem(struct file *file, const char __user *userbuf,
-			       size_t count, loff_t *ppos)
-{
-	struct iommu *obj = file->private_data;
-	struct iovm_struct *area;
-	char *p, *buf;
-
-	count = min_t(size_t, count, PAGE_SIZE);
-
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	p = buf;
-
-	mutex_lock(&iommu_debug_lock);
-
-	if (copy_from_user(p, userbuf, count)) {
-		count =  -EFAULT;
-		goto err_out;
-	}
-
-	area = find_iovm_area(obj, (u32)ppos);
-	if (IS_ERR(area)) {
-		count = -EINVAL;
-		goto err_out;
-	}
-	memcpy(area->va, p, count);
-err_out:
-	mutex_unlock(&iommu_debug_lock);
-	free_page((unsigned long)buf);
-
-	return count;
-}
-
-static int debug_open_generic(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
-#define DEBUG_FOPS(name)						\
-	static const struct file_operations debug_##name##_fops = {	\
-		.open = debug_open_generic,				\
-		.read = debug_read_##name,				\
-		.write = debug_write_##name,				\
-		.llseek = generic_file_llseek,				\
-	};
-
-#define DEBUG_FOPS_RO(name)						\
-	static const struct file_operations debug_##name##_fops = {	\
-		.open = debug_open_generic,				\
-		.read = debug_read_##name,				\
-		.llseek = generic_file_llseek,				\
-	};
-
-DEBUG_FOPS_RO(ver);
-DEBUG_FOPS_RO(regs);
-DEBUG_FOPS_RO(tlb);
-DEBUG_FOPS(pagetable);
-DEBUG_FOPS_RO(mmap);
-DEBUG_FOPS(mem);
-
-#define __DEBUG_ADD_FILE(attr, mode)					\
-	{								\
-		struct dentry *dent;					\
-		dent = debugfs_create_file(#attr, mode, parent,		\
-					   obj, &debug_##attr##_fops);	\
-		if (!dent)						\
-			return -ENOMEM;					\
-	}
-
-#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 600)
-#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 400)
-
-static int iommu_debug_register(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct iommu *obj = platform_get_drvdata(pdev);
-	struct dentry *d, *parent;
-
-	if (!obj || !obj->dev)
-		return -EINVAL;
-
-	d = debugfs_create_dir(obj->name, iommu_debug_root);
-	if (!d)
-		return -ENOMEM;
-	parent = d;
-
-	d = debugfs_create_u8("nr_tlb_entries", 400, parent,
-			      (u8 *)&obj->nr_tlb_entries);
-	if (!d)
-		return -ENOMEM;
-
-	DEBUG_ADD_FILE_RO(ver);
-	DEBUG_ADD_FILE_RO(regs);
-	DEBUG_ADD_FILE_RO(tlb);
-	DEBUG_ADD_FILE(pagetable);
-	DEBUG_ADD_FILE_RO(mmap);
-	DEBUG_ADD_FILE(mem);
-
-	return 0;
-}
-
-static int __init iommu_debug_init(void)
-{
-	struct dentry *d;
-	int err;
-
-	d = debugfs_create_dir("iommu", NULL);
-	if (!d)
-		return -ENOMEM;
-	iommu_debug_root = d;
-
-	err = foreach_iommu_device(d, iommu_debug_register);
-	if (err)
-		goto err_out;
-	return 0;
-
-err_out:
-	debugfs_remove_recursive(iommu_debug_root);
-	return err;
-}
-module_init(iommu_debug_init)
-
-static void __exit iommu_debugfs_exit(void)
-{
-	debugfs_remove_recursive(iommu_debug_root);
-}
-module_exit(iommu_debugfs_exit)
-
-MODULE_DESCRIPTION("omap iommu: debugfs interface");
-MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/plat-omap/iommu.c b/arch/arm/plat-omap/iommu.c
deleted file mode 100644
index 34fc31ee9081..000000000000
--- a/arch/arm/plat-omap/iommu.c
+++ /dev/null
@@ -1,1102 +0,0 @@
-/*
- * omap iommu: tlb and pagetable primitives
- *
- * Copyright (C) 2008-2010 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>,
- *		Paul Mundt and Toshihiro Kobayashi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-
-#include <asm/cacheflush.h>
-
-#include <plat/iommu.h>
-
-#include "iopgtable.h"
-
-#define for_each_iotlb_cr(obj, n, __i, cr)				\
-	for (__i = 0;							\
-	     (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);	\
-	     __i++)
-
-/* accommodate the difference between omap1 and omap2/3 */
-static const struct iommu_functions *arch_iommu;
-
-static struct platform_driver omap_iommu_driver;
-static struct kmem_cache *iopte_cachep;
-
-/**
- * install_iommu_arch - Install archtecure specific iommu functions
- * @ops:	a pointer to architecture specific iommu functions
- *
- * There are several kind of iommu algorithm(tlb, pagetable) among
- * omap series. This interface installs such an iommu algorighm.
- **/
-int install_iommu_arch(const struct iommu_functions *ops)
-{
-	if (arch_iommu)
-		return -EBUSY;
-
-	arch_iommu = ops;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(install_iommu_arch);
-
-/**
- * uninstall_iommu_arch - Uninstall archtecure specific iommu functions
- * @ops:	a pointer to architecture specific iommu functions
- *
- * This interface uninstalls the iommu algorighm installed previously.
- **/
-void uninstall_iommu_arch(const struct iommu_functions *ops)
-{
-	if (arch_iommu != ops)
-		pr_err("%s: not your arch\n", __func__);
-
-	arch_iommu = NULL;
-}
-EXPORT_SYMBOL_GPL(uninstall_iommu_arch);
-
-/**
- * iommu_save_ctx - Save registers for pm off-mode support
- * @obj:	target iommu
- **/
-void iommu_save_ctx(struct iommu *obj)
-{
-	arch_iommu->save_ctx(obj);
-}
-EXPORT_SYMBOL_GPL(iommu_save_ctx);
-
-/**
- * iommu_restore_ctx - Restore registers for pm off-mode support
- * @obj:	target iommu
- **/
-void iommu_restore_ctx(struct iommu *obj)
-{
-	arch_iommu->restore_ctx(obj);
-}
-EXPORT_SYMBOL_GPL(iommu_restore_ctx);
-
-/**
- * iommu_arch_version - Return running iommu arch version
- **/
-u32 iommu_arch_version(void)
-{
-	return arch_iommu->version;
-}
-EXPORT_SYMBOL_GPL(iommu_arch_version);
-
-static int iommu_enable(struct iommu *obj)
-{
-	int err;
-
-	if (!obj)
-		return -EINVAL;
-
-	if (!arch_iommu)
-		return -ENODEV;
-
-	clk_enable(obj->clk);
-
-	err = arch_iommu->enable(obj);
-
-	clk_disable(obj->clk);
-	return err;
-}
-
-static void iommu_disable(struct iommu *obj)
-{
-	if (!obj)
-		return;
-
-	clk_enable(obj->clk);
-
-	arch_iommu->disable(obj);
-
-	clk_disable(obj->clk);
-}
-
-/*
- *	TLB operations
- */
-void iotlb_cr_to_e(struct cr_regs *cr, struct iotlb_entry *e)
-{
-	BUG_ON(!cr || !e);
-
-	arch_iommu->cr_to_e(cr, e);
-}
-EXPORT_SYMBOL_GPL(iotlb_cr_to_e);
-
-static inline int iotlb_cr_valid(struct cr_regs *cr)
-{
-	if (!cr)
-		return -EINVAL;
-
-	return arch_iommu->cr_valid(cr);
-}
-
-static inline struct cr_regs *iotlb_alloc_cr(struct iommu *obj,
-					     struct iotlb_entry *e)
-{
-	if (!e)
-		return NULL;
-
-	return arch_iommu->alloc_cr(obj, e);
-}
-
-u32 iotlb_cr_to_virt(struct cr_regs *cr)
-{
-	return arch_iommu->cr_to_virt(cr);
-}
-EXPORT_SYMBOL_GPL(iotlb_cr_to_virt);
-
-static u32 get_iopte_attr(struct iotlb_entry *e)
-{
-	return arch_iommu->get_pte_attr(e);
-}
-
-static u32 iommu_report_fault(struct iommu *obj, u32 *da)
-{
-	return arch_iommu->fault_isr(obj, da);
-}
-
-static void iotlb_lock_get(struct iommu *obj, struct iotlb_lock *l)
-{
-	u32 val;
-
-	val = iommu_read_reg(obj, MMU_LOCK);
-
-	l->base = MMU_LOCK_BASE(val);
-	l->vict = MMU_LOCK_VICT(val);
-
-}
-
-static void iotlb_lock_set(struct iommu *obj, struct iotlb_lock *l)
-{
-	u32 val;
-
-	val = (l->base << MMU_LOCK_BASE_SHIFT);
-	val |= (l->vict << MMU_LOCK_VICT_SHIFT);
-
-	iommu_write_reg(obj, val, MMU_LOCK);
-}
-
-static void iotlb_read_cr(struct iommu *obj, struct cr_regs *cr)
-{
-	arch_iommu->tlb_read_cr(obj, cr);
-}
-
-static void iotlb_load_cr(struct iommu *obj, struct cr_regs *cr)
-{
-	arch_iommu->tlb_load_cr(obj, cr);
-
-	iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
-	iommu_write_reg(obj, 1, MMU_LD_TLB);
-}
-
-/**
- * iotlb_dump_cr - Dump an iommu tlb entry into buf
- * @obj:	target iommu
- * @cr:		contents of cam and ram register
- * @buf:	output buffer
- **/
-static inline ssize_t iotlb_dump_cr(struct iommu *obj, struct cr_regs *cr,
-				    char *buf)
-{
-	BUG_ON(!cr || !buf);
-
-	return arch_iommu->dump_cr(obj, cr, buf);
-}
-
-/* only used in iotlb iteration for-loop */
-static struct cr_regs __iotlb_read_cr(struct iommu *obj, int n)
-{
-	struct cr_regs cr;
-	struct iotlb_lock l;
-
-	iotlb_lock_get(obj, &l);
-	l.vict = n;
-	iotlb_lock_set(obj, &l);
-	iotlb_read_cr(obj, &cr);
-
-	return cr;
-}
-
-/**
- * load_iotlb_entry - Set an iommu tlb entry
- * @obj:	target iommu
- * @e:		an iommu tlb entry info
- **/
-int load_iotlb_entry(struct iommu *obj, struct iotlb_entry *e)
-{
-	int err = 0;
-	struct iotlb_lock l;
-	struct cr_regs *cr;
-
-	if (!obj || !obj->nr_tlb_entries || !e)
-		return -EINVAL;
-
-	clk_enable(obj->clk);
-
-	iotlb_lock_get(obj, &l);
-	if (l.base == obj->nr_tlb_entries) {
-		dev_warn(obj->dev, "%s: preserve entries full\n", __func__);
-		err = -EBUSY;
-		goto out;
-	}
-	if (!e->prsvd) {
-		int i;
-		struct cr_regs tmp;
-
-		for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, tmp)
-			if (!iotlb_cr_valid(&tmp))
-				break;
-
-		if (i == obj->nr_tlb_entries) {
-			dev_dbg(obj->dev, "%s: full: no entry\n", __func__);
-			err = -EBUSY;
-			goto out;
-		}
-
-		iotlb_lock_get(obj, &l);
-	} else {
-		l.vict = l.base;
-		iotlb_lock_set(obj, &l);
-	}
-
-	cr = iotlb_alloc_cr(obj, e);
-	if (IS_ERR(cr)) {
-		clk_disable(obj->clk);
-		return PTR_ERR(cr);
-	}
-
-	iotlb_load_cr(obj, cr);
-	kfree(cr);
-
-	if (e->prsvd)
-		l.base++;
-	/* increment victim for next tlb load */
-	if (++l.vict == obj->nr_tlb_entries)
-		l.vict = l.base;
-	iotlb_lock_set(obj, &l);
-out:
-	clk_disable(obj->clk);
-	return err;
-}
-EXPORT_SYMBOL_GPL(load_iotlb_entry);
-
-/**
- * flush_iotlb_page - Clear an iommu tlb entry
- * @obj:	target iommu
- * @da:		iommu device virtual address
- *
- * Clear an iommu tlb entry which includes 'da' address.
- **/
-void flush_iotlb_page(struct iommu *obj, u32 da)
-{
-	int i;
-	struct cr_regs cr;
-
-	clk_enable(obj->clk);
-
-	for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
-		u32 start;
-		size_t bytes;
-
-		if (!iotlb_cr_valid(&cr))
-			continue;
-
-		start = iotlb_cr_to_virt(&cr);
-		bytes = iopgsz_to_bytes(cr.cam & 3);
-
-		if ((start <= da) && (da < start + bytes)) {
-			dev_dbg(obj->dev, "%s: %08x<=%08x(%x)\n",
-				__func__, start, da, bytes);
-			iotlb_load_cr(obj, &cr);
-			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
-		}
-	}
-	clk_disable(obj->clk);
-
-	if (i == obj->nr_tlb_entries)
-		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
-}
-EXPORT_SYMBOL_GPL(flush_iotlb_page);
-
-/**
- * flush_iotlb_range - Clear an iommu tlb entries
- * @obj:	target iommu
- * @start:	iommu device virtual address(start)
- * @end:	iommu device virtual address(end)
- *
- * Clear an iommu tlb entry which includes 'da' address.
- **/
-void flush_iotlb_range(struct iommu *obj, u32 start, u32 end)
-{
-	u32 da = start;
-
-	while (da < end) {
-		flush_iotlb_page(obj, da);
-		/* FIXME: Optimize for multiple page size */
-		da += IOPTE_SIZE;
-	}
-}
-EXPORT_SYMBOL_GPL(flush_iotlb_range);
-
-/**
- * flush_iotlb_all - Clear all iommu tlb entries
- * @obj:	target iommu
- **/
-void flush_iotlb_all(struct iommu *obj)
-{
-	struct iotlb_lock l;
-
-	clk_enable(obj->clk);
-
-	l.base = 0;
-	l.vict = 0;
-	iotlb_lock_set(obj, &l);
-
-	iommu_write_reg(obj, 1, MMU_GFLUSH);
-
-	clk_disable(obj->clk);
-}
-EXPORT_SYMBOL_GPL(flush_iotlb_all);
-
-/**
- * iommu_set_twl - enable/disable table walking logic
- * @obj:	target iommu
- * @on:		enable/disable
- *
- * Function used to enable/disable TWL. If one wants to work
- * exclusively with locked TLB entries and receive notifications
- * for TLB miss then call this function to disable TWL.
- */
-void iommu_set_twl(struct iommu *obj, bool on)
-{
-	clk_enable(obj->clk);
-	arch_iommu->set_twl(obj, on);
-	clk_disable(obj->clk);
-}
-EXPORT_SYMBOL_GPL(iommu_set_twl);
-
-#if defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
-
-ssize_t iommu_dump_ctx(struct iommu *obj, char *buf, ssize_t bytes)
-{
-	if (!obj || !buf)
-		return -EINVAL;
-
-	clk_enable(obj->clk);
-
-	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
-
-	clk_disable(obj->clk);
-
-	return bytes;
-}
-EXPORT_SYMBOL_GPL(iommu_dump_ctx);
-
-static int __dump_tlb_entries(struct iommu *obj, struct cr_regs *crs, int num)
-{
-	int i;
-	struct iotlb_lock saved;
-	struct cr_regs tmp;
-	struct cr_regs *p = crs;
-
-	clk_enable(obj->clk);
-	iotlb_lock_get(obj, &saved);
-
-	for_each_iotlb_cr(obj, num, i, tmp) {
-		if (!iotlb_cr_valid(&tmp))
-			continue;
-		*p++ = tmp;
-	}
-
-	iotlb_lock_set(obj, &saved);
-	clk_disable(obj->clk);
-
-	return  p - crs;
-}
-
-/**
- * dump_tlb_entries - dump cr arrays to given buffer
- * @obj:	target iommu
- * @buf:	output buffer
- **/
-size_t dump_tlb_entries(struct iommu *obj, char *buf, ssize_t bytes)
-{
-	int i, num;
-	struct cr_regs *cr;
-	char *p = buf;
-
-	num = bytes / sizeof(*cr);
-	num = min(obj->nr_tlb_entries, num);
-
-	cr = kcalloc(num, sizeof(*cr), GFP_KERNEL);
-	if (!cr)
-		return 0;
-
-	num = __dump_tlb_entries(obj, cr, num);
-	for (i = 0; i < num; i++)
-		p += iotlb_dump_cr(obj, cr + i, p);
-	kfree(cr);
-
-	return p - buf;
-}
-EXPORT_SYMBOL_GPL(dump_tlb_entries);
-
-int foreach_iommu_device(void *data, int (*fn)(struct device *, void *))
-{
-	return driver_for_each_device(&omap_iommu_driver.driver,
-				      NULL, data, fn);
-}
-EXPORT_SYMBOL_GPL(foreach_iommu_device);
-
-#endif /* CONFIG_OMAP_IOMMU_DEBUG_MODULE */
-
-/*
- *	H/W pagetable operations
- */
-static void flush_iopgd_range(u32 *first, u32 *last)
-{
-	/* FIXME: L2 cache should be taken care of if it exists */
-	do {
-		asm("mcr	p15, 0, %0, c7, c10, 1 @ flush_pgd"
-		    : : "r" (first));
-		first += L1_CACHE_BYTES / sizeof(*first);
-	} while (first <= last);
-}
-
-static void flush_iopte_range(u32 *first, u32 *last)
-{
-	/* FIXME: L2 cache should be taken care of if it exists */
-	do {
-		asm("mcr	p15, 0, %0, c7, c10, 1 @ flush_pte"
-		    : : "r" (first));
-		first += L1_CACHE_BYTES / sizeof(*first);
-	} while (first <= last);
-}
-
-static void iopte_free(u32 *iopte)
-{
-	/* Note: freed iopte's must be clean ready for re-use */
-	kmem_cache_free(iopte_cachep, iopte);
-}
-
-static u32 *iopte_alloc(struct iommu *obj, u32 *iopgd, u32 da)
-{
-	u32 *iopte;
-
-	/* a table has already existed */
-	if (*iopgd)
-		goto pte_ready;
-
-	/*
-	 * do the allocation outside the page table lock
-	 */
-	spin_unlock(&obj->page_table_lock);
-	iopte = kmem_cache_zalloc(iopte_cachep, GFP_KERNEL);
-	spin_lock(&obj->page_table_lock);
-
-	if (!*iopgd) {
-		if (!iopte)
-			return ERR_PTR(-ENOMEM);
-
-		*iopgd = virt_to_phys(iopte) | IOPGD_TABLE;
-		flush_iopgd_range(iopgd, iopgd);
-
-		dev_vdbg(obj->dev, "%s: a new pte:%p\n", __func__, iopte);
-	} else {
-		/* We raced, free the reduniovant table */
-		iopte_free(iopte);
-	}
-
-pte_ready:
-	iopte = iopte_offset(iopgd, da);
-
-	dev_vdbg(obj->dev,
-		 "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n",
-		 __func__, da, iopgd, *iopgd, iopte, *iopte);
-
-	return iopte;
-}
-
-static int iopgd_alloc_section(struct iommu *obj, u32 da, u32 pa, u32 prot)
-{
-	u32 *iopgd = iopgd_offset(obj, da);
-
-	if ((da | pa) & ~IOSECTION_MASK) {
-		dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
-			__func__, da, pa, IOSECTION_SIZE);
-		return -EINVAL;
-	}
-
-	*iopgd = (pa & IOSECTION_MASK) | prot | IOPGD_SECTION;
-	flush_iopgd_range(iopgd, iopgd);
-	return 0;
-}
-
-static int iopgd_alloc_super(struct iommu *obj, u32 da, u32 pa, u32 prot)
-{
-	u32 *iopgd = iopgd_offset(obj, da);
-	int i;
-
-	if ((da | pa) & ~IOSUPER_MASK) {
-		dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
-			__func__, da, pa, IOSUPER_SIZE);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < 16; i++)
-		*(iopgd + i) = (pa & IOSUPER_MASK) | prot | IOPGD_SUPER;
-	flush_iopgd_range(iopgd, iopgd + 15);
-	return 0;
-}
-
-static int iopte_alloc_page(struct iommu *obj, u32 da, u32 pa, u32 prot)
-{
-	u32 *iopgd = iopgd_offset(obj, da);
-	u32 *iopte = iopte_alloc(obj, iopgd, da);
-
-	if (IS_ERR(iopte))
-		return PTR_ERR(iopte);
-
-	*iopte = (pa & IOPAGE_MASK) | prot | IOPTE_SMALL;
-	flush_iopte_range(iopte, iopte);
-
-	dev_vdbg(obj->dev, "%s: da:%08x pa:%08x pte:%p *pte:%08x\n",
-		 __func__, da, pa, iopte, *iopte);
-
-	return 0;
-}
-
-static int iopte_alloc_large(struct iommu *obj, u32 da, u32 pa, u32 prot)
-{
-	u32 *iopgd = iopgd_offset(obj, da);
-	u32 *iopte = iopte_alloc(obj, iopgd, da);
-	int i;
-
-	if ((da | pa) & ~IOLARGE_MASK) {
-		dev_err(obj->dev, "%s: %08x:%08x should aligned on %08lx\n",
-			__func__, da, pa, IOLARGE_SIZE);
-		return -EINVAL;
-	}
-
-	if (IS_ERR(iopte))
-		return PTR_ERR(iopte);
-
-	for (i = 0; i < 16; i++)
-		*(iopte + i) = (pa & IOLARGE_MASK) | prot | IOPTE_LARGE;
-	flush_iopte_range(iopte, iopte + 15);
-	return 0;
-}
-
-static int iopgtable_store_entry_core(struct iommu *obj, struct iotlb_entry *e)
-{
-	int (*fn)(struct iommu *, u32, u32, u32);
-	u32 prot;
-	int err;
-
-	if (!obj || !e)
-		return -EINVAL;
-
-	switch (e->pgsz) {
-	case MMU_CAM_PGSZ_16M:
-		fn = iopgd_alloc_super;
-		break;
-	case MMU_CAM_PGSZ_1M:
-		fn = iopgd_alloc_section;
-		break;
-	case MMU_CAM_PGSZ_64K:
-		fn = iopte_alloc_large;
-		break;
-	case MMU_CAM_PGSZ_4K:
-		fn = iopte_alloc_page;
-		break;
-	default:
-		fn = NULL;
-		BUG();
-		break;
-	}
-
-	prot = get_iopte_attr(e);
-
-	spin_lock(&obj->page_table_lock);
-	err = fn(obj, e->da, e->pa, prot);
-	spin_unlock(&obj->page_table_lock);
-
-	return err;
-}
-
-/**
- * iopgtable_store_entry - Make an iommu pte entry
- * @obj:	target iommu
- * @e:		an iommu tlb entry info
- **/
-int iopgtable_store_entry(struct iommu *obj, struct iotlb_entry *e)
-{
-	int err;
-
-	flush_iotlb_page(obj, e->da);
-	err = iopgtable_store_entry_core(obj, e);
-#ifdef PREFETCH_IOTLB
-	if (!err)
-		load_iotlb_entry(obj, e);
-#endif
-	return err;
-}
-EXPORT_SYMBOL_GPL(iopgtable_store_entry);
-
-/**
- * iopgtable_lookup_entry - Lookup an iommu pte entry
- * @obj:	target iommu
- * @da:		iommu device virtual address
- * @ppgd:	iommu pgd entry pointer to be returned
- * @ppte:	iommu pte entry pointer to be returned
- **/
-void iopgtable_lookup_entry(struct iommu *obj, u32 da, u32 **ppgd, u32 **ppte)
-{
-	u32 *iopgd, *iopte = NULL;
-
-	iopgd = iopgd_offset(obj, da);
-	if (!*iopgd)
-		goto out;
-
-	if (iopgd_is_table(*iopgd))
-		iopte = iopte_offset(iopgd, da);
-out:
-	*ppgd = iopgd;
-	*ppte = iopte;
-}
-EXPORT_SYMBOL_GPL(iopgtable_lookup_entry);
-
-static size_t iopgtable_clear_entry_core(struct iommu *obj, u32 da)
-{
-	size_t bytes;
-	u32 *iopgd = iopgd_offset(obj, da);
-	int nent = 1;
-
-	if (!*iopgd)
-		return 0;
-
-	if (iopgd_is_table(*iopgd)) {
-		int i;
-		u32 *iopte = iopte_offset(iopgd, da);
-
-		bytes = IOPTE_SIZE;
-		if (*iopte & IOPTE_LARGE) {
-			nent *= 16;
-			/* rewind to the 1st entry */
-			iopte = iopte_offset(iopgd, (da & IOLARGE_MASK));
-		}
-		bytes *= nent;
-		memset(iopte, 0, nent * sizeof(*iopte));
-		flush_iopte_range(iopte, iopte + (nent - 1) * sizeof(*iopte));
-
-		/*
-		 * do table walk to check if this table is necessary or not
-		 */
-		iopte = iopte_offset(iopgd, 0);
-		for (i = 0; i < PTRS_PER_IOPTE; i++)
-			if (iopte[i])
-				goto out;
-
-		iopte_free(iopte);
-		nent = 1; /* for the next L1 entry */
-	} else {
-		bytes = IOPGD_SIZE;
-		if ((*iopgd & IOPGD_SUPER) == IOPGD_SUPER) {
-			nent *= 16;
-			/* rewind to the 1st entry */
-			iopgd = iopgd_offset(obj, (da & IOSUPER_MASK));
-		}
-		bytes *= nent;
-	}
-	memset(iopgd, 0, nent * sizeof(*iopgd));
-	flush_iopgd_range(iopgd, iopgd + (nent - 1) * sizeof(*iopgd));
-out:
-	return bytes;
-}
-
-/**
- * iopgtable_clear_entry - Remove an iommu pte entry
- * @obj:	target iommu
- * @da:		iommu device virtual address
- **/
-size_t iopgtable_clear_entry(struct iommu *obj, u32 da)
-{
-	size_t bytes;
-
-	spin_lock(&obj->page_table_lock);
-
-	bytes = iopgtable_clear_entry_core(obj, da);
-	flush_iotlb_page(obj, da);
-
-	spin_unlock(&obj->page_table_lock);
-
-	return bytes;
-}
-EXPORT_SYMBOL_GPL(iopgtable_clear_entry);
-
-static void iopgtable_clear_entry_all(struct iommu *obj)
-{
-	int i;
-
-	spin_lock(&obj->page_table_lock);
-
-	for (i = 0; i < PTRS_PER_IOPGD; i++) {
-		u32 da;
-		u32 *iopgd;
-
-		da = i << IOPGD_SHIFT;
-		iopgd = iopgd_offset(obj, da);
-
-		if (!*iopgd)
-			continue;
-
-		if (iopgd_is_table(*iopgd))
-			iopte_free(iopte_offset(iopgd, 0));
-
-		*iopgd = 0;
-		flush_iopgd_range(iopgd, iopgd);
-	}
-
-	flush_iotlb_all(obj);
-
-	spin_unlock(&obj->page_table_lock);
-}
-
-/*
- *	Device IOMMU generic operations
- */
-static irqreturn_t iommu_fault_handler(int irq, void *data)
-{
-	u32 da, errs;
-	u32 *iopgd, *iopte;
-	struct iommu *obj = data;
-
-	if (!obj->refcount)
-		return IRQ_NONE;
-
-	clk_enable(obj->clk);
-	errs = iommu_report_fault(obj, &da);
-	clk_disable(obj->clk);
-	if (errs == 0)
-		return IRQ_HANDLED;
-
-	/* Fault callback or TLB/PTE Dynamic loading */
-	if (obj->isr && !obj->isr(obj, da, errs, obj->isr_priv))
-		return IRQ_HANDLED;
-
-	iommu_disable(obj);
-
-	iopgd = iopgd_offset(obj, da);
-
-	if (!iopgd_is_table(*iopgd)) {
-		dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p "
-			"*pgd:px%08x\n", obj->name, errs, da, iopgd, *iopgd);
-		return IRQ_NONE;
-	}
-
-	iopte = iopte_offset(iopgd, da);
-
-	dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x "
-		"pte:0x%p *pte:0x%08x\n", obj->name, errs, da, iopgd, *iopgd,
-		iopte, *iopte);
-
-	return IRQ_NONE;
-}
-
-static int device_match_by_alias(struct device *dev, void *data)
-{
-	struct iommu *obj = to_iommu(dev);
-	const char *name = data;
-
-	pr_debug("%s: %s %s\n", __func__, obj->name, name);
-
-	return strcmp(obj->name, name) == 0;
-}
-
-/**
- * iommu_set_da_range - Set a valid device address range
- * @obj:		target iommu
- * @start		Start of valid range
- * @end			End of valid range
- **/
-int iommu_set_da_range(struct iommu *obj, u32 start, u32 end)
-{
-
-	if (!obj)
-		return -EFAULT;
-
-	if (end < start || !PAGE_ALIGN(start | end))
-		return -EINVAL;
-
-	obj->da_start = start;
-	obj->da_end = end;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_set_da_range);
-
-/**
- * iommu_get - Get iommu handler
- * @name:	target iommu name
- **/
-struct iommu *iommu_get(const char *name)
-{
-	int err = -ENOMEM;
-	struct device *dev;
-	struct iommu *obj;
-
-	dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name,
-				 device_match_by_alias);
-	if (!dev)
-		return ERR_PTR(-ENODEV);
-
-	obj = to_iommu(dev);
-
-	mutex_lock(&obj->iommu_lock);
-
-	if (obj->refcount++ == 0) {
-		err = iommu_enable(obj);
-		if (err)
-			goto err_enable;
-		flush_iotlb_all(obj);
-	}
-
-	if (!try_module_get(obj->owner))
-		goto err_module;
-
-	mutex_unlock(&obj->iommu_lock);
-
-	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
-	return obj;
-
-err_module:
-	if (obj->refcount == 1)
-		iommu_disable(obj);
-err_enable:
-	obj->refcount--;
-	mutex_unlock(&obj->iommu_lock);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL_GPL(iommu_get);
-
-/**
- * iommu_put - Put back iommu handler
- * @obj:	target iommu
- **/
-void iommu_put(struct iommu *obj)
-{
-	if (!obj || IS_ERR(obj))
-		return;
-
-	mutex_lock(&obj->iommu_lock);
-
-	if (--obj->refcount == 0)
-		iommu_disable(obj);
-
-	module_put(obj->owner);
-
-	mutex_unlock(&obj->iommu_lock);
-
-	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
-}
-EXPORT_SYMBOL_GPL(iommu_put);
-
-int iommu_set_isr(const char *name,
-		  int (*isr)(struct iommu *obj, u32 da, u32 iommu_errs,
-			     void *priv),
-		  void *isr_priv)
-{
-	struct device *dev;
-	struct iommu *obj;
-
-	dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name,
-				 device_match_by_alias);
-	if (!dev)
-		return -ENODEV;
-
-	obj = to_iommu(dev);
-	mutex_lock(&obj->iommu_lock);
-	if (obj->refcount != 0) {
-		mutex_unlock(&obj->iommu_lock);
-		return -EBUSY;
-	}
-	obj->isr = isr;
-	obj->isr_priv = isr_priv;
-	mutex_unlock(&obj->iommu_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_set_isr);
-
-/*
- *	OMAP Device MMU(IOMMU) detection
- */
-static int __devinit omap_iommu_probe(struct platform_device *pdev)
-{
-	int err = -ENODEV;
-	void *p;
-	int irq;
-	struct iommu *obj;
-	struct resource *res;
-	struct iommu_platform_data *pdata = pdev->dev.platform_data;
-
-	if (pdev->num_resources != 2)
-		return -EINVAL;
-
-	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
-	if (!obj)
-		return -ENOMEM;
-
-	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
-	if (IS_ERR(obj->clk))
-		goto err_clk;
-
-	obj->nr_tlb_entries = pdata->nr_tlb_entries;
-	obj->name = pdata->name;
-	obj->dev = &pdev->dev;
-	obj->ctx = (void *)obj + sizeof(*obj);
-	obj->da_start = pdata->da_start;
-	obj->da_end = pdata->da_end;
-
-	mutex_init(&obj->iommu_lock);
-	mutex_init(&obj->mmap_lock);
-	spin_lock_init(&obj->page_table_lock);
-	INIT_LIST_HEAD(&obj->mmap);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		err = -ENODEV;
-		goto err_mem;
-	}
-
-	res = request_mem_region(res->start, resource_size(res),
-				 dev_name(&pdev->dev));
-	if (!res) {
-		err = -EIO;
-		goto err_mem;
-	}
-
-	obj->regbase = ioremap(res->start, resource_size(res));
-	if (!obj->regbase) {
-		err = -ENOMEM;
-		goto err_ioremap;
-	}
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		err = -ENODEV;
-		goto err_irq;
-	}
-	err = request_irq(irq, iommu_fault_handler, IRQF_SHARED,
-			  dev_name(&pdev->dev), obj);
-	if (err < 0)
-		goto err_irq;
-	platform_set_drvdata(pdev, obj);
-
-	p = (void *)__get_free_pages(GFP_KERNEL, get_order(IOPGD_TABLE_SIZE));
-	if (!p) {
-		err = -ENOMEM;
-		goto err_pgd;
-	}
-	memset(p, 0, IOPGD_TABLE_SIZE);
-	clean_dcache_area(p, IOPGD_TABLE_SIZE);
-	obj->iopgd = p;
-
-	BUG_ON(!IS_ALIGNED((unsigned long)obj->iopgd, IOPGD_TABLE_SIZE));
-
-	dev_info(&pdev->dev, "%s registered\n", obj->name);
-	return 0;
-
-err_pgd:
-	free_irq(irq, obj);
-err_irq:
-	iounmap(obj->regbase);
-err_ioremap:
-	release_mem_region(res->start, resource_size(res));
-err_mem:
-	clk_put(obj->clk);
-err_clk:
-	kfree(obj);
-	return err;
-}
-
-static int __devexit omap_iommu_remove(struct platform_device *pdev)
-{
-	int irq;
-	struct resource *res;
-	struct iommu *obj = platform_get_drvdata(pdev);
-
-	platform_set_drvdata(pdev, NULL);
-
-	iopgtable_clear_entry_all(obj);
-	free_pages((unsigned long)obj->iopgd, get_order(IOPGD_TABLE_SIZE));
-
-	irq = platform_get_irq(pdev, 0);
-	free_irq(irq, obj);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-	iounmap(obj->regbase);
-
-	clk_put(obj->clk);
-	dev_info(&pdev->dev, "%s removed\n", obj->name);
-	kfree(obj);
-	return 0;
-}
-
-static struct platform_driver omap_iommu_driver = {
-	.probe	= omap_iommu_probe,
-	.remove	= __devexit_p(omap_iommu_remove),
-	.driver	= {
-		.name	= "omap-iommu",
-	},
-};
-
-static void iopte_cachep_ctor(void *iopte)
-{
-	clean_dcache_area(iopte, IOPTE_TABLE_SIZE);
-}
-
-static int __init omap_iommu_init(void)
-{
-	struct kmem_cache *p;
-	const unsigned long flags = SLAB_HWCACHE_ALIGN;
-	size_t align = 1 << 10; /* L2 pagetable alignement */
-
-	p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags,
-			      iopte_cachep_ctor);
-	if (!p)
-		return -ENOMEM;
-	iopte_cachep = p;
-
-	return platform_driver_register(&omap_iommu_driver);
-}
-module_init(omap_iommu_init);
-
-static void __exit omap_iommu_exit(void)
-{
-	kmem_cache_destroy(iopte_cachep);
-
-	platform_driver_unregister(&omap_iommu_driver);
-}
-module_exit(omap_iommu_exit);
-
-MODULE_DESCRIPTION("omap iommu: tlb and pagetable primitives");
-MODULE_ALIAS("platform:omap-iommu");
-MODULE_AUTHOR("Hiroshi DOYU, Paul Mundt and Toshihiro Kobayashi");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/plat-omap/iovmm.c b/arch/arm/plat-omap/iovmm.c
deleted file mode 100644
index 79e7fedb8602..000000000000
--- a/arch/arm/plat-omap/iovmm.c
+++ /dev/null
@@ -1,904 +0,0 @@
-/*
- * omap iommu: simple virtual address space management
- *
- * Copyright (C) 2008-2009 Nokia Corporation
- *
- * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/device.h>
-#include <linux/scatterlist.h>
-
-#include <asm/cacheflush.h>
-#include <asm/mach/map.h>
-
-#include <plat/iommu.h>
-#include <plat/iovmm.h>
-
-#include "iopgtable.h"
-
-/*
- * A device driver needs to create address mappings between:
- *
- * - iommu/device address
- * - physical address
- * - mpu virtual address
- *
- * There are 4 possible patterns for them:
- *
- *    |iova/			  mapping		iommu_		page
- *    | da	pa	va	(d)-(p)-(v)		function	type
- *  ---------------------------------------------------------------------------
- *  1 | c	c	c	 1 - 1 - 1	  _kmap() / _kunmap()	s
- *  2 | c	c,a	c	 1 - 1 - 1	_kmalloc()/ _kfree()	s
- *  3 | c	d	c	 1 - n - 1	  _vmap() / _vunmap()	s
- *  4 | c	d,a	c	 1 - n - 1	_vmalloc()/ _vfree()	n*
- *
- *
- *	'iova':	device iommu virtual address
- *	'da':	alias of 'iova'
- *	'pa':	physical address
- *	'va':	mpu virtual address
- *
- *	'c':	contiguous memory area
- *	'd':	discontiguous memory area
- *	'a':	anonymous memory allocation
- *	'()':	optional feature
- *
- *	'n':	a normal page(4KB) size is used.
- *	's':	multiple iommu superpage(16MB, 1MB, 64KB, 4KB) size is used.
- *
- *	'*':	not yet, but feasible.
- */
-
-static struct kmem_cache *iovm_area_cachep;
-
-/* return total bytes of sg buffers */
-static size_t sgtable_len(const struct sg_table *sgt)
-{
-	unsigned int i, total = 0;
-	struct scatterlist *sg;
-
-	if (!sgt)
-		return 0;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes;
-
-		bytes = sg->length;
-
-		if (!iopgsz_ok(bytes)) {
-			pr_err("%s: sg[%d] not iommu pagesize(%x)\n",
-			       __func__, i, bytes);
-			return 0;
-		}
-
-		total += bytes;
-	}
-
-	return total;
-}
-#define sgtable_ok(x)	(!!sgtable_len(x))
-
-static unsigned max_alignment(u32 addr)
-{
-	int i;
-	unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, };
-	for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++)
-		;
-	return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0;
-}
-
-/*
- * calculate the optimal number sg elements from total bytes based on
- * iommu superpages
- */
-static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa)
-{
-	unsigned nr_entries = 0, ent_sz;
-
-	if (!IS_ALIGNED(bytes, PAGE_SIZE)) {
-		pr_err("%s: wrong size %08x\n", __func__, bytes);
-		return 0;
-	}
-
-	while (bytes) {
-		ent_sz = max_alignment(da | pa);
-		ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes));
-		nr_entries++;
-		da += ent_sz;
-		pa += ent_sz;
-		bytes -= ent_sz;
-	}
-
-	return nr_entries;
-}
-
-/* allocate and initialize sg_table header(a kind of 'superblock') */
-static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags,
-							u32 da, u32 pa)
-{
-	unsigned int nr_entries;
-	int err;
-	struct sg_table *sgt;
-
-	if (!bytes)
-		return ERR_PTR(-EINVAL);
-
-	if (!IS_ALIGNED(bytes, PAGE_SIZE))
-		return ERR_PTR(-EINVAL);
-
-	if (flags & IOVMF_LINEAR) {
-		nr_entries = sgtable_nents(bytes, da, pa);
-		if (!nr_entries)
-			return ERR_PTR(-EINVAL);
-	} else
-		nr_entries =  bytes / PAGE_SIZE;
-
-	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
-	if (!sgt)
-		return ERR_PTR(-ENOMEM);
-
-	err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL);
-	if (err) {
-		kfree(sgt);
-		return ERR_PTR(err);
-	}
-
-	pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries);
-
-	return sgt;
-}
-
-/* free sg_table header(a kind of superblock) */
-static void sgtable_free(struct sg_table *sgt)
-{
-	if (!sgt)
-		return;
-
-	sg_free_table(sgt);
-	kfree(sgt);
-
-	pr_debug("%s: sgt:%p\n", __func__, sgt);
-}
-
-/* map 'sglist' to a contiguous mpu virtual area and return 'va' */
-static void *vmap_sg(const struct sg_table *sgt)
-{
-	u32 va;
-	size_t total;
-	unsigned int i;
-	struct scatterlist *sg;
-	struct vm_struct *new;
-	const struct mem_type *mtype;
-
-	mtype = get_mem_type(MT_DEVICE);
-	if (!mtype)
-		return ERR_PTR(-EINVAL);
-
-	total = sgtable_len(sgt);
-	if (!total)
-		return ERR_PTR(-EINVAL);
-
-	new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END);
-	if (!new)
-		return ERR_PTR(-ENOMEM);
-	va = (u32)new->addr;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes;
-		u32 pa;
-		int err;
-
-		pa = sg_phys(sg);
-		bytes = sg->length;
-
-		BUG_ON(bytes != PAGE_SIZE);
-
-		err = ioremap_page(va,  pa, mtype);
-		if (err)
-			goto err_out;
-
-		va += bytes;
-	}
-
-	flush_cache_vmap((unsigned long)new->addr,
-				(unsigned long)(new->addr + total));
-	return new->addr;
-
-err_out:
-	WARN_ON(1); /* FIXME: cleanup some mpu mappings */
-	vunmap(new->addr);
-	return ERR_PTR(-EAGAIN);
-}
-
-static inline void vunmap_sg(const void *va)
-{
-	vunmap(va);
-}
-
-static struct iovm_struct *__find_iovm_area(struct iommu *obj, const u32 da)
-{
-	struct iovm_struct *tmp;
-
-	list_for_each_entry(tmp, &obj->mmap, list) {
-		if ((da >= tmp->da_start) && (da < tmp->da_end)) {
-			size_t len;
-
-			len = tmp->da_end - tmp->da_start;
-
-			dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n",
-				__func__, tmp->da_start, da, tmp->da_end, len,
-				tmp->flags);
-
-			return tmp;
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * find_iovm_area  -  find iovma which includes @da
- * @da:		iommu device virtual address
- *
- * Find the existing iovma starting at @da
- */
-struct iovm_struct *find_iovm_area(struct iommu *obj, u32 da)
-{
-	struct iovm_struct *area;
-
-	mutex_lock(&obj->mmap_lock);
-	area = __find_iovm_area(obj, da);
-	mutex_unlock(&obj->mmap_lock);
-
-	return area;
-}
-EXPORT_SYMBOL_GPL(find_iovm_area);
-
-/*
- * This finds the hole(area) which fits the requested address and len
- * in iovmas mmap, and returns the new allocated iovma.
- */
-static struct iovm_struct *alloc_iovm_area(struct iommu *obj, u32 da,
-					   size_t bytes, u32 flags)
-{
-	struct iovm_struct *new, *tmp;
-	u32 start, prev_end, alignment;
-
-	if (!obj || !bytes)
-		return ERR_PTR(-EINVAL);
-
-	start = da;
-	alignment = PAGE_SIZE;
-
-	if (~flags & IOVMF_DA_FIXED) {
-		/* Don't map address 0 */
-		start = obj->da_start ? obj->da_start : alignment;
-
-		if (flags & IOVMF_LINEAR)
-			alignment = iopgsz_max(bytes);
-		start = roundup(start, alignment);
-	} else if (start < obj->da_start || start > obj->da_end ||
-					obj->da_end - start < bytes) {
-		return ERR_PTR(-EINVAL);
-	}
-
-	tmp = NULL;
-	if (list_empty(&obj->mmap))
-		goto found;
-
-	prev_end = 0;
-	list_for_each_entry(tmp, &obj->mmap, list) {
-
-		if (prev_end > start)
-			break;
-
-		if (tmp->da_start > start && (tmp->da_start - start) >= bytes)
-			goto found;
-
-		if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED)
-			start = roundup(tmp->da_end + 1, alignment);
-
-		prev_end = tmp->da_end;
-	}
-
-	if ((start >= prev_end) && (obj->da_end - start >= bytes))
-		goto found;
-
-	dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n",
-		__func__, da, bytes, flags);
-
-	return ERR_PTR(-EINVAL);
-
-found:
-	new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL);
-	if (!new)
-		return ERR_PTR(-ENOMEM);
-
-	new->iommu = obj;
-	new->da_start = start;
-	new->da_end = start + bytes;
-	new->flags = flags;
-
-	/*
-	 * keep ascending order of iovmas
-	 */
-	if (tmp)
-		list_add_tail(&new->list, &tmp->list);
-	else
-		list_add(&new->list, &obj->mmap);
-
-	dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n",
-		__func__, new->da_start, start, new->da_end, bytes, flags);
-
-	return new;
-}
-
-static void free_iovm_area(struct iommu *obj, struct iovm_struct *area)
-{
-	size_t bytes;
-
-	BUG_ON(!obj || !area);
-
-	bytes = area->da_end - area->da_start;
-
-	dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n",
-		__func__, area->da_start, area->da_end, bytes, area->flags);
-
-	list_del(&area->list);
-	kmem_cache_free(iovm_area_cachep, area);
-}
-
-/**
- * da_to_va - convert (d) to (v)
- * @obj:	objective iommu
- * @da:		iommu device virtual address
- * @va:		mpu virtual address
- *
- * Returns mpu virtual addr which corresponds to a given device virtual addr
- */
-void *da_to_va(struct iommu *obj, u32 da)
-{
-	void *va = NULL;
-	struct iovm_struct *area;
-
-	mutex_lock(&obj->mmap_lock);
-
-	area = __find_iovm_area(obj, da);
-	if (!area) {
-		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
-		goto out;
-	}
-	va = area->va;
-out:
-	mutex_unlock(&obj->mmap_lock);
-
-	return va;
-}
-EXPORT_SYMBOL_GPL(da_to_va);
-
-static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va)
-{
-	unsigned int i;
-	struct scatterlist *sg;
-	void *va = _va;
-	void *va_end;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		struct page *pg;
-		const size_t bytes = PAGE_SIZE;
-
-		/*
-		 * iommu 'superpage' isn't supported with 'iommu_vmalloc()'
-		 */
-		pg = vmalloc_to_page(va);
-		BUG_ON(!pg);
-		sg_set_page(sg, pg, bytes, 0);
-
-		va += bytes;
-	}
-
-	va_end = _va + PAGE_SIZE * i;
-}
-
-static inline void sgtable_drain_vmalloc(struct sg_table *sgt)
-{
-	/*
-	 * Actually this is not necessary at all, just exists for
-	 * consistency of the code readability.
-	 */
-	BUG_ON(!sgt);
-}
-
-static void sgtable_fill_kmalloc(struct sg_table *sgt, u32 pa, u32 da,
-								size_t len)
-{
-	unsigned int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		unsigned bytes;
-
-		bytes = max_alignment(da | pa);
-		bytes = min_t(unsigned, bytes, iopgsz_max(len));
-
-		BUG_ON(!iopgsz_ok(bytes));
-
-		sg_set_buf(sg, phys_to_virt(pa), bytes);
-		/*
-		 * 'pa' is cotinuous(linear).
-		 */
-		pa += bytes;
-		da += bytes;
-		len -= bytes;
-	}
-	BUG_ON(len);
-}
-
-static inline void sgtable_drain_kmalloc(struct sg_table *sgt)
-{
-	/*
-	 * Actually this is not necessary at all, just exists for
-	 * consistency of the code readability
-	 */
-	BUG_ON(!sgt);
-}
-
-/* create 'da' <-> 'pa' mapping from 'sgt' */
-static int map_iovm_area(struct iommu *obj, struct iovm_struct *new,
-			 const struct sg_table *sgt, u32 flags)
-{
-	int err;
-	unsigned int i, j;
-	struct scatterlist *sg;
-	u32 da = new->da_start;
-
-	if (!obj || !sgt)
-		return -EINVAL;
-
-	BUG_ON(!sgtable_ok(sgt));
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		u32 pa;
-		int pgsz;
-		size_t bytes;
-		struct iotlb_entry e;
-
-		pa = sg_phys(sg);
-		bytes = sg->length;
-
-		flags &= ~IOVMF_PGSZ_MASK;
-		pgsz = bytes_to_iopgsz(bytes);
-		if (pgsz < 0)
-			goto err_out;
-		flags |= pgsz;
-
-		pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
-			 i, da, pa, bytes);
-
-		iotlb_init_entry(&e, da, pa, flags);
-		err = iopgtable_store_entry(obj, &e);
-		if (err)
-			goto err_out;
-
-		da += bytes;
-	}
-	return 0;
-
-err_out:
-	da = new->da_start;
-
-	for_each_sg(sgt->sgl, sg, i, j) {
-		size_t bytes;
-
-		bytes = iopgtable_clear_entry(obj, da);
-
-		BUG_ON(!iopgsz_ok(bytes));
-
-		da += bytes;
-	}
-	return err;
-}
-
-/* release 'da' <-> 'pa' mapping */
-static void unmap_iovm_area(struct iommu *obj, struct iovm_struct *area)
-{
-	u32 start;
-	size_t total = area->da_end - area->da_start;
-
-	BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
-
-	start = area->da_start;
-	while (total > 0) {
-		size_t bytes;
-
-		bytes = iopgtable_clear_entry(obj, start);
-		if (bytes == 0)
-			bytes = PAGE_SIZE;
-		else
-			dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
-				__func__, start, bytes, area->flags);
-
-		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
-
-		total -= bytes;
-		start += bytes;
-	}
-	BUG_ON(total);
-}
-
-/* template function for all unmapping */
-static struct sg_table *unmap_vm_area(struct iommu *obj, const u32 da,
-				      void (*fn)(const void *), u32 flags)
-{
-	struct sg_table *sgt = NULL;
-	struct iovm_struct *area;
-
-	if (!IS_ALIGNED(da, PAGE_SIZE)) {
-		dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da);
-		return NULL;
-	}
-
-	mutex_lock(&obj->mmap_lock);
-
-	area = __find_iovm_area(obj, da);
-	if (!area) {
-		dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da);
-		goto out;
-	}
-
-	if ((area->flags & flags) != flags) {
-		dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__,
-			area->flags);
-		goto out;
-	}
-	sgt = (struct sg_table *)area->sgt;
-
-	unmap_iovm_area(obj, area);
-
-	fn(area->va);
-
-	dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__,
-		area->da_start, da, area->da_end,
-		area->da_end - area->da_start, area->flags);
-
-	free_iovm_area(obj, area);
-out:
-	mutex_unlock(&obj->mmap_lock);
-
-	return sgt;
-}
-
-static u32 map_iommu_region(struct iommu *obj, u32 da,
-	      const struct sg_table *sgt, void *va, size_t bytes, u32 flags)
-{
-	int err = -ENOMEM;
-	struct iovm_struct *new;
-
-	mutex_lock(&obj->mmap_lock);
-
-	new = alloc_iovm_area(obj, da, bytes, flags);
-	if (IS_ERR(new)) {
-		err = PTR_ERR(new);
-		goto err_alloc_iovma;
-	}
-	new->va = va;
-	new->sgt = sgt;
-
-	if (map_iovm_area(obj, new, sgt, new->flags))
-		goto err_map;
-
-	mutex_unlock(&obj->mmap_lock);
-
-	dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n",
-		__func__, new->da_start, bytes, new->flags, va);
-
-	return new->da_start;
-
-err_map:
-	free_iovm_area(obj, new);
-err_alloc_iovma:
-	mutex_unlock(&obj->mmap_lock);
-	return err;
-}
-
-static inline u32 __iommu_vmap(struct iommu *obj, u32 da,
-		 const struct sg_table *sgt, void *va, size_t bytes, u32 flags)
-{
-	return map_iommu_region(obj, da, sgt, va, bytes, flags);
-}
-
-/**
- * iommu_vmap  -  (d)-(p)-(v) address mapper
- * @obj:	objective iommu
- * @sgt:	address of scatter gather table
- * @flags:	iovma and page property
- *
- * Creates 1-n-1 mapping with given @sgt and returns @da.
- * All @sgt element must be io page size aligned.
- */
-u32 iommu_vmap(struct iommu *obj, u32 da, const struct sg_table *sgt,
-		 u32 flags)
-{
-	size_t bytes;
-	void *va = NULL;
-
-	if (!obj || !obj->dev || !sgt)
-		return -EINVAL;
-
-	bytes = sgtable_len(sgt);
-	if (!bytes)
-		return -EINVAL;
-	bytes = PAGE_ALIGN(bytes);
-
-	if (flags & IOVMF_MMIO) {
-		va = vmap_sg(sgt);
-		if (IS_ERR(va))
-			return PTR_ERR(va);
-	}
-
-	flags |= IOVMF_DISCONT;
-	flags |= IOVMF_MMIO;
-
-	da = __iommu_vmap(obj, da, sgt, va, bytes, flags);
-	if (IS_ERR_VALUE(da))
-		vunmap_sg(va);
-
-	return da;
-}
-EXPORT_SYMBOL_GPL(iommu_vmap);
-
-/**
- * iommu_vunmap  -  release virtual mapping obtained by 'iommu_vmap()'
- * @obj:	objective iommu
- * @da:		iommu device virtual address
- *
- * Free the iommu virtually contiguous memory area starting at
- * @da, which was returned by 'iommu_vmap()'.
- */
-struct sg_table *iommu_vunmap(struct iommu *obj, u32 da)
-{
-	struct sg_table *sgt;
-	/*
-	 * 'sgt' is allocated before 'iommu_vmalloc()' is called.
-	 * Just returns 'sgt' to the caller to free
-	 */
-	sgt = unmap_vm_area(obj, da, vunmap_sg, IOVMF_DISCONT | IOVMF_MMIO);
-	if (!sgt)
-		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
-	return sgt;
-}
-EXPORT_SYMBOL_GPL(iommu_vunmap);
-
-/**
- * iommu_vmalloc  -  (d)-(p)-(v) address allocator and mapper
- * @obj:	objective iommu
- * @da:		contiguous iommu virtual memory
- * @bytes:	allocation size
- * @flags:	iovma and page property
- *
- * Allocate @bytes linearly and creates 1-n-1 mapping and returns
- * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
- */
-u32 iommu_vmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags)
-{
-	void *va;
-	struct sg_table *sgt;
-
-	if (!obj || !obj->dev || !bytes)
-		return -EINVAL;
-
-	bytes = PAGE_ALIGN(bytes);
-
-	va = vmalloc(bytes);
-	if (!va)
-		return -ENOMEM;
-
-	flags |= IOVMF_DISCONT;
-	flags |= IOVMF_ALLOC;
-
-	sgt = sgtable_alloc(bytes, flags, da, 0);
-	if (IS_ERR(sgt)) {
-		da = PTR_ERR(sgt);
-		goto err_sgt_alloc;
-	}
-	sgtable_fill_vmalloc(sgt, va);
-
-	da = __iommu_vmap(obj, da, sgt, va, bytes, flags);
-	if (IS_ERR_VALUE(da))
-		goto err_iommu_vmap;
-
-	return da;
-
-err_iommu_vmap:
-	sgtable_drain_vmalloc(sgt);
-	sgtable_free(sgt);
-err_sgt_alloc:
-	vfree(va);
-	return da;
-}
-EXPORT_SYMBOL_GPL(iommu_vmalloc);
-
-/**
- * iommu_vfree  -  release memory allocated by 'iommu_vmalloc()'
- * @obj:	objective iommu
- * @da:		iommu device virtual address
- *
- * Frees the iommu virtually continuous memory area starting at
- * @da, as obtained from 'iommu_vmalloc()'.
- */
-void iommu_vfree(struct iommu *obj, const u32 da)
-{
-	struct sg_table *sgt;
-
-	sgt = unmap_vm_area(obj, da, vfree, IOVMF_DISCONT | IOVMF_ALLOC);
-	if (!sgt)
-		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
-	sgtable_free(sgt);
-}
-EXPORT_SYMBOL_GPL(iommu_vfree);
-
-static u32 __iommu_kmap(struct iommu *obj, u32 da, u32 pa, void *va,
-			  size_t bytes, u32 flags)
-{
-	struct sg_table *sgt;
-
-	sgt = sgtable_alloc(bytes, flags, da, pa);
-	if (IS_ERR(sgt))
-		return PTR_ERR(sgt);
-
-	sgtable_fill_kmalloc(sgt, pa, da, bytes);
-
-	da = map_iommu_region(obj, da, sgt, va, bytes, flags);
-	if (IS_ERR_VALUE(da)) {
-		sgtable_drain_kmalloc(sgt);
-		sgtable_free(sgt);
-	}
-
-	return da;
-}
-
-/**
- * iommu_kmap  -  (d)-(p)-(v) address mapper
- * @obj:	objective iommu
- * @da:		contiguous iommu virtual memory
- * @pa:		contiguous physical memory
- * @flags:	iovma and page property
- *
- * Creates 1-1-1 mapping and returns @da again, which can be
- * adjusted if 'IOVMF_DA_FIXED' is not set.
- */
-u32 iommu_kmap(struct iommu *obj, u32 da, u32 pa, size_t bytes,
-		 u32 flags)
-{
-	void *va;
-
-	if (!obj || !obj->dev || !bytes)
-		return -EINVAL;
-
-	bytes = PAGE_ALIGN(bytes);
-
-	va = ioremap(pa, bytes);
-	if (!va)
-		return -ENOMEM;
-
-	flags |= IOVMF_LINEAR;
-	flags |= IOVMF_MMIO;
-
-	da = __iommu_kmap(obj, da, pa, va, bytes, flags);
-	if (IS_ERR_VALUE(da))
-		iounmap(va);
-
-	return da;
-}
-EXPORT_SYMBOL_GPL(iommu_kmap);
-
-/**
- * iommu_kunmap  -  release virtual mapping obtained by 'iommu_kmap()'
- * @obj:	objective iommu
- * @da:		iommu device virtual address
- *
- * Frees the iommu virtually contiguous memory area starting at
- * @da, which was passed to and was returned by'iommu_kmap()'.
- */
-void iommu_kunmap(struct iommu *obj, u32 da)
-{
-	struct sg_table *sgt;
-	typedef void (*func_t)(const void *);
-
-	sgt = unmap_vm_area(obj, da, (func_t)iounmap,
-			    IOVMF_LINEAR | IOVMF_MMIO);
-	if (!sgt)
-		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
-	sgtable_free(sgt);
-}
-EXPORT_SYMBOL_GPL(iommu_kunmap);
-
-/**
- * iommu_kmalloc  -  (d)-(p)-(v) address allocator and mapper
- * @obj:	objective iommu
- * @da:		contiguous iommu virtual memory
- * @bytes:	bytes for allocation
- * @flags:	iovma and page property
- *
- * Allocate @bytes linearly and creates 1-1-1 mapping and returns
- * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
- */
-u32 iommu_kmalloc(struct iommu *obj, u32 da, size_t bytes, u32 flags)
-{
-	void *va;
-	u32 pa;
-
-	if (!obj || !obj->dev || !bytes)
-		return -EINVAL;
-
-	bytes = PAGE_ALIGN(bytes);
-
-	va = kmalloc(bytes, GFP_KERNEL | GFP_DMA);
-	if (!va)
-		return -ENOMEM;
-	pa = virt_to_phys(va);
-
-	flags |= IOVMF_LINEAR;
-	flags |= IOVMF_ALLOC;
-
-	da = __iommu_kmap(obj, da, pa, va, bytes, flags);
-	if (IS_ERR_VALUE(da))
-		kfree(va);
-
-	return da;
-}
-EXPORT_SYMBOL_GPL(iommu_kmalloc);
-
-/**
- * iommu_kfree  -  release virtual mapping obtained by 'iommu_kmalloc()'
- * @obj:	objective iommu
- * @da:		iommu device virtual address
- *
- * Frees the iommu virtually contiguous memory area starting at
- * @da, which was passed to and was returned by'iommu_kmalloc()'.
- */
-void iommu_kfree(struct iommu *obj, u32 da)
-{
-	struct sg_table *sgt;
-
-	sgt = unmap_vm_area(obj, da, kfree, IOVMF_LINEAR | IOVMF_ALLOC);
-	if (!sgt)
-		dev_dbg(obj->dev, "%s: No sgt\n", __func__);
-	sgtable_free(sgt);
-}
-EXPORT_SYMBOL_GPL(iommu_kfree);
-
-
-static int __init iovmm_init(void)
-{
-	const unsigned long flags = SLAB_HWCACHE_ALIGN;
-	struct kmem_cache *p;
-
-	p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0,
-			      flags, NULL);
-	if (!p)
-		return -ENOMEM;
-	iovm_area_cachep = p;
-
-	return 0;
-}
-module_init(iovmm_init);
-
-static void __exit iovmm_exit(void)
-{
-	kmem_cache_destroy(iovm_area_cachep);
-}
-module_exit(iovmm_exit);
-
-MODULE_DESCRIPTION("omap iommu: simple virtual address space management");
-MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/plat-s5p/Kconfig b/arch/arm/plat-s5p/Kconfig
index 9843c954c042..9a197e55f669 100644
--- a/arch/arm/plat-s5p/Kconfig
+++ b/arch/arm/plat-s5p/Kconfig
@@ -22,7 +22,6 @@ config PLAT_S5P
 	select PLAT_SAMSUNG
 	select SAMSUNG_CLKSRC
 	select SAMSUNG_IRQ_VIC_TIMER
-	select SAMSUNG_IRQ_UART
 	help
 	  Base platform code for Samsung's S5P series SoC.
 
diff --git a/arch/arm/plat-s5p/dev-uart.c b/arch/arm/plat-s5p/dev-uart.c
index afaf87fdb93e..c9308db36183 100644
--- a/arch/arm/plat-s5p/dev-uart.c
+++ b/arch/arm/plat-s5p/dev-uart.c
@@ -32,20 +32,10 @@ static struct resource s5p_uart0_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S5P_UART_RX0,
-		.end	= IRQ_S5P_UART_RX0,
+		.start	= IRQ_UART0,
+		.end	= IRQ_UART0,
 		.flags	= IORESOURCE_IRQ,
 	},
-	[2] = {
-		.start	= IRQ_S5P_UART_TX0,
-		.end	= IRQ_S5P_UART_TX0,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_S5P_UART_ERR0,
-		.end	= IRQ_S5P_UART_ERR0,
-		.flags	= IORESOURCE_IRQ,
-	}
 };
 
 static struct resource s5p_uart1_resource[] = {
@@ -55,18 +45,8 @@ static struct resource s5p_uart1_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S5P_UART_RX1,
-		.end	= IRQ_S5P_UART_RX1,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S5P_UART_TX1,
-		.end	= IRQ_S5P_UART_TX1,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_S5P_UART_ERR1,
-		.end	= IRQ_S5P_UART_ERR1,
+		.start	= IRQ_UART1,
+		.end	= IRQ_UART1,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
@@ -78,18 +58,8 @@ static struct resource s5p_uart2_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S5P_UART_RX2,
-		.end	= IRQ_S5P_UART_RX2,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S5P_UART_TX2,
-		.end	= IRQ_S5P_UART_TX2,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_S5P_UART_ERR2,
-		.end	= IRQ_S5P_UART_ERR2,
+		.start	= IRQ_UART2,
+		.end	= IRQ_UART2,
 		.flags	= IORESOURCE_IRQ,
 	},
 };
@@ -102,18 +72,8 @@ static struct resource s5p_uart3_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S5P_UART_RX3,
-		.end	= IRQ_S5P_UART_RX3,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S5P_UART_TX3,
-		.end	= IRQ_S5P_UART_TX3,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_S5P_UART_ERR3,
-		.end	= IRQ_S5P_UART_ERR3,
+		.start	= IRQ_UART3,
+		.end	= IRQ_UART3,
 		.flags	= IORESOURCE_IRQ,
 	},
 #endif
@@ -127,18 +87,8 @@ static struct resource s5p_uart4_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S5P_UART_RX4,
-		.end	= IRQ_S5P_UART_RX4,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S5P_UART_TX4,
-		.end	= IRQ_S5P_UART_TX4,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_S5P_UART_ERR4,
-		.end	= IRQ_S5P_UART_ERR4,
+		.start	= IRQ_UART4,
+		.end	= IRQ_UART4,
 		.flags	= IORESOURCE_IRQ,
 	},
 #endif
@@ -152,18 +102,8 @@ static struct resource s5p_uart5_resource[] = {
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
-		.start	= IRQ_S5P_UART_RX5,
-		.end	= IRQ_S5P_UART_RX5,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_S5P_UART_TX5,
-		.end	= IRQ_S5P_UART_TX5,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_S5P_UART_ERR5,
-		.end	= IRQ_S5P_UART_ERR5,
+		.start	= IRQ_UART5,
+		.end	= IRQ_UART5,
 		.flags	= IORESOURCE_IRQ,
 	},
 #endif
diff --git a/arch/arm/plat-s5p/include/plat/irqs.h b/arch/arm/plat-s5p/include/plat/irqs.h
index ba9121c60a2a..144dbfc6506d 100644
--- a/arch/arm/plat-s5p/include/plat/irqs.h
+++ b/arch/arm/plat-s5p/include/plat/irqs.h
@@ -37,41 +37,6 @@
 #define IRQ_VIC1_BASE		S5P_VIC1_BASE
 #define IRQ_VIC2_BASE		S5P_VIC2_BASE
 
-/* UART interrupts, each UART has 4 intterupts per channel so
- * use the space between the ISA and S3C main interrupts. Note, these
- * are not in the same order as the S3C24XX series! */
-
-#define IRQ_S5P_UART_BASE0	(16)
-#define IRQ_S5P_UART_BASE1	(20)
-#define IRQ_S5P_UART_BASE2	(24)
-#define IRQ_S5P_UART_BASE3	(28)
-
-#define UART_IRQ_RXD		(0)
-#define UART_IRQ_ERR		(1)
-#define UART_IRQ_TXD		(2)
-
-#define IRQ_S5P_UART_RX0	(IRQ_S5P_UART_BASE0 + UART_IRQ_RXD)
-#define IRQ_S5P_UART_TX0	(IRQ_S5P_UART_BASE0 + UART_IRQ_TXD)
-#define IRQ_S5P_UART_ERR0	(IRQ_S5P_UART_BASE0 + UART_IRQ_ERR)
-
-#define IRQ_S5P_UART_RX1	(IRQ_S5P_UART_BASE1 + UART_IRQ_RXD)
-#define IRQ_S5P_UART_TX1	(IRQ_S5P_UART_BASE1 + UART_IRQ_TXD)
-#define IRQ_S5P_UART_ERR1	(IRQ_S5P_UART_BASE1 + UART_IRQ_ERR)
-
-#define IRQ_S5P_UART_RX2	(IRQ_S5P_UART_BASE2 + UART_IRQ_RXD)
-#define IRQ_S5P_UART_TX2	(IRQ_S5P_UART_BASE2 + UART_IRQ_TXD)
-#define IRQ_S5P_UART_ERR2	(IRQ_S5P_UART_BASE2 + UART_IRQ_ERR)
-
-#define IRQ_S5P_UART_RX3	(IRQ_S5P_UART_BASE3 + UART_IRQ_RXD)
-#define IRQ_S5P_UART_TX3	(IRQ_S5P_UART_BASE3 + UART_IRQ_TXD)
-#define IRQ_S5P_UART_ERR3	(IRQ_S5P_UART_BASE3 + UART_IRQ_ERR)
-
-/* S3C compatibilty defines */
-#define IRQ_S3CUART_RX0		IRQ_S5P_UART_RX0
-#define IRQ_S3CUART_RX1		IRQ_S5P_UART_RX1
-#define IRQ_S3CUART_RX2		IRQ_S5P_UART_RX2
-#define IRQ_S3CUART_RX3		IRQ_S5P_UART_RX3
-
 /* VIC based IRQs */
 
 #define S5P_IRQ_VIC0(x)		(S5P_VIC0_BASE + (x))
diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c
index f88216d23991..c65eb791d1bb 100644
--- a/arch/arm/plat-s5p/irq-gpioint.c
+++ b/arch/arm/plat-s5p/irq-gpioint.c
@@ -163,9 +163,9 @@ static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip)
 	ct->chip.irq_mask = irq_gc_mask_set_bit;
 	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
 	ct->chip.irq_set_type = s5p_gpioint_set_type,
-	ct->regs.ack = PEND_OFFSET + REG_OFFSET(chip->group);
-	ct->regs.mask = MASK_OFFSET + REG_OFFSET(chip->group);
-	ct->regs.type = CON_OFFSET + REG_OFFSET(chip->group);
+	ct->regs.ack = PEND_OFFSET + REG_OFFSET(group - bank->start);
+	ct->regs.mask = MASK_OFFSET + REG_OFFSET(group - bank->start);
+	ct->regs.type = CON_OFFSET + REG_OFFSET(group - bank->start);
 	irq_setup_generic_chip(gc, IRQ_MSK(chip->chip.ngpio),
 			       IRQ_GC_INIT_MASK_CACHE,
 			       IRQ_NOREQUEST | IRQ_NOPROBE, 0);
diff --git a/arch/arm/plat-s5p/irq.c b/arch/arm/plat-s5p/irq.c
index a97c08957f49..afdaa1082b9f 100644
--- a/arch/arm/plat-s5p/irq.c
+++ b/arch/arm/plat-s5p/irq.c
@@ -17,42 +17,10 @@
 
 #include <asm/hardware/vic.h>
 
-#include <linux/serial_core.h>
 #include <mach/map.h>
 #include <plat/regs-timer.h>
-#include <plat/regs-serial.h>
 #include <plat/cpu.h>
 #include <plat/irq-vic-timer.h>
-#include <plat/irq-uart.h>
-
-/*
- * Note, we make use of the fact that the parent IRQs, IRQ_UART[0..3]
- * are consecutive when looking up the interrupt in the demux routines.
- */
-static struct s3c_uart_irq uart_irqs[] = {
-	[0] = {
-		.regs		= S5P_VA_UART0,
-		.base_irq	= IRQ_S5P_UART_BASE0,
-		.parent_irq	= IRQ_UART0,
-	},
-	[1] = {
-		.regs		= S5P_VA_UART1,
-		.base_irq	= IRQ_S5P_UART_BASE1,
-		.parent_irq	= IRQ_UART1,
-	},
-	[2] = {
-		.regs		= S5P_VA_UART2,
-		.base_irq	= IRQ_S5P_UART_BASE2,
-		.parent_irq	= IRQ_UART2,
-	},
-#if CONFIG_SERIAL_SAMSUNG_UARTS > 3
-	[3] = {
-		.regs		= S5P_VA_UART3,
-		.base_irq	= IRQ_S5P_UART_BASE3,
-		.parent_irq	= IRQ_UART3,
-	},
-#endif
-};
 
 void __init s5p_init_irq(u32 *vic, u32 num_vic)
 {
@@ -65,6 +33,4 @@ void __init s5p_init_irq(u32 *vic, u32 num_vic)
 #endif
 
 	s3c_init_vic_timer_irq(5, IRQ_TIMER0);
-
-	s3c_init_uart_irqs(uart_irqs, ARRAY_SIZE(uart_irqs));
 }
diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig
index b3e10659e4b8..dffa37bc4a0b 100644
--- a/arch/arm/plat-samsung/Kconfig
+++ b/arch/arm/plat-samsung/Kconfig
@@ -65,11 +65,6 @@ config SAMSUNG_IRQ_VIC_TIMER
        help
          Internal configuration to build the VIC timer interrupt code.
 
-config SAMSUNG_IRQ_UART
-       bool
-       help
-         Internal configuration to build the IRQ UART demux code.
-
 # options for gpio configuration support
 
 config SAMSUNG_GPIOLIB_4BIT
diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile
index 853764ba8cc5..1105922342fe 100644
--- a/arch/arm/plat-samsung/Makefile
+++ b/arch/arm/plat-samsung/Makefile
@@ -21,7 +21,6 @@ obj-y				+= dev-asocdma.o
 
 obj-$(CONFIG_SAMSUNG_CLKSRC)	+= clock-clksrc.o
 
-obj-$(CONFIG_SAMSUNG_IRQ_UART)	+= irq-uart.o
 obj-$(CONFIG_SAMSUNG_IRQ_VIC_TIMER) += irq-vic-timer.o
 
 # ADC
diff --git a/arch/arm/plat-samsung/include/plat/regs-serial.h b/arch/arm/plat-samsung/include/plat/regs-serial.h
index bac36fa3becb..720734847027 100644
--- a/arch/arm/plat-samsung/include/plat/regs-serial.h
+++ b/arch/arm/plat-samsung/include/plat/regs-serial.h
@@ -186,6 +186,11 @@
 #define S3C64XX_UINTSP		0x34
 #define S3C64XX_UINTM		0x38
 
+#define S3C64XX_UINTM_RXD	(0)
+#define S3C64XX_UINTM_TXD	(2)
+#define S3C64XX_UINTM_RXD_MSK	(1 << S3C64XX_UINTM_RXD)
+#define S3C64XX_UINTM_TXD_MSK	(1 << S3C64XX_UINTM_TXD)
+
 /* Following are specific to S5PV210 */
 #define S5PV210_UCON_CLKMASK	(1<<10)
 #define S5PV210_UCON_PCLK	(0<<10)
diff --git a/arch/arm/plat-samsung/irq-uart.c b/arch/arm/plat-samsung/irq-uart.c
deleted file mode 100644
index 3014c7226bd1..000000000000
--- a/arch/arm/plat-samsung/irq-uart.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* arch/arm/plat-samsung/irq-uart.c
- *	originally part of arch/arm/plat-s3c64xx/irq.c
- *
- * Copyright 2008 Openmoko, Inc.
- * Copyright 2008 Simtec Electronics
- *      Ben Dooks <ben@simtec.co.uk>
- *      http://armlinux.simtec.co.uk/
- *
- * Samsung- UART Interrupt handling
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/serial_core.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-
-#include <asm/mach/irq.h>
-
-#include <mach/map.h>
-#include <plat/irq-uart.h>
-#include <plat/regs-serial.h>
-#include <plat/cpu.h>
-
-/* Note, we make use of the fact that the parent IRQs, IRQ_UART[0..3]
- * are consecutive when looking up the interrupt in the demux routines.
- */
-static void s3c_irq_demux_uart(unsigned int irq, struct irq_desc *desc)
-{
-	struct s3c_uart_irq *uirq = desc->irq_data.handler_data;
-	struct irq_chip *chip = irq_get_chip(irq);
-	u32 pend = __raw_readl(uirq->regs + S3C64XX_UINTP);
-	int base = uirq->base_irq;
-
-	chained_irq_enter(chip, desc);
-
-	if (pend & (1 << 0))
-		generic_handle_irq(base);
-	if (pend & (1 << 1))
-		generic_handle_irq(base + 1);
-	if (pend & (1 << 2))
-		generic_handle_irq(base + 2);
-	if (pend & (1 << 3))
-		generic_handle_irq(base + 3);
-
-	chained_irq_exit(chip, desc);
-}
-
-static void __init s3c_init_uart_irq(struct s3c_uart_irq *uirq)
-{
-	void __iomem *reg_base = uirq->regs;
-	struct irq_chip_generic *gc;
-	struct irq_chip_type *ct;
-
-	/* mask all interrupts at the start. */
-	__raw_writel(0xf, reg_base + S3C64XX_UINTM);
-
-	gc = irq_alloc_generic_chip("s3c-uart", 1, uirq->base_irq, reg_base,
-				    handle_level_irq);
-
-	if (!gc) {
-		pr_err("%s: irq_alloc_generic_chip for IRQ %u failed\n",
-		       __func__, uirq->base_irq);
-		return;
-	}
-
-	ct = gc->chip_types;
-	ct->chip.irq_ack = irq_gc_ack_set_bit;
-	ct->chip.irq_mask = irq_gc_mask_set_bit;
-	ct->chip.irq_unmask = irq_gc_mask_clr_bit;
-	ct->regs.ack = S3C64XX_UINTP;
-	ct->regs.mask = S3C64XX_UINTM;
-	irq_setup_generic_chip(gc, IRQ_MSK(4), IRQ_GC_INIT_MASK_CACHE,
-			       IRQ_NOREQUEST | IRQ_NOPROBE, 0);
-
-	irq_set_handler_data(uirq->parent_irq, uirq);
-	irq_set_chained_handler(uirq->parent_irq, s3c_irq_demux_uart);
-}
-
-/**
- * s3c_init_uart_irqs() - initialise UART IRQs and the necessary demuxing
- * @irq: The interrupt data for registering
- * @nr_irqs: The number of interrupt descriptions in @irq.
- *
- * Register the UART interrupts specified by @irq including the demuxing
- * routines. This supports the S3C6400 and newer style of devices.
- */
-void __init s3c_init_uart_irqs(struct s3c_uart_irq *irq, unsigned int nr_irqs)
-{
-	for (; nr_irqs > 0; nr_irqs--, irq++)
-		s3c_init_uart_irq(irq);
-}
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 62cc8f981171..5bdeef969847 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,10 +12,9 @@
 #
 #   http://www.arm.linux.org.uk/developer/machines/?action=new
 #
-# XXX: This is a cut-down version of the file; it contains only machines that
-# XXX: are in mainline or have been submitted to the machine database within
-# XXX: the last 12 months.  If your entry is missing please email rmk at
-# XXX: <linux@arm.linux.org.uk>
+# This is a cut-down version of the file; it contains only machines that
+# are merged into mainline or have been edited in the machine database
+# within the last 12 months.  References to machine_is_NAME() do not count!
 #
 # Last update: Sat May 7 08:48:24 2011
 #
@@ -65,6 +64,7 @@ h7201			ARCH_H7201		H7201			161
 h7202			ARCH_H7202		H7202			162
 iq80321			ARCH_IQ80321		IQ80321			169
 ks8695			ARCH_KS8695		KS8695			180
+karo			ARCH_KARO		KARO			190
 smdk2410		ARCH_SMDK2410		SMDK2410		193
 ceiva			ARCH_CEIVA		CEIVA			200
 voiceblue		MACH_VOICEBLUE		VOICEBLUE		218
@@ -188,6 +188,7 @@ omap_2430sdp		MACH_OMAP_2430SDP	OMAP_2430SDP		900
 davinci_evm		MACH_DAVINCI_EVM	DAVINCI_EVM		901
 palmz72			MACH_PALMZ72		PALMZ72			904
 nxdb500			MACH_NXDB500		NXDB500			905
+apf9328			MACH_APF9328		APF9328			906
 palmt5			MACH_PALMT5		PALMT5			917
 palmtc			MACH_PALMTC		PALMTC			918
 omap_apollon		MACH_OMAP_APOLLON	OMAP_APOLLON		919
@@ -271,10 +272,12 @@ pcm038			MACH_PCM038		PCM038			1551
 ts_x09			MACH_TS209		TS209			1565
 at91cap9adk		MACH_AT91CAP9ADK	AT91CAP9ADK		1566
 mx31moboard		MACH_MX31MOBOARD	MX31MOBOARD		1574
+vision_ep9307		MACH_VISION_EP9307	VISION_EP9307		1578
 terastation_pro2	MACH_TERASTATION_PRO2	TERASTATION_PRO2	1584
 linkstation_pro		MACH_LINKSTATION_PRO	LINKSTATION_PRO		1585
 e350			MACH_E350		E350			1596
 ts409			MACH_TS409		TS409			1601
+rsi_ews			MACH_RSI_EWS		RSI_EWS			1609
 cm_x300			MACH_CM_X300		CM_X300			1616
 at91sam9g20ek		MACH_AT91SAM9G20EK	AT91SAM9G20EK		1624
 smdk6410		MACH_SMDK6410		SMDK6410		1626
@@ -331,6 +334,7 @@ smdkc100		MACH_SMDKC100		SMDKC100		1826
 tavorevb		MACH_TAVOREVB		TAVOREVB		1827
 saar			MACH_SAAR		SAAR			1828
 at91sam9m10g45ek	MACH_AT91SAM9M10G45EK	AT91SAM9M10G45EK	1830
+usb_a9g20		MACH_USB_A9G20		USB_A9G20		1841
 mxlads			MACH_MXLADS		MXLADS			1851
 linkstation_mini	MACH_LINKSTATION_MINI	LINKSTATION_MINI	1858
 afeb9260		MACH_AFEB9260		AFEB9260		1859
@@ -369,6 +373,7 @@ pcm043			MACH_PCM043		PCM043			2072
 sheevaplug		MACH_SHEEVAPLUG		SHEEVAPLUG		2097
 avengers_lite		MACH_AVENGERS_LITE	AVENGERS_LITE		2104
 mx51_babbage		MACH_MX51_BABBAGE	MX51_BABBAGE		2125
+tx37			MACH_TX37		TX37			2127
 rd78x00_masa		MACH_RD78X00_MASA	RD78X00_MASA		2135
 dm355_leopard		MACH_DM355_LEOPARD	DM355_LEOPARD		2138
 ts219			MACH_TS219		TS219			2139
@@ -379,6 +384,7 @@ omap_4430sdp		MACH_OMAP_4430SDP	OMAP_4430SDP		2160
 magx_zn5		MACH_MAGX_ZN5		MAGX_ZN5		2162
 btmavb101		MACH_BTMAVB101		BTMAVB101		2172
 btmawb101		MACH_BTMAWB101		BTMAWB101		2173
+tx25			MACH_TX25		TX25			2177
 omap3_torpedo		MACH_OMAP3_TORPEDO	OMAP3_TORPEDO		2178
 anw6410			MACH_ANW6410		ANW6410			2183
 imx27_visstrim_m10	MACH_IMX27_VISSTRIM_M10	IMX27_VISSTRIM_M10	2187
@@ -423,6 +429,7 @@ raumfeld_rc		MACH_RAUMFELD_RC	RAUMFELD_RC		2413
 raumfeld_connector	MACH_RAUMFELD_CONNECTOR	RAUMFELD_CONNECTOR	2414
 raumfeld_speaker	MACH_RAUMFELD_SPEAKER	RAUMFELD_SPEAKER	2415
 tnetv107x		MACH_TNETV107X		TNETV107X		2418
+mx51_m2id		MACH_MX51_M2ID		MX51_M2ID		2428
 smdkv210		MACH_SMDKV210		SMDKV210		2456
 omap_zoom3		MACH_OMAP_ZOOM3		OMAP_ZOOM3		2464
 omap_3630sdp		MACH_OMAP_3630SDP	OMAP_3630SDP		2465
@@ -433,14 +440,17 @@ omapl138_hawkboard	MACH_OMAPL138_HAWKBOARD	OMAPL138_HAWKBOARD	2495
 ts41x			MACH_TS41X		TS41X			2502
 phy3250			MACH_PHY3250		PHY3250			2511
 mini6410		MACH_MINI6410		MINI6410		2520
+tx51			MACH_TX51		TX51			2529
 mx28evk			MACH_MX28EVK		MX28EVK			2531
 smartq5			MACH_SMARTQ5		SMARTQ5			2534
 davinci_dm6467tevm	MACH_DAVINCI_DM6467TEVM	DAVINCI_DM6467TEVM	2548
 mxt_td60		MACH_MXT_TD60		MXT_TD60		2550
 riot_bei2		MACH_RIOT_BEI2		RIOT_BEI2		2576
 riot_x37		MACH_RIOT_X37		RIOT_X37		2578
+pca101			MACH_PCA101		PCA101			2595
 capc7117		MACH_CAPC7117		CAPC7117		2612
 icontrol		MACH_ICONTROL		ICONTROL		2624
+gplugd			MACH_GPLUGD		GPLUGD			2625
 qsd8x50a_st1_5		MACH_QSD8X50A_ST1_5	QSD8X50A_ST1_5		2627
 mx23evk			MACH_MX23EVK		MX23EVK			2629
 ap4evb			MACH_AP4EVB		AP4EVB			2630
@@ -1113,3 +1123,5 @@ blissc			MACH_BLISSC		BLISSC			3491
 thales_adc		MACH_THALES_ADC		THALES_ADC		3492
 ubisys_p9d_evp		MACH_UBISYS_P9D_EVP	UBISYS_P9D_EVP		3493
 atdgp318		MACH_ATDGP318		ATDGP318		3494
+smdk4212		MACH_SMDK4212		SMDK4212		3638
+smdk4412		MACH_SMDK4412		SMDK4412		3765
diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile
index 6de73aab0195..a81404c09d5d 100644
--- a/arch/arm/vfp/Makefile
+++ b/arch/arm/vfp/Makefile
@@ -7,7 +7,7 @@
 # ccflags-y := -DDEBUG
 # asflags-y := -DDEBUG
 
-KBUILD_AFLAGS	:=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp)
+KBUILD_AFLAGS	:=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp -mfloat-abi=soft)
 LDFLAGS		+=--no-warn-mismatch
 
 obj-y			+= vfp.o
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 79bcb4316930..0cbd5a0a9332 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/signal.h>
@@ -68,7 +69,7 @@ static bool vfp_state_in_hw(unsigned int cpu, struct thread_info *thread)
 /*
  * Force a reload of the VFP context from the thread structure.  We do
  * this by ensuring that access to the VFP hardware is disabled, and
- * clear last_VFP_context.  Must be called from non-preemptible context.
+ * clear vfp_current_hw_state.  Must be called from non-preemptible context.
  */
 static void vfp_force_reload(unsigned int cpu, struct thread_info *thread)
 {
@@ -436,9 +437,7 @@ static void vfp_enable(void *unused)
 	set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11));
 }
 
-#ifdef CONFIG_PM
-#include <linux/syscore_ops.h>
-
+#ifdef CONFIG_CPU_PM
 static int vfp_pm_suspend(void)
 {
 	struct thread_info *ti = current_thread_info();
@@ -468,19 +467,33 @@ static void vfp_pm_resume(void)
 	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
 }
 
-static struct syscore_ops vfp_pm_syscore_ops = {
-	.suspend	= vfp_pm_suspend,
-	.resume		= vfp_pm_resume,
+static int vfp_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd,
+	void *v)
+{
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		vfp_pm_suspend();
+		break;
+	case CPU_PM_ENTER_FAILED:
+	case CPU_PM_EXIT:
+		vfp_pm_resume();
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vfp_cpu_pm_notifier_block = {
+	.notifier_call = vfp_cpu_pm_notifier,
 };
 
 static void vfp_pm_init(void)
 {
-	register_syscore_ops(&vfp_pm_syscore_ops);
+	cpu_pm_register_notifier(&vfp_cpu_pm_notifier_block);
 }
 
 #else
 static inline void vfp_pm_init(void) { }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_CPU_PM */
 
 /*
  * Ensure that the VFP state stored in 'thread->vfpstate' is up to date
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c7476295de80..abe5a9e85148 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -248,10 +248,6 @@ config HOTPLUG_CPU
 	depends on SMP && HOTPLUG
 	default y
 
-config HAVE_LEGACY_PER_CPU_AREA
-	def_bool y
-	depends on SMP
-
 config BF_REV_MIN
 	int
 	default 0 if (BF51x || BF52x || (BF54x && !BF54xM))
diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig
index 56151b5dbc44..0e6d841b5d01 100644
--- a/arch/blackfin/configs/BF548-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF548-EZKIT_defconfig
@@ -4,7 +4,6 @@ CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_EXPERT=y
 # CONFIG_SYSCTL_SYSCALL is not set
 # CONFIG_ELF_CORE is not set
@@ -40,7 +39,6 @@ CONFIG_EBIU_MODEVAL=0x1
 CONFIG_EBIU_FCTLVAL=0x6
 CONFIG_BINFMT_FLAT=y
 CONFIG_BINFMT_ZFLAT=y
-CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -55,7 +53,6 @@ CONFIG_IP_PNP=y
 CONFIG_CAN=m
 CONFIG_CAN_RAW=m
 CONFIG_CAN_BCM=m
-CONFIG_CAN_DEV=m
 CONFIG_CAN_BFIN=m
 CONFIG_IRDA=m
 CONFIG_IRLAN=m
@@ -67,7 +64,6 @@ CONFIG_BFIN_SIR3=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
@@ -105,12 +101,12 @@ CONFIG_INPUT_TOUCHSCREEN=y
 CONFIG_TOUCHSCREEN_AD7877=m
 CONFIG_INPUT_MISC=y
 # CONFIG_SERIO is not set
-# CONFIG_DEVKMEM is not set
+# CONFIG_LEGACY_PTYS is not set
 CONFIG_BFIN_JTAG_COMM=m
+# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_BFIN=y
 CONFIG_SERIAL_BFIN_CONSOLE=y
 CONFIG_SERIAL_BFIN_UART1=y
-# CONFIG_LEGACY_PTYS is not set
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
 CONFIG_I2C_CHARDEV=y
@@ -163,6 +159,7 @@ CONFIG_USB_DEVICEFS=y
 CONFIG_USB_OTG_BLACKLIST_HUB=y
 CONFIG_USB_MON=y
 CONFIG_USB_MUSB_HDRC=y
+CONFIG_USB_MUSB_BLACKFIN=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_BLOCK=m
@@ -185,8 +182,6 @@ CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
 CONFIG_NFSD=m
 CONFIG_NFSD_V3=y
-CONFIG_SMB_FS=m
-CONFIG_SMB_NLS_DEFAULT=y
 CONFIG_CIFS=y
 CONFIG_NLS_CODEPAGE_437=m
 CONFIG_NLS_CODEPAGE_936=m
@@ -196,7 +191,6 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_FTRACE is not set
 CONFIG_DEBUG_MMRS=y
 CONFIG_DEBUG_HWERR=y
@@ -206,5 +200,4 @@ CONFIG_DEBUG_BFIN_HWTRACE_COMPRESSION_ONE=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_CPLB_INFO=y
 CONFIG_BFIN_PSEUDODBG_INSNS=y
-CONFIG_CRYPTO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 7a075eaf6041..5a0625aad6a0 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -21,6 +21,7 @@ generic-y += local64.h
 generic-y += local.h
 generic-y += mman.h
 generic-y += msgbuf.h
+generic-y += mutex.h
 generic-y += param.h
 generic-y += percpu.h
 generic-y += pgalloc.h
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 135225696fd2..54c6e2887e9f 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2004-2009 Analog Devices Inc.
+ * Copyright 2004-2011 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -7,111 +7,27 @@
 #ifndef __ARCH_BLACKFIN_ATOMIC__
 #define __ARCH_BLACKFIN_ATOMIC__
 
-#ifndef CONFIG_SMP
-# include <asm-generic/atomic.h>
-#else
+#ifdef CONFIG_SMP
 
-#include <linux/types.h>
-#include <asm/system.h>	/* local_irq_XXX() */
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i)	{ (i) }
-#define atomic_set(v, i)	(((v)->counter) = i)
-
-#define atomic_read(v)	__raw_uncached_fetch_asm(&(v)->counter)
+#include <linux/linkage.h>
 
 asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
-
 asmlinkage int __raw_atomic_update_asm(volatile int *ptr, int value);
-
 asmlinkage int __raw_atomic_clear_asm(volatile int *ptr, int value);
-
 asmlinkage int __raw_atomic_set_asm(volatile int *ptr, int value);
-
 asmlinkage int __raw_atomic_xor_asm(volatile int *ptr, int value);
-
 asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	__raw_atomic_update_asm(&v->counter, i);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
-	__raw_atomic_update_asm(&v->counter, -i);
-}
-
-static inline int atomic_add_return(int i, atomic_t *v)
-{
-	return __raw_atomic_update_asm(&v->counter, i);
-}
-
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
-	return __raw_atomic_update_asm(&v->counter, -i);
-}
+#define atomic_read(v) __raw_uncached_fetch_asm(&(v)->counter)
 
-static inline void atomic_inc(volatile atomic_t *v)
-{
-	__raw_atomic_update_asm(&v->counter, 1);
-}
-
-static inline void atomic_dec(volatile atomic_t *v)
-{
-	__raw_atomic_update_asm(&v->counter, -1);
-}
-
-static inline void atomic_clear_mask(int mask, atomic_t *v)
-{
-	__raw_atomic_clear_asm(&v->counter, mask);
-}
-
-static inline void atomic_set_mask(int mask, atomic_t *v)
-{
-	__raw_atomic_set_asm(&v->counter, mask);
-}
-
-/* Atomic operations are already serializing */
-#define smp_mb__before_atomic_dec()    barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc()    barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-#define atomic_dec_return(v) atomic_sub_return(1,(v))
-#define atomic_inc_return(v) atomic_add_return(1,(v))
-
-#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-#define __atomic_add_unless(v, a, u)				\
-({								\
-	int c, old;						\
-	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
-		c = old;					\
-	c;							\
-})
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
+#define atomic_add_return(i, v) __raw_atomic_update_asm(&(v)->counter, i)
+#define atomic_sub_return(i, v) __raw_atomic_update_asm(&(v)->counter, -(i))
 
+#define atomic_clear_mask(m, v) __raw_atomic_clear_asm(&(v)->counter, m)
+#define atomic_set_mask(m, v)   __raw_atomic_set_asm(&(v)->counter, m)
 
 #endif
 
+#include <asm-generic/atomic.h>
+
 #endif
diff --git a/arch/blackfin/include/asm/mutex.h b/arch/blackfin/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c71..000000000000
--- a/arch/blackfin/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 1c0d190adaef..5cc111502822 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h
@@ -195,17 +195,17 @@ static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
-		memcpy(to, from, n);
+		memcpy(to, (const void __force *)from, n);
 	else
 		return n;
 	return 0;
 }
 
 static inline unsigned long __must_check
-copy_to_user(void *to, const void __user *from, unsigned long n)
+copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	if (access_ok(VERIFY_WRITE, to, n))
-		memcpy(to, from, n);
+		memcpy((void __force *)to, from, n);
 	else
 		return n;
 	return 0;
diff --git a/arch/blackfin/kernel/Makefile b/arch/blackfin/kernel/Makefile
index b7bdc42fe1a3..1f88edd4572a 100644
--- a/arch/blackfin/kernel/Makefile
+++ b/arch/blackfin/kernel/Makefile
@@ -38,6 +38,6 @@ obj-$(CONFIG_PERF_EVENTS)            += perf_event.o
 
 # the kgdb test puts code into L2 and without linker
 # relaxation, we need to force long calls to/from it
-CFLAGS_kgdb_test.o := -mlong-calls -O0
+CFLAGS_kgdb_test.o := -mlong-calls
 
 obj-$(CONFIG_DEBUG_MMRS)             += debug-mmrs.o
diff --git a/arch/blackfin/kernel/kgdb_test.c b/arch/blackfin/kernel/kgdb_test.c
index 2a6e9dbb62a5..4a7dcfea98af 100644
--- a/arch/blackfin/kernel/kgdb_test.c
+++ b/arch/blackfin/kernel/kgdb_test.c
@@ -50,8 +50,7 @@ void kgdb_l2_test(void)
 
 #endif
 
-
-int kgdb_test(char *name, int len, int count, int z)
+noinline int kgdb_test(char *name, int len, int count, int z)
 {
 	pr_alert("kgdb name(%d): %s, %d, %d\n", len, name, count, z);
 	count = z;
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index 9e9b60d969dc..1bcf3a3c57d8 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -188,8 +188,7 @@ irqreturn_t bfin_gptmr0_interrupt(int irq, void *dev_id)
 
 static struct irqaction gptmr0_irq = {
 	.name		= "Blackfin GPTimer0",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | \
-			  IRQF_IRQPOLL | IRQF_PERCPU,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU,
 	.handler	= bfin_gptmr0_interrupt,
 };
 
@@ -297,8 +296,7 @@ irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id)
 
 static struct irqaction coretmr_irq = {
 	.name		= "Blackfin CoreTimer",
-	.flags		= IRQF_DISABLED | IRQF_TIMER | \
-			  IRQF_IRQPOLL | IRQF_PERCPU,
+	.flags		= IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU,
 	.handler	= bfin_coretmr_interrupt,
 };
 
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index ceb2bf63dfe2..2310b249675f 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -25,7 +25,6 @@
 
 static struct irqaction bfin_timer_irq = {
 	.name = "Blackfin Timer Tick",
-	.flags = IRQF_DISABLED
 };
 
 #if defined(CONFIG_IPIPE)
diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c
index eb325ed6607e..5da5787fc4ef 100644
--- a/arch/blackfin/mach-bf533/boards/H8606.c
+++ b/arch/blackfin/mach-bf533/boards/H8606.c
@@ -54,7 +54,8 @@ static struct resource dm9000_resources[] = {
 	[2] = {
 		.start	= IRQ_PF10,
 		.end	= IRQ_PF10,
-		.flags	= (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE | IRQF_SHARED | IRQF_TRIGGER_HIGH),
+		.flags	= (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE |
+		           IORESOURCE_IRQ_SHAREABLE),
 	},
 };
 
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
index 44fd8409db10..9fb20d6d8f91 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
@@ -605,7 +605,7 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH | IRQF_DISABLED,
+	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
index 1b4ac5c64aae..5ba389fc61ae 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
@@ -570,7 +570,7 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH | IRQF_DISABLED,
+	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index b52e6728f64f..6c916a67ef68 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -962,10 +962,10 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_AD183X) \
-	|| defined(CONFIG_SND_BF5XX_SOC_AD183X_MODULE)
+#if defined(CONFIG_SND_BF5XX_SOC_AD1836) \
+	|| defined(CONFIG_SND_BF5XX_SOC_AD1836_MODULE)
 	{
-		.modalias = "ad183x",
+		.modalias = "ad1836",
 		.max_speed_hz = 3125000,     /* max spi clock (SCK) speed in HZ */
 		.bus_num = 0,
 		.chip_select = 4,
@@ -984,9 +984,9 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_ADAV80X) || defined(CONFIG_SND_BF5XX_SOC_ADAV80X_MODULE)
+#if defined(CONFIG_SND_SOC_ADAV80X) || defined(CONFIG_SND_SOC_ADV80X_MODULE)
 	{
-		.modalias = "adav80x",
+		.modalias = "adav801",
 		.max_speed_hz = 3125000,     /* max spi clock (SCK) speed in HZ */
 		.bus_num = 0,
 		.chip_select = 1,
@@ -2101,7 +2101,7 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 	},
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_ADAV80X) || defined(CONFIG_SND_BF5XX_SOC_ADAV80X_MODULE)
+#if defined(CONFIG_SND_SOC_ADAV80X) || defined(CONFIG_SND_SOC_ADAV80X_MODULE)
 	{
 		I2C_BOARD_INFO("adav803", 0x10),
 	},
@@ -2134,23 +2134,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 	},
 #endif
 
-#if defined(CONFIG_AD7414) || defined(CONFIG_AD7414_MODULE)
-	{
-		I2C_BOARD_INFO("ad7414", 0x9),
-		.irq = IRQ_PG5,
-		.irq_flags = IRQF_TRIGGER_LOW,
-	},
-#endif
-
-#if defined(CONFIG_AD7416) || defined(CONFIG_AD7416_MODULE)
-	{
-		I2C_BOARD_INFO("ad7417", 0xb),
-		.irq = IRQ_PG5,
-		.irq_flags = IRQF_TRIGGER_LOW,
-		.platform_data = (void *)GPIO_PF4,
-	},
-#endif
-
 #if defined(CONFIG_ADE7854_I2C) || defined(CONFIG_ADE7854_I2C_MODULE)
 	{
 		I2C_BOARD_INFO("ade7854", 0x38),
@@ -2161,15 +2144,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 	{
 		I2C_BOARD_INFO("adt75", 0x9),
 		.irq = IRQ_PG5,
-		.irq_flags = IRQF_TRIGGER_LOW,
-	},
-#endif
-
-#if defined(CONFIG_ADT7408) || defined(CONFIG_ADT7408_MODULE)
-	{
-		I2C_BOARD_INFO("adt7408", 0x18),
-		.irq = IRQ_PG5,
-		.irq_flags = IRQF_TRIGGER_LOW,
 	},
 #endif
 
@@ -2178,7 +2152,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 		I2C_BOARD_INFO("adt7410", 0x48),
 		/* CT critical temperature event. line 0 */
 		.irq = IRQ_PG5,
-		.irq_flags = IRQF_TRIGGER_LOW,
 		.platform_data = (void *)&adt7410_platform_data,
 	},
 #endif
@@ -2187,7 +2160,6 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 	{
 		I2C_BOARD_INFO("ad7291", 0x20),
 		.irq = IRQ_PG5,
-		.irq_flags = IRQF_TRIGGER_LOW,
 	},
 #endif
 
@@ -2275,6 +2247,11 @@ static struct i2c_board_info __initdata bfin_i2c_board_info[] = {
 		I2C_BOARD_INFO("adau1361", 0x38),
 	},
 #endif
+#if defined(CONFIG_SND_SOC_ADAU1701) || defined(CONFIG_SND_SOC_ADAU1701_MODULE)
+	{
+		I2C_BOARD_INFO("adau1701", 0x34),
+	},
+#endif
 #if defined(CONFIG_AD525X_DPOT) || defined(CONFIG_AD525X_DPOT_MODULE)
 	{
 		I2C_BOARD_INFO("ad5258", 0x18),
@@ -2388,7 +2365,7 @@ static struct platform_device bfin_sport1_uart_device = {
 #define PATA_INT	IRQ_PF5
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 1,
-	.irq_flags = IRQF_TRIGGER_HIGH | IRQF_DISABLED,
+	.irq_flags = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -2540,13 +2517,21 @@ static struct platform_device bfin_ac97_pcm = {
 };
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_AD73311) || defined(CONFIG_SND_BF5XX_SOC_AD73311_MODULE)
+#if defined(CONFIG_SND_SOC_AD73311) || defined(CONFIG_SND_SOC_AD73311_MODULE)
 static struct platform_device bfin_ad73311_codec_device = {
 	.name = "ad73311",
 	.id = -1,
 };
 #endif
 
+#if defined(CONFIG_SND_SOC_BFIN_EVAL_ADAV80X) || \
+	defined(CONFIG_SND_SOC_BFIN_EVAL_ADAV80X_MODULE)
+static struct platform_device bfin_eval_adav801_device = {
+	.name = "bfin-eval-adav801",
+	.id = -1,
+};
+#endif
+
 #if defined(CONFIG_SND_BF5XX_SOC_I2S) || defined(CONFIG_SND_BF5XX_SOC_I2S_MODULE)
 static struct platform_device bfin_i2s = {
 	.name = "bfin-i2s",
@@ -2661,6 +2646,20 @@ static struct platform_device iio_gpio_trigger = {
 };
 #endif
 
+#if defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1373) || \
+	defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1373_MODULE)
+static struct platform_device bf5xx_adau1373_device = {
+	.name = "bfin-eval-adau1373",
+};
+#endif
+
+#if defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1701) || \
+	defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1701_MODULE)
+static struct platform_device bf5xx_adau1701_device = {
+	.name = "bfin-eval-adau1701",
+};
+#endif
+
 static struct platform_device *stamp_devices[] __initdata = {
 
 	&bfin_dpmc,
@@ -2782,7 +2781,7 @@ static struct platform_device *stamp_devices[] __initdata = {
 	&bfin_ac97_pcm,
 #endif
 
-#if defined(CONFIG_SND_BF5XX_SOC_AD73311) || defined(CONFIG_SND_BF5XX_SOC_AD73311_MODULE)
+#if defined(CONFIG_SND_SOC_AD73311) || defined(CONFIG_SND_SOC_AD73311_MODULE)
 	&bfin_ad73311_codec_device,
 #endif
 
@@ -2821,6 +2820,21 @@ static struct platform_device *stamp_devices[] __initdata = {
 	defined(CONFIG_IIO_GPIO_TRIGGER_MODULE)
 	&iio_gpio_trigger,
 #endif
+
+#if defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1373) || \
+	defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1373_MODULE)
+	&bf5xx_adau1373_device,
+#endif
+
+#if defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1701) || \
+	defined(CONFIG_SND_SOC_BFIN_EVAL_ADAU1701_MODULE)
+	&bf5xx_adau1701_device,
+#endif
+
+#if defined(CONFIG_SND_SOC_BFIN_EVAL_ADAV80X) || \
+	defined(CONFIG_SND_SOC_BFIN_EVAL_ADAV80X_MODULE)
+	&bfin_eval_adav801_device,
+#endif
 };
 
 static int __init net2272_init(void)
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index 9b7287abdfa1..2da0316d890e 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -572,7 +572,7 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH | IRQF_DISABLED,
+	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index e4f397d1d65b..c1b72f2d6354 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -348,7 +348,7 @@ static struct platform_device bfin_sir0_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH | IRQF_DISABLED,
+	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
index 85abd8be1343..db22401e7605 100644
--- a/arch/blackfin/mach-bf561/smp.c
+++ b/arch/blackfin/mach-bf561/smp.c
@@ -114,7 +114,7 @@ void __init platform_request_ipi(int irq, void *handler)
 	int ret;
 	const char *name = (irq == IRQ_SUPPLE_0) ? supple0 : supple1;
 
-	ret = request_irq(irq, handler, IRQF_DISABLED | IRQF_PERCPU, name, handler);
+	ret = request_irq(irq, handler, IRQF_PERCPU, name, handler);
 	if (ret)
 		panic("Cannot request %s for IPI service", name);
 }
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 107622aacf6b..0784a52389c8 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -295,10 +295,15 @@ EXPORT_SYMBOL_GPL(smp_call_function_single);
 
 void smp_send_reschedule(int cpu)
 {
+	cpumask_t callmap;
 	/* simply trigger an ipi */
 	if (cpu_is_offline(cpu))
 		return;
-	platform_send_ipi_cpu(cpu, IRQ_SUPPLE_0);
+
+	cpumask_clear(&callmap);
+	cpumask_set_cpu(cpu, &callmap);
+
+	smp_send_message(callmap, BFIN_IPI_RESCHEDULE, NULL, NULL, 0);
 
 	return;
 }
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 17addacb169e..408b055c585f 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -282,8 +282,8 @@ config ETRAX_RTC
 	  Enables drivers for the Real-Time Clock battery-backed chips on
 	  some products. The kernel reads the time when booting, and
 	  the date can be set using ioctl(fd, RTC_SET_TIME, &rt) with rt a
-	  rtc_time struct (see <file:include/asm-cris/rtc.h>) on the /dev/rtc
-	  device.  You can check the time with cat /proc/rtc, but
+	  rtc_time struct (see <file:arch/cris/include/asm/rtc.h>) on the
+	  /dev/rtc device.  You can check the time with cat /proc/rtc, but
 	  normal time reading should be done using libc function time and
 	  friends.
 
diff --git a/arch/cris/arch-v10/Kconfig b/arch/cris/arch-v10/Kconfig
index adc164e99339..df9a38b4f18f 100644
--- a/arch/cris/arch-v10/Kconfig
+++ b/arch/cris/arch-v10/Kconfig
@@ -24,8 +24,8 @@ config ETRAX_PA_LEDS
 	help
 	  The ETRAX network driver is responsible for flashing LED's when
 	  packets arrive and are sent.  It uses macros defined in
-	  <file:include/asm-cris/io.h>, and those macros are defined after what
-	  YOU choose in this option.  The actual bits used are configured
+	  <file:arch/cris/include/asm/io.h>, and those macros are defined after
+	  what YOU choose in this option.  The actual bits used are configured
 	  separately.  Select this if the LEDs are on port PA.  Some products
 	  put the leds on PB or a memory-mapped latch (CSP0) instead.
 
@@ -34,8 +34,8 @@ config ETRAX_PB_LEDS
 	help
 	  The ETRAX network driver is responsible for flashing LED's when
 	  packets arrive and are sent.  It uses macros defined in
-	  <file:include/asm-cris/io.h>, and those macros are defined after what
-	  YOU choose in this option.  The actual bits used are configured
+	  <file:arch/cris/include/asm/io.h>, and those macros are defined after
+	  what YOU choose in this option.  The actual bits used are configured
 	  separately.  Select this if the LEDs are on port PB.  Some products
 	  put the leds on PA or a memory-mapped latch (CSP0) instead.
 
@@ -44,8 +44,8 @@ config ETRAX_CSP0_LEDS
 	help
 	  The ETRAX network driver is responsible for flashing LED's when
 	  packets arrive and are sent. It uses macros defined in
-	  <file:include/asm-cris/io.h>, and those macros are defined after what
-	  YOU choose in this option.  The actual bits used are configured
+	  <file:arch/cris/include/asm/io.h>, and those macros are defined after
+	  what YOU choose in this option.  The actual bits used are configured
 	  separately.  Select this if the LEDs are on a memory-mapped latch
 	  using chip select CSP0, this is mapped at 0x90000000.
 	  Some products put the leds on PA or PB instead.
diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig
index 0d7221779923..32d90867a984 100644
--- a/arch/cris/arch-v10/drivers/Kconfig
+++ b/arch/cris/arch-v10/drivers/Kconfig
@@ -4,6 +4,7 @@ config ETRAX_ETHERNET
 	bool "Ethernet support"
 	depends on ETRAX_ARCH_V10
 	select NET_ETHERNET
+	select NET_CORE
 	select MII
 	help
 	  This option enables the ETRAX 100LX built-in 10/100Mbit Ethernet
diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c
index b9f9c8ce2169..b579dd02e098 100644
--- a/arch/cris/arch-v10/kernel/kgdb.c
+++ b/arch/cris/arch-v10/kernel/kgdb.c
@@ -694,7 +694,7 @@ mem2hex(char *buf, unsigned char *mem, int count)
                 /* Valid mem address. */
                 for (i = 0; i < count; i++) {
                         ch = *mem++;
-			buf = pack_hex_byte(buf, ch);
+			buf = hex_byte_pack(buf, ch);
                 }
         }
         
@@ -868,7 +868,7 @@ stub_is_stopped(int sigval)
 	/* Send trap type (converted to signal) */
 
 	*ptr++ = 'T';
-	ptr = pack_hex_byte(ptr, sigval);
+	ptr = hex_byte_pack(ptr, sigval);
 
 	/* Send register contents. We probably only need to send the
 	 * PC, frame pointer and stack pointer here. Other registers will be
@@ -881,7 +881,7 @@ stub_is_stopped(int sigval)
                 status = read_register (regno, &reg_cont);
                 
 		if (status == SUCCESS) {
-			ptr = pack_hex_byte(ptr, regno);
+			ptr = hex_byte_pack(ptr, regno);
                         *ptr++ = ':';
 
                         ptr = mem2hex(ptr, (unsigned char *)&reg_cont,
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 41a2732e8b9c..e47e9c3401b0 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -4,6 +4,7 @@ config ETRAX_ETHERNET
 	bool "Ethernet support"
 	depends on ETRAX_ARCH_V32
 	select NET_ETHERNET
+	select NET_CORE
 	select MII
 	help
 	  This option enables the ETRAX FS built-in 10/100Mbit Ethernet
diff --git a/arch/cris/arch-v32/kernel/kgdb.c b/arch/cris/arch-v32/kernel/kgdb.c
index c0343c3ea7f8..8c1d35cdf00a 100644
--- a/arch/cris/arch-v32/kernel/kgdb.c
+++ b/arch/cris/arch-v32/kernel/kgdb.c
@@ -677,7 +677,7 @@ mem2hex(char *buf, unsigned char *mem, int count)
                 /* Valid mem address. */
 		for (i = 0; i < count; i++) {
 			ch = *mem++;
-			buf = pack_hex_byte(buf, ch);
+			buf = hex_byte_pack(buf, ch);
 		}
         }
         /* Terminate properly. */
@@ -695,7 +695,7 @@ mem2hex_nbo(char *buf, unsigned char *mem, int count)
 	mem += count - 1;
 	for (i = 0; i < count; i++) {
 		ch = *mem--;
-		buf = pack_hex_byte(buf, ch);
+		buf = hex_byte_pack(buf, ch);
         }
 
         /* Terminate properly. */
@@ -880,7 +880,7 @@ stub_is_stopped(int sigval)
 	/* Send trap type (converted to signal) */
 
 	*ptr++ = 'T';
-	ptr = pack_hex_byte(ptr, sigval);
+	ptr = hex_byte_pack(ptr, sigval);
 
 	if (((reg.exs & 0xff00) >> 8) == 0xc) {
 
@@ -988,26 +988,26 @@ stub_is_stopped(int sigval)
 	}
 	/* Only send PC, frame and stack pointer. */
 	read_register(PC, &reg_cont);
-	ptr = pack_hex_byte(ptr, PC);
+	ptr = hex_byte_pack(ptr, PC);
 	*ptr++ = ':';
 	ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[PC]);
 	*ptr++ = ';';
 
 	read_register(R8, &reg_cont);
-	ptr = pack_hex_byte(ptr, R8);
+	ptr = hex_byte_pack(ptr, R8);
 	*ptr++ = ':';
 	ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[R8]);
 	*ptr++ = ';';
 
 	read_register(SP, &reg_cont);
-	ptr = pack_hex_byte(ptr, SP);
+	ptr = hex_byte_pack(ptr, SP);
 	*ptr++ = ':';
 	ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[SP]);
 	*ptr++ = ';';
 
 	/* Send ERP as well; this will save us an entire register fetch in some cases. */
         read_register(ERP, &reg_cont);
-	ptr = pack_hex_byte(ptr, ERP);
+	ptr = hex_byte_pack(ptr, ERP);
         *ptr++ = ':';
         ptr = mem2hex(ptr, (unsigned char *)&reg_cont, register_size[ERP]);
         *ptr++ = ';';
diff --git a/arch/cris/arch-v32/lib/nand_init.S b/arch/cris/arch-v32/lib/nand_init.S
deleted file mode 100644
index d671fed451c9..000000000000
--- a/arch/cris/arch-v32/lib/nand_init.S
+++ /dev/null
@@ -1,178 +0,0 @@
-##=============================================================================
-##
-##      nand_init.S
-##
-##      The bootrom copies data from the NAND flash to the internal RAM but
-##      due to a bug/feature we can only trust the 256 first bytes. So this
-##      code copies more data from NAND flash to internal RAM. Obvioulsy this
-##      code must fit in the first 256 bytes so alter with care.
-##
-##	Some notes about the bug/feature for future reference:
-##        The bootrom copies the first 127 KB from NAND flash to internal
-##        memory. The problem is that it does a bytewise copy. NAND flashes
-##        does autoincrement on the address so for a 16-bite device each
-##        read/write increases the address by two. So the copy loop in the
-##        bootrom will discard every second byte. This is solved by inserting
-##        zeroes in every second byte in the first erase block.
-##
-##        The bootrom also incorrectly assumes that it can read the flash
-##        linear with only one read command but the flash will actually
-##        switch between normal area and spare area if you do that so we
-##        can't trust more than the first 256 bytes.
-##
-##=============================================================================
-
-#include <arch/hwregs/asm/reg_map_asm.h>
-#include <arch/hwregs/asm/gio_defs_asm.h>
-#include <arch/hwregs/asm/pinmux_defs_asm.h>
-#include <arch/hwregs/asm/bif_core_defs_asm.h>
-#include <arch/hwregs/asm/config_defs_asm.h>
-
-;; There are 8-bit NAND flashes and 16-bit NAND flashes.
-;; We need to treat them slightly different.
-#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
-#define PAGE_SIZE 256
-#else
-#error 2
-#define PAGE_SIZE 512
-#endif
-#define ERASE_BLOCK 16384
-
-;; GPIO pins connected to NAND flash
-#define CE 4
-#define CLE 5
-#define ALE 6
-#define BY 7
-
-;; Address space for NAND flash
-#define NAND_RD_ADDR 0x90000000
-#define NAND_WR_ADDR 0x94000000
-
-#define READ_CMD 0x00
-
-;; Readability macros
-#define CSP_MASK \
-	REG_MASK(bif_core, rw_grp3_cfg, gated_csp0) | \
-	REG_MASK(bif_core, rw_grp3_cfg, gated_csp1)
-#define CSP_VAL \
-	REG_STATE(bif_core, rw_grp3_cfg, gated_csp0, rd) | \
-	REG_STATE(bif_core, rw_grp3_cfg, gated_csp1, wr)
-
-;;----------------------------------------------------------------------------
-;; Macros to set/clear GPIO bits
-
-.macro SET x
-	or.b   (1<<\x),$r9
-	move.d $r9, [$r2]
-.endm
-
-.macro CLR x
-	and.b  ~(1<<\x),$r9
-	move.d $r9, [$r2]
-.endm
-
-;;----------------------------------------------------------------------------
-
-nand_boot:
-	;; Check if nand boot was selected
-	move.d REG_ADDR(config, regi_config, r_bootsel), $r0
-	move.d [$r0], $r0
-	and.d  REG_MASK(config, r_bootsel, boot_mode), $r0
-	cmp.d  REG_STATE(config, r_bootsel, boot_mode, nand), $r0
-	bne normal_boot ; No NAND boot
-	nop
-
-copy_nand_to_ram:
-	;; copy_nand_to_ram
-	;; Arguments
-	;;   r10 - destination
-	;;   r11 - source offset
-	;;   r12 - size
-	;;   r13 - Address to jump to after completion
-	;; Note : r10-r12 are clobbered on return
-	;; Registers used:
-	;;   r0 - NAND_RD_ADDR
-	;;   r1 - NAND_WR_ADDR
-	;;   r2 - reg_gio_rw_pa_dout
-	;;   r3 - reg_gio_r_pa_din
-	;;   r4 - tmp
-	;;   r5 - byte counter within a page
-	;;   r6 - reg_pinmux_rw_pa
-	;;   r7 - reg_gio_rw_pa_oe
-	;;   r8 - reg_bif_core_rw_grp3_cfg
-	;;   r9 - reg_gio_rw_pa_dout shadow
-	move.d 0x90000000, $r0
-	move.d 0x94000000, $r1
-	move.d REG_ADDR(gio, regi_gio, rw_pa_dout), $r2
-	move.d REG_ADDR(gio, regi_gio, r_pa_din), $r3
-	move.d REG_ADDR(pinmux, regi_pinmux, rw_pa), $r6
-	move.d REG_ADDR(gio, regi_gio, rw_pa_oe), $r7
-	move.d REG_ADDR(bif_core, regi_bif_core, rw_grp3_cfg), $r8
-
-#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
-	lsrq	1, $r11
-#endif
-	;; Set up GPIO
-	move.d [$r2], $r9
-	move.d [$r7], $r4
-	or.b (1<<ALE) | (1 << CLE) | (1<<CE), $r4
-	move.d $r4, [$r7]
-
-	;; Set up bif
-	move.d [$r8], $r4
-	and.d CSP_MASK, $r4
-	or.d CSP_VAL, $r4
-	move.d $r4, [$r8]
-
-1:	;; Copy one page
-	CLR CE
-	SET CLE
-	moveq	READ_CMD, $r4
-	move.b	$r4, [$r1]
-	moveq	20, $r4
-2:	bne	2b
-	subq	1, $r4
-	CLR CLE
-	SET ALE
-	clear.w [$r1] 		; Column address = 0
-	move.d	$r11, $r4
-	lsrq	8, $r4
-	move.b	$r4, [$r1]	; Row address
-	lsrq	8, $r4
-	move.b	$r4, [$r1]	; Row address
-	moveq	20, $r4
-2:	bne	2b
-	subq	1, $r4
-	CLR ALE
-2:	move.d	[$r3], $r4
-	and.d	1 << BY, $r4
-	beq 2b
-	movu.w  PAGE_SIZE, $r5
-2:	; Copy one byte/word
-#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
-	move.w  [$r0], $r4
-#else
-	move.b  [$r0], $r4
-#endif
-	subq	1, $r5
-	bne	2b
-#if CONFIG_ETRAX_FLASH_BUSWIDTH==2
-	move.w  $r4, [$r10+]
-	subu.w	PAGE_SIZE*2, $r12
-#else
-	move.b  $r4, [$r10+]
-	subu.w	PAGE_SIZE, $r12
-#endif
-	bpl	1b
-	addu.w	PAGE_SIZE, $r11
-
-	;; End of copy
-	jump	$r13
-	nop
-
-	;; This will warn if the code above is too large. If you consider
-	;; to remove this you don't understand the bug/feature.
-	.org 256
-	.org ERASE_BLOCK
-
-normal_boot:
diff --git a/arch/frv/kernel/gdb-stub.c b/arch/frv/kernel/gdb-stub.c
index a4dba6b20bd0..a6d5381c94fe 100644
--- a/arch/frv/kernel/gdb-stub.c
+++ b/arch/frv/kernel/gdb-stub.c
@@ -672,7 +672,7 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa
 	if ((uint32_t)mem&1 && count>=1) {
 		if (!gdbstub_read_byte(mem,ch))
 			return NULL;
-		buf = pack_hex_byte(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[0]);
 		mem++;
 		count--;
 	}
@@ -680,8 +680,8 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa
 	if ((uint32_t)mem&3 && count>=2) {
 		if (!gdbstub_read_word(mem,(uint16_t *)ch))
 			return NULL;
-		buf = pack_hex_byte(buf, ch[0]);
-		buf = pack_hex_byte(buf, ch[1]);
+		buf = hex_byte_pack(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[1]);
 		mem += 2;
 		count -= 2;
 	}
@@ -689,10 +689,10 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa
 	while (count>=4) {
 		if (!gdbstub_read_dword(mem,(uint32_t *)ch))
 			return NULL;
-		buf = pack_hex_byte(buf, ch[0]);
-		buf = pack_hex_byte(buf, ch[1]);
-		buf = pack_hex_byte(buf, ch[2]);
-		buf = pack_hex_byte(buf, ch[3]);
+		buf = hex_byte_pack(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[1]);
+		buf = hex_byte_pack(buf, ch[2]);
+		buf = hex_byte_pack(buf, ch[3]);
 		mem += 4;
 		count -= 4;
 	}
@@ -700,8 +700,8 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa
 	if (count>=2) {
 		if (!gdbstub_read_word(mem,(uint16_t *)ch))
 			return NULL;
-		buf = pack_hex_byte(buf, ch[0]);
-		buf = pack_hex_byte(buf, ch[1]);
+		buf = hex_byte_pack(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[1]);
 		mem += 2;
 		count -= 2;
 	}
@@ -709,7 +709,7 @@ static unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fa
 	if (count>=1) {
 		if (!gdbstub_read_byte(mem,ch))
 			return NULL;
-		buf = pack_hex_byte(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[0]);
 	}
 
 	*buf = 0;
@@ -1498,21 +1498,21 @@ void gdbstub(int sigval)
 		ptr = mem2hex(title, ptr, sizeof(title) - 1,0);
 
 		hx = hex_asc_hi(brr >> 24);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(brr >> 24);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(brr >> 16);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(brr >> 16);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(brr >> 8);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(brr >> 8);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(brr);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(brr);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 
 		ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0);
 		*ptr = 0;
@@ -1526,10 +1526,10 @@ void gdbstub(int sigval)
 
 	/* Send trap type (converted to signal) */
 	*ptr++ = 'T';
-	ptr = pack_hex_byte(ptr, sigval);
+	ptr = hex_byte_pack(ptr, sigval);
 
 	/* Send Error PC */
-	ptr = pack_hex_byte(ptr, GDB_REG_PC);
+	ptr = hex_byte_pack(ptr, GDB_REG_PC);
 	*ptr++ = ':';
 	ptr = mem2hex(&__debug_frame->pc, ptr, 4, 0);
 	*ptr++ = ';';
@@ -1537,7 +1537,7 @@ void gdbstub(int sigval)
 	/*
 	 * Send frame pointer
 	 */
-	ptr = pack_hex_byte(ptr, GDB_REG_FP);
+	ptr = hex_byte_pack(ptr, GDB_REG_FP);
 	*ptr++ = ':';
 	ptr = mem2hex(&__debug_frame->fp, ptr, 4, 0);
 	*ptr++ = ';';
@@ -1545,7 +1545,7 @@ void gdbstub(int sigval)
 	/*
 	 * Send stack pointer
 	 */
-	ptr = pack_hex_byte(ptr, GDB_REG_SP);
+	ptr = hex_byte_pack(ptr, GDB_REG_SP);
 	*ptr++ = ':';
 	ptr = mem2hex(&__debug_frame->sp, ptr, 4, 0);
 	*ptr++ = ';';
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 091ed6192ae8..d1f377f5d3b6 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -160,7 +160,7 @@ config VT_CONSOLE
 
 config HW_CONSOLE
 	bool
-	depends on VT && !S390 && !UM
+	depends on VT
 	default y
 
 comment "Unix98 PTY support"
@@ -195,7 +195,7 @@ config UNIX98_PTYS
 
 source "drivers/char/pcmcia/Kconfig"
 
-source "drivers/serial/Kconfig"
+source "drivers/tty/serial/Kconfig"
 
 source "drivers/i2c/Kconfig"
 
diff --git a/arch/h8300/include/asm/gpio.h b/arch/h8300/include/asm/gpio-internal.h
index a714f0c0efbc..a714f0c0efbc 100644
--- a/arch/h8300/include/asm/gpio.h
+++ b/arch/h8300/include/asm/gpio-internal.h
diff --git a/arch/h8300/platform/h8300h/irq.c b/arch/h8300/platform/h8300h/irq.c
index e977345105d7..bc4f51bceef5 100644
--- a/arch/h8300/platform/h8300h/irq.c
+++ b/arch/h8300/platform/h8300h/irq.c
@@ -11,7 +11,7 @@
 #include <asm/traps.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/gpio.h>
+#include <asm/gpio-internal.h>
 #include <asm/regs306x.h>
 
 const int __initdata h8300_saved_vectors[] = {
diff --git a/arch/h8300/platform/h8s/irq.c b/arch/h8300/platform/h8s/irq.c
index 8182f041f829..7b5f29febc07 100644
--- a/arch/h8300/platform/h8s/irq.c
+++ b/arch/h8300/platform/h8s/irq.c
@@ -14,7 +14,7 @@
 #include <asm/traps.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/gpio.h>
+#include <asm/gpio-internal.h>
 #include <asm/regs267x.h>
 
 /* saved vector list */
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
new file mode 100644
index 000000000000..02513c2dd5ec
--- /dev/null
+++ b/arch/hexagon/Kconfig
@@ -0,0 +1,220 @@
+# Hexagon configuration
+comment "Linux Kernel Configuration for Hexagon"
+
+config HEXAGON
+	def_bool y
+	select HAVE_OPROFILE
+	select USE_GENERIC_SMP_HELPERS if SMP
+	# Other pending projects/to-do items.
+	# select HAVE_REGS_AND_STACK_ACCESS_API
+	# select HAVE_HW_BREAKPOINT if PERF_EVENTS
+	# select ARCH_HAS_CPU_IDLE_WAIT
+	# select ARCH_WANT_OPTIONAL_GPIOLIB
+	# select ARCH_REQUIRE_GPIOLIB
+	# select HAVE_CLK
+	# select IRQ_PER_CPU
+	select HAVE_IRQ_WORK
+	# select GENERIC_PENDING_IRQ if SMP
+	select GENERIC_ATOMIC64
+	select HAVE_PERF_EVENTS
+	select HAVE_GENERIC_HARDIRQS
+	select GENERIC_HARDIRQS_NO__DO_IRQ
+	select GENERIC_HARDIRQS_NO_DEPRECATED
+	# GENERIC_ALLOCATOR is used by dma_alloc_coherent()
+	select GENERIC_ALLOCATOR
+	select GENERIC_IRQ_SHOW
+	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_TRACEHOOK
+	select NO_IOPORT
+	# mostly generic routines, with some accelerated ones
+	---help---
+	  Qualcomm Hexagon is a processor architecture designed for high
+	  performance and low power across a wide variety of applications.
+
+config HEXAGON_ARCH_V1
+	bool
+
+config HEXAGON_ARCH_V2
+	bool
+
+config HEXAGON_ARCH_V3
+	bool
+
+config HEXAGON_ARCH_V4
+	bool
+
+config FRAME_POINTER
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config PCI
+	def_bool n
+
+config EARLY_PRINTK
+	def_bool y
+
+config KTIME_SCALAR
+	def_bool y
+
+config MMU
+	def_bool y
+
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+config GENERIC_CSUM
+	def_bool y
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_IRQ_PROBE
+	def_bool y
+
+config GENERIC_IOMAP
+	def_bool y
+
+#config ZONE_DMA
+#	bool
+#	default y
+
+config HAS_DMA
+	bool
+	select HAVE_DMA_ATTRS
+	default y
+
+config NEED_SG_DMA_LENGTH
+	def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+	def_bool n
+
+config RWSEM_XCHGADD_ALGORITHM
+	def_bool y
+
+config GENERIC_FIND_NEXT_BIT
+	def_bool y
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_TIME
+	def_bool y
+
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
+config GENERIC_CLOCKEVENTS_BROADCAST
+	def_bool y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+	select STACKTRACE
+
+config GENERIC_BUG
+	def_bool y
+	depends on BUG
+
+config BUG
+	def_bool y
+
+menu "Machine selection"
+
+choice
+	prompt "System type"
+	default HEXAGON_ARCH_V2
+
+config HEXAGON_COMET
+	bool "Comet Board"
+	select HEXAGON_ARCH_V2
+	---help---
+	  Support for the Comet platform.
+
+endchoice
+
+config HEXAGON_VM
+	def_bool y
+
+config CMDLINE
+	string "Default kernel command string"
+	default ""
+	help
+	  On some platforms, there is currently no way for the boot loader
+	  to pass arguments to the kernel. For these, you should supply some
+	  command-line options at build time by entering them here.  At a
+	  minimum, you should specify the memory size and the root device
+	  (e.g., mem=64M root=/dev/nfs).
+
+config HEXAGON_ANGEL_TRAPS
+	bool "Use Angel Traps"
+	default n
+	---help---
+	  Enable angel debug traps (for printk's).
+
+config SMP
+	bool "Multi-Processing support"
+	---help---
+	  Enables SMP support in the kernel.  If unsure, say "Y"
+
+config NR_CPUS
+	int "Maximum number of CPUs" if SMP
+	range 2 6 if SMP
+	default "1" if !SMP
+	default "6" if SMP
+	---help---
+	  This allows you to specify the maximum number of CPUs which this
+	  kernel will support.  The maximum supported value is 6 and the
+	  minimum value which makes sense is 2.
+
+	  This is purely to save memory - each supported CPU adds
+	  approximately eight kilobytes to the kernel image.
+
+choice
+	prompt "Kernel page size"
+	default PAGE_SIZE_4KB
+	---help---
+	  Changes the default page size; use with caution.
+
+config PAGE_SIZE_4KB
+	bool "4KB"
+
+config PAGE_SIZE_16KB
+	bool "16KB"
+
+config PAGE_SIZE_64KB
+	bool "64KB"
+
+config PAGE_SIZE_256KB
+	bool "256KB"
+
+endchoice
+
+source "mm/Kconfig"
+
+source "kernel/Kconfig.hz"
+source "kernel/time/Kconfig"
+
+config GENERIC_GPIO
+	bool "Generic GPIO support"
+	default n
+
+endmenu
+
+source "init/Kconfig"
+source "drivers/Kconfig"
+source "fs/Kconfig"
+
+menu "Executable File Formats"
+source "fs/Kconfig.binfmt"
+endmenu
+
+source "net/Kconfig"
+source "security/Kconfig"
+source "crypto/Kconfig"
+source "lib/Kconfig"
+
+menu "Kernel hacking"
+source "lib/Kconfig.debug"
+endmenu
diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile
new file mode 100644
index 000000000000..0c4de8790fd5
--- /dev/null
+++ b/arch/hexagon/Makefile
@@ -0,0 +1,58 @@
+#  Makefile for the Hexagon arch
+
+KBUILD_DEFCONFIG = comet_defconfig
+
+# Do not use GP-relative jumps
+KBUILD_CFLAGS += -G0
+LDFLAGS_vmlinux += -G0
+
+# Do not use single-byte enums; these will overflow.
+KBUILD_CFLAGS += -fno-short-enums
+
+# Modules must use either long-calls, or use pic/plt.
+# Use long-calls for now, it's easier.  And faster.
+# CFLAGS_MODULE += -fPIC
+# LDFLAGS_MODULE += -shared
+CFLAGS_MODULE += -mlong-calls
+
+cflags-$(CONFIG_HEXAGON_ARCH_V1) += $(call cc-option,-mv1)
+cflags-$(CONFIG_HEXAGON_ARCH_V2) += $(call cc-option,-mv2)
+cflags-$(CONFIG_HEXAGON_ARCH_V3) += $(call cc-option,-mv3)
+cflags-$(CONFIG_HEXAGON_ARCH_V4) += $(call cc-option,-mv4)
+
+aflags-$(CONFIG_HEXAGON_ARCH_V1) += $(call cc-option,-mv1)
+aflags-$(CONFIG_HEXAGON_ARCH_V2) += $(call cc-option,-mv2)
+aflags-$(CONFIG_HEXAGON_ARCH_V3) += $(call cc-option,-mv3)
+aflags-$(CONFIG_HEXAGON_ARCH_V4) += $(call cc-option,-mv4)
+
+ldflags-$(CONFIG_HEXAGON_ARCH_V1) += $(call cc-option,-mv1)
+ldflags-$(CONFIG_HEXAGON_ARCH_V2) += $(call cc-option,-mv2)
+ldflags-$(CONFIG_HEXAGON_ARCH_V3) += $(call cc-option,-mv3)
+ldflags-$(CONFIG_HEXAGON_ARCH_V4) += $(call cc-option,-mv4)
+
+KBUILD_CFLAGS += $(cflags-y)
+KBUILD_AFLAGS += $(aflags-y)
+
+#  no KBUILD_LDFLAGS?
+LDFLAGS += $(ldflags-y)
+
+# Thread-info register will be r19.  This value is not configureable;
+# it is hard-coded in several files.
+TIR_NAME := r19
+KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
+KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
+
+LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+libs-y += $(LIBGCC)
+
+head-y := arch/hexagon/kernel/head.o \
+	arch/hexagon/kernel/init_task.o
+
+core-y += arch/hexagon/kernel/ \
+	arch/hexagon/mm/ \
+	arch/hexagon/lib/
+
+#	arch/hexagon/platform/common/
+#
+#core-$(CONFIG_HEXAGON_COMET)		+= arch/hexagon/platform/comet/
+#machine-$(CONFIG_HEXAGON_COMET)		:= comet
diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig
new file mode 100644
index 000000000000..e324f65f41e7
--- /dev/null
+++ b/arch/hexagon/configs/comet_defconfig
@@ -0,0 +1,85 @@
+CONFIG_SMP=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=0
+CONFIG_HZ_100=y
+CONFIG_EXPERIMENTAL=y
+CONFIG_CROSS_COMPILE="hexagon-"
+CONFIG_LOCALVERSION="-smp"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_STANDALONE is not set
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_NETDEVICES=y
+CONFIG_MII=y
+CONFIG_PHYLIB=y
+CONFIG_NET_ETHERNET=y
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_CONSOLE_TRANSLATIONS is not set
+CONFIG_LEGACY_PTY_COUNT=64
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+CONFIG_SPI=y
+CONFIG_SPI_DEBUG=y
+CONFIG_SPI_BITBANG=y
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_QUOTA=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+CONFIG_CRC_CCITT=y
+CONFIG_CRC16=y
+CONFIG_CRC_T10DIF=y
+CONFIG_LIBCRC32C=y
+CONFIG_FRAME_WARN=0
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
new file mode 100644
index 000000000000..9aa17f1917ea
--- /dev/null
+++ b/arch/hexagon/include/asm/Kbuild
@@ -0,0 +1,58 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += registers.h
+header-y += ucontext.h
+header-y += user.h
+
+generic-y += auxvec.h
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cpumask.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += ftrace.h
+generic-y += hardirq.h
+generic-y += hw_irq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += iomap.h
+generic-y += ipcbuf.h
+generic-y += ipc.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += local64.h
+generic-y += local.h
+generic-y += local.h
+generic-y += mman.h
+generic-y += msgbuf.h
+generic-y += pci.h
+generic-y += percpu.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += rwsem.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += segment.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += siginfo.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += stat.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += topology.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += unaligned.h
+generic-y += xor.h
diff --git a/arch/hexagon/include/asm/asm-offsets.h b/arch/hexagon/include/asm/asm-offsets.h
new file mode 100644
index 000000000000..d370ee36a182
--- /dev/null
+++ b/arch/hexagon/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
new file mode 100644
index 000000000000..e220f9053035
--- /dev/null
+++ b/arch/hexagon/include/asm/atomic.h
@@ -0,0 +1,164 @@
+/*
+ * Atomic operations for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_ATOMIC_H
+#define _ASM_ATOMIC_H
+
+#include <linux/types.h>
+
+#define ATOMIC_INIT(i)		{ (i) }
+#define atomic_set(v, i)	((v)->counter = (i))
+
+/**
+ * atomic_read - reads a word, atomically
+ * @v: pointer to atomic value
+ *
+ * Assumes all word reads on our architecture are atomic.
+ */
+#define atomic_read(v)		((v)->counter)
+
+/**
+ * atomic_xchg - atomic
+ * @v: pointer to memory to change
+ * @new: new value (technically passed in a register -- see xchg)
+ */
+#define atomic_xchg(v, new)	(xchg(&((v)->counter), (new)))
+
+
+/**
+ * atomic_cmpxchg - atomic compare-and-exchange values
+ * @v: pointer to value to change
+ * @old:  desired old value to match
+ * @new:  new value to put in
+ *
+ * Parameters are then pointer, value-in-register, value-in-register,
+ * and the output is the old value.
+ *
+ * Apparently this is complicated for archs that don't support
+ * the memw_locked like we do (or it's broken or whatever).
+ *
+ * Kind of the lynchpin of the rest of the generically defined routines.
+ * Remember V2 had that bug with dotnew predicate set by memw_locked.
+ *
+ * "old" is "expected" old val, __oldval is actual old value
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int __oldval;
+
+	asm volatile(
+		"1:	%0 = memw_locked(%1);\n"
+		"	{ P0 = cmp.eq(%0,%2);\n"
+		"	  if (!P0.new) jump:nt 2f; }\n"
+		"	memw_locked(%1,P0) = %3;\n"
+		"	if (!P0) jump 1b;\n"
+		"2:\n"
+		: "=&r" (__oldval)
+		: "r" (&v->counter), "r" (old), "r" (new)
+		: "memory", "p0"
+	);
+
+	return __oldval;
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	int output;
+
+	__asm__ __volatile__ (
+		"1:	%0 = memw_locked(%1);\n"
+		"	%0 = add(%0,%2);\n"
+		"	memw_locked(%1,P3)=%0;\n"
+		"	if !P3 jump 1b;\n"
+		: "=&r" (output)
+		: "r" (&v->counter), "r" (i)
+		: "memory", "p3"
+	);
+	return output;
+
+}
+
+#define atomic_add(i, v) atomic_add_return(i, (v))
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+	int output;
+	__asm__ __volatile__ (
+		"1:	%0 = memw_locked(%1);\n"
+		"	%0 = sub(%0,%2);\n"
+		"	memw_locked(%1,P3)=%0\n"
+		"	if !P3 jump 1b;\n"
+		: "=&r" (output)
+		: "r" (&v->counter), "r" (i)
+		: "memory", "p3"
+	);
+	return output;
+}
+
+#define atomic_sub(i, v) atomic_sub_return(i, (v))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer to value
+ * @a: amount to add
+ * @u: unless value is equal to u
+ *
+ * Returns 1 if the add happened, 0 if it didn't.
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int output, __oldval;
+	asm volatile(
+		"1:	%0 = memw_locked(%2);"
+		"	{"
+		"		p3 = cmp.eq(%0, %4);"
+		"		if (p3.new) jump:nt 2f;"
+		"		%0 = add(%0, %3);"
+		"		%1 = #0;"
+		"	}"
+		"	memw_locked(%2, p3) = %0;"
+		"	{"
+		"		if !p3 jump 1b;"
+		"		%1 = #1;"
+		"	}"
+		"2:"
+		: "=&r" (__oldval), "=&r" (output)
+		: "r" (v), "r" (a), "r" (u)
+		: "memory", "p3"
+	);
+	return output;
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define atomic_inc(v) atomic_add(1, (v))
+#define atomic_dec(v) atomic_sub(1, (v))
+
+#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
+#define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0)
+#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
+
+
+#define atomic_inc_return(v) (atomic_add_return(1, v))
+#define atomic_dec_return(v) (atomic_sub_return(1, v))
+
+#endif
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
new file mode 100644
index 000000000000..d23461e080ff
--- /dev/null
+++ b/arch/hexagon/include/asm/bitops.h
@@ -0,0 +1,301 @@
+/*
+ * Bit operations for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_BITOPS_H
+#define _ASM_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+
+#ifdef __KERNEL__
+
+#define smp_mb__before_clear_bit()	barrier()
+#define smp_mb__after_clear_bit()	barrier()
+
+/*
+ * The offset calculations for these are based on BITS_PER_LONG == 32
+ * (i.e. I get to shift by #5-2 (32 bits per long, 4 bytes per access),
+ * mask by 0x0000001F)
+ *
+ * Typically, R10 is clobbered for address, R11 bit nr, and R12 is temp
+ */
+
+/**
+ * test_and_clear_bit - clear a bit and return its old value
+ * @nr:  bit number to clear
+ * @addr:  pointer to memory
+ */
+static inline int test_and_clear_bit(int nr, volatile void *addr)
+{
+	int oldval;
+
+	__asm__ __volatile__ (
+	"	{R10 = %1; R11 = asr(%2,#5); }\n"
+	"	{R10 += asl(R11,#2); R11 = and(%2,#0x1f)}\n"
+	"1:	R12 = memw_locked(R10);\n"
+	"	{ P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n"
+	"	memw_locked(R10,P1) = R12;\n"
+	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	: "=&r" (oldval)
+	: "r" (addr), "r" (nr)
+	: "r10", "r11", "r12", "p0", "p1", "memory"
+	);
+
+	return oldval;
+}
+
+/**
+ * test_and_set_bit - set a bit and return its old value
+ * @nr:  bit number to set
+ * @addr:  pointer to memory
+ */
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+	int oldval;
+
+	__asm__ __volatile__ (
+	"	{R10 = %1; R11 = asr(%2,#5); }\n"
+	"	{R10 += asl(R11,#2); R11 = and(%2,#0x1f)}\n"
+	"1:	R12 = memw_locked(R10);\n"
+	"	{ P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n"
+	"	memw_locked(R10,P1) = R12;\n"
+	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	: "=&r" (oldval)
+	: "r" (addr), "r" (nr)
+	: "r10", "r11", "r12", "p0", "p1", "memory"
+	);
+
+
+	return oldval;
+
+}
+
+/**
+ * test_and_change_bit - toggle a bit and return its old value
+ * @nr:  bit number to set
+ * @addr:  pointer to memory
+ */
+static inline int test_and_change_bit(int nr, volatile void *addr)
+{
+	int oldval;
+
+	__asm__ __volatile__ (
+	"	{R10 = %1; R11 = asr(%2,#5); }\n"
+	"	{R10 += asl(R11,#2); R11 = and(%2,#0x1f)}\n"
+	"1:	R12 = memw_locked(R10);\n"
+	"	{ P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n"
+	"	memw_locked(R10,P1) = R12;\n"
+	"	{if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+	: "=&r" (oldval)
+	: "r" (addr), "r" (nr)
+	: "r10", "r11", "r12", "p0", "p1", "memory"
+	);
+
+	return oldval;
+
+}
+
+/*
+ * Atomic, but doesn't care about the return value.
+ * Rewrite later to save a cycle or two.
+ */
+
+static inline void clear_bit(int nr, volatile void *addr)
+{
+	test_and_clear_bit(nr, addr);
+}
+
+static inline void set_bit(int nr, volatile void *addr)
+{
+	test_and_set_bit(nr, addr);
+}
+
+static inline void change_bit(int nr, volatile void *addr)
+{
+	test_and_change_bit(nr, addr);
+}
+
+
+/*
+ * These are allowed to be non-atomic.  In fact the generic flavors are
+ * in non-atomic.h.  Would it be better to use intrinsics for this?
+ *
+ * OK, writes in our architecture do not invalidate LL/SC, so this has to
+ * be atomic, particularly for things like slab_lock and slab_unlock.
+ *
+ */
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	test_and_clear_bit(nr, addr);
+}
+
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	test_and_set_bit(nr, addr);
+}
+
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	test_and_change_bit(nr, addr);
+}
+
+/*  Apparently, at least some of these are allowed to be non-atomic  */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_clear_bit(nr, addr);
+}
+
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_set_bit(nr, addr);
+}
+
+static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_change_bit(nr, addr);
+}
+
+static inline int __test_bit(int nr, const volatile unsigned long *addr)
+{
+	int retval;
+
+	asm volatile(
+	"{P0 = tstbit(%1,%2); if (P0.new) %0 = #1; if (!P0.new) %0 = #0;}\n"
+	: "=&r" (retval)
+	: "r" (addr[BIT_WORD(nr)]), "r" (nr % BITS_PER_LONG)
+	: "p0"
+	);
+
+	return retval;
+}
+
+#define test_bit(nr, addr) __test_bit(nr, addr)
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline long ffz(int x)
+{
+	int r;
+
+	asm("%0 = ct1(%1);\n"
+		: "=&r" (r)
+		: "r" (x));
+	return r;
+}
+
+/*
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline long fls(int x)
+{
+	int r;
+
+	asm("{ %0 = cl0(%1);}\n"
+		"%0 = sub(#32,%0);\n"
+		: "=&r" (r)
+		: "r" (x)
+		: "p0");
+
+	return r;
+}
+
+/*
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline long ffs(int x)
+{
+	int r;
+
+	asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n"
+		"{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n"
+		: "=&r" (r)
+		: "r" (x)
+		: "p0");
+
+	return r;
+}
+
+/*
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ *
+ * bits_per_long assumed to be 32
+ * numbering starts at 0 I think (instead of 1 like ffs)
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	int num;
+
+	asm("%0 = ct0(%1);\n"
+		: "=&r" (num)
+		: "r" (word));
+
+	return num;
+}
+
+/*
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ * bits_per_long assumed to be 32
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	int num;
+
+	asm("%0 = cl0(%1);\n"
+		"%0 = sub(#31,%0);\n"
+		: "=&r" (num)
+		: "r" (word));
+
+	return num;
+}
+
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/find.h>
+
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/hweight.h>
+
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/hexagon/include/asm/bitsperlong.h b/arch/hexagon/include/asm/bitsperlong.h
new file mode 100644
index 000000000000..2701cae3426e
--- /dev/null
+++ b/arch/hexagon/include/asm/bitsperlong.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_HEXAGON_BITSPERLONG_H
+#define __ASM_HEXAGON_BITSPERLONG_H
+
+#define __BITS_PER_LONG 32
+
+#include <asm-generic/bitsperlong.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/byteorder.h b/arch/hexagon/include/asm/byteorder.h
new file mode 100644
index 000000000000..0e19b9fe4ca6
--- /dev/null
+++ b/arch/hexagon/include/asm/byteorder.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_BYTEORDER_H
+#define _ASM_BYTEORDER_H
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+#  define __BYTEORDER_HAS_U64__
+#endif
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _ASM_BYTEORDER_H */
diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h
new file mode 100644
index 000000000000..0f01de2eb4ab
--- /dev/null
+++ b/arch/hexagon/include/asm/cache.h
@@ -0,0 +1,34 @@
+/*
+ * Cache definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_CACHE_H
+#define __ASM_CACHE_H
+
+/* Bytes per L1 cache line */
+#define L1_CACHE_SHIFT		(5)
+#define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
+
+#define __cacheline_aligned	__aligned(L1_CACHE_BYTES)
+#define ____cacheline_aligned	__aligned(L1_CACHE_BYTES)
+
+/* See http://kerneltrap.org/node/15100  */
+#define __read_mostly
+
+#endif
diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
new file mode 100644
index 000000000000..6865c1be927a
--- /dev/null
+++ b/arch/hexagon/include/asm/cacheflush.h
@@ -0,0 +1,99 @@
+/*
+ * Cache flush operations for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_CACHEFLUSH_H
+#define _ASM_CACHEFLUSH_H
+
+#include <linux/cache.h>
+#include <linux/mm.h>
+#include <asm/string.h>
+#include <asm-generic/cacheflush.h>
+
+/* Cache flushing:
+ *
+ *  - flush_cache_all() flushes entire cache
+ *  - flush_cache_mm(mm) flushes the specified mm context's cache lines
+ *  - flush_cache_page(mm, vmaddr, pfn) flushes a single page
+ *  - flush_cache_range(vma, start, end) flushes a range of pages
+ *  - flush_icache_range(start, end) flush a range of instructions
+ *  - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache
+ *  - flush_icache_page(vma, pg) flushes(invalidates) a page for icache
+ *
+ *  Need to doublecheck which one is really needed for ptrace stuff to work.
+ */
+#define LINESIZE	32
+#define LINEBITS	5
+
+/*
+ * Flush Dcache range through current map.
+ */
+extern void flush_dcache_range(unsigned long start, unsigned long end);
+
+/*
+ * Flush Icache range through current map.
+ */
+#undef flush_icache_range
+extern void flush_icache_range(unsigned long start, unsigned long end);
+
+/*
+ * Memory-management related flushes are there to ensure in non-physically
+ * indexed cache schemes that stale lines belonging to a given ASID aren't
+ * in the cache to confuse things.  The prototype Hexagon Virtual Machine
+ * only uses a single ASID for all user-mode maps, which should
+ * mean that they aren't necessary.  A brute-force, flush-everything
+ * implementation, with the name xxxxx_hexagon() is present in
+ * arch/hexagon/mm/cache.c, but let's not wire it up until we know
+ * it is needed.
+ */
+extern void flush_cache_all_hexagon(void);
+
+/*
+ * This may or may not ever have to be non-null, depending on the
+ * virtual machine MMU.  For a native kernel, it's definitiely  a no-op
+ *
+ * This is also the place where deferred cache coherency stuff seems
+ * to happen, classically...  but instead we do it like ia64 and
+ * clean the cache when the PTE is set.
+ *
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+					unsigned long address, pte_t *ptep)
+{
+	/*  generic_ptrace_pokedata doesn't wind up here, does it?  */
+}
+
+#undef copy_to_user_page
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+					     struct page *page,
+					     unsigned long vaddr,
+					     void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+		(unsigned long) dst + len);
+	}
+}
+
+
+extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end);
+extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end);
+
+#endif
diff --git a/arch/hexagon/include/asm/checksum.h b/arch/hexagon/include/asm/checksum.h
new file mode 100644
index 000000000000..3ce4ecd44f82
--- /dev/null
+++ b/arch/hexagon/include/asm/checksum.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_CHECKSUM_H
+#define _ASM_CHECKSUM_H
+
+#define do_csum	do_csum
+unsigned int do_csum(const void *voidptr, int len);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+#define csum_partial_copy_nocheck csum_partial_copy_nocheck
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+					int len, __wsum sum);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+#define csum_tcpudp_nofold csum_tcpudp_nofold
+__wsum csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr,
+	unsigned short len, unsigned short proto, __wsum sum);
+
+#define csum_tcpudp_magic csum_tcpudp_magic
+__sum16 csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
+	unsigned short len, unsigned short proto, __wsum sum);
+
+#include <asm-generic/checksum.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/delay.h b/arch/hexagon/include/asm/delay.h
new file mode 100644
index 000000000000..9ab12e9a872b
--- /dev/null
+++ b/arch/hexagon/include/asm/delay.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_DELAY_H
+#define _ASM_DELAY_H
+
+#include <asm/param.h>
+
+extern void __udelay(unsigned long usecs);
+
+#define udelay(usecs) __udelay((usecs))
+
+#endif /* _ASM_DELAY_H */
diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h
new file mode 100644
index 000000000000..448b224ba4ef
--- /dev/null
+++ b/arch/hexagon/include/asm/dma-mapping.h
@@ -0,0 +1,101 @@
+/*
+ * DMA operations for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_DMA_MAPPING_H
+#define _ASM_DMA_MAPPING_H
+
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/dma-attrs.h>
+#include <asm/io.h>
+
+struct device;
+extern int bad_dma_address;
+
+extern struct dma_map_ops *dma_ops;
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+	if (unlikely(dev == NULL))
+		return NULL;
+
+	return dma_ops;
+}
+
+extern int dma_supported(struct device *dev, u64 mask);
+extern int dma_set_mask(struct device *dev, u64 mask);
+extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle);
+extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+			   enum dma_data_direction direction);
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return 0;
+	return addr + size - 1 <= *dev->dma_mask;
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops->mapping_error)
+		return dma_ops->mapping_error(dev, dma_addr);
+
+	return (dma_addr == bad_dma_address);
+}
+
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t flag)
+{
+	void *ret;
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!dma_ops);
+
+	ret = ops->alloc_coherent(dev, size, dma_handle, flag);
+
+	debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
+
+	return ret;
+}
+
+static inline void dma_free_coherent(struct device *dev, size_t size,
+				     void *cpu_addr, dma_addr_t dma_handle)
+{
+	struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+	BUG_ON(!dma_ops);
+
+	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+
+	debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+}
+
+#endif
diff --git a/arch/hexagon/include/asm/dma.h b/arch/hexagon/include/asm/dma.h
new file mode 100644
index 000000000000..da6d2f61a93a
--- /dev/null
+++ b/arch/hexagon/include/asm/dma.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <asm/io.h>
+
+#define MAX_DMA_CHANNELS 1
+#define MAX_DMA_ADDRESS  (PAGE_OFFSET)
+
+extern size_t hexagon_coherent_pool_size;
+
+#endif
diff --git a/arch/hexagon/include/asm/elf.h b/arch/hexagon/include/asm/elf.h
new file mode 100644
index 000000000000..37976a0d3650
--- /dev/null
+++ b/arch/hexagon/include/asm/elf.h
@@ -0,0 +1,229 @@
+/*
+ * ELF definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_ELF_H
+#define __ASM_ELF_H
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+/*
+ * This should really be in linux/elf-em.h.
+ */
+#define EM_HEXAGON	164   /* QUALCOMM Hexagon */
+
+struct elf32_hdr;
+
+/*
+ * ELF header e_flags defines.
+ */
+
+/*  should have stuff like "CPU type" and maybe "ABI version", etc  */
+
+/* Hexagon relocations */
+  /* V2 */
+#define R_HEXAGON_NONE           0
+#define R_HEXAGON_B22_PCREL      1
+#define R_HEXAGON_B15_PCREL      2
+#define R_HEXAGON_B7_PCREL       3
+#define R_HEXAGON_LO16           4
+#define R_HEXAGON_HI16           5
+#define R_HEXAGON_32             6
+#define R_HEXAGON_16             7
+#define R_HEXAGON_8              8
+#define R_HEXAGON_GPREL16_0      9
+#define R_HEXAGON_GPREL16_1     10
+#define R_HEXAGON_GPREL16_2     11
+#define R_HEXAGON_GPREL16_3     12
+#define R_HEXAGON_HL16          13
+  /* V3 */
+#define R_HEXAGON_B13_PCREL     14
+  /* V4 */
+#define R_HEXAGON_B9_PCREL      15
+  /* V4 (extenders) */
+#define R_HEXAGON_B32_PCREL_X   16
+#define R_HEXAGON_32_6_X        17
+  /* V4 (extended) */
+#define R_HEXAGON_B22_PCREL_X   18
+#define R_HEXAGON_B15_PCREL_X   19
+#define R_HEXAGON_B13_PCREL_X   20
+#define R_HEXAGON_B9_PCREL_X    21
+#define R_HEXAGON_B7_PCREL_X    22
+#define R_HEXAGON_16_X          23
+#define R_HEXAGON_12_X          24
+#define R_HEXAGON_11_X          25
+#define R_HEXAGON_10_X          26
+#define R_HEXAGON_9_X           27
+#define R_HEXAGON_8_X           28
+#define R_HEXAGON_7_X           29
+#define R_HEXAGON_6_X           30
+  /* V2 PIC */
+#define R_HEXAGON_32_PCREL      31
+#define R_HEXAGON_COPY          32
+#define R_HEXAGON_GLOB_DAT      33
+#define R_HEXAGON_JMP_SLOT      34
+#define R_HEXAGON_RELATIVE      35
+#define R_HEXAGON_PLT_B22_PCREL 36
+#define R_HEXAGON_GOTOFF_LO16   37
+#define R_HEXAGON_GOTOFF_HI16   38
+#define R_HEXAGON_GOTOFF_32     39
+#define R_HEXAGON_GOT_LO16      40
+#define R_HEXAGON_GOT_HI16      41
+#define R_HEXAGON_GOT_32        42
+#define R_HEXAGON_GOT_16        43
+
+/*
+ * ELF register definitions..
+ */
+typedef unsigned long elf_greg_t;
+
+typedef struct user_regs_struct elf_gregset_t;
+#define ELF_NGREG (sizeof(elf_gregset_t)/sizeof(unsigned long))
+
+/*  Placeholder  */
+typedef unsigned long elf_fpregset_t;
+
+/*
+ * Bypass the whole "regsets" thing for now and use the define.
+ */
+
+#define ELF_CORE_COPY_REGS(DEST, REGS)	\
+do {					\
+	DEST.r0 = REGS->r00;		\
+	DEST.r1 = REGS->r01;		\
+	DEST.r2 = REGS->r02;		\
+	DEST.r3 = REGS->r03;		\
+	DEST.r4 = REGS->r04;		\
+	DEST.r5 = REGS->r05;		\
+	DEST.r6 = REGS->r06;		\
+	DEST.r7 = REGS->r07;		\
+	DEST.r8 = REGS->r08;		\
+	DEST.r9 = REGS->r09;		\
+	DEST.r10 = REGS->r10;		\
+	DEST.r11 = REGS->r11;		\
+	DEST.r12 = REGS->r12;		\
+	DEST.r13 = REGS->r13;		\
+	DEST.r14 = REGS->r14;		\
+	DEST.r15 = REGS->r15;		\
+	DEST.r16 = REGS->r16;		\
+	DEST.r17 = REGS->r17;		\
+	DEST.r18 = REGS->r18;		\
+	DEST.r19 = REGS->r19;		\
+	DEST.r20 = REGS->r20;		\
+	DEST.r21 = REGS->r21;		\
+	DEST.r22 = REGS->r22;		\
+	DEST.r23 = REGS->r23;		\
+	DEST.r24 = REGS->r24;		\
+	DEST.r25 = REGS->r25;		\
+	DEST.r26 = REGS->r26;		\
+	DEST.r27 = REGS->r27;		\
+	DEST.r28 = REGS->r28;		\
+	DEST.r29 = pt_psp(REGS);	\
+	DEST.r30 = REGS->r30;		\
+	DEST.r31 = REGS->r31;		\
+	DEST.sa0 = REGS->sa0;		\
+	DEST.lc0 = REGS->lc0;		\
+	DEST.sa1 = REGS->sa1;		\
+	DEST.lc1 = REGS->lc1;		\
+	DEST.m0 = REGS->m0;		\
+	DEST.m1 = REGS->m1;		\
+	DEST.usr = REGS->usr;		\
+	DEST.p3_0 = REGS->preds;	\
+	DEST.gp = REGS->gp;		\
+	DEST.ugp = REGS->ugp;		\
+	DEST.pc = pt_elr(REGS);	\
+	DEST.cause = pt_cause(REGS);	\
+	DEST.badva = pt_badva(REGS);	\
+} while (0);
+
+
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ * Checks the machine and ABI type.
+ */
+#define elf_check_arch(hdr)	((hdr)->e_machine == EM_HEXAGON)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2LSB
+#define ELF_ARCH	EM_HEXAGON
+
+#ifdef CONFIG_HEXAGON_ARCH_V2
+#define ELF_CORE_EFLAGS 0x1
+#endif
+
+#ifdef CONFIG_HEXAGON_ARCH_V3
+#define ELF_CORE_EFLAGS 0x2
+#endif
+
+#ifdef CONFIG_HEXAGON_ARCH_V4
+#define ELF_CORE_EFLAGS 0x3
+#endif
+
+/*
+ * Some architectures have ld.so set up a pointer to a function
+ * to be registered using atexit, to facilitate cleanup.  So that
+ * static executables will be well-behaved, we would null the register
+ * in question here, in the pt_regs structure passed.  For now,
+ * leave it a null macro.
+ */
+#define ELF_PLAT_INIT(regs, load_addr) do { } while (0)
+
+#define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
+
+/* Hrm is this going to cause problems for changing PAGE_SIZE?  */
+#define ELF_EXEC_PAGESIZE	4096
+
+/*
+ * This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+ * use of this is to invoke "./ld.so someprog" to test out a new version of
+ * the loader.  We need to make sure that it is out of the way of the program
+ * that it will "exec", and that there is sufficient room for the brk.
+ */
+#define ELF_ET_DYN_BASE         0x08000000UL
+
+/*
+ * This yields a mask that user programs can use to figure out what
+ * instruction set this cpu supports.
+ */
+#define ELF_HWCAP	(0)
+
+/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+#define ELF_PLATFORM  (NULL)
+
+#ifdef __KERNEL__
+#define SET_PERSONALITY(ex) set_personality(PER_LINUX)
+#endif
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+				       int uses_interp);
+
+
+#endif
diff --git a/arch/hexagon/include/asm/fixmap.h b/arch/hexagon/include/asm/fixmap.h
new file mode 100644
index 000000000000..b27f4941645b
--- /dev/null
+++ b/arch/hexagon/include/asm/fixmap.h
@@ -0,0 +1,73 @@
+/*
+ * Fixmap support for Hexagon - enough to support highmem features
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+/*
+ * A lot of the fixmap info is already in mem-layout.h
+ */
+#include <asm/mem-layout.h>
+
+/*
+ * Full fixmap support involves set_fixmap() functions, but
+ * these may not be needed if all we're after is an area for
+ * highmem kernel mappings.
+ */
+#define	__fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define	__virt_to_fix(x)	((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+extern void __this_fixmap_does_not_exist(void);
+
+/**
+ * fix_to_virt -- "index to address" translation.
+ *
+ * If anyone tries to use the idx directly without translation,
+ * we catch the bug with a NULL-deference kernel oops. Illegal
+ * ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+	/*
+	 * This branch gets completely eliminated after inlining,
+	 * except when someone tries to use fixaddr indices in an
+	 * illegal way. (such as mixing up address types or using
+	 * out-of-range indices).
+	 *
+	 * If it doesn't get removed, the linker will complain
+	 * loudly with a reasonably clear error message..
+	 */
+	if (idx >= __end_of_fixed_addresses)
+		__this_fixmap_does_not_exist();
+
+	return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+	return __virt_to_fix(vaddr);
+}
+
+#define kmap_get_fixmap_pte(vaddr) \
+	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), \
+				(vaddr)), (vaddr)), (vaddr))
+
+#endif
diff --git a/arch/hexagon/include/asm/fpu.h b/arch/hexagon/include/asm/fpu.h
new file mode 100644
index 000000000000..0e135ea8c45b
--- /dev/null
+++ b/arch/hexagon/include/asm/fpu.h
@@ -0,0 +1,4 @@
+/*
+ * If the FPU is used inside the kernel,
+ * kernel_fpu_end() will be defined here.
+ */
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
new file mode 100644
index 000000000000..7e597f8434da
--- /dev/null
+++ b/arch/hexagon/include/asm/futex.h
@@ -0,0 +1,137 @@
+#ifndef _ASM_HEXAGON_FUTEX_H
+#define _ASM_HEXAGON_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+/* XXX TODO-- need to add sync barriers! */
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+	__asm__ __volatile( \
+	"1: %0 = memw_locked(%3);\n" \
+	    /* For example: %1 = %4 */ \
+	    insn \
+	"2: memw_locked(%3,p2) = %1;\n" \
+	"   if !p2 jump 1b;\n" \
+	"   %1 = #0;\n" \
+	"3:\n" \
+	".section .fixup,\"ax\"\n" \
+	"4: %1 = #%5;\n" \
+	"   jump 3b\n" \
+	".previous\n" \
+	".section __ex_table,\"a\"\n" \
+	".long 1b,4b,2b,4b\n" \
+	".previous\n" \
+	: "=&r" (oldval), "=&r" (ret), "+m" (*uaddr) \
+	: "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
+	: "p2", "memory")
+
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret;
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+		return -EFAULT;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("%1 = %4\n", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("%1 = add(%0,%4)\n", ret, oldval, uaddr,
+				  oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("%1 = or(%0,%4)\n", ret, oldval, uaddr,
+				  oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("%1 = not(%4); %1 = and(%0,%1)\n", ret,
+				  oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("%1 = xor(%0,%4)\n", ret, oldval, uaddr,
+				  oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ:
+			ret = (oldval == cmparg);
+			break;
+		case FUTEX_OP_CMP_NE:
+			ret = (oldval != cmparg);
+			break;
+		case FUTEX_OP_CMP_LT:
+			ret = (oldval < cmparg);
+			break;
+		case FUTEX_OP_CMP_GE:
+			ret = (oldval >= cmparg);
+			break;
+		case FUTEX_OP_CMP_LE:
+			ret = (oldval <= cmparg);
+			break;
+		case FUTEX_OP_CMP_GT:
+			ret = (oldval > cmparg);
+			break;
+		default:
+			ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
+			      u32 newval)
+{
+	int prev;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	__asm__ __volatile__ (
+	"1: %1 = memw_locked(%3)\n"
+	"   {\n"
+	"      p2 = cmp.eq(%1,%4)\n"
+	"      if !p2.new jump:NT 3f\n"
+	"   }\n"
+	"2: memw_locked(%3,p2) = %5\n"
+	"   if !p2 jump 1b\n"
+	"3:\n"
+	".section .fixup,\"ax\"\n"
+	"4: %0 = #%6\n"
+	"   jump 3b\n"
+	".previous\n"
+	".section __ex_table,\"a\"\n"
+	".long 1b,4b,2b,4b\n"
+	".previous\n"
+	: "+r" (ret), "=&r" (prev), "+m" (*uaddr)
+	: "r" (uaddr), "r" (oldval), "r" (newval), "i"(-EFAULT)
+	: "p2", "memory");
+
+	*uval = prev;
+	return ret;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_HEXAGON_FUTEX_H */
diff --git a/arch/hexagon/include/asm/hexagon_vm.h b/arch/hexagon/include/asm/hexagon_vm.h
new file mode 100644
index 000000000000..182cb9d54769
--- /dev/null
+++ b/arch/hexagon/include/asm/hexagon_vm.h
@@ -0,0 +1,281 @@
+/*
+ * Declarations for to Hexagon Virtal Machine.
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef ASM_HEXAGON_VM_H
+#define ASM_HEXAGON_VM_H
+
+/*
+ * In principle, a Linux kernel for the VM could
+ * selectively define the virtual instructions
+ * as inline assembler macros, but for a first pass,
+ * we'll use subroutines for both the VM and the native
+ * kernels.  It's costing a subroutine call/return,
+ * but it makes for a single set of entry points
+ * for tracing/debugging.
+ */
+
+/*
+ * Lets make this stuff visible only if configured,
+ * so we can unconditionally include the file.
+ */
+
+#ifndef __ASSEMBLY__
+
+enum VM_CACHE_OPS {
+	ickill,
+	dckill,
+	l2kill,
+	dccleaninva,
+	icinva,
+	idsync,
+	fetch_cfg
+};
+
+enum VM_INT_OPS {
+	nop,
+	globen,
+	globdis,
+	locen,
+	locdis,
+	affinity,
+	get,
+	peek,
+	status,
+	post,
+	clear
+};
+
+extern void _K_VM_event_vector(void);
+
+void __vmrte(void);
+long __vmsetvec(void *);
+long __vmsetie(long);
+long __vmgetie(void);
+long __vmintop(enum VM_INT_OPS, long, long, long, long);
+long __vmclrmap(void *, unsigned long);
+long __vmnewmap(void *);
+long __vmcache(enum VM_CACHE_OPS op, unsigned long addr, unsigned long len);
+unsigned long long __vmgettime(void);
+long __vmsettime(unsigned long long);
+long __vmstart(void *, void *);
+void __vmstop(void);
+long __vmwait(void);
+void __vmyield(void);
+long __vmvpid(void);
+
+static inline long __vmcache_ickill(void)
+{
+	return __vmcache(ickill, 0, 0);
+}
+
+static inline long __vmcache_dckill(void)
+{
+	return __vmcache(dckill, 0, 0);
+}
+
+static inline long __vmcache_l2kill(void)
+{
+	return __vmcache(l2kill, 0, 0);
+}
+
+static inline long __vmcache_dccleaninva(unsigned long addr, unsigned long len)
+{
+	return __vmcache(dccleaninva, addr, len);
+}
+
+static inline long __vmcache_icinva(unsigned long addr, unsigned long len)
+{
+	return __vmcache(icinva, addr, len);
+}
+
+static inline long __vmcache_idsync(unsigned long addr,
+					   unsigned long len)
+{
+	return __vmcache(idsync, addr, len);
+}
+
+static inline long __vmcache_fetch_cfg(unsigned long val)
+{
+	return __vmcache(fetch_cfg, val, 0);
+}
+
+/* interrupt operations  */
+
+static inline long __vmintop_nop(void)
+{
+	return __vmintop(nop, 0, 0, 0, 0);
+}
+
+static inline long __vmintop_globen(long i)
+{
+	return __vmintop(globen, i, 0, 0, 0);
+}
+
+static inline long __vmintop_globdis(long i)
+{
+	return __vmintop(globdis, i, 0, 0, 0);
+}
+
+static inline long __vmintop_locen(long i)
+{
+	return __vmintop(locen, i, 0, 0, 0);
+}
+
+static inline long __vmintop_locdis(long i)
+{
+	return __vmintop(locdis, i, 0, 0, 0);
+}
+
+static inline long __vmintop_affinity(long i, long cpu)
+{
+	return __vmintop(locdis, i, cpu, 0, 0);
+}
+
+static inline long __vmintop_get(void)
+{
+	return __vmintop(get, 0, 0, 0, 0);
+}
+
+static inline long __vmintop_peek(void)
+{
+	return __vmintop(peek, 0, 0, 0, 0);
+}
+
+static inline long __vmintop_status(long i)
+{
+	return __vmintop(status, i, 0, 0, 0);
+}
+
+static inline long __vmintop_post(long i)
+{
+	return __vmintop(post, i, 0, 0, 0);
+}
+
+static inline long __vmintop_clear(long i)
+{
+	return __vmintop(clear, i, 0, 0, 0);
+}
+
+#else /* Only assembly code should reference these */
+
+#define HVM_TRAP1_VMRTE			1
+#define HVM_TRAP1_VMSETVEC		2
+#define HVM_TRAP1_VMSETIE		3
+#define HVM_TRAP1_VMGETIE		4
+#define HVM_TRAP1_VMINTOP		5
+#define HVM_TRAP1_VMCLRMAP		10
+#define HVM_TRAP1_VMNEWMAP		11
+#define HVM_TRAP1_FORMERLY_VMWIRE	12
+#define HVM_TRAP1_VMCACHE		13
+#define HVM_TRAP1_VMGETTIME		14
+#define HVM_TRAP1_VMSETTIME		15
+#define HVM_TRAP1_VMWAIT		16
+#define HVM_TRAP1_VMYIELD		17
+#define HVM_TRAP1_VMSTART		18
+#define HVM_TRAP1_VMSTOP		19
+#define HVM_TRAP1_VMVPID		20
+#define HVM_TRAP1_VMSETREGS		21
+#define HVM_TRAP1_VMGETREGS		22
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Constants for virtual instruction parameters and return values
+ */
+
+/* vmsetie arguments */
+
+#define VM_INT_DISABLE	0
+#define VM_INT_ENABLE	1
+
+/* vmsetimask arguments */
+
+#define VM_INT_UNMASK	0
+#define VM_INT_MASK	1
+
+#define VM_NEWMAP_TYPE_LINEAR	0
+#define VM_NEWMAP_TYPE_PGTABLES	1
+
+
+/*
+ * Event Record definitions useful to both C and Assembler
+ */
+
+/* VMEST Layout */
+
+#define HVM_VMEST_UM_SFT	31
+#define HVM_VMEST_UM_MSK	1
+#define HVM_VMEST_IE_SFT	30
+#define HVM_VMEST_IE_MSK	1
+#define HVM_VMEST_EVENTNUM_SFT	16
+#define HVM_VMEST_EVENTNUM_MSK	0xff
+#define HVM_VMEST_CAUSE_SFT	0
+#define HVM_VMEST_CAUSE_MSK	0xffff
+
+/*
+ * The initial program gets to find a system environment descriptor
+ * on its stack when it begins exection. The first word is a version
+ * code to indicate what is there.  Zero means nothing more.
+ */
+
+#define HEXAGON_VM_SED_NULL	0
+
+/*
+ * Event numbers for vector binding
+ */
+
+#define HVM_EV_RESET		0
+#define HVM_EV_MACHCHECK	1
+#define HVM_EV_GENEX		2
+#define HVM_EV_TRAP		8
+#define HVM_EV_INTR		15
+/* These shoud be nuked as soon as we know the VM is up to spec v0.1.1 */
+#define HVM_EV_INTR_0		16
+#define HVM_MAX_INTR		240
+
+/*
+ * Cause values for General Exception
+ */
+
+#define HVM_GE_C_BUS	0x01
+#define HVM_GE_C_XPROT	0x11
+#define HVM_GE_C_XUSER	0x14
+#define HVM_GE_C_INVI	0x15
+#define HVM_GE_C_PRIVI	0x1B
+#define HVM_GE_C_XMAL	0x1C
+#define HVM_GE_C_RMAL	0x20
+#define HVM_GE_C_WMAL	0x21
+#define HVM_GE_C_RPROT	0x22
+#define HVM_GE_C_WPROT	0x23
+#define HVM_GE_C_RUSER	0x24
+#define HVM_GE_C_WUSER	0x25
+#define HVM_GE_C_CACHE	0x28
+
+/*
+ * Cause codes for Machine Check
+ */
+
+#define	HVM_MCHK_C_DOWN		0x00
+#define	HVM_MCHK_C_BADSP	0x01
+#define	HVM_MCHK_C_BADEX	0x02
+#define	HVM_MCHK_C_BADPT	0x03
+#define	HVM_MCHK_C_REGWR	0x29
+
+#endif
diff --git a/arch/hexagon/include/asm/intrinsics.h b/arch/hexagon/include/asm/intrinsics.h
new file mode 100644
index 000000000000..1c02186d2e9a
--- /dev/null
+++ b/arch/hexagon/include/asm/intrinsics.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_HEXAGON_INTRINSICS_H
+#define _ASM_HEXAGON_INTRINSICS_H
+
+#define HEXAGON_P_vrmpyhacc_PP	__builtin_HEXAGON_M2_vrmac_s0
+#define HEXAGON_P_vrmpyh_PP	__builtin_HEXAGON_M2_vrmpy_s0
+#define HEXAGON_R_cl0_R		__builtin_HEXAGON_S2_cl0
+
+#endif
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
new file mode 100644
index 000000000000..b3acc2cc71bf
--- /dev/null
+++ b/arch/hexagon/include/asm/io.h
@@ -0,0 +1,326 @@
+/*
+ * IO definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <asm/string.h>
+#include <asm/mem-layout.h>
+#include <asm/iomap.h>
+#include <asm/page.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/*
+ * We don't have PCI yet.
+ * _IO_BASE is pointing at what should be unused virtual space.
+ */
+#define IO_SPACE_LIMIT 0xffff
+#define _IO_BASE ((void __iomem *)0xfe000000)
+
+extern int remap_area_pages(unsigned long start, unsigned long phys_addr,
+				unsigned long end, unsigned long flags);
+
+extern void __iounmap(const volatile void __iomem *addr);
+
+/* Defined in lib/io.c, needed for smc91x driver. */
+extern void __raw_readsw(const void __iomem *addr, void *data, int wordlen);
+extern void __raw_writesw(void __iomem *addr, const void *data, int wordlen);
+
+extern void __raw_readsl(const void __iomem *addr, void *data, int wordlen);
+extern void __raw_writesl(void __iomem *addr, const void *data, int wordlen);
+
+#define readsw(p, d, l)	__raw_readsw(p, d, l)
+#define writesw(p, d, l) __raw_writesw(p, d, l)
+
+#define readsl(p, d, l)   __raw_readsl(p, d, l)
+#define writesl(p, d, l)  __raw_writesl(p, d, l)
+
+/*
+ * virt_to_phys - map virtual address to physical
+ * @address:  address to map
+ */
+static inline unsigned long virt_to_phys(volatile void *address)
+{
+	return __pa(address);
+}
+
+/*
+ * phys_to_virt - map physical address to virtual
+ * @address: address to map
+ */
+static inline void *phys_to_virt(unsigned long address)
+{
+	return __va(address);
+}
+
+/*
+ * convert a physical pointer to a virtual kernel pointer for
+ * /dev/mem access.
+ */
+#define xlate_dev_kmem_ptr(p)    __va(p)
+#define xlate_dev_mem_ptr(p)    __va(p)
+
+/*
+ * IO port access primitives.  Hexagon doesn't have special IO access
+ * instructions; all I/O is memory mapped.
+ *
+ * in/out are used for "ports", but we don't have "port instructions",
+ * so these are really just memory mapped too.
+ */
+
+/*
+ * readb - read byte from memory mapped device
+ * @addr:  pointer to memory
+ *
+ * Operates on "I/O bus memory space"
+ */
+static inline u8 readb(const volatile void __iomem *addr)
+{
+	u8 val;
+	asm volatile(
+		"%0 = memb(%1);"
+		: "=&r" (val)
+		: "r" (addr)
+	);
+	return val;
+}
+
+static inline u16 readw(const volatile void __iomem *addr)
+{
+	u16 val;
+	asm volatile(
+		"%0 = memh(%1);"
+		: "=&r" (val)
+		: "r" (addr)
+	);
+	return val;
+}
+
+static inline u32 readl(const volatile void __iomem *addr)
+{
+	u32 val;
+	asm volatile(
+		"%0 = memw(%1);"
+		: "=&r" (val)
+		: "r" (addr)
+	);
+	return val;
+}
+
+/*
+ * writeb - write a byte to a memory location
+ * @data: data to write to
+ * @addr:  pointer to memory
+ *
+ */
+static inline void writeb(u8 data, volatile void __iomem *addr)
+{
+	asm volatile(
+		"memb(%0) = %1;"
+		:
+		: "r" (addr), "r" (data)
+		: "memory"
+	);
+}
+
+static inline void writew(u16 data, volatile void __iomem *addr)
+{
+	asm volatile(
+		"memh(%0) = %1;"
+		:
+		: "r" (addr), "r" (data)
+		: "memory"
+	);
+
+}
+
+static inline void writel(u32 data, volatile void __iomem *addr)
+{
+	asm volatile(
+		"memw(%0) = %1;"
+		:
+		: "r" (addr), "r" (data)
+		: "memory"
+	);
+}
+
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+/*
+ * Need an mtype somewhere in here, for cache type deals?
+ * This is probably too long for an inline.
+ */
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size);
+
+static inline void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
+{
+	return ioremap_nocache(phys_addr, size);
+}
+
+static inline void iounmap(volatile void __iomem *addr)
+{
+	__iounmap(addr);
+}
+
+#define __raw_writel writel
+
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
+	int count)
+{
+	memcpy(dst, (void *) src, count);
+}
+
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
+	int count)
+{
+	memcpy((void *) dst, src, count);
+}
+
+#define PCI_IO_ADDR	(volatile void __iomem *)
+
+/*
+ * inb - read byte from I/O port or something
+ * @port:  address in I/O space
+ *
+ * Operates on "I/O bus I/O space"
+ */
+static inline u8 inb(unsigned long port)
+{
+	return readb(_IO_BASE + (port & IO_SPACE_LIMIT));
+}
+
+static inline u16 inw(unsigned long port)
+{
+	return readw(_IO_BASE + (port & IO_SPACE_LIMIT));
+}
+
+static inline u32 inl(unsigned long port)
+{
+	return readl(_IO_BASE + (port & IO_SPACE_LIMIT));
+}
+
+/*
+ * outb - write a byte to a memory location
+ * @data: data to write to
+ * @addr:  address in I/O space
+ */
+static inline void outb(u8 data, unsigned long port)
+{
+	writeb(data, _IO_BASE + (port & IO_SPACE_LIMIT));
+}
+
+static inline void outw(u16 data, unsigned long port)
+{
+	writew(data, _IO_BASE + (port & IO_SPACE_LIMIT));
+}
+
+static inline void outl(u32 data, unsigned long port)
+{
+	writel(data, _IO_BASE + (port & IO_SPACE_LIMIT));
+}
+
+#define outb_p outb
+#define outw_p outw
+#define outl_p outl
+
+#define inb_p inb
+#define inw_p inw
+#define inl_p inl
+
+static inline void insb(unsigned long port, void *buffer, int count)
+{
+	if (count) {
+		u8 *buf = buffer;
+		do {
+			u8 x = inb(port);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insw(unsigned long port, void *buffer, int count)
+{
+	if (count) {
+		u16 *buf = buffer;
+		do {
+			u16 x = inw(port);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void insl(unsigned long port, void *buffer, int count)
+{
+	if (count) {
+		u32 *buf = buffer;
+		do {
+			u32 x = inw(port);
+			*buf++ = x;
+		} while (--count);
+	}
+}
+
+static inline void outsb(unsigned long port, const void *buffer, int count)
+{
+	if (count) {
+		const u8 *buf = buffer;
+		do {
+			outb(*buf++, port);
+		} while (--count);
+	}
+}
+
+static inline void outsw(unsigned long port, const void *buffer, int count)
+{
+	if (count) {
+		const u16 *buf = buffer;
+		do {
+			outw(*buf++, port);
+		} while (--count);
+	}
+}
+
+static inline void outsl(unsigned long port, const void *buffer, int count)
+{
+	if (count) {
+		const u32 *buf = buffer;
+		do {
+			outl(*buf++, port);
+		} while (--count);
+	}
+}
+
+#define flush_write_buffers() do { } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/arch/hexagon/include/asm/irq.h b/arch/hexagon/include/asm/irq.h
new file mode 100644
index 000000000000..ded8c15cf3e5
--- /dev/null
+++ b/arch/hexagon/include/asm/irq.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_IRQ_H_
+#define _ASM_IRQ_H_
+
+/* Number of first-level interrupts associated with the CPU core. */
+#define HEXAGON_CPUINTS 32
+
+/*
+ * Must define NR_IRQS before including <asm-generic/irq.h>
+ * 64 == the two SIRC's, 176 == the two gpio's
+ *
+ * IRQ configuration is still in flux; defining this to a comfortably
+ * large number.
+ */
+#define NR_IRQS 512
+
+#include <asm-generic/irq.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/irqflags.h b/arch/hexagon/include/asm/irqflags.h
new file mode 100644
index 000000000000..ec1523655416
--- /dev/null
+++ b/arch/hexagon/include/asm/irqflags.h
@@ -0,0 +1,62 @@
+/*
+ * IRQ support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#include <asm/hexagon_vm.h>
+#include <linux/types.h>
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return __vmgetie();
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	return __vmsetie(VM_INT_DISABLE);
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !flags;
+}
+
+static inline bool arch_irqs_disabled(void)
+{
+	return !__vmgetie();
+}
+
+static inline void arch_local_irq_enable(void)
+{
+	__vmsetie(VM_INT_ENABLE);
+}
+
+static inline void arch_local_irq_disable(void)
+{
+	__vmsetie(VM_INT_DISABLE);
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+	__vmsetie(flags);
+}
+
+#endif
diff --git a/arch/hexagon/include/asm/kgdb.h b/arch/hexagon/include/asm/kgdb.h
new file mode 100644
index 000000000000..9e8779702f10
--- /dev/null
+++ b/arch/hexagon/include/asm/kgdb.h
@@ -0,0 +1,43 @@
+/*
+ * arch/hexagon/include/asm/kgdb.h - Hexagon KGDB Support
+ *
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __HEXAGON_KGDB_H__
+#define __HEXAGON_KGDB_H__
+
+#define BREAK_INSTR_SIZE 4
+#define CACHE_FLUSH_IS_SAFE   1
+#define BUFMAX       ((NUMREGBYTES * 2) + 512)
+
+static inline void arch_kgdb_breakpoint(void)
+{
+	asm("trap0(#0xDB)");
+}
+
+/* Registers:
+ * 32 gpr + sa0/1 + lc0/1 + m0/1 + gp + ugp + pred + pc = 42 total.
+ * vm regs = psp+elr+est+badva = 4
+ * syscall+restart = 2 more
+ * so 48 = 42 +4 + 2
+ */
+#define DBG_USER_REGS 42
+#define DBG_MAX_REG_NUM (DBG_USER_REGS + 6)
+#define NUMREGBYTES  (DBG_MAX_REG_NUM*4)
+
+#endif /* __HEXAGON_KGDB_H__ */
diff --git a/arch/hexagon/include/asm/linkage.h b/arch/hexagon/include/asm/linkage.h
new file mode 100644
index 000000000000..a00b85f680b8
--- /dev/null
+++ b/arch/hexagon/include/asm/linkage.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN		.align 4
+#define __ALIGN_STR	".align 4"
+
+#endif
diff --git a/arch/hexagon/include/asm/mem-layout.h b/arch/hexagon/include/asm/mem-layout.h
new file mode 100644
index 000000000000..72e5dcda79f5
--- /dev/null
+++ b/arch/hexagon/include/asm/mem-layout.h
@@ -0,0 +1,112 @@
+/*
+ * Memory layout definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_HEXAGON_MEM_LAYOUT_H
+#define _ASM_HEXAGON_MEM_LAYOUT_H
+
+#include <linux/const.h>
+
+/*
+ * Have to do this for ginormous numbers, else they get printed as
+ * negative numbers, which the linker no likey when you try to
+ * assign it to the location counter.
+ */
+
+#define PAGE_OFFSET			_AC(0xc0000000, UL)
+
+/*
+ * LOAD_ADDRESS is the physical/linear address of where in memory
+ * the kernel gets loaded. The 12 least significant bits must be zero (0)
+ * due to limitations on setting the EVB
+ *
+ */
+
+#ifndef LOAD_ADDRESS
+#define LOAD_ADDRESS			0x00000000
+#endif
+
+#define TASK_SIZE			(PAGE_OFFSET)
+
+/*  not sure how these are used yet  */
+#define STACK_TOP			TASK_SIZE
+#define STACK_TOP_MAX			TASK_SIZE
+
+#ifndef __ASSEMBLY__
+enum fixed_addresses {
+	FIX_KMAP_BEGIN,
+	FIX_KMAP_END,  /*  check for per-cpuism  */
+	__end_of_fixed_addresses
+};
+
+#define MIN_KERNEL_SEG 0x300   /* From 0xc0000000 */
+extern int max_kernel_seg;
+
+/*
+ * Start of vmalloc virtual address space for kernel;
+ * supposed to be based on the amount of physical memory available
+ */
+
+#define VMALLOC_START (PAGE_OFFSET + VMALLOC_OFFSET + \
+	(unsigned long)high_memory)
+
+/* Gap between physical ram and vmalloc space for guard purposes. */
+#define VMALLOC_OFFSET PAGE_SIZE
+
+/*
+ * Create the space between VMALLOC_START and FIXADDR_TOP backwards
+ * from the ... "top".
+ *
+ * Permanent IO mappings will live at 0xfexx_xxxx
+ * Hypervisor occupies the last 16MB page at 0xffxxxxxx
+ */
+
+#define FIXADDR_TOP     0xfe000000
+#define FIXADDR_SIZE    (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START   (FIXADDR_TOP - FIXADDR_SIZE)
+
+/*
+ * "permanent kernel mappings", defined as long-lasting mappings of
+ * high-memory page frames into the kernel address space.
+ */
+
+#define LAST_PKMAP	PTRS_PER_PTE
+#define LAST_PKMAP_MASK	(LAST_PKMAP - 1)
+#define PKMAP_NR(virt)	((virt - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)	(PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+/*
+ * To the "left" of the fixed map space is the kmap space
+ *
+ * "Permanent Kernel Mappings"; fancy (or less fancy) PTE table
+ * that looks like it's actually walked.
+ * Need to check the alignment/shift usage; some archs use
+ * PMD_MASK on this value
+ */
+#define PKMAP_BASE (FIXADDR_START-PAGE_SIZE*LAST_PKMAP)
+
+/*
+ * 2 pages of guard gap between where vmalloc area ends
+ * and pkmap_base begins.
+ */
+#define VMALLOC_END (PKMAP_BASE-PAGE_SIZE*2)
+#endif /*  !__ASSEMBLY__  */
+
+
+#endif /* _ASM_HEXAGON_MEM_LAYOUT_H */
diff --git a/arch/hexagon/include/asm/mmu.h b/arch/hexagon/include/asm/mmu.h
new file mode 100644
index 000000000000..30a5d8d2659d
--- /dev/null
+++ b/arch/hexagon/include/asm/mmu.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_MMU_H
+#define _ASM_MMU_H
+
+#include <asm/vdso.h>
+
+/*
+ * Architecture-specific state for a mm_struct.
+ * For the Hexagon Virtual Machine, it can be a copy
+ * of the pointer to the page table base.
+ */
+struct mm_context {
+	unsigned long long generation;
+	unsigned long ptbase;
+	struct hexagon_vdso *vdso;
+};
+
+typedef struct mm_context mm_context_t;
+
+#endif
diff --git a/arch/hexagon/include/asm/mmu_context.h b/arch/hexagon/include/asm/mmu_context.h
new file mode 100644
index 000000000000..b4fe5a5411b6
--- /dev/null
+++ b/arch/hexagon/include/asm/mmu_context.h
@@ -0,0 +1,100 @@
+/*
+ * MM context support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_MMU_CONTEXT_H
+#define _ASM_MMU_CONTEXT_H
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/mem-layout.h>
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+}
+
+/*
+ * VM port hides all TLB management, so "lazy TLB" isn't very
+ * meaningful.  Even for ports to architectures with visble TLBs,
+ * this is almost invariably a null function.
+ */
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+	struct task_struct *tsk)
+{
+}
+
+/*
+ * Architecture-specific actions, if any, for memory map deactivation.
+ */
+static inline void deactivate_mm(struct task_struct *tsk,
+	struct mm_struct *mm)
+{
+}
+
+/**
+ * init_new_context - initialize context related info for new mm_struct instance
+ * @tsk: pointer to a task struct
+ * @mm: pointer to a new mm struct
+ */
+static inline int init_new_context(struct task_struct *tsk,
+					struct mm_struct *mm)
+{
+	/* mm->context is set up by pgd_alloc */
+	return 0;
+}
+
+/*
+ *  Switch active mm context
+ */
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+				struct task_struct *tsk)
+{
+	int l1;
+
+	/*
+	 * For virtual machine, we have to update system map if it's been
+	 * touched.
+	 */
+	if (next->context.generation < prev->context.generation) {
+		for (l1 = MIN_KERNEL_SEG; l1 <= max_kernel_seg; l1++)
+			next->pgd[l1] = init_mm.pgd[l1];
+
+		next->context.generation = prev->context.generation;
+	}
+
+	__vmnewmap((void *)next->context.ptbase);
+}
+
+/*
+ *  Activate new memory map for task
+ */
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	switch_mm(prev, next, current_thread_info()->task);
+	local_irq_restore(flags);
+}
+
+/*  Generic hooks for arch_dup_mmap and arch_exit_mmap  */
+#include <asm-generic/mm_hooks.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/module.h b/arch/hexagon/include/asm/module.h
new file mode 100644
index 000000000000..72ba494e6d7d
--- /dev/null
+++ b/arch/hexagon/include/asm/module.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_MODULE_H
+#define _ASM_MODULE_H
+
+#include <asm-generic/module.h>
+
+#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " "
+
+#endif
diff --git a/arch/hexagon/include/asm/mutex.h b/arch/hexagon/include/asm/mutex.h
new file mode 100644
index 000000000000..58b52de1bc22
--- /dev/null
+++ b/arch/hexagon/include/asm/mutex.h
@@ -0,0 +1,8 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+#include <asm-generic/mutex-xchg.h>
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
new file mode 100644
index 000000000000..edd97626c482
--- /dev/null
+++ b/arch/hexagon/include/asm/page.h
@@ -0,0 +1,157 @@
+/*
+ * Page management definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_PAGE_H
+#define _ASM_PAGE_H
+
+#include <linux/const.h>
+
+/*  This is probably not the most graceful way to handle this.  */
+
+#ifdef CONFIG_PAGE_SIZE_4KB
+#define PAGE_SHIFT 12
+#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_4KB
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_16KB
+#define PAGE_SHIFT 14
+#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_16KB
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_64KB
+#define PAGE_SHIFT 16
+#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_64KB
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_256KB
+#define PAGE_SHIFT 18
+#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_256KB
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_1MB
+#define PAGE_SHIFT 20
+#define HEXAGON_L1_PTE_SIZE __HVM_PDE_S_1MB
+#endif
+
+/*
+ *  These should be defined in hugetlb.h, but apparently not.
+ *  "Huge" for us should be 4MB or 16MB, which are both represented
+ *  in L1 PTE's.  Right now, it's set up for 4MB.
+ */
+#ifdef CONFIG_HUGETLB_PAGE
+#define HPAGE_SHIFT 22
+#define HPAGE_SIZE (1UL << HPAGE_SHIFT)
+#define HPAGE_MASK (~(HPAGE_SIZE-1))
+#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT)
+#define HVM_HUGEPAGE_SIZE 0x5
+#endif
+
+#define PAGE_SIZE  (1UL << PAGE_SHIFT)
+#define PAGE_MASK  (~((1 << PAGE_SHIFT) - 1))
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+/*
+ * This is for PFN_DOWN, which mm.h needs.  Seems the right place to pull it in.
+ */
+#include <linux/pfn.h>
+
+/*
+ * We implement a two-level architecture-specific page table structure.
+ * Null intermediate page table level (pmd, pud) definitions will come from
+ * asm-generic/pagetable-nopmd.h and asm-generic/pagetable-nopud.h
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x)     ((x).pte)
+#define pgd_val(x)     ((x).pgd)
+#define pgprot_val(x)  ((x).pgprot)
+#define __pte(x)       ((pte_t) { (x) })
+#define __pgd(x)       ((pgd_t) { (x) })
+#define __pgprot(x)    ((pgprot_t) { (x) })
+
+/*
+ * We need a __pa and a __va routine for kernel space.
+ * MIPS says they're only used during mem_init.
+ * also, check if we need a PHYS_OFFSET.
+ */
+#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET)
+#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
+
+/* The "page frame" descriptor is defined in linux/mm.h */
+struct page;
+
+/* Returns page frame descriptor for virtual address. */
+#define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr)))
+
+/* Default vm area behavior is non-executable.  */
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
+				VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define pfn_valid(pfn) ((pfn) < max_mapnr)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+/*  Need to not use a define for linesize; may move this to another file.  */
+static inline void clear_page(void *page)
+{
+	/*  This can only be done on pages with L1 WB cache */
+	asm volatile(
+		"	loop0(1f,%1);\n"
+		"1:	{ dczeroa(%0);\n"
+		"	  %0 = add(%0,#32); }:endloop0\n"
+		: "+r" (page)
+		: "r" (PAGE_SIZE/32)
+		: "lc0", "sa0", "memory"
+	);
+}
+
+#define copy_page(to, from)	memcpy((to), (from), PAGE_SIZE)
+
+/*
+ * Under assumption that kernel always "sees" user map...
+ */
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+/*
+ * page_to_phys - convert page to physical address
+ * @page - pointer to page entry in mem_map
+ */
+#define page_to_phys(page)      (page_to_pfn(page) << PAGE_SHIFT)
+
+/*
+ * For port to Hexagon Virtual Machine, MAYBE we check for attempts
+ * to reference reserved HVM space, but in any case, the VM will be
+ * protected.
+ */
+#define kern_addr_valid(addr)   (1)
+
+#include <asm-generic/memory_model.h>
+/* XXX Todo: implement assembly-optimized version of getorder. */
+#include <asm-generic/getorder.h>
+
+#endif /* ifdef __ASSEMBLY__ */
+#endif /* ifdef __KERNEL__ */
+
+#endif
diff --git a/arch/arm/include/asm/sizes.h b/arch/hexagon/include/asm/param.h
index 154b89b81d3e..285344bbd036 100644
--- a/arch/arm/include/asm/sizes.h
+++ b/arch/hexagon/include/asm/param.h
@@ -1,8 +1,9 @@
 /*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
  * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -11,11 +12,15 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-/*  Size definitions
- *  Copyright (C) ARM Limited 1998. All rights reserved.
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
  */
-#include <asm-generic/sizes.h>
 
-#define SZ_48M	(SZ_32M + SZ_16M)
+#ifndef _ASM_PARAM_H
+#define _ASM_PARAM_H
+
+#define EXEC_PAGESIZE	16384
+
+#include <asm-generic/param.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
new file mode 100644
index 000000000000..6c2910f91180
--- /dev/null
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_PERF_EVENT_H
+#define _ASM_PERF_EVENT_H
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
new file mode 100644
index 000000000000..13443c775131
--- /dev/null
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -0,0 +1,146 @@
+/*
+ * Page table support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_PGALLOC_H
+#define _ASM_PGALLOC_H
+
+#include <asm/mem-layout.h>
+#include <asm/atomic.h>
+
+#define check_pgt_cache() do {} while (0)
+
+extern unsigned long long kmap_generation;
+
+/*
+ * Page table creation interface
+ */
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd;
+
+	pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+
+	/*
+	 * There may be better ways to do this, but to ensure
+	 * that new address spaces always contain the kernel
+	 * base mapping, and to ensure that the user area is
+	 * initially marked invalid, initialize the new map
+	 * map with a copy of the kernel's persistent map.
+	 */
+
+	memcpy(pgd, swapper_pg_dir, PTRS_PER_PGD*sizeof(pgd_t *));
+	mm->context.generation = kmap_generation;
+
+	/* Physical version is what is passed to virtual machine on switch */
+	mm->context.ptbase = __pa(pgd);
+
+	return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	free_page((unsigned long) pgd);
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+					 unsigned long address)
+{
+	struct page *pte;
+
+	pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+
+	if (pte)
+		pgtable_page_ctor(pte);
+
+	return pte;
+}
+
+/* _kernel variant gets to use a different allocator */
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	gfp_t flags =  GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	return (pte_t *) __get_free_page(flags);
+}
+
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
+{
+	pgtable_page_dtor(pte);
+	__free_page(pte);
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte)
+{
+	/*
+	 * Conveniently, zero in 3 LSB means indirect 4K page table.
+	 * Not so convenient when you're trying to vary the page size.
+	 */
+	set_pmd(pmd, __pmd(((unsigned long)page_to_pfn(pte) << PAGE_SHIFT) |
+		HEXAGON_L1_PTE_SIZE));
+}
+
+/*
+ * Other architectures seem to have ways of making all processes
+ * share the same pmd's for their kernel mappings, but the v0.3
+ * Hexagon VM spec has a "monolithic" L1 table for user and kernel
+ * segments.  We track "generations" of the kernel map to minimize
+ * overhead, and update the "slave" copies of the kernel mappings
+ * as part of switch_mm.  However, we still need to update the
+ * kernel map of the active thread who's calling pmd_populate_kernel...
+ */
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte)
+{
+	extern spinlock_t kmap_gen_lock;
+	pmd_t *ppmd;
+	int pmdindex;
+
+	spin_lock(&kmap_gen_lock);
+	kmap_generation++;
+	mm->context.generation = kmap_generation;
+	current->active_mm->context.generation = kmap_generation;
+	spin_unlock(&kmap_gen_lock);
+
+	set_pmd(pmd, __pmd(((unsigned long)__pa(pte)) | HEXAGON_L1_PTE_SIZE));
+
+	/*
+	 * Now the "slave" copy of the current thread.
+	 * This is pointer arithmetic, not byte addresses!
+	 */
+	pmdindex = (pgd_t *)pmd - mm->pgd;
+	ppmd = (pmd_t *)current->active_mm->pgd + pmdindex;
+	set_pmd(ppmd, __pmd(((unsigned long)__pa(pte)) | HEXAGON_L1_PTE_SIZE));
+	if (pmdindex > max_kernel_seg)
+		max_kernel_seg = pmdindex;
+}
+
+#define __pte_free_tlb(tlb, pte, addr)		\
+do {						\
+	pgtable_page_dtor((pte));		\
+	tlb_remove_page((tlb), (pte));		\
+} while (0)
+
+#endif
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
new file mode 100644
index 000000000000..ca619bf225ef
--- /dev/null
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -0,0 +1,518 @@
+/*
+ * Page table support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_PGTABLE_H
+#define _ASM_PGTABLE_H
+
+/*
+ * Page table definitions for Qualcomm Hexagon processor.
+ */
+#include <linux/swap.h>
+#include <asm/page.h>
+#include <asm-generic/pgtable-nopmd.h>
+
+/* A handy thing to have if one has the RAM. Declared in head.S */
+extern unsigned long empty_zero_page;
+extern unsigned long zero_page_mask;
+
+/*
+ * The PTE model described here is that of the Hexagon Virtual Machine,
+ * which autonomously walks 2-level page tables.  At a lower level, we
+ * also describe the RISCish software-loaded TLB entry structure of
+ * the underlying Hexagon processor. A kernel built to run on the
+ * virtual machine has no need to know about the underlying hardware.
+ */
+#include <asm/vm_mmu.h>
+
+/*
+ * To maximize the comfort level for the PTE manipulation macros,
+ * define the "well known" architecture-specific bits.
+ */
+#define _PAGE_READ	__HVM_PTE_R
+#define _PAGE_WRITE	__HVM_PTE_W
+#define _PAGE_EXECUTE	__HVM_PTE_X
+#define _PAGE_USER	__HVM_PTE_U
+
+/*
+ * We have a total of 4 "soft" bits available in the abstract PTE.
+ * The two mandatory software bits are Dirty and Accessed.
+ * To make nonlinear swap work according to the more recent
+ * model, we want a low order "Present" bit to indicate whether
+ * the PTE describes MMU programming or swap space.
+ */
+#define _PAGE_PRESENT	(1<<0)
+#define _PAGE_DIRTY	(1<<1)
+#define _PAGE_ACCESSED	(1<<2)
+
+/*
+ * _PAGE_FILE is only meaningful if _PAGE_PRESENT is false, while
+ * _PAGE_DIRTY is only meaningful if _PAGE_PRESENT is true.
+ * So we can overload the bit...
+ */
+#define _PAGE_FILE	_PAGE_DIRTY /* set:  pagecache, unset = swap */
+
+/*
+ * For now, let's say that Valid and Present are the same thing.
+ * Alternatively, we could say that it's the "or" of R, W, and X
+ * permissions.
+ */
+#define _PAGE_VALID	_PAGE_PRESENT
+
+/*
+ * We're not defining _PAGE_GLOBAL here, since there's no concept
+ * of global pages or ASIDs exposed to the Hexagon Virtual Machine,
+ * and we want to use the same page table structures and macros in
+ * the native kernel as we do in the virtual machine kernel.
+ * So we'll put up with a bit of inefficiency for now...
+ */
+
+/*
+ * Top "FOURTH" level (pgd), which for the Hexagon VM is really
+ * only the second from the bottom, pgd and pud both being collapsed.
+ * Each entry represents 4MB of virtual address space, 4K of table
+ * thus maps the full 4GB.
+ */
+#define PGDIR_SHIFT 22
+#define PTRS_PER_PGD 1024
+
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#ifdef CONFIG_PAGE_SIZE_4KB
+#define PTRS_PER_PTE 1024
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_16KB
+#define PTRS_PER_PTE 256
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_64KB
+#define PTRS_PER_PTE 64
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_256KB
+#define PTRS_PER_PTE 16
+#endif
+
+#ifdef CONFIG_PAGE_SIZE_1MB
+#define PTRS_PER_PTE 4
+#endif
+
+/*  Any bigger and the PTE disappears.  */
+#define pgd_ERROR(e) \
+	printk(KERN_ERR "%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__,\
+		pgd_val(e))
+
+/*
+ * Page Protection Constants. Includes (in this variant) cache attributes.
+ */
+extern unsigned long _dflt_cache_att;
+
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_USER | \
+				_dflt_cache_att)
+#define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | \
+				_PAGE_READ | _PAGE_EXECUTE | _dflt_cache_att)
+#define PAGE_COPY	PAGE_READONLY
+#define PAGE_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER | \
+				_PAGE_READ | _PAGE_EXECUTE | _dflt_cache_att)
+#define PAGE_COPY_EXEC	PAGE_EXEC
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | \
+				_PAGE_EXECUTE | _PAGE_WRITE | _dflt_cache_att)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+				_PAGE_WRITE | _PAGE_EXECUTE | _dflt_cache_att)
+
+
+/*
+ * Aliases for mapping mmap() protection bits to page protections.
+ * These get used for static initialization, so using the _dflt_cache_att
+ * variable for the default cache attribute isn't workable. If the
+ * default gets changed at boot time, the boot option code has to
+ * update data structures like the protaction_map[] array.
+ */
+#define CACHEDEF	(CACHE_DEFAULT << 6)
+
+/* Private (copy-on-write) page protections. */
+#define __P000 __pgprot(_PAGE_PRESENT | _PAGE_USER | CACHEDEF)
+#define __P001 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | CACHEDEF)
+#define __P010 __P000	/* Write-only copy-on-write */
+#define __P011 __P001	/* Read/Write copy-on-write */
+#define __P100 __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+			_PAGE_EXECUTE | CACHEDEF)
+#define __P101 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_EXECUTE | \
+			_PAGE_READ | CACHEDEF)
+#define __P110 __P100	/* Write/execute copy-on-write */
+#define __P111 __P101	/* Read/Write/Execute, copy-on-write */
+
+/* Shared page protections. */
+#define __S000 __P000
+#define __S001 __P001
+#define __S010 __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+			_PAGE_WRITE | CACHEDEF)
+#define __S011 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | \
+			_PAGE_WRITE | CACHEDEF)
+#define __S100 __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+			_PAGE_EXECUTE | CACHEDEF)
+#define __S101 __P101
+#define __S110 __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+			_PAGE_EXECUTE | _PAGE_WRITE | CACHEDEF)
+#define __S111 __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | \
+			_PAGE_EXECUTE | _PAGE_WRITE | CACHEDEF)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];  /* located in head.S */
+
+/* Seems to be zero even in architectures where the zero page is firewalled? */
+#define FIRST_USER_ADDRESS 0
+#define pte_special(pte)	0
+#define pte_mkspecial(pte)	(pte)
+
+/*  HUGETLB not working currently  */
+#ifdef CONFIG_HUGETLB_PAGE
+#define pte_mkhuge(pte) __pte((pte_val(pte) & ~0x3) | HVM_HUGEPAGE_SIZE)
+#endif
+
+/*
+ * For now, assume that higher-level code will do TLB/MMU invalidations
+ * and don't insert that overhead into this low-level function.
+ */
+extern void sync_icache_dcache(pte_t pte);
+
+#define pte_present_exec_user(pte) \
+	((pte_val(pte) & (_PAGE_EXECUTE | _PAGE_USER)) == \
+	(_PAGE_EXECUTE | _PAGE_USER))
+
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	/*  should really be using pte_exec, if it weren't declared later. */
+	if (pte_present_exec_user(pteval))
+		sync_icache_dcache(pteval);
+
+	*ptep = pteval;
+}
+
+/*
+ * For the Hexagon Virtual Machine MMU (or its emulation), a null/invalid
+ * L1 PTE (PMD/PGD) has 7 in the least significant bits. For the L2 PTE
+ * (Linux PTE), the key is to have bits 11..9 all zero.  We'd use 0x7
+ * as a universal null entry, but some of those least significant bits
+ * are interpreted by software.
+ */
+#define _NULL_PMD	0x7
+#define _NULL_PTE	0x0
+
+static inline void pmd_clear(pmd_t *pmd_entry_ptr)
+{
+	 pmd_val(*pmd_entry_ptr) = _NULL_PMD;
+}
+
+/*
+ * Conveniently, a null PTE value is invalid.
+ */
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep)
+{
+	pte_val(*ptep) = _NULL_PTE;
+}
+
+#ifdef NEED_PMD_INDEX_DESPITE_BEING_2_LEVEL
+/**
+ * pmd_index - returns the index of the entry in the PMD page
+ * which would control the given virtual address
+ */
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+#endif
+
+/**
+ * pgd_index - returns the index of the entry in the PGD page
+ * which would control the given virtual address
+ *
+ * This returns the *index* for the address in the pgd_t
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+/*
+ * pgd_offset - find an offset in a page-table-directory
+ */
+#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
+
+/*
+ * pgd_offset_k - get kernel (init_mm) pgd entry pointer for addr
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/**
+ * pmd_none - check if pmd_entry is mapped
+ * @pmd_entry:  pmd entry
+ *
+ * MIPS checks it against that "invalid pte table" thing.
+ */
+static inline int pmd_none(pmd_t pmd)
+{
+	return pmd_val(pmd) == _NULL_PMD;
+}
+
+/**
+ * pmd_present - is there a page table behind this?
+ * Essentially the inverse of pmd_none.  We maybe
+ * save an inline instruction by defining it this
+ * way, instead of simply "!pmd_none".
+ */
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) != (unsigned long)_NULL_PMD;
+}
+
+/**
+ * pmd_bad - check if a PMD entry is "bad". That might mean swapped out.
+ * As we have no known cause of badness, it's null, as it is for many
+ * architectures.
+ */
+static inline int pmd_bad(pmd_t pmd)
+{
+	return 0;
+}
+
+/*
+ * pmd_page - converts a PMD entry to a page pointer
+ */
+#define pmd_page(pmd)  (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+/**
+ * pte_none - check if pte is mapped
+ * @pte: pte_t entry
+ */
+static inline int pte_none(pte_t pte)
+{
+	return pte_val(pte) == _NULL_PTE;
+};
+
+/*
+ * pte_present - check if page is present
+ */
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/* mk_pte - make a PTE out of a page pointer and protection bits */
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+/* pte_page - returns a page (frame pointer/descriptor?) based on a PTE */
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+/* pte_mkold - mark PTE as not recently accessed */
+static inline pte_t pte_mkold(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_ACCESSED;
+	return pte;
+}
+
+/* pte_mkyoung - mark PTE as recently accessed */
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_ACCESSED;
+	return pte;
+}
+
+/* pte_mkclean - mark page as in sync with backing store */
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_DIRTY;
+	return pte;
+}
+
+/* pte_mkdirty - mark page as modified */
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_DIRTY;
+	return pte;
+}
+
+/* pte_young - "is PTE marked as accessed"? */
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+/* pte_dirty - "is PTE dirty?" */
+static inline int pte_dirty(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_DIRTY;
+}
+
+/* pte_modify - set protection bits on PTE */
+static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
+{
+	pte_val(pte) &= PAGE_MASK;
+	pte_val(pte) |= pgprot_val(prot);
+	return pte;
+}
+
+/* pte_wrprotect - mark page as not writable */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_WRITE;
+	return pte;
+}
+
+/* pte_mkwrite - mark page as writable */
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_WRITE;
+	return pte;
+}
+
+/* pte_mkexec - mark PTE as executable */
+static inline pte_t pte_mkexec(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_EXECUTE;
+	return pte;
+}
+
+/* pte_read - "is PTE marked as readable?" */
+static inline int pte_read(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_READ;
+}
+
+/* pte_write - "is PTE marked as writable?" */
+static inline int pte_write(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_WRITE;
+}
+
+
+/* pte_exec - "is PTE marked as executable?" */
+static inline int pte_exec(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_EXECUTE;
+}
+
+/* __pte_to_swp_entry - extract swap entry from PTE */
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+
+/* __swp_entry_to_pte - extract PTE from swap entry */
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+/* pfn_pte - convert page number and protection value to page table entry */
+#define pfn_pte(pfn, pgprot) __pte((pfn << PAGE_SHIFT) | pgprot_val(pgprot))
+
+/* pte_pfn - convert pte to page frame number */
+#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+
+/*
+ * set_pte_at - update page table and do whatever magic may be
+ * necessary to make the underlying hardware/firmware take note.
+ *
+ * VM may require a virtual instruction to alert the MMU.
+ */
+#define set_pte_at(mm, addr, ptep, pte) set_pte(ptep, pte)
+
+/*
+ * May need to invoke the virtual machine as well...
+ */
+#define pte_unmap(pte)		do { } while (0)
+#define pte_unmap_nested(pte)	do { } while (0)
+
+/*
+ * pte_offset_map - returns the linear address of the page table entry
+ * corresponding to an address
+ */
+#define pte_offset_map(dir, address)                                    \
+	((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
+
+#define pte_offset_map_nested(pmd, addr) pte_offset_map(pmd, addr)
+
+/* pte_offset_kernel - kernel version of pte_offset */
+#define pte_offset_kernel(dir, address) \
+	((pte_t *) (unsigned long) __va(pmd_val(*dir) & PAGE_MASK) \
+				+  __pte_offset(address))
+
+/* ZERO_PAGE - returns the globally shared zero page */
+#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page))
+
+#define __pte_offset(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+/* Nothing special about IO remapping at this point */
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+	remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+/*  I think this is in case we have page table caches; needed by init/main.c  */
+#define pgtable_cache_init()    do { } while (0)
+
+/*
+ * Swap/file PTE definitions.  If _PAGE_PRESENT is zero, the rest of the
+ * PTE is interpreted as swap information.  Depending on the _PAGE_FILE
+ * bit, the remaining free bits are eitehr interpreted as a file offset
+ * or a swap type/offset tuple.  Rather than have the TLB fill handler
+ * test _PAGE_PRESENT, we're going to reserve the permissions bits
+ * and set them to all zeros for swap entries, which speeds up the
+ * miss handler at the cost of 3 bits of offset.  That trade-off can
+ * be revisited if necessary, but Hexagon processor architecture and
+ * target applications suggest a lot of TLB misses and not much swap space.
+ *
+ * Format of swap PTE:
+ *	bit	0:	Present (zero)
+ *	bit	1:	_PAGE_FILE (zero)
+ *	bits	2-6:	swap type (arch independent layer uses 5 bits max)
+ *	bits	7-9:	bits 2:0 of offset
+ *	bits 10-12:	effectively _PAGE_PROTNONE (all zero)
+ *	bits 13-31:  bits 21:3 of swap offset
+ *
+ * Format of file PTE:
+ *	bit	0:	Present (zero)
+ *	bit	1:	_PAGE_FILE (zero)
+ *	bits	2-9:	bits 7:0 of offset
+ *	bits 10-12:	effectively _PAGE_PROTNONE (all zero)
+ *	bits 13-31:  bits 26:8 of swap offset
+ *
+ * The split offset makes some of the following macros a little gnarly,
+ * but there's plenty of precedent for this sort of thing.
+ */
+#define PTE_FILE_MAX_BITS     27
+
+/* Used for swap PTEs */
+#define __swp_type(swp_pte)		(((swp_pte).val >> 2) & 0x1f)
+
+#define __swp_offset(swp_pte) \
+	((((swp_pte).val >> 7) & 0x7) | (((swp_pte).val >> 10) & 0x003ffff8))
+
+#define __swp_entry(type, offset) \
+	((swp_entry_t)	{ \
+		((type << 2) | \
+		 ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) })
+
+/* Used for file PTEs */
+#define pte_file(pte) \
+	((pte_val(pte) & (_PAGE_FILE | _PAGE_PRESENT)) == _PAGE_FILE)
+
+#define pte_to_pgoff(pte) \
+	(((pte_val(pte) >> 2) & 0xff) | ((pte_val(pte) >> 5) & 0x07ffff00))
+
+#define pgoff_to_pte(off) \
+	((pte_t) { ((((off) & 0x7ffff00) << 5) | (((off) & 0xff) << 2)\
+	| _PAGE_FILE) })
+
+/*  Oh boy.  There are a lot of possible arch overrides found in this file.  */
+#include <asm-generic/pgtable.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
new file mode 100644
index 000000000000..20c5ddabbd8b
--- /dev/null
+++ b/arch/hexagon/include/asm/processor.h
@@ -0,0 +1,123 @@
+/*
+ * Process/processor support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_PROCESSOR_H
+#define _ASM_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/mem-layout.h>
+#include <asm/registers.h>
+#include <asm/hexagon_vm.h>
+
+/*  must be a macro  */
+#define current_text_addr() ({ __label__ _l; _l: &&_l; })
+
+/*  task_struct, defined elsewhere, is the "process descriptor" */
+struct task_struct;
+
+/*  this is defined in arch/process.c  */
+extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
+extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
+
+/*
+ * thread_struct is supposed to be for context switch data.
+ * Specifically, to hold the state necessary to perform switch_to...
+ */
+struct thread_struct {
+	void *switch_sp;
+};
+
+/*
+ * initializes thread_struct
+ * The only thing we have in there is switch_sp
+ * which doesn't really need to be initialized.
+ */
+
+#define INIT_THREAD { \
+}
+
+#define cpu_relax() __vmyield()
+
+/*
+ * "Unlazying all lazy status" occurs here.
+ */
+static inline void prepare_to_copy(struct task_struct *tsk)
+{
+}
+
+/*
+ * Decides where the kernel will search for a free chunk of vm space during
+ * mmaps.
+ * See also arch_get_unmapped_area.
+ * Doesn't affect if you have MAX_FIXED in the page flags set though...
+ *
+ * Apparently the convention is that ld.so will ask for "unmapped" private
+ * memory to be allocated SOMEWHERE, but it also asks for memory explicitly
+ * via MAP_FIXED at the lower * addresses starting at VA=0x0.
+ *
+ * If the two requests collide, you get authentic segfaulting action, so
+ * you have to kick the "unmapped" base requests higher up.
+ */
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE/3))
+
+
+#define task_pt_regs(task) \
+	((struct pt_regs *)(task_stack_page(task) + THREAD_SIZE) - 1)
+
+#define KSTK_EIP(tsk) (pt_elr(task_pt_regs(tsk)))
+#define KSTK_ESP(tsk) (pt_psp(task_pt_regs(tsk)))
+
+/*  Free all resources held by a thread; defined in process.c  */
+extern void release_thread(struct task_struct *dead_task);
+
+/* Get wait channel for task P.  */
+extern unsigned long get_wchan(struct task_struct *p);
+
+/*  The following stuff is pretty HEXAGON specific.  */
+
+/*  This is really just here for __switch_to.
+    Offsets are pulled via asm-offsets.c  */
+
+/*
+ * No real reason why VM and native switch stacks should be different.
+ * Ultimately this should merge.  Note that Rev C. ABI called out only
+ * R24-27 as callee saved GPRs needing explicit attention (R29-31 being
+ * dealt with automagically by allocframe), but the current ABI has
+ * more, R16-R27.  By saving more, the worst case is that we waste some
+ * cycles if building with the old compilers.
+ */
+
+struct hexagon_switch_stack {
+	unsigned long long	r1716;
+	unsigned long long	r1918;
+	unsigned long long	r2120;
+	unsigned long long	r2322;
+	unsigned long long	r2524;
+	unsigned long long	r2726;
+	unsigned long		fp;
+	unsigned long		lr;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/arch/hexagon/include/asm/ptrace.h b/arch/hexagon/include/asm/ptrace.h
new file mode 100644
index 000000000000..3d2f607cd63c
--- /dev/null
+++ b/arch/hexagon/include/asm/ptrace.h
@@ -0,0 +1,35 @@
+/*
+ * Ptrace definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_PTRACE_H
+#define _ASM_PTRACE_H
+
+#include <asm/registers.h>
+
+#define instruction_pointer(regs) pt_elr(regs)
+#define user_stack_pointer(regs) ((regs)->r29)
+
+#define profile_pc(regs) instruction_pointer(regs)
+
+/* kprobe-based event tracer support */
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+
+#endif
diff --git a/arch/hexagon/include/asm/registers.h b/arch/hexagon/include/asm/registers.h
new file mode 100644
index 000000000000..4dd741be855d
--- /dev/null
+++ b/arch/hexagon/include/asm/registers.h
@@ -0,0 +1,236 @@
+/*
+ * Register definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_REGISTERS_H
+#define _ASM_REGISTERS_H
+
+#define SP r29
+
+#ifndef __ASSEMBLY__
+
+/*  See kernel/entry.S for further documentation.  */
+
+/*
+ * Entry code copies the event record out of guest registers into
+ * this structure (which is on the stack).
+ */
+
+struct hvm_event_record {
+	unsigned long vmel;     /* Event Linkage (return address) */
+	unsigned long vmest;    /* Event context - pre-event SSR values */
+	unsigned long vmpsp;    /* Previous stack pointer */
+	unsigned long vmbadva;  /* Bad virtual address for addressing events */
+};
+
+struct pt_regs {
+	long restart_r0;        /* R0 checkpoint for syscall restart */
+	long syscall_nr;        /* Only used in system calls */
+	union {
+		struct {
+			unsigned long usr;
+			unsigned long preds;
+		};
+		long long int predsusr;
+	};
+	union {
+		struct {
+			unsigned long m0;
+			unsigned long m1;
+		};
+		long long int m1m0;
+	};
+	union {
+		struct {
+			unsigned long sa1;
+			unsigned long lc1;
+		};
+		long long int lc1sa1;
+	};
+	union {
+		struct {
+			unsigned long sa0;
+			unsigned long lc0;
+		};
+		long long int lc0sa0;
+	};
+	union {
+		struct {
+			unsigned long gp;
+			unsigned long ugp;
+		};
+		long long int ugpgp;
+	};
+	/*
+	* Be extremely careful with rearranging these, if at all.  Some code
+	* assumes the 32 registers exist exactly like this in memory;
+	* e.g. kernel/ptrace.c
+	* e.g. kernel/signal.c (restore_sigcontext)
+	*/
+	union {
+		struct {
+			unsigned long r00;
+			unsigned long r01;
+		};
+		long long int r0100;
+	};
+	union {
+		struct {
+			unsigned long r02;
+			unsigned long r03;
+		};
+		long long int r0302;
+	};
+	union {
+		struct {
+			unsigned long r04;
+			unsigned long r05;
+		};
+		long long int r0504;
+	};
+	union {
+		struct {
+			unsigned long r06;
+			unsigned long r07;
+		};
+		long long int r0706;
+	};
+	union {
+		struct {
+			unsigned long r08;
+			unsigned long r09;
+		};
+		long long int r0908;
+	};
+	union {
+	       struct {
+			unsigned long r10;
+			unsigned long r11;
+	       };
+	       long long int r1110;
+	};
+	union {
+	       struct {
+			unsigned long r12;
+			unsigned long r13;
+	       };
+	       long long int r1312;
+	};
+	union {
+	       struct {
+			unsigned long r14;
+			unsigned long r15;
+	       };
+	       long long int r1514;
+	};
+	union {
+		struct {
+			unsigned long r16;
+			unsigned long r17;
+		};
+		long long int r1716;
+	};
+	union {
+		struct {
+			unsigned long r18;
+			unsigned long r19;
+		};
+		long long int r1918;
+	};
+	union {
+		struct {
+			unsigned long r20;
+			unsigned long r21;
+		};
+		long long int r2120;
+	};
+	union {
+		struct {
+			unsigned long r22;
+			unsigned long r23;
+		};
+		long long int r2322;
+	};
+	union {
+		struct {
+			unsigned long r24;
+			unsigned long r25;
+		};
+		long long int r2524;
+	};
+	union {
+		struct {
+			unsigned long r26;
+			unsigned long r27;
+		};
+		long long int r2726;
+	};
+	union {
+		struct {
+			unsigned long r28;
+			unsigned long r29;
+	       };
+	       long long int r2928;
+	};
+	union {
+		struct {
+			unsigned long r30;
+			unsigned long r31;
+		};
+		long long int r3130;
+	};
+	/* VM dispatch pushes event record onto stack - we can build on it */
+	struct hvm_event_record hvmer;
+};
+
+/* Defines to conveniently access the values  */
+
+/*
+ * As of the VM spec 0.5, these registers are now set/retrieved via a
+ * VM call.  On the in-bound side, we just fetch the values
+ * at the entry points and stuff them into the old record in pt_regs.
+ * However, on the outbound side, probably at VM rte, we set the
+ * registers back.
+ */
+
+#define pt_elr(regs) ((regs)->hvmer.vmel)
+#define pt_set_elr(regs, val) ((regs)->hvmer.vmel = (val))
+#define pt_cause(regs) ((regs)->hvmer.vmest & (HVM_VMEST_CAUSE_MSK))
+#define user_mode(regs) \
+	(((regs)->hvmer.vmest & (HVM_VMEST_UM_MSK << HVM_VMEST_UM_SFT)) != 0)
+#define ints_enabled(regs) \
+	(((regs)->hvmer.vmest & (HVM_VMEST_IE_MSK << HVM_VMEST_IE_SFT)) != 0)
+#define pt_psp(regs) ((regs)->hvmer.vmpsp)
+#define pt_badva(regs) ((regs)->hvmer.vmbadva)
+
+#define pt_set_rte_sp(regs, sp) do {\
+	pt_psp(regs) = (sp);\
+	(regs)->SP = (unsigned long) &((regs)->hvmer);\
+	} while (0)
+
+#define pt_set_kmode(regs) \
+	(regs)->hvmer.vmest = (HVM_VMEST_IE_MSK << HVM_VMEST_IE_SFT)
+
+#define pt_set_usermode(regs) \
+	(regs)->hvmer.vmest = (HVM_VMEST_UM_MSK << HVM_VMEST_UM_SFT) \
+			    | (HVM_VMEST_IE_MSK << HVM_VMEST_IE_SFT)
+
+#endif  /*  ifndef __ASSEMBLY  */
+
+#endif
diff --git a/arch/hexagon/include/asm/setup.h b/arch/hexagon/include/asm/setup.h
new file mode 100644
index 000000000000..3b754c50bc0a
--- /dev/null
+++ b/arch/hexagon/include/asm/setup.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SETUP_H
+#define _ASM_SETUP_H
+
+#include <linux/init.h>
+#include <asm-generic/setup.h>
+
+extern char external_cmdline_buffer;
+
+void __init setup_arch_memory(void);
+
+#endif
diff --git a/arch/hexagon/include/asm/sigcontext.h b/arch/hexagon/include/asm/sigcontext.h
new file mode 100644
index 000000000000..ce6dcd995220
--- /dev/null
+++ b/arch/hexagon/include/asm/sigcontext.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SIGCONTEXT_H
+#define _ASM_SIGCONTEXT_H
+
+#include <asm/user.h>
+
+/*
+ * Signal context structure - contains all info to do with the state
+ * before the signal handler was invoked.  Note: only add new entries
+ * to the end of the structure.
+ */
+struct sigcontext {
+	struct user_regs_struct sc_regs;
+} __aligned(8);
+
+#endif
diff --git a/arch/hexagon/include/asm/signal.h b/arch/hexagon/include/asm/signal.h
new file mode 100644
index 000000000000..471c0562e17b
--- /dev/null
+++ b/arch/hexagon/include/asm/signal.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SIGNAL_H
+#define _ASM_SIGNAL_H
+
+extern unsigned long __rt_sigtramp_template[2];
+
+#include <asm-generic/signal.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/smp.h b/arch/hexagon/include/asm/smp.h
new file mode 100644
index 000000000000..87c869a6a897
--- /dev/null
+++ b/arch/hexagon/include/asm/smp.h
@@ -0,0 +1,44 @@
+/*
+ * SMP definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/cpumask.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+enum ipi_message_type {
+	IPI_NOP = 0,
+	IPI_RESCHEDULE = 1,
+	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
+	IPI_CPU_STOP,
+	IPI_TIMER,
+};
+
+extern void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg);
+extern void smp_start_cpus(void);
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern void smp_vm_unmask_irq(void *info);
+
+#endif
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
new file mode 100644
index 000000000000..168a920485a8
--- /dev/null
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -0,0 +1,186 @@
+/*
+ * Spinlock support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SPINLOCK_H
+#define _ASM_SPINLOCK_H
+
+#include <asm/irqflags.h>
+
+/*
+ * This file is pulled in for SMP builds.
+ * Really need to check all the barrier stuff for "true" SMP
+ */
+
+/*
+ * Read locks:
+ * - load the lock value
+ * - increment it
+ * - if the lock value is still negative, go back and try again.
+ * - unsuccessful store is unsuccessful.  Go back and try again.  Loser.
+ * - successful store new lock value if positive -> lock acquired
+ */
+static inline void arch_read_lock(arch_rwlock_t *lock)
+{
+	__asm__ __volatile__(
+		"1:	R6 = memw_locked(%0);\n"
+		"	{ P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
+		"	{ if !P3 jump 1b; }\n"
+		"	memw_locked(%0,P3) = R6;\n"
+		"	{ if !P3 jump 1b; }\n"
+		:
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *lock)
+{
+	__asm__ __volatile__(
+		"1:	R6 = memw_locked(%0);\n"
+		"	R6 = add(R6,#-1);\n"
+		"	memw_locked(%0,P3) = R6\n"
+		"	if !P3 jump 1b;\n"
+		:
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+
+}
+
+/*  I think this returns 0 on fail, 1 on success.  */
+static inline int arch_read_trylock(arch_rwlock_t *lock)
+{
+	int temp;
+	__asm__ __volatile__(
+		"	R6 = memw_locked(%1);\n"
+		"	{ %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
+		"	{ if !P3 jump 1f; }\n"
+		"	memw_locked(%1,P3) = R6;\n"
+		"	{ %0 = P3 }\n"
+		"1:\n"
+		: "=&r" (temp)
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+	return temp;
+}
+
+static inline int arch_read_can_lock(arch_rwlock_t *rwlock)
+{
+	return rwlock->lock == 0;
+}
+
+static inline int arch_write_can_lock(arch_rwlock_t *rwlock)
+{
+	return rwlock->lock == 0;
+}
+
+/*  Stuffs a -1 in the lock value?  */
+static inline void arch_write_lock(arch_rwlock_t *lock)
+{
+	__asm__ __volatile__(
+		"1:	R6 = memw_locked(%0)\n"
+		"	{ P3 = cmp.eq(R6,#0);  R6 = #-1;}\n"
+		"	{ if !P3 jump 1b; }\n"
+		"	memw_locked(%0,P3) = R6;\n"
+		"	{ if !P3 jump 1b; }\n"
+		:
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+}
+
+
+static inline int arch_write_trylock(arch_rwlock_t *lock)
+{
+	int temp;
+	__asm__ __volatile__(
+		"	R6 = memw_locked(%1)\n"
+		"	{ %0 = #0; P3 = cmp.eq(R6,#0);  R6 = #-1;}\n"
+		"	{ if !P3 jump 1f; }\n"
+		"	memw_locked(%1,P3) = R6;\n"
+		"	%0 = P3;\n"
+		"1:\n"
+		: "=&r" (temp)
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+	return temp;
+
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *lock)
+{
+	smp_mb();
+	lock->lock = 0;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	__asm__ __volatile__(
+		"1:	R6 = memw_locked(%0);\n"
+		"	P3 = cmp.eq(R6,#0);\n"
+		"	{ if !P3 jump 1b; R6 = #1; }\n"
+		"	memw_locked(%0,P3) = R6;\n"
+		"	{ if !P3 jump 1b; }\n"
+		:
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+	lock->lock = 0;
+}
+
+static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	int temp;
+	__asm__ __volatile__(
+		"	R6 = memw_locked(%1);\n"
+		"	P3 = cmp.eq(R6,#0);\n"
+		"	{ if !P3 jump 1f; R6 = #1; %0 = #0; }\n"
+		"	memw_locked(%1,P3) = R6;\n"
+		"	%0 = P3;\n"
+		"1:\n"
+		: "=&r" (temp)
+		: "r" (&lock->lock)
+		: "memory", "r6", "p3"
+	);
+	return temp;
+}
+
+/*
+ * SMP spinlocks are intended to allow only a single CPU at the lock
+ */
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+#define arch_spin_unlock_wait(lock) \
+	do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+#define arch_spin_is_locked(x) ((x)->lock != 0)
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#endif
diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
new file mode 100644
index 000000000000..5e937af1c4ad
--- /dev/null
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -0,0 +1,42 @@
+/*
+ * Spinlock support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SPINLOCK_TYPES_H
+#define _ASM_SPINLOCK_TYPES_H
+
+#include <linux/version.h>
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
diff --git a/arch/hexagon/include/asm/string.h b/arch/hexagon/include/asm/string.h
new file mode 100644
index 000000000000..f4489c15942c
--- /dev/null
+++ b/arch/hexagon/include/asm/string.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_STRING_H_
+#define _ASM_STRING_H_
+
+#ifdef __KERNEL__
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+
+/*  ToDo:  use dczeroa, accelerate the compiler-constant zero case  */
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *__to, int c, size_t __n);
+#endif
+
+
+#endif /* _ASM_STRING_H_ */
diff --git a/arch/hexagon/include/asm/suspend.h b/arch/hexagon/include/asm/suspend.h
new file mode 100644
index 000000000000..089dd8268791
--- /dev/null
+++ b/arch/hexagon/include/asm/suspend.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SUSPEND_H
+#define _ASM_SUSPEND_H
+
+static inline int arch_prepare_suspend(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/arch/hexagon/include/asm/swab.h b/arch/hexagon/include/asm/swab.h
new file mode 100644
index 000000000000..99cf0be3fb83
--- /dev/null
+++ b/arch/hexagon/include/asm/swab.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SWAB_H
+#define _ASM_SWAB_H
+
+#define __SWAB_64_THRU_32__
+
+#endif
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
new file mode 100644
index 000000000000..3e7d61d38d97
--- /dev/null
+++ b/arch/hexagon/include/asm/syscall.h
@@ -0,0 +1,54 @@
+/*
+ * Syscall support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_HEXAGON_SYSCALL_H
+#define _ASM_HEXAGON_SYSCALL_H
+
+typedef long (*syscall_fn)(unsigned long, unsigned long,
+	unsigned long, unsigned long,
+	unsigned long, unsigned long);
+
+asmlinkage int sys_execve(char __user *ufilename, char __user * __user *argv,
+			  char __user * __user *envp);
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidp, unsigned long child_tidp);
+
+#define sys_execve	sys_execve
+#define sys_clone	sys_clone
+
+#include <asm-generic/syscalls.h>
+
+extern void *sys_call_table[];
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return regs->r06;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	memcpy(args, &(&regs->r00)[i], n * sizeof(args[0]));
+}
+#endif
diff --git a/arch/hexagon/include/asm/system.h b/arch/hexagon/include/asm/system.h
new file mode 100644
index 000000000000..323ed1dd65e2
--- /dev/null
+++ b/arch/hexagon/include/asm/system.h
@@ -0,0 +1,126 @@
+/*
+ * System level definitions for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_SYSTEM_H
+#define _ASM_SYSTEM_H
+
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+#include <asm/atomic.h>
+#include <asm/hexagon_vm.h>
+
+struct thread_struct;
+
+extern struct task_struct *__switch_to(struct task_struct *,
+	struct task_struct *,
+	struct task_struct *);
+
+#define switch_to(p, n, r) do {\
+	r = __switch_to((p), (n), (r));\
+} while (0)
+
+
+#define rmb()				barrier()
+#define read_barrier_depends()		barrier()
+#define wmb()				barrier()
+#define mb()				barrier()
+#define smp_rmb()			barrier()
+#define smp_read_barrier_depends()	barrier()
+#define smp_wmb()			barrier()
+#define smp_mb()			barrier()
+#define smp_mb__before_atomic_dec()	barrier()
+#define smp_mb__after_atomic_dec()	barrier()
+#define smp_mb__before_atomic_inc()	barrier()
+#define smp_mb__after_atomic_inc()	barrier()
+
+/*
+ * __xchg - atomically exchange a register and a memory location
+ * @x: value to swap
+ * @ptr: pointer to memory
+ * @size:  size of the value
+ *
+ * Only 4 bytes supported currently.
+ *
+ * Note:  there was an errata for V2 about .new's and memw_locked.
+ *
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+				   int size)
+{
+	unsigned long retval;
+
+	/*  Can't seem to use printk or panic here, so just stop  */
+	if (size != 4) do { asm volatile("brkpt;\n"); } while (1);
+
+	__asm__ __volatile__ (
+	"1:	%0 = memw_locked(%1);\n"    /*  load into retval */
+	"	memw_locked(%1,P0) = %2;\n" /*  store into memory */
+	"	if !P0 jump 1b;\n"
+	: "=&r" (retval)
+	: "r" (ptr), "r" (x)
+	: "memory", "p0"
+	);
+	return retval;
+}
+
+/*
+ * Atomically swap the contents of a register with memory.  Should be atomic
+ * between multiple CPU's and within interrupts on the same CPU.
+ */
+#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), \
+	sizeof(*(ptr))))
+
+/*  Set a value and use a memory barrier.  Used by the scheduler somewhere.  */
+#define set_mb(var, value) \
+	do { var = value; mb(); } while (0)
+
+/*
+ *  see rt-mutex-design.txt; cmpxchg supposedly checks if *ptr == A and swaps.
+ *  looks just like atomic_cmpxchg on our arch currently with a bunch of
+ *  variable casting.
+ */
+#define __HAVE_ARCH_CMPXCHG 1
+
+#define cmpxchg(ptr, old, new)					\
+({								\
+	__typeof__(ptr) __ptr = (ptr);				\
+	__typeof__(*(ptr)) __old = (old);			\
+	__typeof__(*(ptr)) __new = (new);			\
+	__typeof__(*(ptr)) __oldval = 0;			\
+								\
+	asm volatile(						\
+		"1:	%0 = memw_locked(%1);\n"		\
+		"	{ P0 = cmp.eq(%0,%2);\n"		\
+		"	  if (!P0.new) jump:nt 2f; }\n"		\
+		"	memw_locked(%1,p0) = %3;\n"		\
+		"	if (!P0) jump 1b;\n"			\
+		"2:\n"						\
+		: "=&r" (__oldval)				\
+		: "r" (__ptr), "r" (__old), "r" (__new)		\
+		: "memory", "p0"				\
+	);							\
+	__oldval;						\
+})
+
+/*  Should probably shoot for an 8-byte aligned stack pointer  */
+#define STACK_MASK (~7)
+#define arch_align_stack(x) (x & STACK_MASK)
+
+#endif
diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h
new file mode 100644
index 000000000000..9c2934ff5756
--- /dev/null
+++ b/arch/hexagon/include/asm/thread_info.h
@@ -0,0 +1,154 @@
+/*
+ * Thread support for the Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/registers.h>
+#include <asm/page.h>
+#endif
+
+#define THREAD_SHIFT		12
+#define THREAD_SIZE		(1<<THREAD_SHIFT)
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
+#else  /*  don't use standard allocator  */
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node);
+extern void free_thread_info(struct thread_info *ti);
+#endif
+
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+	unsigned long seg;
+} mm_segment_t;
+
+/*
+ * This is union'd with the "bottom" of the kernel stack.
+ * It keeps track of thread info which is handy for routines
+ * to access quickly.
+ */
+
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	struct exec_domain      *exec_domain;   /* execution domain */
+	unsigned long		flags;          /* low level flags */
+	__u32                   cpu;            /* current cpu */
+	int                     preempt_count;  /* 0=>preemptible,<0=>BUG */
+	mm_segment_t            addr_limit;     /* segmentation sux */
+	/*
+	 * used for syscalls somehow;
+	 * seems to have a function pointer and four arguments
+	 */
+	struct restart_block    restart_block;
+	/* Points to the current pt_regs frame  */
+	struct pt_regs		*regs;
+	/*
+	 * saved kernel sp at switch_to time;
+	 * not sure if this is used (it's not in the VM model it seems;
+	 * see thread_struct)
+	 */
+	unsigned long		sp;
+};
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/asm-offsets.h>
+
+#endif  /* __ASSEMBLY__  */
+
+/*  looks like "linux/hardirq.h" uses this.  */
+
+#define PREEMPT_ACTIVE		0x10000000
+
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)                   \
+{                                               \
+	.task           = &tsk,                 \
+	.exec_domain    = &default_exec_domain, \
+	.flags          = 0,                    \
+	.cpu            = 0,                    \
+	.preempt_count  = 1,                    \
+	.addr_limit     = KERNEL_DS,            \
+	.restart_block = {                      \
+		.fn = do_no_restart_syscall,    \
+	},                                      \
+	.sp = 0,				\
+	.regs = NULL,			\
+}
+
+#define init_thread_info        (init_thread_union.thread_info)
+#define init_stack              (init_thread_union.stack)
+
+/* Tacky preprocessor trickery */
+#define	qqstr(s) qstr(s)
+#define qstr(s) #s
+#define QUOTED_THREADINFO_REG qqstr(THREADINFO_REG)
+
+register struct thread_info *__current_thread_info asm(QUOTED_THREADINFO_REG);
+#define current_thread_info()  __current_thread_info
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files
+ *   may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+
+#define TIF_SYSCALL_TRACE       0       /* syscall trace active */
+#define TIF_NOTIFY_RESUME       1       /* resumption notification requested */
+#define TIF_SIGPENDING          2       /* signal pending */
+#define TIF_NEED_RESCHED        3       /* rescheduling necessary */
+#define TIF_SINGLESTEP          4       /* restore ss @ return to usr mode */
+#define TIF_IRET                5       /* return with iret */
+#define TIF_RESTORE_SIGMASK     6       /* restore sig mask in do_signal() */
+/* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_POLLING_NRFLAG      16
+#define TIF_MEMDIE              17      /* OOM killer killed process */
+
+#define _TIF_SYSCALL_TRACE      (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME      (1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING         (1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED       (1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP         (1 << TIF_SINGLESTEP)
+#define _TIF_IRET               (1 << TIF_IRET)
+#define _TIF_RESTORE_SIGMASK    (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_POLLING_NRFLAG     (1 << TIF_POLLING_NRFLAG)
+
+/* work to do on interrupt/exception return - All but TIF_SYSCALL_TRACE */
+#define _TIF_WORK_MASK          (0x0000FFFF & ~_TIF_SYSCALL_TRACE)
+
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK       0x0000FFFF
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/arch/hexagon/include/asm/time.h b/arch/hexagon/include/asm/time.h
new file mode 100644
index 000000000000..081b82cac9a9
--- /dev/null
+++ b/arch/hexagon/include/asm/time.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef ASM_TIME_H
+#define ASM_TIME_H
+
+extern cycles_t        pcycle_freq_mhz;
+extern cycles_t        thread_freq_mhz;
+extern cycles_t        sleep_clk_freq;
+
+void setup_percpu_clockdev(void);
+void ipi_timer(void);
+
+#endif
diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h
new file mode 100644
index 000000000000..d80db239a7b6
--- /dev/null
+++ b/arch/hexagon/include/asm/timer-regs.h
@@ -0,0 +1,39 @@
+/*
+ * Timer support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_TIMER_REGS_H
+#define _ASM_TIMER_REGS_H
+
+/*  This stuff should go into a platform specific file  */
+#define TCX0_CLK_RATE		19200
+#define TIMER_ENABLE		0
+#define TIMER_CLR_ON_MATCH	1
+
+/*
+ * 8x50 HDD Specs 5-8.  Simulator co-sim not fixed until
+ * release 1.1, and then it's "adjustable" and probably not defaulted.
+ */
+#define RTOS_TIMER_INT		3
+#ifdef CONFIG_HEXAGON_COMET
+#define RTOS_TIMER_REGS_ADDR	0xAB000000UL
+#endif
+#define SLEEP_CLK_RATE		32000
+
+#endif
diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h
new file mode 100644
index 000000000000..b11c62b23f31
--- /dev/null
+++ b/arch/hexagon/include/asm/timex.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_TIMEX_H
+#define _ASM_TIMEX_H
+
+#include <asm-generic/timex.h>
+#include <asm/timer-regs.h>
+
+/* Using TCX0 as our clock.  CLOCK_TICK_RATE scheduled to be removed. */
+#define CLOCK_TICK_RATE              TCX0_CLK_RATE
+
+#define ARCH_HAS_READ_CURRENT_TIMER
+
+static inline int read_current_timer(unsigned long *timer_val)
+{
+	*timer_val = (unsigned long) __vmgettime();
+	return 0;
+}
+
+#endif
diff --git a/arch/hexagon/include/asm/tlb.h b/arch/hexagon/include/asm/tlb.h
new file mode 100644
index 000000000000..473abde01d62
--- /dev/null
+++ b/arch/hexagon/include/asm/tlb.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_TLB_H
+#define _ASM_TLB_H
+
+#include <linux/pagemap.h>
+#include <asm/tlbflush.h>
+
+/*
+ * We don't need any special per-pte or per-vma handling...
+ */
+#define tlb_start_vma(tlb, vma)				do { } while (0)
+#define tlb_end_vma(tlb, vma)				do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address)	do { } while (0)
+
+/*
+ * .. because we flush the whole mm when it fills up
+ */
+#define tlb_flush(tlb)		flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/tlbflush.h b/arch/hexagon/include/asm/tlbflush.h
new file mode 100644
index 000000000000..b89a90251225
--- /dev/null
+++ b/arch/hexagon/include/asm/tlbflush.h
@@ -0,0 +1,58 @@
+/*
+ * TLB flush support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_TLBFLUSH_H
+#define _ASM_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <asm/processor.h>
+
+/*
+ * TLB flushing -- in "SMP", these routines get defined to be the
+ * ones from smp.c, else they are some local flavors.
+ */
+
+/*
+ * These functions are commonly macros, but in the interests of
+ * VM vs. native implementation and code size, we simply declare
+ * the function prototypes here.
+ */
+extern void tlb_flush_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
+extern void flush_tlb_range(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_one(unsigned long);
+
+/*
+ * "This is called in munmap when we have freed up some page-table pages.
+ * We don't need to do anything here..."
+ *
+ * The VM kernel doesn't walk page tables, and they are passed to the VMM
+ * by logical address. There doesn't seem to be any possibility that they
+ * could be referenced by the VM kernel based on a stale mapping, since
+ * they would only be located by consulting the mm structure, and they
+ * will have been purged from that structure by the munmap.  Seems like
+ * a noop on HVM as well.
+ */
+#define flush_tlb_pgtables(mm, start, end)
+
+#endif
diff --git a/arch/hexagon/include/asm/traps.h b/arch/hexagon/include/asm/traps.h
new file mode 100644
index 000000000000..6a407f6e5e24
--- /dev/null
+++ b/arch/hexagon/include/asm/traps.h
@@ -0,0 +1,29 @@
+/*
+ * Trap support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_HEXAGON_TRAPS_H
+#define _ASM_HEXAGON_TRAPS_H
+
+#include <asm/registers.h>
+
+extern int die(const char *str, struct pt_regs *regs, long err);
+extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
+
+#endif /* _ASM_HEXAGON_TRAPS_H */
diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
new file mode 100644
index 000000000000..7e706eadbf0a
--- /dev/null
+++ b/arch/hexagon/include/asm/uaccess.h
@@ -0,0 +1,116 @@
+/*
+ * User memory access support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_UACCESS_H
+#define _ASM_UACCESS_H
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/segment.h>
+#include <asm/sections.h>
+
+/*
+ * access_ok: - Checks if a user space pointer is valid
+ * @type: Type of access: %VERIFY_READ or %VERIFY_WRITE.  Note that
+ *        %VERIFY_WRITE is a superset of %VERIFY_READ - if it is safe
+ *        to write to a block, it is always safe to read from it.
+ * @addr: User space pointer to start of block to check
+ * @size: Size of block to check
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Checks if a pointer to a block of memory in user space is valid.
+ *
+ * Returns true (nonzero) if the memory block *may* be valid, false (zero)
+ * if it is definitely invalid.
+ *
+ * User address space in Hexagon, like x86, goes to 0xbfffffff, so the
+ * simple MSB-based tests used by MIPS won't work.  Some further
+ * optimization is probably possible here, but for now, keep it
+ * reasonably simple and not *too* slow.  After all, we've got the
+ * MMU for backup.
+ */
+#define VERIFY_READ     0
+#define VERIFY_WRITE    1
+
+#define __access_ok(addr, size) \
+	((get_fs().seg == KERNEL_DS.seg) || \
+	(((unsigned long)addr < get_fs().seg) && \
+	  (unsigned long)size < (get_fs().seg - (unsigned long)addr)))
+
+/*
+ * When a kernel-mode page fault is taken, the faulting instruction
+ * address is checked against a table of exception_table_entries.
+ * Each entry is a tuple of the address of an instruction that may
+ * be authorized to fault, and the address at which execution should
+ * be resumed instead of the faulting instruction, so as to effect
+ * a workaround.
+ */
+
+/*  Assembly somewhat optimized copy routines  */
+unsigned long __copy_from_user_hexagon(void *to, const void __user *from,
+				     unsigned long n);
+unsigned long __copy_to_user_hexagon(void __user *to, const void *from,
+				   unsigned long n);
+
+#define __copy_from_user(to, from, n) __copy_from_user_hexagon(to, from, n)
+#define __copy_to_user(to, from, n) __copy_to_user_hexagon(to, from, n)
+
+/*
+ * XXX todo: some additonal performance gain is possible by
+ * implementing __copy_to/from_user_inatomic, which is much
+ * like __copy_to/from_user, but performs slightly less checking.
+ */
+
+__kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count);
+#define __clear_user(a, s) __clear_user_hexagon((a), (s))
+
+#define __strncpy_from_user(dst, src, n) hexagon_strncpy_from_user(dst, src, n)
+
+/*  get around the ifndef in asm-generic/uaccess.h  */
+#define __strnlen_user __strnlen_user
+
+extern long __strnlen_user(const char __user *src, long n);
+
+static inline long hexagon_strncpy_from_user(char *dst, const char __user *src,
+					     long n);
+
+#include <asm-generic/uaccess.h>
+
+/*  Todo:  an actual accelerated version of this.  */
+static inline long hexagon_strncpy_from_user(char *dst, const char __user *src,
+					     long n)
+{
+	long res = __strnlen_user(src, n);
+
+	/* return from strnlen can't be zero -- that would be rubbish. */
+
+	if (res > n) {
+		copy_from_user(dst, src, n);
+		return n;
+	} else {
+		copy_from_user(dst, src, res);
+		return res-1;
+	}
+}
+
+#endif
diff --git a/arch/hexagon/include/asm/unistd.h b/arch/hexagon/include/asm/unistd.h
new file mode 100644
index 000000000000..4d0ecde3665f
--- /dev/null
+++ b/arch/hexagon/include/asm/unistd.h
@@ -0,0 +1,36 @@
+/*
+ * Syscall support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#if !defined(_ASM_HEXAGON_UNISTD_H) || defined(__SYSCALL)
+#define _ASM_HEXAGON_UNISTD_H
+
+/*
+ *  The kernel pulls this unistd.h in three different ways:
+ *  1.  the "normal" way which gets all the __NR defines
+ *  2.  with __SYSCALL defined to produce function declarations
+ *  3.  with __SYSCALL defined to produce syscall table initialization
+ *  See also:  syscalltab.c
+ */
+
+#define sys_mmap2 sys_mmap_pgoff
+
+#include <asm-generic/unistd.h>
+
+#endif
diff --git a/arch/hexagon/include/asm/user.h b/arch/hexagon/include/asm/user.h
new file mode 100644
index 000000000000..3a55078543d1
--- /dev/null
+++ b/arch/hexagon/include/asm/user.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef HEXAGON_ASM_USER_H
+#define HEXAGON_ASM_USER_H
+
+/*
+ * Layout for registers passed in elf core dumps to userspace.
+ *
+ * Basically a rearranged subset of "pt_regs".
+ *
+ * Interested parties:  libc, gdb...
+ */
+
+struct user_regs_struct {
+	unsigned long r0;
+	unsigned long r1;
+	unsigned long r2;
+	unsigned long r3;
+	unsigned long r4;
+	unsigned long r5;
+	unsigned long r6;
+	unsigned long r7;
+	unsigned long r8;
+	unsigned long r9;
+	unsigned long r10;
+	unsigned long r11;
+	unsigned long r12;
+	unsigned long r13;
+	unsigned long r14;
+	unsigned long r15;
+	unsigned long r16;
+	unsigned long r17;
+	unsigned long r18;
+	unsigned long r19;
+	unsigned long r20;
+	unsigned long r21;
+	unsigned long r22;
+	unsigned long r23;
+	unsigned long r24;
+	unsigned long r25;
+	unsigned long r26;
+	unsigned long r27;
+	unsigned long r28;
+	unsigned long r29;
+	unsigned long r30;
+	unsigned long r31;
+	unsigned long sa0;
+	unsigned long lc0;
+	unsigned long sa1;
+	unsigned long lc1;
+	unsigned long m0;
+	unsigned long m1;
+	unsigned long usr;
+	unsigned long p3_0;
+	unsigned long gp;
+	unsigned long ugp;
+	unsigned long pc;
+	unsigned long cause;
+	unsigned long badva;
+	unsigned long pad1;  /* pad out to 48 words total */
+	unsigned long pad2;  /* pad out to 48 words total */
+	unsigned long pad3;  /* pad out to 48 words total */
+};
+
+#endif
diff --git a/arch/hexagon/include/asm/vdso.h b/arch/hexagon/include/asm/vdso.h
new file mode 100644
index 000000000000..2d95cbba3572
--- /dev/null
+++ b/arch/hexagon/include/asm/vdso.h
@@ -0,0 +1,30 @@
+/*
+ * vDSO implementation for Hexagon
+ *
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_VDSO_H
+#define __ASM_VDSO_H
+
+#include <linux/types.h>
+
+struct hexagon_vdso {
+	u32 rt_signal_trampoline[2];
+};
+
+#endif /* __ASM_VDSO_H */
diff --git a/arch/hexagon/include/asm/vm_fault.h b/arch/hexagon/include/asm/vm_fault.h
new file mode 100644
index 000000000000..cacda36ef5d5
--- /dev/null
+++ b/arch/hexagon/include/asm/vm_fault.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_HEXAGON_VM_FAULT_H
+#define _ASM_HEXAGON_VM_FAULT_H
+
+extern void execute_protection_fault(struct pt_regs *);
+extern void write_protection_fault(struct pt_regs *);
+extern void read_protection_fault(struct pt_regs *);
+
+#endif
diff --git a/arch/hexagon/include/asm/vm_mmu.h b/arch/hexagon/include/asm/vm_mmu.h
new file mode 100644
index 000000000000..580462de5cca
--- /dev/null
+++ b/arch/hexagon/include/asm/vm_mmu.h
@@ -0,0 +1,111 @@
+/*
+ * Hexagon VM page table entry definitions
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef _ASM_VM_MMU_H
+#define _ASM_VM_MMU_H
+
+/*
+ * Shift, mask, and other constants for the Hexagon Virtual Machine
+ * page tables.
+ *
+ * Virtual machine MMU allows first-level entries to either be
+ * single-level lookup PTEs for very large pages, or PDEs pointing
+ * to second-level PTEs for smaller pages. If PTE is single-level,
+ * the least significant bits cannot be used as software bits to encode
+ * virtual memory subsystem information about the page, and that state
+ * must be maintained in some parallel data structure.
+ */
+
+/* S or Page Size field in PDE */
+#define	__HVM_PDE_S		(0x7 << 0)
+#define __HVM_PDE_S_4KB		0
+#define __HVM_PDE_S_16KB	1
+#define __HVM_PDE_S_64KB	2
+#define __HVM_PDE_S_256KB	3
+#define __HVM_PDE_S_1MB		4
+#define __HVM_PDE_S_4MB		5
+#define __HVM_PDE_S_16MB	6
+#define __HVM_PDE_S_INVALID	7
+
+/* Masks for L2 page table pointer, as function of page size */
+#define __HVM_PDE_PTMASK_4KB	0xfffff000
+#define __HVM_PDE_PTMASK_16KB	0xfffffc00
+#define __HVM_PDE_PTMASK_64KB	0xffffff00
+#define __HVM_PDE_PTMASK_256KB	0xffffffc0
+#define __HVM_PDE_PTMASK_1MB	0xfffffff0
+
+/*
+ * Virtual Machine PTE Bits/Fields
+ */
+#define __HVM_PTE_T		(1<<4)
+#define __HVM_PTE_U		(1<<5)
+#define	__HVM_PTE_C		(0x7<<6)
+#define __HVM_PTE_CVAL(pte)	(((pte) & __HVM_PTE_C) >> 6)
+#define __HVM_PTE_R		(1<<9)
+#define __HVM_PTE_W		(1<<10)
+#define __HVM_PTE_X		(1<<11)
+
+/*
+ * Cache Attributes, to be shifted as necessary for virtual/physical PTEs
+ */
+
+#define __HEXAGON_C_WB		0x0	/* Write-back, no L2 */
+#define	__HEXAGON_C_WT		0x1	/* Write-through, no L2 */
+#define	__HEXAGON_C_DEV		0x4	/* Device register space */
+#define	__HEXAGON_C_WT_L2	0x5	/* Write-through, with L2 */
+/* this really should be #if CONFIG_HEXAGON_ARCH = 2 but that's not defined */
+#if defined(CONFIG_HEXAGON_COMET) || defined(CONFIG_QDSP6_ST1)
+#define __HEXAGON_C_UNC		__HEXAGON_C_DEV
+#else
+#define	__HEXAGON_C_UNC		0x6	/* Uncached memory */
+#endif
+#define	__HEXAGON_C_WB_L2	0x7	/* Write-back, with L2 */
+
+/*
+ * This can be overriden, but we're defaulting to the most aggressive
+ * cache policy, the better to find bugs sooner.
+ */
+
+#define	CACHE_DEFAULT	__HEXAGON_C_WB_L2
+
+/* Masks for physical page address, as a function of page size */
+
+#define __HVM_PTE_PGMASK_4KB	0xfffff000
+#define __HVM_PTE_PGMASK_16KB	0xffffc000
+#define __HVM_PTE_PGMASK_64KB	0xffff0000
+#define __HVM_PTE_PGMASK_256KB	0xfffc0000
+#define __HVM_PTE_PGMASK_1MB	0xfff00000
+
+/* Masks for single-level large page lookups */
+
+#define __HVM_PTE_PGMASK_4MB	0xffc00000
+#define __HVM_PTE_PGMASK_16MB	0xff000000
+
+/*
+ * "Big kernel page mappings" (see vm_init_segtable.S)
+ * are currently 16MB
+ */
+
+#define BIG_KERNEL_PAGE_SHIFT 24
+#define BIG_KERNEL_PAGE_SIZE (1 << BIG_KERNEL_PAGE_SHIFT)
+
+
+
+#endif /* _ASM_VM_MMU_H */
diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile
new file mode 100644
index 000000000000..3689f3754d09
--- /dev/null
+++ b/arch/hexagon/kernel/Makefile
@@ -0,0 +1,18 @@
+extra-y := head.o vmlinux.lds init_task.o
+
+obj-$(CONFIG_SMP) += smp.o topology.o
+
+obj-y += setup.o irq_cpu.o traps.o syscalltab.o signal.o time.o
+obj-y += process.o syscall.o trampoline.o reset.o ptrace.o
+obj-y += vdso.o
+
+obj-$(CONFIG_KGDB)    += kgdb.o
+obj-$(CONFIG_MODULES) += module.o hexagon_ksyms.o
+
+# Modules required to work with the Hexagon Virtual Machine
+obj-y += vm_entry.o vm_events.o vm_switch.o vm_ops.o vm_init_segtable.o
+obj-y += vm_vectors.o
+
+obj-$(CONFIG_HAS_DMA) += dma.o
+
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/arch/hexagon/kernel/asm-offsets.c b/arch/hexagon/kernel/asm-offsets.c
new file mode 100644
index 000000000000..89ffa514611f
--- /dev/null
+++ b/arch/hexagon/kernel/asm-offsets.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 1996 David S. Miller
+ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Kevin Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+ * Copyright (C) 2000 MIPS Technologies, Inc.
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/compat.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/kbuild.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+
+/*  This file is used to produce asm/linkerscript constants from header
+    files typically used in c.  Specifically, it generates asm-offsets.h  */
+
+int main(void)
+{
+	COMMENT("This is a comment.");
+	/*  might get these from somewhere else.  */
+	DEFINE(_PAGE_SIZE, PAGE_SIZE);
+	DEFINE(_PAGE_SHIFT, PAGE_SHIFT);
+	BLANK();
+
+	COMMENT("Hexagon pt_regs definitions");
+	OFFSET(_PT_SYSCALL_NR, pt_regs, syscall_nr);
+	OFFSET(_PT_UGPGP, pt_regs, ugpgp);
+	OFFSET(_PT_R3130, pt_regs, r3130);
+	OFFSET(_PT_R2928, pt_regs, r2928);
+	OFFSET(_PT_R2726, pt_regs, r2726);
+	OFFSET(_PT_R2524, pt_regs, r2524);
+	OFFSET(_PT_R2322, pt_regs, r2322);
+	OFFSET(_PT_R2120, pt_regs, r2120);
+	OFFSET(_PT_R1918, pt_regs, r1918);
+	OFFSET(_PT_R1716, pt_regs, r1716);
+	OFFSET(_PT_R1514, pt_regs, r1514);
+	OFFSET(_PT_R1312, pt_regs, r1312);
+	OFFSET(_PT_R1110, pt_regs, r1110);
+	OFFSET(_PT_R0908, pt_regs, r0908);
+	OFFSET(_PT_R0706, pt_regs, r0706);
+	OFFSET(_PT_R0504, pt_regs, r0504);
+	OFFSET(_PT_R0302, pt_regs, r0302);
+	OFFSET(_PT_R0100, pt_regs, r0100);
+	OFFSET(_PT_LC0SA0, pt_regs, lc0sa0);
+	OFFSET(_PT_LC1SA1, pt_regs, lc1sa1);
+	OFFSET(_PT_M1M0, pt_regs, m1m0);
+	OFFSET(_PT_PREDSUSR, pt_regs, predsusr);
+	OFFSET(_PT_EVREC, pt_regs, hvmer);
+	OFFSET(_PT_ER_VMEL, pt_regs, hvmer.vmel);
+	OFFSET(_PT_ER_VMEST, pt_regs, hvmer.vmest);
+	OFFSET(_PT_ER_VMPSP, pt_regs, hvmer.vmpsp);
+	OFFSET(_PT_ER_VMBADVA, pt_regs, hvmer.vmbadva);
+	DEFINE(_PT_REGS_SIZE, sizeof(struct pt_regs));
+	BLANK();
+
+	COMMENT("Hexagon thread_info definitions");
+	OFFSET(_THREAD_INFO_FLAGS, thread_info, flags);
+	OFFSET(_THREAD_INFO_PT_REGS, thread_info, regs);
+	OFFSET(_THREAD_INFO_SP, thread_info, sp);
+	DEFINE(_THREAD_SIZE, THREAD_SIZE);
+	BLANK();
+
+	COMMENT("Hexagon hexagon_switch_stack definitions");
+	OFFSET(_SWITCH_R1716, hexagon_switch_stack, r1716);
+	OFFSET(_SWITCH_R1918, hexagon_switch_stack, r1918);
+	OFFSET(_SWITCH_R2120, hexagon_switch_stack, r2120);
+	OFFSET(_SWITCH_R2322, hexagon_switch_stack, r2322);
+
+	OFFSET(_SWITCH_R2524, hexagon_switch_stack, r2524);
+	OFFSET(_SWITCH_R2726, hexagon_switch_stack, r2726);
+	OFFSET(_SWITCH_FP, hexagon_switch_stack, fp);
+	OFFSET(_SWITCH_LR, hexagon_switch_stack, lr);
+	DEFINE(_SWITCH_STACK_SIZE, sizeof(struct hexagon_switch_stack));
+	BLANK();
+
+	COMMENT("Hexagon task_struct definitions");
+	OFFSET(_TASK_THREAD_INFO, task_struct, stack);
+	OFFSET(_TASK_STRUCT_THREAD, task_struct, thread);
+
+	COMMENT("Hexagon thread_struct definitions");
+	OFFSET(_THREAD_STRUCT_SWITCH_SP, thread_struct, switch_sp);
+
+	return 0;
+}
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
new file mode 100644
index 000000000000..e711ace62fdf
--- /dev/null
+++ b/arch/hexagon/kernel/dma.c
@@ -0,0 +1,220 @@
+/*
+ * DMA implementation for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/bootmem.h>
+#include <linux/genalloc.h>
+#include <asm/dma-mapping.h>
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+int bad_dma_address;  /*  globals are automatically initialized to zero  */
+
+int dma_supported(struct device *dev, u64 mask)
+{
+	if (mask == DMA_BIT_MASK(32))
+		return 1;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(dma_supported);
+
+int dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+static struct gen_pool *coherent_pool;
+
+
+/* Allocates from a pool of uncached memory that was reserved at boot time */
+
+void *hexagon_dma_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t *dma_addr, gfp_t flag)
+{
+	void *ret;
+
+	if (coherent_pool == NULL) {
+		coherent_pool = gen_pool_create(PAGE_SHIFT, -1);
+
+		if (coherent_pool == NULL)
+			panic("Can't create %s() memory pool!", __func__);
+		else
+			gen_pool_add(coherent_pool,
+				(PAGE_OFFSET + (max_low_pfn << PAGE_SHIFT)),
+				hexagon_coherent_pool_size, -1);
+	}
+
+	ret = (void *) gen_pool_alloc(coherent_pool, size);
+
+	if (ret) {
+		memset(ret, 0, size);
+		*dma_addr = (dma_addr_t) (ret - PAGE_OFFSET);
+	} else
+		*dma_addr = ~0;
+
+	return ret;
+}
+
+static void hexagon_free_coherent(struct device *dev, size_t size, void *vaddr,
+				  dma_addr_t dma_addr)
+{
+	gen_pool_free(coherent_pool, (unsigned long) vaddr, size);
+}
+
+static int check_addr(const char *name, struct device *hwdev,
+		      dma_addr_t bus, size_t size)
+{
+	if (hwdev && hwdev->dma_mask && !dma_capable(hwdev, bus, size)) {
+		if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
+			printk(KERN_ERR
+				"%s: overflow %Lx+%zu of device mask %Lx\n",
+				name, (long long)bus, size,
+				(long long)*hwdev->dma_mask);
+		return 0;
+	}
+	return 1;
+}
+
+static int hexagon_map_sg(struct device *hwdev, struct scatterlist *sg,
+			  int nents, enum dma_data_direction dir,
+			  struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	WARN_ON(nents == 0 || sg[0].length == 0);
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = sg_phys(s);
+		if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
+			return 0;
+
+		s->dma_length = s->length;
+
+		flush_dcache_range(PAGE_OFFSET + s->dma_address,
+				   PAGE_OFFSET + s->dma_address + s->length);
+	}
+
+	return nents;
+}
+
+/*
+ * address is virtual
+ */
+static inline void dma_sync(void *addr, size_t size,
+			    enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		hexagon_clean_dcache_range((unsigned long) addr,
+		(unsigned long) addr + size);
+		break;
+	case DMA_FROM_DEVICE:
+		hexagon_inv_dcache_range((unsigned long) addr,
+		(unsigned long) addr + size);
+		break;
+	case DMA_BIDIRECTIONAL:
+		flush_dcache_range((unsigned long) addr,
+		(unsigned long) addr + size);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void *dma_addr_to_virt(dma_addr_t dma_addr)
+{
+	return phys_to_virt((unsigned long) dma_addr);
+}
+
+/**
+ * hexagon_map_page() - maps an address for device DMA
+ * @dev:	pointer to DMA device
+ * @page:	pointer to page struct of DMA memory
+ * @offset:	offset within page
+ * @size:	size of memory to map
+ * @dir:	transfer direction
+ * @attrs:	pointer to DMA attrs (not used)
+ *
+ * Called to map a memory address to a DMA address prior
+ * to accesses to/from device.
+ *
+ * We don't particularly have many hoops to jump through
+ * so far.  Straight translation between phys and virtual.
+ *
+ * DMA is not cache coherent so sync is necessary; this
+ * seems to be a convenient place to do it.
+ *
+ */
+static dma_addr_t hexagon_map_page(struct device *dev, struct page *page,
+				   unsigned long offset, size_t size,
+				   enum dma_data_direction dir,
+				   struct dma_attrs *attrs)
+{
+	dma_addr_t bus = page_to_phys(page) + offset;
+	WARN_ON(size == 0);
+
+	if (!check_addr("map_single", dev, bus, size))
+		return bad_dma_address;
+
+	dma_sync(dma_addr_to_virt(bus), size, dir);
+
+	return bus;
+}
+
+static void hexagon_sync_single_for_cpu(struct device *dev,
+					dma_addr_t dma_handle, size_t size,
+					enum dma_data_direction dir)
+{
+	dma_sync(dma_addr_to_virt(dma_handle), size, dir);
+}
+
+static void hexagon_sync_single_for_device(struct device *dev,
+					dma_addr_t dma_handle, size_t size,
+					enum dma_data_direction dir)
+{
+	dma_sync(dma_addr_to_virt(dma_handle), size, dir);
+}
+
+struct dma_map_ops hexagon_dma_ops = {
+	.alloc_coherent	= hexagon_dma_alloc_coherent,
+	.free_coherent	= hexagon_free_coherent,
+	.map_sg		= hexagon_map_sg,
+	.map_page	= hexagon_map_page,
+	.sync_single_for_cpu = hexagon_sync_single_for_cpu,
+	.sync_single_for_device = hexagon_sync_single_for_device,
+	.is_phys	= 1,
+};
+
+void __init hexagon_dma_init(void)
+{
+	if (dma_ops)
+		return;
+
+	dma_ops = &hexagon_dma_ops;
+}
diff --git a/arch/hexagon/kernel/head.S b/arch/hexagon/kernel/head.S
new file mode 100644
index 000000000000..8e6b819125a3
--- /dev/null
+++ b/arch/hexagon/kernel/head.S
@@ -0,0 +1,162 @@
+/*
+ * Early kernel startup code for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/asm-offsets.h>
+#include <asm/mem-layout.h>
+#include <asm/vm_mmu.h>
+#include <asm/page.h>
+
+	__INIT
+ENTRY(stext)
+	/*
+	 * VMM will already have set up true vector page, MMU, etc.
+	 * To set up initial kernel identity map, we have to pass
+	 * the VMM a pointer to some canonical page tables. In
+	 * this implementation, we're assuming that we've got
+	 * them precompiled. Generate value in R24, as we'll need
+	 * it again shortly.
+	 */
+	r24.L = #LO(swapper_pg_dir)
+	r24.H = #HI(swapper_pg_dir)
+
+	/*
+	 * Symbol is kernel segment address, but we need
+	 * the logical/physical address.
+	 */
+	r24 = asl(r24, #2)
+	r24 = lsr(r24, #2)
+
+	r0 = r24
+
+	/*
+	 * Initialize a 16MB PTE to make the virtual and physical
+	 * addresses where the kernel was loaded be identical.
+	 */
+#define PTE_BITS ( __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X	\
+		  | __HEXAGON_C_WB_L2 << 6			\
+		  | __HVM_PDE_S_4MB)
+
+	r1 = pc
+	r2.H = #0xffc0
+	r2.L = #0x0000
+	r1 = and(r1,r2)		/* round PC to 4MB boundary	*/
+	r2 = lsr(r1, #22)	/* 4MB page number		*/
+	r2 = asl(r2, #2)	/* times sizeof(PTE) (4bytes)	*/
+	r0 = add(r0,r2)		/* r0 = address of correct PTE	*/
+	r2 = #PTE_BITS
+	r1 = add(r1,r2)		/* r1 = 4MB PTE for the first entry	*/
+	r2.h = #0x0040
+	r2.l = #0x0000		/* 4MB	*/
+	memw(r0 ++ #4) = r1
+	r1 = add(r1, r2)
+	memw(r0 ++ #4) = r1
+
+	r0 = r24
+
+	/*
+	 * The subroutine wrapper around the virtual instruction touches
+	 * no memory, so we should be able to use it even here.
+	 */
+	call	__vmnewmap;
+
+	/*  Jump into virtual address range.  */
+
+	r31.h = #hi(__head_s_vaddr_target)
+	r31.l = #lo(__head_s_vaddr_target)
+	jumpr r31
+
+	/*  Insert trippy space effects.  */
+
+__head_s_vaddr_target:
+	/*
+	 * Tear down VA=PA translation now that we are running
+	 * in the desgnated kernel segments.
+	 */
+	r0 = #__HVM_PDE_S_INVALID
+	r1 = r24
+	loop0(1f,#0x100)
+1:
+	{
+		memw(R1 ++ #4) = R0
+	}:endloop0
+
+	r0 = r24
+	call __vmnewmap
+
+	/*  Go ahead and install the trap0 return so angel calls work  */
+	r0.h = #hi(_K_provisional_vec)
+	r0.l = #lo(_K_provisional_vec)
+	call __vmsetvec
+
+	/*
+	 * OK, at this point we should start to be much more careful,
+	 * we're going to enter C code and start touching memory
+	 * in all sorts of places.
+	 * This means:
+	 *      SGP needs to be OK
+	 *	Need to lock shared resources
+	 *	A bunch of other things that will cause
+	 * 	all kinds of painful bugs
+	 */
+
+	/*
+	 * Stack pointer should be pointed at the init task's
+	 * thread stack, which should have been declared in arch/init_task.c.
+	 * So uhhhhh...
+	 * It's accessible via the init_thread_union, which is a union
+	 * of a thread_info struct and a stack; of course, the top
+	 * of the stack is not for you.  The end of the stack
+	 * is simply init_thread_union + THREAD_SIZE.
+	 */
+
+	{r29.H = #HI(init_thread_union); r0.H = #HI(_THREAD_SIZE); }
+	{r29.L = #LO(init_thread_union); r0.L = #LO(_THREAD_SIZE); }
+
+	/*  initialize the register used to point to current_thread_info */
+	/*  Fixme:  THREADINFO_REG can't be R2 because of that memset thing. */
+	{r29 = add(r29,r0); THREADINFO_REG = r29; }
+
+	/*  Hack:  zero bss; */
+	{ r0.L = #LO(__bss_start);  r1 = #0; r2.l = #LO(__bss_stop); }
+	{ r0.H = #HI(__bss_start);           r2.h = #HI(__bss_stop); }
+
+	r2 = sub(r2,r0);
+	call memset;
+
+	/* Time to make the doughnuts.   */
+	call start_kernel
+
+	/*
+	 * Should not reach here.
+	 */
+1:
+	jump 1b
+
+.p2align PAGE_SHIFT
+ENTRY(external_cmdline_buffer)
+        .fill _PAGE_SIZE,1,0
+
+.data
+.p2align PAGE_SHIFT
+ENTRY(empty_zero_page)
+        .fill _PAGE_SIZE,1,0
diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c
new file mode 100644
index 000000000000..7f1892471805
--- /dev/null
+++ b/arch/hexagon/kernel/hexagon_ksyms.c
@@ -0,0 +1,40 @@
+/*
+ * Export of symbols defined in assembly files and/or libgcc.
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <asm/hexagon_vm.h>
+#include <asm/uaccess.h>
+
+EXPORT_SYMBOL(__copy_from_user_hexagon);
+EXPORT_SYMBOL(__copy_to_user_hexagon);
+EXPORT_SYMBOL(__vmgetie);
+EXPORT_SYMBOL(__vmsetie);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+
+#define DECLARE_EXPORT(name)     \
+	extern void name(void); EXPORT_SYMBOL(name)
+
+/* Symbols found in libgcc that assorted kernel modules need */
+DECLARE_EXPORT(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes);
+
+DECLARE_EXPORT(__hexagon_divsi3);
+DECLARE_EXPORT(__hexagon_modsi3);
+DECLARE_EXPORT(__hexagon_udivsi3);
+DECLARE_EXPORT(__hexagon_umodsi3);
diff --git a/arch/hexagon/kernel/init_task.c b/arch/hexagon/kernel/init_task.c
new file mode 100644
index 000000000000..73283d3edf09
--- /dev/null
+++ b/arch/hexagon/kernel/init_task.c
@@ -0,0 +1,54 @@
+/*
+ * Init task definition
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+#include <asm/thread_info.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by making sure
+ * the linker maps this in the .text segment right after head.S,
+ * and making head.S ensure the proper alignment.
+ */
+union thread_union init_thread_union
+	__attribute__((__section__(".data.init_task"),
+		__aligned__(THREAD_SIZE))) = {
+			INIT_THREAD_INFO(init_task)
+		};
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
diff --git a/arch/hexagon/kernel/irq_cpu.c b/arch/hexagon/kernel/irq_cpu.c
new file mode 100644
index 000000000000..d4416a1a431e
--- /dev/null
+++ b/arch/hexagon/kernel/irq_cpu.c
@@ -0,0 +1,90 @@
+/*
+ * First-level interrupt controller model for Hexagon.
+ *
+ * Copyright (c) 2010-2011 Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/interrupt.h>
+#include <asm/irq.h>
+#include <asm/hexagon_vm.h>
+
+static void mask_irq(struct irq_data *data)
+{
+	__vmintop_locdis((long) data->irq);
+}
+
+static void mask_irq_num(unsigned int irq)
+{
+	__vmintop_locdis((long) irq);
+}
+
+static void unmask_irq(struct irq_data *data)
+{
+	__vmintop_locen((long) data->irq);
+}
+
+/*  This is actually all we need for handle_fasteoi_irq  */
+static void eoi_irq(struct irq_data *data)
+{
+	__vmintop_globen((long) data->irq);
+}
+
+/* Power mamangement wake call. We don't need this, however,
+ * if this is absent, then an -ENXIO error is returned to the
+ * msm_serial driver, and it fails to correctly initialize.
+ * This is a bug in the msm_serial driver, but, for now, we
+ * work around it here, by providing this bogus handler.
+ * XXX FIXME!!! remove this when msm_serial is fixed.
+ */
+static int set_wake(struct irq_data *data, unsigned int on)
+{
+	return 0;
+}
+
+static struct irq_chip hexagon_irq_chip = {
+	.name		= "HEXAGON",
+	.irq_mask	= mask_irq,
+	.irq_unmask	= unmask_irq,
+	.irq_set_wake	= set_wake,
+	.irq_eoi	= eoi_irq
+};
+
+/**
+ * The hexagon core comes with a first-level interrupt controller
+ * with 32 total possible interrupts.  When the core is embedded
+ * into different systems/platforms, it is typically wrapped by
+ * macro cells that provide one or more second-level interrupt
+ * controllers that are cascaded into one or more of the first-level
+ * interrupts handled here. The precise wiring of these other
+ * irqs varies from platform to platform, and are set up & configured
+ * in the platform-specific files.
+ *
+ * The first-level interrupt controller is wrapped by the VM, which
+ * virtualizes the interrupt controller for us.  It provides a very
+ * simple, fast & efficient API, and so the fasteoi handler is
+ * appropriate for this case.
+ */
+void __init init_IRQ(void)
+{
+	int irq;
+
+	for (irq = 0; irq < HEXAGON_CPUINTS; irq++) {
+		mask_irq_num(irq);
+		irq_set_chip_and_handler(irq, &hexagon_irq_chip,
+						 handle_fasteoi_irq);
+	}
+}
diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c
new file mode 100644
index 000000000000..fe4aa1bcca50
--- /dev/null
+++ b/arch/hexagon/kernel/kgdb.c
@@ -0,0 +1,254 @@
+/*
+ * arch/hexagon/kernel/kgdb.c - Hexagon KGDB Support
+ *
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+
+/* All registers are 4 bytes, for now */
+#define GDB_SIZEOF_REG 4
+
+/* The register names are used during printing of the regs;
+ * Keep these at three letters to pretty-print. */
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+	{ " r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, r00)},
+	{ " r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, r01)},
+	{ " r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, r02)},
+	{ " r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, r03)},
+	{ " r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, r04)},
+	{ " r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, r05)},
+	{ " r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, r06)},
+	{ " r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, r07)},
+	{ " r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, r08)},
+	{ " r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, r09)},
+	{ "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, r10)},
+	{ "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, r11)},
+	{ "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, r12)},
+	{ "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, r13)},
+	{ "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, r14)},
+	{ "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, r15)},
+	{ "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, r16)},
+	{ "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, r17)},
+	{ "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, r18)},
+	{ "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, r19)},
+	{ "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, r20)},
+	{ "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, r21)},
+	{ "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, r22)},
+	{ "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, r23)},
+	{ "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, r24)},
+	{ "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, r25)},
+	{ "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, r26)},
+	{ "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, r27)},
+	{ "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, r28)},
+	{ "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, r29)},
+	{ "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, r30)},
+	{ "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, r31)},
+
+	{ "usr", GDB_SIZEOF_REG, offsetof(struct pt_regs, usr)},
+	{ "preds", GDB_SIZEOF_REG, offsetof(struct pt_regs, preds)},
+	{ " m0", GDB_SIZEOF_REG, offsetof(struct pt_regs, m0)},
+	{ " m1", GDB_SIZEOF_REG, offsetof(struct pt_regs, m1)},
+	{ "sa0", GDB_SIZEOF_REG, offsetof(struct pt_regs, sa0)},
+	{ "sa1", GDB_SIZEOF_REG, offsetof(struct pt_regs, sa1)},
+	{ "lc0", GDB_SIZEOF_REG, offsetof(struct pt_regs, lc0)},
+	{ "lc1", GDB_SIZEOF_REG, offsetof(struct pt_regs, lc1)},
+	{ " gp", GDB_SIZEOF_REG, offsetof(struct pt_regs, gp)},
+	{ "ugp", GDB_SIZEOF_REG, offsetof(struct pt_regs, ugp)},
+	{ "psp", GDB_SIZEOF_REG, offsetof(struct pt_regs, hvmer.vmpsp)},
+	{ "elr", GDB_SIZEOF_REG, offsetof(struct pt_regs, hvmer.vmel)},
+	{ "est", GDB_SIZEOF_REG, offsetof(struct pt_regs, hvmer.vmest)},
+	{ "badva", GDB_SIZEOF_REG, offsetof(struct pt_regs, hvmer.vmbadva)},
+	{ "restart_r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, restart_r0)},
+	{ "syscall_nr", GDB_SIZEOF_REG, offsetof(struct pt_regs, syscall_nr)},
+};
+
+struct kgdb_arch arch_kgdb_ops = {
+	/* trap0(#0xDB) 0x0cdb0054 */
+	.gdb_bpt_instr = {0x54, 0x00, 0xdb, 0x0c},
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return NULL;
+
+	*((unsigned long *) mem) = *((unsigned long *) ((void *)regs +
+		dbg_reg_def[regno].offset));
+
+	return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+	if (regno >= DBG_MAX_REG_NUM || regno < 0)
+		return -EINVAL;
+
+	*((unsigned long *) ((void *)regs + dbg_reg_def[regno].offset)) =
+		*((unsigned long *) mem);
+
+	return 0;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	instruction_pointer(regs) = pc;
+}
+
+#ifdef CONFIG_SMP
+
+/**
+ * kgdb_roundup_cpus - Get other CPUs into a holding pattern
+ * @flags: Current IRQ state
+ *
+ * On SMP systems, we need to get the attention of the other CPUs
+ * and get them be in a known state.  This should do what is needed
+ * to get the other CPUs to call kgdb_wait(). Note that on some arches,
+ * the NMI approach is not used for rounding up all the CPUs. For example,
+ * in case of MIPS, smp_call_function() is used to roundup CPUs. In
+ * this case, we have to make sure that interrupts are enabled before
+ * calling smp_call_function(). The argument to this function is
+ * the flags that will be used when restoring the interrupts. There is
+ * local_irq_save() call before kgdb_roundup_cpus().
+ *
+ * On non-SMP systems, this is not called.
+ */
+
+static void hexagon_kgdb_nmi_hook(void *ignored)
+{
+	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	local_irq_enable();
+	smp_call_function(hexagon_kgdb_nmi_hook, NULL, 0);
+	local_irq_disable();
+}
+#endif
+
+
+/*  Not yet working  */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
+				 struct task_struct *task)
+{
+	struct pt_regs *thread_regs;
+
+	if (task == NULL)
+		return;
+
+	/* Initialize to zero */
+	memset(gdb_regs, 0, NUMREGBYTES);
+
+	/* Otherwise, we have only some registers from switch_to() */
+	thread_regs = task_pt_regs(task);
+	gdb_regs[0] = thread_regs->r00;
+}
+
+/**
+ * kgdb_arch_handle_exception - Handle architecture specific GDB packets.
+ * @vector: The error vector of the exception that happened.
+ * @signo: The signal number of the exception that happened.
+ * @err_code: The error code of the exception that happened.
+ * @remcom_in_buffer: The buffer of the packet we have read.
+ * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @regs: The &struct pt_regs of the current process.
+ *
+ * This function MUST handle the 'c' and 's' command packets,
+ * as well packets to set / remove a hardware breakpoint, if used.
+ * If there are additional packets which the hardware needs to handle,
+ * they are handled here.  The code should return -1 if it wants to
+ * process more packets, and a %0 or %1 if it wants to exit from the
+ * kgdb callback.
+ *
+ * Not yet working.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+			       char *remcom_in_buffer, char *remcom_out_buffer,
+			       struct pt_regs *linux_regs)
+{
+	switch (remcom_in_buffer[0]) {
+	case 's':
+	case 'c':
+		return 0;
+	}
+	/* Stay in the debugger. */
+	return -1;
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	/* cpu roundup */
+	if (atomic_read(&kgdb_active) != -1) {
+		kgdb_nmicallback(smp_processor_id(), args->regs);
+		return NOTIFY_STOP;
+	}
+
+	if (user_mode(args->regs))
+		return NOTIFY_DONE;
+
+	if (kgdb_handle_exception(args->trapnr & 0xff, args->signr, args->err,
+				    args->regs))
+		return NOTIFY_DONE;
+
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call = kgdb_notify,
+
+	/*
+	 * Lowest-prio notifier priority, we want to be notified last:
+	 */
+	.priority = -INT_MAX,
+};
+
+/**
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ *
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	return register_die_notifier(&kgdb_notifier);
+}
+
+/**
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ *
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/hexagon/kernel/module.c b/arch/hexagon/kernel/module.c
new file mode 100644
index 000000000000..61a76bae3668
--- /dev/null
+++ b/arch/hexagon/kernel/module.c
@@ -0,0 +1,162 @@
+/*
+ * Kernel module loader for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <asm/module.h>
+#include <linux/elf.h>
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/vmalloc.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+/*
+ * module_frob_arch_sections - tweak got/plt sections.
+ * @hdr - pointer to elf header
+ * @sechdrs - pointer to elf load section headers
+ * @secstrings - symbol names
+ * @mod - pointer to module
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+				char *secstrings,
+				struct module *mod)
+{
+	unsigned int i;
+	int found = 0;
+
+	/* Look for .plt and/or .got.plt and/or .init.plt sections */
+	for (i = 0; i < hdr->e_shnum; i++) {
+		DEBUGP("Section %d is %s\n", i,
+		       secstrings + sechdrs[i].sh_name);
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+			found = i+1;
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".got.plt") == 0)
+			found = i+1;
+		if (strcmp(secstrings + sechdrs[i].sh_name, ".rela.plt") == 0)
+			found = i+1;
+	}
+
+	/* At this time, we don't support modules comiled with -shared */
+	if (found) {
+		printk(KERN_WARNING
+			"Module '%s' contains unexpected .plt/.got sections.\n",
+			mod->name);
+		/*  return -ENOEXEC;  */
+	}
+
+	return 0;
+}
+
+/*
+ * apply_relocate_add - perform rela relocations.
+ * @sechdrs - pointer to section headers
+ * @strtab - some sort of start address?
+ * @symindex - symbol index offset or something?
+ * @relsec - address to relocate to?
+ * @module - pointer to module
+ *
+ * Perform rela relocations.
+ */
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+			unsigned int symindex, unsigned int relsec,
+			struct module *module)
+{
+	unsigned int i;
+	Elf32_Sym *sym;
+	uint32_t *location;
+	uint32_t value;
+	unsigned int nrelocs = sechdrs[relsec].sh_size / sizeof(Elf32_Rela);
+	Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr;
+	Elf32_Word sym_info = sechdrs[relsec].sh_info;
+	Elf32_Sym *sym_base = (Elf32_Sym *) sechdrs[symindex].sh_addr;
+	void *loc_base = (void *) sechdrs[sym_info].sh_addr;
+
+	DEBUGP("Applying relocations in section %u to section %u base=%p\n",
+	       relsec, sym_info, loc_base);
+
+	for (i = 0; i < nrelocs; i++) {
+
+		/* Symbol to relocate */
+		sym = sym_base + ELF32_R_SYM(rela[i].r_info);
+
+		/* Where to make the change */
+		location = loc_base + rela[i].r_offset;
+
+		/* `Everything is relative'. */
+		value = sym->st_value + rela[i].r_addend;
+
+		DEBUGP("%d: value=%08x loc=%p reloc=%d symbol=%s\n",
+		       i, value, location, ELF32_R_TYPE(rela[i].r_info),
+		       sym->st_name ?
+		       &strtab[sym->st_name] : "(anonymous)");
+
+		switch (ELF32_R_TYPE(rela[i].r_info)) {
+		case R_HEXAGON_B22_PCREL: {
+			int dist = (int)(value - (uint32_t)location);
+			if ((dist < -0x00800000) ||
+			    (dist >= 0x00800000)) {
+				printk(KERN_ERR
+				       "%s: %s: %08x=%08x-%08x %s\n",
+				       module->name,
+				       "R_HEXAGON_B22_PCREL reloc out of range",
+				       dist, value, (uint32_t)location,
+				       sym->st_name ?
+				       &strtab[sym->st_name] : "(anonymous)");
+				return -ENOEXEC;
+			}
+			DEBUGP("B22_PCREL contents: %08X.\n", *location);
+			*location &= ~0x01ff3fff;
+			*location |= 0x00003fff & dist;
+			*location |= 0x01ff0000 & (dist<<2);
+			DEBUGP("Contents after reloc: %08x\n", *location);
+			break;
+		}
+		case R_HEXAGON_HI16:
+			value = (value>>16) & 0xffff;
+			/* fallthrough */
+		case R_HEXAGON_LO16:
+			*location &= ~0x00c03fff;
+			*location |= value & 0x3fff;
+			*location |= (value & 0xc000) << 8;
+			break;
+		case R_HEXAGON_32:
+			*location = value;
+			break;
+		case R_HEXAGON_32_PCREL:
+			*location = value - (uint32_t)location;
+			break;
+		case R_HEXAGON_PLT_B22_PCREL:
+		case R_HEXAGON_GOTOFF_LO16:
+		case R_HEXAGON_GOTOFF_HI16:
+			printk(KERN_ERR "%s: GOT/PLT relocations unsupported\n",
+			       module->name);
+			return -ENOEXEC;
+		default:
+			printk(KERN_ERR "%s: unknown relocation: %u\n",
+			       module->name,
+			       ELF32_R_TYPE(rela[i].r_info));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
new file mode 100644
index 000000000000..18c4f0b0f4ba
--- /dev/null
+++ b/arch/hexagon/kernel/process.c
@@ -0,0 +1,279 @@
+/*
+ * Process creation support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/tick.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+/*
+ * Kernel thread creation.  The desired kernel function is "wrapped"
+ * in the kernel_thread_helper function, which does cleanup
+ * afterwards.
+ */
+static void __noreturn kernel_thread_helper(void *arg, int (*fn)(void *))
+{
+	do_exit(fn(arg));
+}
+
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct pt_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+	/*
+	 * Yes, we're exploting illicit knowledge of the ABI here.
+	 */
+	regs.r00 = (unsigned long) arg;
+	regs.r01 = (unsigned long) fn;
+	pt_set_elr(&regs, (unsigned long)kernel_thread_helper);
+	pt_set_kmode(&regs);
+
+	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * Program thread launch.  Often defined as a macro in processor.h,
+ * but we're shooting for a small footprint and it's not an inner-loop
+ * performance-critical operation.
+ *
+ * The Hexagon ABI specifies that R28 is zero'ed before program launch,
+ * so that gets automatically done here.  If we ever stop doing that here,
+ * we'll probably want to define the ELF_PLAT_INIT macro.
+ */
+void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
+{
+	/* Set to run with user-mode data segmentation */
+	set_fs(USER_DS);
+	/* We want to zero all data-containing registers. Is this overkill? */
+	memset(regs, 0, sizeof(*regs));
+	/* We might want to also zero all Processor registers here */
+	pt_set_usermode(regs);
+	pt_set_elr(regs, pc);
+	pt_set_rte_sp(regs, sp);
+}
+
+/*
+ *  Spin, or better still, do a hardware or VM wait instruction
+ *  If hardware or VM offer wait termination even though interrupts
+ *  are disabled.
+ */
+static void default_idle(void)
+{
+	__vmwait();
+}
+
+void (*idle_sleep)(void) = default_idle;
+
+void cpu_idle(void)
+{
+	while (1) {
+		tick_nohz_stop_sched_tick(1);
+		local_irq_disable();
+		while (!need_resched()) {
+			idle_sleep();
+			/*  interrupts wake us up, but aren't serviced  */
+			local_irq_enable();	/* service interrupt   */
+			local_irq_disable();
+		}
+		local_irq_enable();
+		tick_nohz_restart_sched_tick();
+		schedule();
+	}
+}
+
+/*
+ *  Return saved PC of a blocked thread
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+	return 0;
+}
+
+/*
+ * Copy architecture-specific thread state
+ */
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+		unsigned long unused, struct task_struct *p,
+		struct pt_regs *regs)
+{
+	struct thread_info *ti = task_thread_info(p);
+	struct hexagon_switch_stack *ss;
+	struct pt_regs *childregs;
+	asmlinkage void ret_from_fork(void);
+
+	childregs = (struct pt_regs *) (((unsigned long) ti + THREAD_SIZE) -
+					sizeof(*childregs));
+
+	memcpy(childregs, regs, sizeof(*childregs));
+	ti->regs = childregs;
+
+	/*
+	 * Establish kernel stack pointer and initial PC for new thread
+	 */
+	ss = (struct hexagon_switch_stack *) ((unsigned long) childregs -
+						    sizeof(*ss));
+	ss->lr = (unsigned long)ret_from_fork;
+	p->thread.switch_sp = ss;
+
+	/* If User mode thread, set pt_reg stack pointer as per parameter */
+	if (user_mode(childregs)) {
+		pt_set_rte_sp(childregs, usp);
+
+		/* Child sees zero return value */
+		childregs->r00 = 0;
+
+		/*
+		 * The clone syscall has the C signature:
+		 * int [r0] clone(int flags [r0],
+		 *           void *child_frame [r1],
+		 *           void *parent_tid [r2],
+		 *           void *child_tid [r3],
+		 *           void *thread_control_block [r4]);
+		 * ugp is used to provide TLS support.
+		 */
+		if (clone_flags & CLONE_SETTLS)
+			childregs->ugp = childregs->r04;
+
+		/*
+		 * Parent sees new pid -- not necessary, not even possible at
+		 * this point in the fork process
+		 * Might also want to set things like ti->addr_limit
+		 */
+	} else {
+		/*
+		 * If kernel thread, resume stack is kernel stack base.
+		 * Note that this is pointer arithmetic on pt_regs *
+		 */
+		pt_set_rte_sp(childregs, (unsigned long)(childregs + 1));
+		/*
+		 * We need the current thread_info fast path pointer
+		 * set up in pt_regs.  The register to be used is
+		 * parametric for assembler code, but the mechanism
+		 * doesn't drop neatly into C.  Needs to be fixed.
+		 */
+		childregs->THREADINFO_REG = (unsigned long) ti;
+	}
+
+	/*
+	 * thread_info pointer is pulled out of task_struct "stack"
+	 * field on switch_to.
+	 */
+	p->stack = (void *)ti;
+
+	return 0;
+}
+
+/*
+ * Release any architecture-specific resources locked by thread
+ */
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+/*
+ * Free any architecture-specific thread data structures, etc.
+ */
+void exit_thread(void)
+{
+}
+
+/*
+ * Some archs flush debug and FPU info here
+ */
+void flush_thread(void)
+{
+}
+
+/*
+ * The "wait channel" terminology is archaic, but what we want
+ * is an identification of the point at which the scheduler
+ * was invoked by a blocked thread.
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long fp, pc;
+	unsigned long stack_page;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	stack_page = (unsigned long)task_stack_page(p);
+	fp = ((struct hexagon_switch_stack *)p->thread.switch_sp)->fp;
+	do {
+		if (fp < (stack_page + sizeof(struct thread_info)) ||
+			fp >= (THREAD_SIZE - 8 + stack_page))
+			return 0;
+		pc = ((unsigned long *)fp)[1];
+		if (!in_sched_functions(pc))
+			return pc;
+		fp = *(unsigned long *) fp;
+	} while (count++ < 16);
+
+	return 0;
+}
+
+/*
+ * Borrowed from PowerPC -- basically allow smaller kernel stacks if we
+ * go crazy with the page sizes.
+ */
+#if THREAD_SHIFT < PAGE_SHIFT
+
+static struct kmem_cache *thread_info_cache;
+
+struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node)
+{
+	struct thread_info *ti;
+
+	ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node);
+	if (unlikely(ti == NULL))
+		return NULL;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	memset(ti, 0, THREAD_SIZE);
+#endif
+	return ti;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+	kmem_cache_free(thread_info_cache, ti);
+}
+
+/*  Weak symbol; called by init/main.c  */
+
+void thread_info_cache_init(void)
+{
+	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+					      THREAD_SIZE, 0, NULL);
+	BUG_ON(thread_info_cache == NULL);
+}
+
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
+
+/*
+ * Required placeholder.
+ */
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+	return 0;
+}
diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c
new file mode 100644
index 000000000000..bea3f08470fd
--- /dev/null
+++ b/arch/hexagon/kernel/ptrace.c
@@ -0,0 +1,180 @@
+/*
+ * Ptrace support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <generated/compile.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/regset.h>
+#include <linux/user.h>
+
+#include <asm/system.h>
+#include <asm/user.h>
+
+static int genregs_get(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   void *kbuf, void __user *ubuf)
+{
+	int ret;
+	unsigned int dummy;
+	struct pt_regs *regs = task_pt_regs(target);
+
+
+	if (!regs)
+		return -EIO;
+
+	/* The general idea here is that the copyout must happen in
+	 * exactly the same order in which the userspace expects these
+	 * regs. Now, the sequence in userspace does not match the
+	 * sequence in the kernel, so everything past the 32 gprs
+	 * happens one at a time.
+	 */
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  &regs->r00, 0, 32*sizeof(unsigned long));
+
+#define ONEXT(KPT_REG, USR_REG) \
+	if (!ret) \
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, \
+			KPT_REG, offsetof(struct user_regs_struct, USR_REG), \
+			offsetof(struct user_regs_struct, USR_REG) + \
+				 sizeof(unsigned long));
+
+	/* Must be exactly same sequence as struct user_regs_struct */
+	ONEXT(&regs->sa0, sa0);
+	ONEXT(&regs->lc0, lc0);
+	ONEXT(&regs->sa1, sa1);
+	ONEXT(&regs->lc1, lc1);
+	ONEXT(&regs->m0, m0);
+	ONEXT(&regs->m1, m1);
+	ONEXT(&regs->usr, usr);
+	ONEXT(&regs->preds, p3_0);
+	ONEXT(&regs->gp, gp);
+	ONEXT(&regs->ugp, ugp);
+	ONEXT(&pt_elr(regs), pc);
+	dummy = pt_cause(regs);
+	ONEXT(&dummy, cause);
+	ONEXT(&pt_badva(regs), badva);
+
+	/* Pad the rest with zeros, if needed */
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					offsetof(struct user_regs_struct, pad1), -1);
+	return ret;
+}
+
+static int genregs_set(struct task_struct *target,
+		   const struct user_regset *regset,
+		   unsigned int pos, unsigned int count,
+		   const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	unsigned long bucket;
+	struct pt_regs *regs = task_pt_regs(target);
+
+	if (!regs)
+		return -EIO;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &regs->r00, 0, 32*sizeof(unsigned long));
+
+#define INEXT(KPT_REG, USR_REG) \
+	if (!ret) \
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, \
+			KPT_REG, offsetof(struct user_regs_struct, USR_REG), \
+			offsetof(struct user_regs_struct, USR_REG) + \
+				sizeof(unsigned long));
+
+	/* Must be exactly same sequence as struct user_regs_struct */
+	INEXT(&regs->sa0, sa0);
+	INEXT(&regs->lc0, lc0);
+	INEXT(&regs->sa1, sa1);
+	INEXT(&regs->lc1, lc1);
+	INEXT(&regs->m0, m0);
+	INEXT(&regs->m1, m1);
+	INEXT(&regs->usr, usr);
+	INEXT(&regs->preds, p3_0);
+	INEXT(&regs->gp, gp);
+	INEXT(&regs->ugp, ugp);
+	INEXT(&pt_elr(regs), pc);
+
+	/* CAUSE and BADVA aren't writeable. */
+	INEXT(&bucket, cause);
+	INEXT(&bucket, badva);
+
+	/* Ignore the rest, if needed */
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					offsetof(struct user_regs_struct, pad1), -1);
+
+	if (ret)
+		return ret;
+
+	/*
+	 * This is special; SP is actually restored by the VM via the
+	 * special event record which is set by the special trap.
+	 */
+	regs->hvmer.vmpsp = regs->r29;
+	return 0;
+}
+
+enum hexagon_regset {
+	REGSET_GENERAL,
+};
+
+static const struct user_regset hexagon_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(unsigned long),
+		.align = sizeof(unsigned long),
+		.get = genregs_get,
+		.set = genregs_set,
+	},
+};
+
+static const struct user_regset_view hexagon_user_view = {
+	.name = UTS_MACHINE,
+	.e_machine = ELF_ARCH,
+	.ei_osabi = ELF_OSABI,
+	.regsets = hexagon_regsets,
+	.n = ARRAY_SIZE(hexagon_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &hexagon_user_view;
+}
+
+void ptrace_disable(struct task_struct *child)
+{
+	/* Boilerplate - resolves to null inline if no HW single-step */
+	user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	return ptrace_request(child, request, addr, data);
+}
diff --git a/arch/hexagon/kernel/reset.c b/arch/hexagon/kernel/reset.c
new file mode 100644
index 000000000000..4d72fc58e9b1
--- /dev/null
+++ b/arch/hexagon/kernel/reset.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/smp.h>
+#include <asm/hexagon_vm.h>
+
+void machine_power_off(void)
+{
+	smp_send_stop();
+	__vmstop();
+}
+
+void machine_halt(void)
+{
+}
+
+void machine_restart(char *cmd)
+{
+}
+
+void pm_power_off(void)
+{
+}
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
new file mode 100644
index 000000000000..1202f78d25cb
--- /dev/null
+++ b/arch/hexagon/kernel/setup.c
@@ -0,0 +1,145 @@
+/*
+ * Arch related setup for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mmzone.h>
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+#include <linux/console.h>
+#include <linux/of_fdt.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/processor.h>
+#include <asm/hexagon_vm.h>
+#include <asm/vm_mmu.h>
+#include <asm/time.h>
+#ifdef CONFIG_OF
+#include <asm/prom.h>
+#endif
+
+char cmd_line[COMMAND_LINE_SIZE];
+static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
+
+int on_simulator;
+
+void __cpuinit calibrate_delay(void)
+{
+	loops_per_jiffy = thread_freq_mhz * 1000000 / HZ;
+}
+
+/*
+ * setup_arch -  high level architectural setup routine
+ * @cmdline_p: pointer to pointer to command-line arguments
+ */
+
+void __init setup_arch(char **cmdline_p)
+{
+	char *p = &external_cmdline_buffer;
+
+	/*
+	 * These will eventually be pulled in via either some hypervisor
+	 * or devicetree description.  Hardwiring for now.
+	 */
+	pcycle_freq_mhz = 600;
+	thread_freq_mhz = 100;
+	sleep_clk_freq = 32000;
+
+	/*
+	 * Set up event bindings to handle exceptions and interrupts.
+	 */
+	__vmsetvec(_K_VM_event_vector);
+
+	/*
+	 * Simulator has a few differences from the hardware.
+	 * For now, check uninitialized-but-mapped memory
+	 * prior to invoking setup_arch_memory().
+	 */
+	if (*(int *)((unsigned long)_end + 8) == 0x1f1f1f1f)
+		on_simulator = 1;
+	else
+		on_simulator = 0;
+
+	if (p[0] != '\0')
+		strlcpy(boot_command_line, p, COMMAND_LINE_SIZE);
+	else
+		strlcpy(boot_command_line, default_command_line,
+			COMMAND_LINE_SIZE);
+
+	/*
+	 * boot_command_line and the value set up by setup_arch
+	 * are both picked up by the init code. If no reason to
+	 * make them different, pass the same pointer back.
+	 */
+	strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = cmd_line;
+
+	parse_early_param();
+
+	setup_arch_memory();
+
+#ifdef CONFIG_SMP
+	smp_start_cpus();
+#endif
+}
+
+/*
+ * Functions for dumping CPU info via /proc
+ * Probably should move to kernel/proc.c or something.
+ */
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * Eventually this will dump information about
+ * CPU properties like ISA level, TLB size, etc.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	int cpu = (unsigned long) v - 1;
+
+	seq_printf(m, "processor\t: %d\n", cpu);
+	seq_printf(m, "model name\t: Hexagon Virtual Machine\n");
+	seq_printf(m, "BogoMips\t: %lu.%02lu\n",
+		(loops_per_jiffy * HZ) / 500000,
+		((loops_per_jiffy * HZ) / 5000) % 100);
+	seq_printf(m, "\n");
+	return 0;
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start  = &c_start,
+	.next   = &c_next,
+	.stop   = &c_stop,
+	.show   = &show_cpuinfo,
+};
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
new file mode 100644
index 000000000000..b45be3181193
--- /dev/null
+++ b/arch/hexagon/kernel/signal.c
@@ -0,0 +1,345 @@
+/*
+ * Signal support for Hexagon processor
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/linkage.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+#include <linux/tracehook.h>
+#include <asm/registers.h>
+#include <asm/thread_info.h>
+#include <asm/unistd.h>
+#include <asm/uaccess.h>
+#include <asm/ucontext.h>
+#include <asm/cacheflush.h>
+#include <asm/signal.h>
+#include <asm/vdso.h>
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+struct rt_sigframe {
+	unsigned long tramp[2];
+	struct siginfo info;
+	struct ucontext uc;
+};
+
+static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+			  size_t frame_size)
+{
+	unsigned long sp = regs->r29;
+
+	/* Switch to signal stack if appropriate */
+	if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	return (void __user *)((sp - frame_size) & ~(sizeof(long long) - 1));
+}
+
+static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+{
+	unsigned long tmp;
+	int err = 0;
+
+	err |= copy_to_user(&sc->sc_regs.r0, &regs->r00,
+			    32*sizeof(unsigned long));
+
+	err |= __put_user(regs->sa0, &sc->sc_regs.sa0);
+	err |= __put_user(regs->lc0, &sc->sc_regs.lc0);
+	err |= __put_user(regs->sa1, &sc->sc_regs.sa1);
+	err |= __put_user(regs->lc1, &sc->sc_regs.lc1);
+	err |= __put_user(regs->m0, &sc->sc_regs.m0);
+	err |= __put_user(regs->m1, &sc->sc_regs.m1);
+	err |= __put_user(regs->usr, &sc->sc_regs.usr);
+	err |= __put_user(regs->preds, &sc->sc_regs.p3_0);
+	err |= __put_user(regs->gp, &sc->sc_regs.gp);
+	err |= __put_user(regs->ugp, &sc->sc_regs.ugp);
+
+	tmp = pt_elr(regs); err |= __put_user(tmp, &sc->sc_regs.pc);
+	tmp = pt_cause(regs); err |= __put_user(tmp, &sc->sc_regs.cause);
+	tmp = pt_badva(regs); err |= __put_user(tmp, &sc->sc_regs.badva);
+
+	return err;
+}
+
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc)
+{
+	unsigned long tmp;
+	int err = 0;
+
+	err |= copy_from_user(&regs->r00, &sc->sc_regs.r0,
+			      32 * sizeof(unsigned long));
+
+	err |= __get_user(regs->sa0, &sc->sc_regs.sa0);
+	err |= __get_user(regs->lc0, &sc->sc_regs.lc0);
+	err |= __get_user(regs->sa1, &sc->sc_regs.sa1);
+	err |= __get_user(regs->lc1, &sc->sc_regs.lc1);
+	err |= __get_user(regs->m0, &sc->sc_regs.m0);
+	err |= __get_user(regs->m1, &sc->sc_regs.m1);
+	err |= __get_user(regs->usr, &sc->sc_regs.usr);
+	err |= __get_user(regs->preds, &sc->sc_regs.p3_0);
+	err |= __get_user(regs->gp, &sc->sc_regs.gp);
+	err |= __get_user(regs->ugp, &sc->sc_regs.ugp);
+
+	err |= __get_user(tmp, &sc->sc_regs.pc); pt_set_elr(regs, tmp);
+
+	return err;
+}
+
+/*
+ * Setup signal stack frame with siginfo structure
+ */
+static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
+			  sigset_t *set,  struct pt_regs *regs)
+{
+	int err = 0;
+	struct rt_sigframe __user *frame;
+	struct hexagon_vdso *vdso = current->mm->context.vdso;
+
+	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(struct rt_sigframe)))
+		goto	sigsegv;
+
+	if (copy_siginfo_to_user(&frame->info, info))
+		goto	sigsegv;
+
+	/* The on-stack signal trampoline is no longer executed;
+	 * however, the libgcc signal frame unwinding code checks for
+	 * the presence of these two numeric magic values.
+	 */
+	err |= __put_user(0x7800d166, &frame->tramp[0]);
+	err |= __put_user(0x5400c004, &frame->tramp[1]);
+	err |= setup_sigcontext(regs, &frame->uc.uc_mcontext);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto sigsegv;
+
+	/* Load r0/r1 pair with signumber/siginfo pointer... */
+	regs->r0100 = ((unsigned long long)((unsigned long)&frame->info) << 32)
+		| (unsigned long long)signr;
+	regs->r02 = (unsigned long) &frame->uc;
+	regs->r31 = (unsigned long) vdso->rt_signal_trampoline;
+	pt_psp(regs) = (unsigned long) frame;
+	pt_set_elr(regs, (unsigned long)ka->sa.sa_handler);
+
+	return 0;
+
+sigsegv:
+	force_sigsegv(signr, current);
+	return -EFAULT;
+}
+
+/*
+ * Setup invocation of signal handler
+ */
+static int handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
+			 sigset_t *oldset, struct pt_regs *regs)
+{
+	int rc;
+
+	/*
+	 * If we're handling a signal that aborted a system call,
+	 * set up the error return value before adding the signal
+	 * frame to the stack.
+	 */
+
+	if (regs->syscall_nr >= 0) {
+		switch (regs->r00) {
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->r00 = -EINTR;
+			break;
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
+				regs->r00 = -EINTR;
+				break;
+			}
+			/* Fall through */
+		case -ERESTARTNOINTR:
+			regs->r06 = regs->syscall_nr;
+			pt_set_elr(regs, pt_elr(regs) - 4);
+			regs->r00 = regs->restart_r0;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/*
+	 * Set up the stack frame; not doing the SA_SIGINFO thing.  We
+	 * only set up the rt_frame flavor.
+	 */
+	rc = setup_rt_frame(sig, ka, info, oldset, regs);
+
+	/* If there was an error on setup, no signal was delivered. */
+	if (rc)
+		return rc;
+
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	return 0;
+}
+
+/*
+ * Called from return-from-event code.
+ */
+static void do_signal(struct pt_regs *regs)
+{
+	struct k_sigaction sigact;
+	siginfo_t info;
+	int signo;
+
+	if (!user_mode(regs))
+		return;
+
+	if (try_to_freeze())
+		goto no_signal;
+
+	signo = get_signal_to_deliver(&info, &sigact, regs, NULL);
+
+	if (signo > 0) {
+		sigset_t *oldset;
+
+		if (test_thread_flag(TIF_RESTORE_SIGMASK))
+			oldset = &current->saved_sigmask;
+		else
+			oldset = &current->blocked;
+
+		if (handle_signal(signo, &info, &sigact, oldset, regs) == 0) {
+			/*
+			 * Successful delivery case.  The saved sigmask is
+			 * stored in the signal frame, and will be restored
+			 * by sigreturn.  We can clear the TIF flag.
+			 */
+			clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+			tracehook_signal_handler(signo, &info, &sigact, regs,
+				test_thread_flag(TIF_SINGLESTEP));
+		}
+		return;
+	}
+
+no_signal:
+	/*
+	 * If we came from a system call, handle the restart.
+	 */
+	if (regs->syscall_nr >= 0) {
+		switch (regs->r00) {
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->r06 = regs->syscall_nr;
+			break;
+		case -ERESTART_RESTARTBLOCK:
+			regs->r06 = __NR_restart_syscall;
+			break;
+		default:
+			goto no_restart;
+		}
+		pt_set_elr(regs, pt_elr(regs) - 4);
+		regs->r00 = regs->restart_r0;
+	}
+
+no_restart:
+	/* If there's no signal to deliver, put the saved sigmask back */
+	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+		clear_thread_flag(TIF_RESTORE_SIGMASK);
+		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+	}
+}
+
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
+{
+	if (thread_info_flags & _TIF_SIGPENDING)
+		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
+	}
+}
+
+/*
+ * Architecture-specific wrappers for signal-related system calls
+ */
+asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
+{
+	struct pt_regs *regs = current_thread_info()->regs;
+
+	return do_sigaltstack(uss, uoss, regs->r29);
+}
+
+asmlinkage int sys_rt_sigreturn(void)
+{
+	struct pt_regs *regs = current_thread_info()->regs;
+	struct rt_sigframe __user *frame;
+	sigset_t blocked;
+
+	frame = (struct rt_sigframe __user *)pt_psp(regs);
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+		goto badframe;
+	if (__copy_from_user(&blocked, &frame->uc.uc_sigmask, sizeof(blocked)))
+		goto badframe;
+
+	sigdelsetmask(&blocked, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = blocked;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	/* Restore the user's stack as well */
+	pt_psp(regs) = regs->r29;
+
+	/*
+	 * Leave a trace in the stack frame that this was a sigreturn.
+	 * If the system call is to replay, we've already restored the
+	 * number in the GPR slot and it will be regenerated on the
+	 * new system call trap entry. Note that if restore_sigcontext()
+	 * did something other than a bulk copy of the pt_regs struct,
+	 * we could avoid this assignment by simply not overwriting
+	 * regs->syscall_nr.
+	 */
+	regs->syscall_nr = __NR_rt_sigreturn;
+
+	/*
+	 * If we were meticulous, we'd only call this if we knew that
+	 * we were actually going to use an alternate stack, and we'd
+	 * consider any error to be fatal.  What we do here, in common
+	 * with many other architectures, is call it blindly and only
+	 * consider the -EFAULT return case to be proof of a problem.
+	 */
+	if (do_sigaltstack(&frame->uc.uc_stack, NULL, pt_psp(regs)) == -EFAULT)
+		goto badframe;
+
+	return 0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
new file mode 100644
index 000000000000..c871a2cffaef
--- /dev/null
+++ b/arch/hexagon/kernel/smp.c
@@ -0,0 +1,276 @@
+/*
+ * SMP support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+
+#include <asm/system.h>  /*  xchg  */
+#include <asm/time.h>    /*  timer_interrupt  */
+#include <asm/hexagon_vm.h>
+
+#define BASE_IPI_IRQ 26
+
+/*
+ * cpu_possible_map needs to be filled out prior to setup_per_cpu_areas
+ * (which is prior to any of our smp_prepare_cpu crap), in order to set
+ * up the...  per_cpu areas.
+ */
+
+struct ipi_data {
+	unsigned long bits;
+};
+
+static DEFINE_PER_CPU(struct ipi_data, ipi_data);
+
+static inline void __handle_ipi(unsigned long *ops, struct ipi_data *ipi,
+				int cpu)
+{
+	unsigned long msg = 0;
+	do {
+		msg = find_next_bit(ops, BITS_PER_LONG, msg+1);
+
+		switch (msg) {
+
+		case IPI_TIMER:
+			ipi_timer();
+			break;
+
+		case IPI_CALL_FUNC:
+			generic_smp_call_function_interrupt();
+			break;
+
+		case IPI_CALL_FUNC_SINGLE:
+			generic_smp_call_function_single_interrupt();
+			break;
+
+		case IPI_CPU_STOP:
+			/*
+			 * call vmstop()
+			 */
+			__vmstop();
+			break;
+
+		case IPI_RESCHEDULE:
+			scheduler_ipi();
+			break;
+		}
+	} while (msg < BITS_PER_LONG);
+}
+
+/*  Used for IPI call from other CPU's to unmask int  */
+void smp_vm_unmask_irq(void *info)
+{
+	__vmintop_locen((long) info);
+}
+
+
+/*
+ * This is based on Alpha's IPI stuff.
+ * Supposed to take (int, void*) as args now.
+ * Specifically, first arg is irq, second is the irq_desc.
+ */
+
+irqreturn_t handle_ipi(int irq, void *desc)
+{
+	int cpu = smp_processor_id();
+	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+	unsigned long ops;
+
+	while ((ops = xchg(&ipi->bits, 0)) != 0)
+		__handle_ipi(&ops, ipi, cpu);
+	return IRQ_HANDLED;
+}
+
+void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
+{
+	unsigned long flags;
+	unsigned long cpu;
+	unsigned long retval;
+
+	local_irq_save(flags);
+
+	for_each_cpu(cpu, cpumask) {
+		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+
+		set_bit(msg, &ipi->bits);
+		/*  Possible barrier here  */
+		retval = __vmintop_post(BASE_IPI_IRQ+cpu);
+
+		if (retval != 0) {
+			printk(KERN_ERR "interrupt %ld not configured?\n",
+				BASE_IPI_IRQ+cpu);
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+static struct irqaction ipi_intdesc = {
+	.handler = handle_ipi,
+	.flags = IRQF_TRIGGER_RISING,
+	.name = "ipi_handler"
+};
+
+void __init smp_prepare_boot_cpu(void)
+{
+}
+
+/*
+ * interrupts should already be disabled from the VM
+ * SP should already be correct; need to set THREADINFO_REG
+ * to point to current thread info
+ */
+
+void __cpuinit start_secondary(void)
+{
+	unsigned int cpu;
+	unsigned long thread_ptr;
+
+	/*  Calculate thread_info pointer from stack pointer  */
+	__asm__ __volatile__(
+		"%0 = SP;\n"
+		: "=r" (thread_ptr)
+	);
+
+	thread_ptr = thread_ptr & ~(THREAD_SIZE-1);
+
+	__asm__ __volatile__(
+		QUOTED_THREADINFO_REG " = %0;\n"
+		:
+		: "r" (thread_ptr)
+	);
+
+	/*  Set the memory struct  */
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+
+	cpu = smp_processor_id();
+
+	setup_irq(BASE_IPI_IRQ + cpu, &ipi_intdesc);
+
+	/*  Register the clock_event dummy  */
+	setup_percpu_clockdev();
+
+	printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu);
+
+	set_cpu_online(cpu, true);
+	while (!cpumask_test_cpu(cpu, cpu_active_mask))
+		cpu_relax();
+	local_irq_enable();
+
+	cpu_idle();
+}
+
+
+/*
+ * called once for each present cpu
+ * apparently starts up the CPU and then
+ * maintains control until "cpu_online(cpu)" is set.
+ */
+
+int __cpuinit __cpu_up(unsigned int cpu)
+{
+	struct task_struct *idle;
+	struct thread_info *thread;
+	void *stack_start;
+
+	/*  Create new init task for the CPU  */
+	idle = fork_idle(cpu);
+	if (IS_ERR(idle))
+		panic(KERN_ERR "fork_idle failed\n");
+
+	thread = (struct thread_info *)idle->stack;
+	thread->cpu = cpu;
+
+	/*  Boot to the head.  */
+	stack_start =  ((void *) thread) + THREAD_SIZE;
+	__vmstart(start_secondary, stack_start);
+
+	while (!cpu_isset(cpu, cpu_online_map))
+		barrier();
+
+	return 0;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	int i;
+
+	/*
+	 * should eventually have some sort of machine
+	 * descriptor that has this stuff
+	 */
+
+	/*  Right now, let's just fake it. */
+	for (i = 0; i < max_cpus; i++)
+		cpu_set(i, cpu_present_map);
+
+	/*  Also need to register the interrupts for IPI  */
+	if (max_cpus > 1)
+		setup_irq(BASE_IPI_IRQ, &ipi_intdesc);
+}
+
+void smp_send_reschedule(int cpu)
+{
+	send_ipi(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+void smp_send_stop(void)
+{
+	struct cpumask targets;
+	cpumask_copy(&targets, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &targets);
+	send_ipi(&targets, IPI_CPU_STOP);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	send_ipi(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	send_ipi(mask, IPI_CALL_FUNC);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+void smp_start_cpus(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		cpu_set(i, cpu_possible_map);
+}
diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c
new file mode 100644
index 000000000000..11c597b2ac59
--- /dev/null
+++ b/arch/hexagon/kernel/stacktrace.c
@@ -0,0 +1,66 @@
+/*
+ * Stacktrace support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/thread_info.h>
+#include <linux/module.h>
+
+register unsigned long current_frame_pointer asm("r30");
+
+struct stackframe {
+	unsigned long fp;
+	unsigned long rets;
+};
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+	unsigned long low, high;
+	unsigned long fp;
+	struct stackframe *frame;
+	int skip = trace->skip;
+
+	low = (unsigned long)task_stack_page(current);
+	high = low + THREAD_SIZE;
+	fp = current_frame_pointer;
+
+	while (fp >= low && fp <= (high - sizeof(*frame))) {
+		frame = (struct stackframe *)fp;
+
+		if (skip) {
+			skip--;
+		} else {
+			trace->entries[trace->nr_entries++] = frame->rets;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+
+		/*
+		 * The next frame must be at a higher address than the
+		 * current frame.
+		 */
+		low = fp + sizeof(*frame);
+		fp = frame->fp;
+	}
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
diff --git a/arch/hexagon/kernel/syscall.c b/arch/hexagon/kernel/syscall.c
new file mode 100644
index 000000000000..620dd18197a0
--- /dev/null
+++ b/arch/hexagon/kernel/syscall.c
@@ -0,0 +1,90 @@
+/*
+ * Hexagon system calls
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/linkage.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <asm/mman.h>
+#include <asm/registers.h>
+
+/*
+ * System calls with architecture-specific wrappers.
+ * See signal.c for signal-related system call wrappers.
+ */
+
+asmlinkage int sys_execve(char __user *ufilename,
+			  const char __user *const __user *argv,
+			  const char __user *const __user *envp)
+{
+	struct pt_regs *pregs = current_thread_info()->regs;
+	char *filename;
+	int retval;
+
+	filename = getname(ufilename);
+	retval = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		return retval;
+
+	retval = do_execve(filename, argv, envp, pregs);
+	putname(filename);
+
+	return retval;
+}
+
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
+			 unsigned long parent_tidp, unsigned long child_tidp)
+{
+	struct pt_regs *pregs = current_thread_info()->regs;
+
+	if (!newsp)
+		newsp = pregs->SP;
+	return do_fork(clone_flags, newsp, pregs, 0, (int __user *)parent_tidp,
+		       (int __user *)child_tidp);
+}
+
+/*
+ * Do a system call from the kernel, so as to have a proper pt_regs
+ * and recycle the sys_execvpe infrustructure.
+ */
+int kernel_execve(const char *filename,
+		  const char *const argv[], const char *const envp[])
+{
+	register unsigned long __a0 asm("r0") = (unsigned long) filename;
+	register unsigned long __a1 asm("r1") = (unsigned long) argv;
+	register unsigned long __a2 asm("r2") = (unsigned long) envp;
+	int retval;
+
+	__asm__ volatile(
+		"	R6 = #%4;\n"
+		"	trap0(#1);\n"
+		"	%0 = R0;\n"
+		: "=r" (retval)
+		: "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_execve)
+	);
+
+	return retval;
+}
+EXPORT_SYMBOL(kernel_execve);
diff --git a/arch/hexagon/kernel/syscalltab.c b/arch/hexagon/kernel/syscalltab.c
new file mode 100644
index 000000000000..c550f4177ab8
--- /dev/null
+++ b/arch/hexagon/kernel/syscalltab.c
@@ -0,0 +1,32 @@
+/*
+ * System call table for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/unistd.h>
+
+#include <asm/syscall.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+void *sys_call_table[__NR_syscalls] = {
+#include <asm/unistd.h>
+};
diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c
new file mode 100644
index 000000000000..6bee15c9c113
--- /dev/null
+++ b/arch/hexagon/kernel/time.c
@@ -0,0 +1,250 @@
+/*
+ * Time related functions for Hexagon architecture
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/timer-regs.h>
+#include <asm/hexagon_vm.h>
+
+/*
+ * For the clocksource we need:
+ *	pcycle frequency (600MHz)
+ * For the loops_per_jiffy we need:
+ *	thread/cpu frequency (100MHz)
+ * And for the timer, we need:
+ *	sleep clock rate
+ */
+
+cycles_t	pcycle_freq_mhz;
+cycles_t	thread_freq_mhz;
+cycles_t	sleep_clk_freq;
+
+static struct resource rtos_timer_resources[] = {
+	{
+		.start	= RTOS_TIMER_REGS_ADDR,
+		.end	= RTOS_TIMER_REGS_ADDR+PAGE_SIZE-1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device rtos_timer_device = {
+	.name		= "rtos_timer",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(rtos_timer_resources),
+	.resource	= rtos_timer_resources,
+};
+
+/*  A lot of this stuff should move into a platform specific section.  */
+struct adsp_hw_timer_struct {
+	u32 match;   /*  Match value  */
+	u32 count;
+	u32 enable;  /*  [1] - CLR_ON_MATCH_EN, [0] - EN  */
+	u32 clear;   /*  one-shot register that clears the count  */
+};
+
+/*  Look for "TCX0" for related constants.  */
+static __iomem struct adsp_hw_timer_struct *rtos_timer;
+
+static cycle_t timer_get_cycles(struct clocksource *cs)
+{
+	return (cycle_t) __vmgettime();
+}
+
+static struct clocksource hexagon_clocksource = {
+	.name		= "pcycles",
+	.rating		= 250,
+	.read		= timer_get_cycles,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int set_next_event(unsigned long delta, struct clock_event_device *evt)
+{
+	/*  Assuming the timer will be disabled when we enter here.  */
+
+	iowrite32(1, &rtos_timer->clear);
+	iowrite32(0, &rtos_timer->clear);
+
+	iowrite32(delta, &rtos_timer->match);
+	iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable);
+	return 0;
+}
+
+/*
+ * Sets the mode (periodic, shutdown, oneshot, etc) of a timer.
+ */
+static void set_mode(enum clock_event_mode mode,
+	struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		/* XXX implement me */
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_SMP
+/*  Broadcast mechanism  */
+static void broadcast(const struct cpumask *mask)
+{
+	send_ipi(mask, IPI_TIMER);
+}
+#endif
+
+static struct clock_event_device hexagon_clockevent_dev = {
+	.name		= "clockevent",
+	.features	= CLOCK_EVT_FEAT_ONESHOT,
+	.rating		= 400,
+	.irq		= RTOS_TIMER_INT,
+	.set_next_event = set_next_event,
+	.set_mode	= set_mode,
+#ifdef CONFIG_SMP
+	.broadcast	= broadcast,
+#endif
+};
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct clock_event_device, clock_events);
+
+void setup_percpu_clockdev(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *ce_dev = &hexagon_clockevent_dev;
+	struct clock_event_device *dummy_clock_dev =
+		&per_cpu(clock_events, cpu);
+
+	memcpy(dummy_clock_dev, ce_dev, sizeof(*dummy_clock_dev));
+	INIT_LIST_HEAD(&dummy_clock_dev->list);
+
+	dummy_clock_dev->features = CLOCK_EVT_FEAT_DUMMY;
+	dummy_clock_dev->cpumask = cpumask_of(cpu);
+	dummy_clock_dev->mode = CLOCK_EVT_MODE_UNUSED;
+
+	clockevents_register_device(dummy_clock_dev);
+}
+
+/*  Called from smp.c for each CPU's timer ipi call  */
+void ipi_timer(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *ce_dev = &per_cpu(clock_events, cpu);
+
+	ce_dev->event_handler(ce_dev);
+}
+#endif /* CONFIG_SMP */
+
+static irqreturn_t timer_interrupt(int irq, void *devid)
+{
+	struct clock_event_device *ce_dev = &hexagon_clockevent_dev;
+
+	iowrite32(0, &rtos_timer->enable);
+	ce_dev->event_handler(ce_dev);
+
+	return IRQ_HANDLED;
+}
+
+/*  This should also be pulled from devtree  */
+static struct irqaction rtos_timer_intdesc = {
+	.handler = timer_interrupt,
+	.flags = IRQF_TIMER | IRQF_TRIGGER_RISING,
+	.name = "rtos_timer"
+};
+
+/*
+ * time_init_deferred - called by start_kernel to set up timer/clock source
+ *
+ * Install the IRQ handler for the clock, setup timers.
+ * This is done late, as that way, we can use ioremap().
+ *
+ * This runs just before the delay loop is calibrated, and
+ * is used for delay calibration.
+ */
+void __init time_init_deferred(void)
+{
+	struct resource *resource = NULL;
+	struct clock_event_device *ce_dev = &hexagon_clockevent_dev;
+	struct device_node *dn;
+	struct resource r;
+	int err;
+
+	ce_dev->cpumask = cpu_all_mask;
+
+	if (!resource)
+		resource = rtos_timer_device.resource;
+
+	/*  ioremap here means this has to run later, after paging init  */
+	rtos_timer = ioremap(resource->start, resource->end
+		- resource->start + 1);
+
+	if (!rtos_timer) {
+		release_mem_region(resource->start, resource->end
+			- resource->start + 1);
+	}
+	clocksource_register_khz(&hexagon_clocksource, pcycle_freq_mhz * 1000);
+
+	/*  Note: the sim generic RTOS clock is apparently really 18750Hz  */
+
+	/*
+	 * Last arg is some guaranteed seconds for which the conversion will
+	 * work without overflow.
+	 */
+	clockevents_calc_mult_shift(ce_dev, sleep_clk_freq, 4);
+
+	ce_dev->max_delta_ns = clockevent_delta2ns(0x7fffffff, ce_dev);
+	ce_dev->min_delta_ns = clockevent_delta2ns(0xf, ce_dev);
+
+#ifdef CONFIG_SMP
+	setup_percpu_clockdev();
+#endif
+
+	clockevents_register_device(ce_dev);
+	setup_irq(ce_dev->irq, &rtos_timer_intdesc);
+}
+
+void __init time_init(void)
+{
+	late_time_init = time_init_deferred;
+}
+
+/*
+ * This could become parametric or perhaps even computed at run-time,
+ * but for now we take the observed simulator jitter.
+ */
+static long long fudgefactor = 350;  /* Maybe lower if kernel optimized. */
+
+void __udelay(unsigned long usecs)
+{
+	unsigned long long start = __vmgettime();
+	unsigned long long finish = (pcycle_freq_mhz * usecs) - fudgefactor;
+
+	while ((__vmgettime() - start) < finish)
+		cpu_relax(); /*  not sure how this improves readability  */
+}
+EXPORT_SYMBOL(__udelay);
diff --git a/arch/hexagon/kernel/topology.c b/arch/hexagon/kernel/topology.c
new file mode 100644
index 000000000000..ba4475184432
--- /dev/null
+++ b/arch/hexagon/kernel/topology.c
@@ -0,0 +1,52 @@
+/*
+ * CPU topology for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/percpu.h>
+
+/*  Swiped from MIPS.  */
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+static int __init topology_init(void)
+{
+	int i, ret;
+
+	for_each_present_cpu(i) {
+
+		/*
+		 * register_cpu takes a per_cpu pointer and
+		 * just points it at another per_cpu struct...
+		 */
+
+		ret = register_cpu(&per_cpu(cpu_devices, i), i);
+		if (ret)
+			printk(KERN_WARNING "topology_init: register_cpu %d "
+			       "failed (%d)\n", i, ret);
+	}
+
+	return 0;
+}
+
+subsys_initcall(topology_init);
diff --git a/arch/hexagon/kernel/trampoline.S b/arch/hexagon/kernel/trampoline.S
new file mode 100644
index 000000000000..06c36c036b98
--- /dev/null
+++ b/arch/hexagon/kernel/trampoline.S
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * Trampoline sequences to be copied onto user stack.
+ * This consumes a little more space than hand-assembling
+ * immediate constants for use in C, but is more portable
+ * to future tweaks to the Hexagon instruction set.
+ */
+
+#include <asm/unistd.h>
+
+/*  Sig trampolines - call sys_sigreturn or sys_rt_sigreturn as appropriate */
+
+/*  plain sigreturn is gone.  */
+
+	.globl __rt_sigtramp_template
+__rt_sigtramp_template:
+	r6 = #__NR_rt_sigreturn;
+	trap0(#1);
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
new file mode 100644
index 000000000000..f08857d0715b
--- /dev/null
+++ b/arch/hexagon/kernel/traps.c
@@ -0,0 +1,454 @@
+/*
+ * Kernel traps/events for Hexagon processor
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/kdebug.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/tracehook.h>
+#include <asm/traps.h>
+#include <asm/vm_fault.h>
+#include <asm/syscall.h>
+#include <asm/registers.h>
+#include <asm/unistd.h>
+#include <asm/sections.h>
+#ifdef CONFIG_KGDB
+# include <linux/kgdb.h>
+#endif
+
+#define TRAP_SYSCALL	1
+#define TRAP_DEBUG	0xdb
+
+void __init trap_init(void)
+{
+}
+
+#ifdef CONFIG_GENERIC_BUG
+/* Maybe should resemble arch/sh/kernel/traps.c ?? */
+int is_valid_bugaddr(unsigned long addr)
+{
+	return 1;
+}
+#endif /* CONFIG_GENERIC_BUG */
+
+static const char *ex_name(int ex)
+{
+	switch (ex) {
+	case HVM_GE_C_XPROT:
+	case HVM_GE_C_XUSER:
+		return "Execute protection fault";
+	case HVM_GE_C_RPROT:
+	case HVM_GE_C_RUSER:
+		return "Read protection fault";
+	case HVM_GE_C_WPROT:
+	case HVM_GE_C_WUSER:
+		return "Write protection fault";
+	case HVM_GE_C_XMAL:
+		return "Misaligned instruction";
+	case HVM_GE_C_RMAL:
+		return "Misaligned data load";
+	case HVM_GE_C_WMAL:
+		return "Misaligned data store";
+	case HVM_GE_C_INVI:
+	case HVM_GE_C_PRIVI:
+		return "Illegal instruction";
+	case HVM_GE_C_BUS:
+		return "Precise bus error";
+	case HVM_GE_C_CACHE:
+		return "Cache error";
+
+	case 0xdb:
+		return "Debugger trap";
+
+	default:
+		return "Unrecognized exception";
+	}
+}
+
+static void do_show_stack(struct task_struct *task, unsigned long *fp,
+			  unsigned long ip)
+{
+	int kstack_depth_to_print = 24;
+	unsigned long offset, size;
+	const char *name = NULL;
+	unsigned long *newfp;
+	unsigned long low, high;
+	char tmpstr[128];
+	char *modname;
+	int i;
+
+	if (task == NULL)
+		task = current;
+
+	printk(KERN_INFO "CPU#%d, %s/%d, Call Trace:\n",
+	       raw_smp_processor_id(), task->comm,
+	       task_pid_nr(task));
+
+	if (fp == NULL) {
+		if (task == current) {
+			asm("%0 = r30" : "=r" (fp));
+		} else {
+			fp = (unsigned long *)
+			     ((struct hexagon_switch_stack *)
+			     task->thread.switch_sp)->fp;
+		}
+	}
+
+	if ((((unsigned long) fp) & 0x3) || ((unsigned long) fp < 0x1000)) {
+		printk(KERN_INFO "-- Corrupt frame pointer %p\n", fp);
+		return;
+	}
+
+	/* Saved link reg is one word above FP */
+	if (!ip)
+		ip = *(fp+1);
+
+	/* Expect kernel stack to be in-bounds */
+	low = (unsigned long)task_stack_page(task);
+	high = low + THREAD_SIZE - 8;
+	low += sizeof(struct thread_info);
+
+	for (i = 0; i < kstack_depth_to_print; i++) {
+
+		name = kallsyms_lookup(ip, &size, &offset, &modname, tmpstr);
+
+		printk(KERN_INFO "[%p] 0x%lx: %s + 0x%lx", fp, ip, name,
+			offset);
+		if (((unsigned long) fp < low) || (high < (unsigned long) fp))
+			printk(KERN_CONT " (FP out of bounds!)");
+		if (modname)
+			printk(KERN_CONT " [%s] ", modname);
+		printk(KERN_CONT "\n");
+
+		newfp = (unsigned long *) *fp;
+
+		if (((unsigned long) newfp) & 0x3) {
+			printk(KERN_INFO "-- Corrupt frame pointer %p\n",
+				newfp);
+			break;
+		}
+
+		/* Attempt to continue past exception. */
+		if (0 == newfp) {
+			struct pt_regs *regs = (struct pt_regs *) (((void *)fp)
+						+ 8);
+
+			if (regs->syscall_nr != -1) {
+				printk(KERN_INFO "-- trap0 -- syscall_nr: %ld",
+					regs->syscall_nr);
+				printk(KERN_CONT "  psp: %lx  elr: %lx\n",
+					 pt_psp(regs), pt_elr(regs));
+				break;
+			} else {
+				/* really want to see more ... */
+				kstack_depth_to_print += 6;
+				printk(KERN_INFO "-- %s (0x%lx)  badva: %lx\n",
+					ex_name(pt_cause(regs)), pt_cause(regs),
+					pt_badva(regs));
+			}
+
+			newfp = (unsigned long *) regs->r30;
+			ip = pt_elr(regs);
+		} else {
+			ip = *(newfp + 1);
+		}
+
+		/* If link reg is null, we are done. */
+		if (ip == 0x0)
+			break;
+
+		/* If newfp isn't larger, we're tracing garbage. */
+		if (newfp > fp)
+			fp = newfp;
+		else
+			break;
+	}
+}
+
+void show_stack(struct task_struct *task, unsigned long *fp)
+{
+	/* Saved link reg is one word above FP */
+	do_show_stack(task, fp, 0);
+}
+
+void dump_stack(void)
+{
+	unsigned long *fp;
+	asm("%0 = r30" : "=r" (fp));
+	show_stack(current, fp);
+}
+EXPORT_SYMBOL(dump_stack);
+
+int die(const char *str, struct pt_regs *regs, long err)
+{
+	static struct {
+		spinlock_t lock;
+		int counter;
+	} die = {
+		.lock = __SPIN_LOCK_UNLOCKED(die.lock),
+		.counter = 0
+	};
+
+	console_verbose();
+	oops_enter();
+
+	spin_lock_irq(&die.lock);
+	bust_spinlocks(1);
+	printk(KERN_EMERG "Oops: %s[#%d]:\n", str, ++die.counter);
+
+	if (notify_die(DIE_OOPS, str, regs, err, pt_cause(regs), SIGSEGV) ==
+	    NOTIFY_STOP)
+		return 1;
+
+	print_modules();
+	show_regs(regs);
+	do_show_stack(current, &regs->r30, pt_elr(regs));
+
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE);
+
+	spin_unlock_irq(&die.lock);
+
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+
+	if (panic_on_oops)
+		panic("Fatal exception");
+
+	oops_exit();
+	do_exit(err);
+	return 0;
+}
+
+int die_if_kernel(char *str, struct pt_regs *regs, long err)
+{
+	if (!user_mode(regs))
+		return die(str, regs, err);
+	else
+		return 0;
+}
+
+/*
+ * It's not clear that misaligned fetches are ever recoverable.
+ */
+static void misaligned_instruction(struct pt_regs *regs)
+{
+	die_if_kernel("Misaligned Instruction", regs, 0);
+	force_sig(SIGBUS, current);
+}
+
+/*
+ * Misaligned loads and stores, on the other hand, can be
+ * emulated, and probably should be, some day.  But for now
+ * they will be considered fatal.
+ */
+static void misaligned_data_load(struct pt_regs *regs)
+{
+	die_if_kernel("Misaligned Data Load", regs, 0);
+	force_sig(SIGBUS, current);
+}
+
+static void misaligned_data_store(struct pt_regs *regs)
+{
+	die_if_kernel("Misaligned Data Store", regs, 0);
+	force_sig(SIGBUS, current);
+}
+
+static void illegal_instruction(struct pt_regs *regs)
+{
+	die_if_kernel("Illegal Instruction", regs, 0);
+	force_sig(SIGILL, current);
+}
+
+/*
+ * Precise bus errors may be recoverable with a a retry,
+ * but for now, treat them as irrecoverable.
+ */
+static void precise_bus_error(struct pt_regs *regs)
+{
+	die_if_kernel("Precise Bus Error", regs, 0);
+	force_sig(SIGBUS, current);
+}
+
+/*
+ * If anything is to be done here other than panic,
+ * it will probably be complex and migrate to another
+ * source module.  For now, just die.
+ */
+static void cache_error(struct pt_regs *regs)
+{
+	die("Cache Error", regs, 0);
+}
+
+/*
+ * General exception handler
+ */
+void do_genex(struct pt_regs *regs)
+{
+	/*
+	 * Decode Cause and Dispatch
+	 */
+	switch (pt_cause(regs)) {
+	case HVM_GE_C_XPROT:
+	case HVM_GE_C_XUSER:
+		execute_protection_fault(regs);
+		break;
+	case HVM_GE_C_RPROT:
+	case HVM_GE_C_RUSER:
+		read_protection_fault(regs);
+		break;
+	case HVM_GE_C_WPROT:
+	case HVM_GE_C_WUSER:
+		write_protection_fault(regs);
+		break;
+	case HVM_GE_C_XMAL:
+		misaligned_instruction(regs);
+		break;
+	case HVM_GE_C_RMAL:
+		misaligned_data_load(regs);
+		break;
+	case HVM_GE_C_WMAL:
+		misaligned_data_store(regs);
+		break;
+	case HVM_GE_C_INVI:
+	case HVM_GE_C_PRIVI:
+		illegal_instruction(regs);
+		break;
+	case HVM_GE_C_BUS:
+		precise_bus_error(regs);
+		break;
+	case HVM_GE_C_CACHE:
+		cache_error(regs);
+		break;
+	default:
+		/* Halt and catch fire */
+		panic("Unrecognized exception 0x%lx\n", pt_cause(regs));
+		break;
+	}
+}
+
+/* Indirect system call dispatch */
+long sys_syscall(void)
+{
+	printk(KERN_ERR "sys_syscall invoked!\n");
+	return -ENOSYS;
+}
+
+void do_trap0(struct pt_regs *regs)
+{
+	unsigned long syscallret = 0;
+	syscall_fn syscall;
+
+	switch (pt_cause(regs)) {
+	case TRAP_SYSCALL:
+		/* System call is trap0 #1 */
+
+		/* allow strace to catch syscall args  */
+		if (unlikely(test_thread_flag(TIF_SYSCALL_TRACE) &&
+			tracehook_report_syscall_entry(regs)))
+			return;  /*  return -ENOSYS somewhere?  */
+
+		/* Interrupts should be re-enabled for syscall processing */
+		__vmsetie(VM_INT_ENABLE);
+
+		/*
+		 * System call number is in r6, arguments in r0..r5.
+		 * Fortunately, no Linux syscall has more than 6 arguments,
+		 * and Hexagon ABI passes first 6 arguments in registers.
+		 * 64-bit arguments are passed in odd/even register pairs.
+		 * Fortunately, we have no system calls that take more
+		 * than three arguments with more than one 64-bit value.
+		 * Should that change, we'd need to redesign to copy
+		 * between user and kernel stacks.
+		 */
+		regs->syscall_nr = regs->r06;
+
+		/*
+		 * GPR R0 carries the first parameter, and is also used
+		 * to report the return value.  We need a backup of
+		 * the user's value in case we need to do a late restart
+		 * of the system call.
+		 */
+		regs->restart_r0 = regs->r00;
+
+		if ((unsigned long) regs->syscall_nr >= __NR_syscalls) {
+			regs->r00 = -1;
+		} else {
+			syscall = (syscall_fn)
+				  (sys_call_table[regs->syscall_nr]);
+			syscallret = syscall(regs->r00, regs->r01,
+				   regs->r02, regs->r03,
+				   regs->r04, regs->r05);
+		}
+
+		/*
+		 * If it was a sigreturn system call, don't overwrite
+		 * r0 value in stack frame with return value.
+		 *
+		 * __NR_sigreturn doesn't seem to exist in new unistd.h
+		 */
+
+		if (regs->syscall_nr != __NR_rt_sigreturn)
+			regs->r00 = syscallret;
+
+		/* allow strace to get the syscall return state  */
+		if (unlikely(test_thread_flag(TIF_SYSCALL_TRACE)))
+			tracehook_report_syscall_exit(regs, 0);
+
+		break;
+	case TRAP_DEBUG:
+		/* Trap0 0xdb is debug breakpoint */
+		if (user_mode(regs)) {
+			struct siginfo info;
+
+			info.si_signo = SIGTRAP;
+			info.si_errno = 0;
+			/*
+			 * Some architecures add some per-thread state
+			 * to distinguish between breakpoint traps and
+			 * trace traps.  We may want to do that, and
+			 * set the si_code value appropriately, or we
+			 * may want to use a different trap0 flavor.
+			 */
+			info.si_code = TRAP_BRKPT;
+			info.si_addr = (void __user *) pt_elr(regs);
+			send_sig_info(SIGTRAP, &info, current);
+		} else {
+#ifdef CONFIG_KGDB
+			kgdb_handle_exception(pt_cause(regs), SIGTRAP,
+					      TRAP_BRKPT, regs);
+#endif
+		}
+		break;
+	}
+	/* Ignore other trap0 codes for now, especially 0 (Angel calls) */
+}
+
+/*
+ * Machine check exception handler
+ */
+void do_machcheck(struct pt_regs *regs)
+{
+	/* Halt and catch fire */
+	__vmstop();
+}
diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c
new file mode 100644
index 000000000000..16277c33308a
--- /dev/null
+++ b/arch/hexagon/kernel/vdso.c
@@ -0,0 +1,100 @@
+/*
+ * vDSO implementation for Hexagon
+ *
+ * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <asm/vdso.h>
+
+static struct page *vdso_page;
+
+/* Create a vDSO page holding the signal trampoline.
+ * We want this for a non-executable stack.
+ */
+static int __init vdso_init(void)
+{
+	struct hexagon_vdso *vdso;
+
+	vdso_page = alloc_page(GFP_KERNEL);
+	if (!vdso_page)
+		panic("Cannot allocate vdso");
+
+	vdso = vmap(&vdso_page, 1, 0, PAGE_KERNEL);
+	if (!vdso)
+		panic("Cannot map vdso");
+	clear_page(vdso);
+
+	/* Install the signal trampoline; currently looks like this:
+	 *	r6 = #__NR_rt_sigreturn;
+	 *	trap0(#1);
+	 */
+	vdso->rt_signal_trampoline[0] = __rt_sigtramp_template[0];
+	vdso->rt_signal_trampoline[1] = __rt_sigtramp_template[1];
+
+	vunmap(vdso);
+
+	return 0;
+}
+arch_initcall(vdso_init);
+
+/*
+ * Called from binfmt_elf.  Create a VMA for the vDSO page.
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	int ret;
+	unsigned long vdso_base;
+	struct mm_struct *mm = current->mm;
+
+	down_write(&mm->mmap_sem);
+
+	/* Try to get it loaded right near ld.so/glibc. */
+	vdso_base = STACK_TOP;
+
+	vdso_base = get_unmapped_area(NULL, vdso_base, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(vdso_base)) {
+		ret = vdso_base;
+		goto up_fail;
+	}
+
+	/* MAYWRITE to allow gdb to COW and set breakpoints. */
+	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+				      VM_READ|VM_EXEC|
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+				      VM_ALWAYSDUMP,
+				      &vdso_page);
+
+	if (ret)
+		goto up_fail;
+
+	mm->context.vdso = (void *)vdso_base;
+
+up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+		return "[vdso]";
+	return NULL;
+}
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
new file mode 100644
index 000000000000..5b99066cbc8d
--- /dev/null
+++ b/arch/hexagon/kernel/vm_entry.S
@@ -0,0 +1,269 @@
+/*
+ * Event entry/exit for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <asm/asm-offsets.h>  /*  assembly-safer versions of C defines */
+#include <asm/mem-layout.h>   /*  sigh, except for page_offset  */
+#include <asm/hexagon_vm.h>
+#include <asm/thread_info.h>
+
+/*
+ * Entry into guest-mode Linux under Hexagon Virtual Machine.
+ * Stack pointer points to event record - build pt_regs on top of it,
+ * set up a plausible C stack frame, and dispatch to the C handler.
+ * On return, do vmrte virtual instruction with SP where we started.
+ *
+ * VM Spec 0.5 uses a trap to fetch HVM record now.
+ */
+
+/*
+ * Save full register state, while setting up thread_info struct
+ * pointer derived from kernel stack pointer in THREADINFO_REG
+ * register, putting prior thread_info.regs pointer in a callee-save
+ * register (R24, which had better not ever be assigned to THREADINFO_REG),
+ * and updating thread_info.regs to point to current stack frame,
+ * so as to support nested events in kernel mode.
+ *
+ * As this is common code, we set the pt_regs system call number
+ * to -1 for all events.  It will be replaced with the system call
+ * number in the case where we decode a system call (trap0(#1)).
+ */
+
+#define save_pt_regs()\
+	memd(R0 + #_PT_R3130) = R31:30; \
+	{ memw(R0 + #_PT_R2928) = R28; \
+	  R31 = memw(R0 + #_PT_ER_VMPSP); }\
+	{ memw(R0 + #(_PT_R2928 + 4)) = R31; \
+	  R31 = ugp; } \
+	{ memd(R0 + #_PT_R2726) = R27:26; \
+	  R30 = gp ; } \
+	memd(R0 + #_PT_R2524) = R25:24; \
+	memd(R0 + #_PT_R2322) = R23:22; \
+	memd(R0 + #_PT_R2120) = R21:20; \
+	memd(R0 + #_PT_R1918) = R19:18; \
+	memd(R0 + #_PT_R1716) = R17:16; \
+	memd(R0 + #_PT_R1514) = R15:14; \
+	memd(R0 + #_PT_R1312) = R13:12; \
+	{ memd(R0 + #_PT_R1110) = R11:10; \
+	  R15 = lc0; } \
+	{ memd(R0 + #_PT_R0908) = R9:8; \
+	  R14 = sa0; } \
+	{ memd(R0 + #_PT_R0706) = R7:6; \
+	  R13 = lc1; } \
+	{ memd(R0 + #_PT_R0504) = R5:4; \
+	  R12 = sa1; } \
+	{ memd(R0 + #_PT_UGPGP) = R31:30; \
+	  R11 = m1; \
+	  R2.H = #HI(_THREAD_SIZE); } \
+	{ memd(R0 + #_PT_LC0SA0) = R15:14; \
+	  R10 = m0; \
+	  R2.L = #LO(_THREAD_SIZE); } \
+	{ memd(R0 + #_PT_LC1SA1) = R13:12; \
+	  R15 = p3:0; \
+	  R2 = neg(R2); } \
+	{ memd(R0 + #_PT_M1M0) = R11:10; \
+	  R14  = usr; \
+	  R2 = and(R0,R2); } \
+	{ memd(R0 + #_PT_PREDSUSR) =  R15:14; \
+	  THREADINFO_REG = R2; } \
+	{ r24 = memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS); \
+	  memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R0; \
+	  R2 = #-1; } \
+	{ memw(R0 + #_PT_SYSCALL_NR) = R2; \
+	  R30 = #0; }
+
+/*
+ * Restore registers and thread_info.regs state. THREADINFO_REG
+ * is assumed to still be sane, and R24 to have been correctly
+ * preserved. Don't restore R29 (SP) until later.
+ */
+
+#define restore_pt_regs() \
+	{ memw(THREADINFO_REG + #_THREAD_INFO_PT_REGS) = R24; \
+	  R15:14 = memd(R0 + #_PT_PREDSUSR); } \
+	{ R11:10 = memd(R0 + #_PT_M1M0); \
+	  p3:0 = R15; } \
+	{ R13:12 = memd(R0 + #_PT_LC1SA1); \
+	  usr = R14; } \
+	{ R15:14 = memd(R0 + #_PT_LC0SA0); \
+	  m1 = R11; } \
+	{ R3:2 = memd(R0 + #_PT_R0302); \
+	  m0 = R10; } \
+	{ R5:4 = memd(R0 + #_PT_R0504); \
+	  lc1 = R13; } \
+	{ R7:6 = memd(R0 + #_PT_R0706); \
+	  sa1 = R12; } \
+	{ R9:8 = memd(R0 + #_PT_R0908); \
+	  lc0 = R15; } \
+	{ R11:10 = memd(R0 + #_PT_R1110); \
+	  sa0 = R14; } \
+	{ R13:12 = memd(R0 + #_PT_R1312); \
+	  R15:14 = memd(R0 + #_PT_R1514); } \
+	{ R17:16 = memd(R0 + #_PT_R1716); \
+	  R19:18 = memd(R0 + #_PT_R1918); } \
+	{ R21:20 = memd(R0 + #_PT_R2120); \
+	  R23:22 = memd(R0 + #_PT_R2322); } \
+	{ R25:24 = memd(R0 + #_PT_R2524); \
+	  R27:26 = memd(R0 + #_PT_R2726); } \
+	R31:30 = memd(R0 + #_PT_UGPGP); \
+	{ R28 = memw(R0 + #_PT_R2928); \
+	  ugp = R31; } \
+	{ R31:30 = memd(R0 + #_PT_R3130); \
+	  gp = R30; }
+
+	/*
+	 * Clears off enough space for the rest of pt_regs; evrec is a part
+	 * of pt_regs in HVM mode.  Save R0/R1, set handler's address in R1.
+	 * R0 is the address of pt_regs and is the parameter to save_pt_regs.
+	 */
+
+/*
+ * Since the HVM isn't automagically pushing the EVREC onto the stack anymore,
+ * we'll subract the entire size out and then fill it in ourselves.
+ * Need to save off R0, R1, R2, R3 immediately.
+ */
+
+#define	vm_event_entry(CHandler) \
+	{ \
+		R29 = add(R29, #-(_PT_REGS_SIZE)); \
+		memd(R29 + #(_PT_R0100 + -_PT_REGS_SIZE)) = R1:0; \
+	} \
+	{ \
+		memd(R29 +#_PT_R0302) = R3:2; \
+	} \
+	trap1(#HVM_TRAP1_VMGETREGS); \
+	{ \
+		memd(R29 + #_PT_ER_VMEL) = R1:0; \
+		R0 = R29; \
+		R1.L = #LO(CHandler); \
+	} \
+	{ \
+		memd(R29 + #_PT_ER_VMPSP) = R3:2; \
+		R1.H = #HI(CHandler); \
+		jump event_dispatch; \
+	}
+
+.text
+	/*
+	 * Do bulk save/restore in one place.
+	 * Adds a jump to dispatch latency, but
+	 * saves hundreds of bytes.
+	 */
+
+event_dispatch:
+	save_pt_regs()
+	callr	r1
+
+	/*
+	 * If we were in kernel mode, we don't need to check scheduler
+	 * or signals if CONFIG_PREEMPT is not set.  If set, then it has
+	 * to jump to a need_resched kind of block.
+	 * BTW, CONFIG_PREEMPT is not supported yet.
+	 */
+
+#ifdef CONFIG_PREEMPT
+	R0 = #VM_INT_DISABLE
+	trap1(#HVM_TRAP1_VMSETIE)
+#endif
+
+	/*  "Nested control path" -- if the previous mode was kernel  */
+	R0 = memw(R29 + #_PT_ER_VMEST);
+	P0 = tstbit(R0, #HVM_VMEST_UM_SFT);
+	if !P0 jump restore_all;
+	/*
+	 * Returning from system call, normally coming back from user mode
+	 */
+return_from_syscall:
+	/*  Disable interrupts while checking TIF  */
+	R0 = #VM_INT_DISABLE
+	trap1(#HVM_TRAP1_VMSETIE)
+
+	/*
+	 * Coming back from the C-world, our thread info pointer
+	 * should be in the designated register (usually R19)
+	 */
+	R1.L = #LO(_TIF_ALLWORK_MASK)
+	{
+		R1.H = #HI(_TIF_ALLWORK_MASK);
+		R0 = memw(THREADINFO_REG + #_THREAD_INFO_FLAGS);
+	}
+
+	/*
+	 * Compare against the "return to userspace" _TIF_WORK_MASK
+	 */
+	R1 = and(R1,R0);
+	{ P0 = cmp.eq(R1,#0); if (!P0.new) jump:t work_pending;}
+	jump restore_all;  /*  we're outta here!  */
+
+work_pending:
+	{
+		P0 = tstbit(R1, #TIF_NEED_RESCHED);
+		if (!P0.new) jump:nt work_notifysig;
+	}
+	call schedule
+	jump return_from_syscall;  /*  check for more work  */
+
+work_notifysig:
+	/*  this is the part that's kind of fuzzy.  */
+	R1 = and(R0, #(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME));
+	P0 = cmp.eq(R1, #0);
+	if P0 jump restore_all
+	R1 = R0; 	/* unsigned long thread_info_flags */
+	R0 = R29;	/* regs should still be at top of stack  */
+	call do_notify_resume
+
+restore_all:
+	/* Disable interrupts, if they weren't already, before reg restore.  */
+	R0 = #VM_INT_DISABLE
+	trap1(#HVM_TRAP1_VMSETIE)
+
+	/*  do the setregs here for VM 0.5  */
+	/*  R29 here should already be pointing at pt_regs  */
+	R1:0 = memd(R29 + #_PT_ER_VMEL);
+	R3:2 = memd(R29 + #_PT_ER_VMPSP);
+	trap1(#HVM_TRAP1_VMSETREGS);
+
+	R0 = R29
+	restore_pt_regs()
+	R1:0 = memd(R29 + #_PT_R0100);
+	R29 = add(R29, #_PT_REGS_SIZE);
+	trap1(#HVM_TRAP1_VMRTE)
+	/* Notreached */
+
+	.globl _K_enter_genex
+_K_enter_genex:
+	vm_event_entry(do_genex)
+
+	.globl _K_enter_interrupt
+_K_enter_interrupt:
+	vm_event_entry(arch_do_IRQ)
+
+	.globl _K_enter_trap0
+_K_enter_trap0:
+	vm_event_entry(do_trap0)
+
+	.globl _K_enter_machcheck
+_K_enter_machcheck:
+	vm_event_entry(do_machcheck)
+
+
+	.globl ret_from_fork
+ret_from_fork:
+	call schedule_tail
+	jump return_from_syscall
diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c
new file mode 100644
index 000000000000..986a081e32ec
--- /dev/null
+++ b/arch/hexagon/kernel/vm_events.c
@@ -0,0 +1,101 @@
+/*
+ * Mostly IRQ support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/kernel.h>
+#include <asm/registers.h>
+#include <linux/irq.h>
+#include <linux/hardirq.h>
+#include <asm/system.h>
+
+/*
+ * show_regs - print pt_regs structure
+ * @regs: pointer to pt_regs
+ *
+ * To-do:  add all the accessor definitions to registers.h
+ *
+ * Will make this routine a lot easier to write.
+ */
+void show_regs(struct pt_regs *regs)
+{
+	printk(KERN_EMERG "restart_r0: \t0x%08lx   syscall_nr: %ld\n",
+	       regs->restart_r0, regs->syscall_nr);
+	printk(KERN_EMERG "preds: \t\t0x%08lx\n", regs->preds);
+	printk(KERN_EMERG "lc0: \t0x%08lx   sa0: 0x%08lx   m0:  0x%08lx\n",
+	       regs->lc0, regs->sa0, regs->m0);
+	printk(KERN_EMERG "lc1: \t0x%08lx   sa1: 0x%08lx   m1:  0x%08lx\n",
+	       regs->lc1, regs->sa1, regs->m1);
+	printk(KERN_EMERG "gp: \t0x%08lx   ugp: 0x%08lx   usr: 0x%08lx\n",
+	       regs->gp, regs->ugp, regs->usr);
+	printk(KERN_EMERG "r0: \t0x%08lx %08lx %08lx %08lx\n", regs->r00,
+		regs->r01,
+		regs->r02,
+		regs->r03);
+	printk(KERN_EMERG "r4:  \t0x%08lx %08lx %08lx %08lx\n", regs->r04,
+		regs->r05,
+		regs->r06,
+		regs->r07);
+	printk(KERN_EMERG "r8:  \t0x%08lx %08lx %08lx %08lx\n", regs->r08,
+		regs->r09,
+		regs->r10,
+		regs->r11);
+	printk(KERN_EMERG "r12: \t0x%08lx %08lx %08lx %08lx\n", regs->r12,
+		regs->r13,
+		regs->r14,
+		regs->r15);
+	printk(KERN_EMERG "r16: \t0x%08lx %08lx %08lx %08lx\n", regs->r16,
+		regs->r17,
+		regs->r18,
+		regs->r19);
+	printk(KERN_EMERG "r20: \t0x%08lx %08lx %08lx %08lx\n", regs->r20,
+		regs->r21,
+		regs->r22,
+		regs->r23);
+	printk(KERN_EMERG "r24: \t0x%08lx %08lx %08lx %08lx\n", regs->r24,
+		regs->r25,
+		regs->r26,
+		regs->r27);
+	printk(KERN_EMERG "r28: \t0x%08lx %08lx %08lx %08lx\n", regs->r28,
+		regs->r29,
+		regs->r30,
+		regs->r31);
+
+	printk(KERN_EMERG "elr: \t0x%08lx   cause: 0x%08lx   user_mode: %d\n",
+		pt_elr(regs), pt_cause(regs), user_mode(regs));
+	printk(KERN_EMERG "psp: \t0x%08lx   badva: 0x%08lx   int_enabled: %d\n",
+		pt_psp(regs), pt_badva(regs), ints_enabled(regs));
+}
+
+void dummy_handler(struct pt_regs *regs)
+{
+	unsigned int elr = pt_elr(regs);
+	printk(KERN_ERR "Unimplemented handler; ELR=0x%08x\n", elr);
+}
+
+
+void arch_do_IRQ(struct pt_regs *regs)
+{
+	int irq = pt_cause(regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	irq_enter();
+	generic_handle_irq(irq);
+	irq_exit();
+	set_irq_regs(old_regs);
+}
diff --git a/arch/hexagon/kernel/vm_init_segtable.S b/arch/hexagon/kernel/vm_init_segtable.S
new file mode 100644
index 000000000000..aebb35b6465e
--- /dev/null
+++ b/arch/hexagon/kernel/vm_init_segtable.S
@@ -0,0 +1,442 @@
+/*
+ * Initial page table for Linux kernel under Hexagon VM,
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * These tables are pre-computed and linked into kernel.
+ */
+
+#include <asm/vm_mmu.h>
+/*  #include <asm/iomap.h>  */
+
+/*
+ * Start with mapping PA=0 to both VA=0x0 and VA=0xc000000 as 16MB large pages.
+ * No user mode access, RWX, write-back cache.  The entry needs
+ * to be replicated for all 4 virtual segments mapping to the page.
+ */
+
+/* "Big Kernel Page"  */
+#define BKP(pa) (((pa) & __HVM_PTE_PGMASK_4MB)		\
+		| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X	\
+		| __HEXAGON_C_WB_L2 << 6			\
+		| __HVM_PDE_S_16MB)
+
+/*  No cache version  */
+
+#define BKPG_IO(pa) (((pa) & __HVM_PTE_PGMASK_16MB) \
+			| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \
+			| __HVM_PDE_S_16MB | __HEXAGON_C_DEV << 6 )
+
+#define FOURK_IO(pa) (((pa) & __HVM_PTE_PGMASK_4KB) \
+			| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X \
+			| __HEXAGON_C_DEV << 6 )
+
+#define L2_PTR(pa) (((pa) & __HVM_PTE_PGMASK_4KB) \
+			| __HVM_PDE_S_4KB  )
+
+#define X __HVM_PDE_S_INVALID
+
+	.p2align 12
+	.globl swapper_pg_dir
+	.globl _K_init_segtable
+swapper_pg_dir:
+/* VA 0x00000000 */
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+/* VA 0x40000000 */
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+/* VA 0x80000000 */
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+/*0xa8*/.word X,X,X,X
+#ifdef CONFIG_COMET_EARLY_UART_DEBUG
+UART_PTE_ENTRY:
+/*0xa9*/.word BKPG_IO(0xa9000000),BKPG_IO(0xa9000000),BKPG_IO(0xa9000000),BKPG_IO(0xa9000000)
+#else
+/*0xa9*/.word X,X,X,X
+#endif
+/*0xaa*/.word X,X,X,X
+/*0xab*/.word X,X,X,X
+/*0xac*/.word X,X,X,X
+/*0xad*/.word X,X,X,X
+/*0xae*/.word X,X,X,X
+/*0xaf*/.word X,X,X,X
+/*0xb0*/.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+	.word X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
+_K_init_segtable:
+/* VA 0xC0000000 */
+	.word BKP(0x00000000), BKP(0x00400000), BKP(0x00800000), BKP(0x00c00000)
+	.word BKP(0x01000000), BKP(0x01400000), BKP(0x01800000), BKP(0x01c00000)
+	.word BKP(0x02000000), BKP(0x02400000), BKP(0x02800000), BKP(0x02c00000)
+	.word BKP(0x03000000), BKP(0x03400000), BKP(0x03800000), BKP(0x03c00000)
+	.word BKP(0x04000000), BKP(0x04400000), BKP(0x04800000), BKP(0x04c00000)
+	.word BKP(0x05000000), BKP(0x05400000), BKP(0x05800000), BKP(0x05c00000)
+	.word BKP(0x06000000), BKP(0x06400000), BKP(0x06800000), BKP(0x06c00000)
+	.word BKP(0x07000000), BKP(0x07400000), BKP(0x07800000), BKP(0x07c00000)
+
+	.word BKP(0x08000000), BKP(0x08400000), BKP(0x08800000), BKP(0x08c00000)
+	.word BKP(0x09000000), BKP(0x09400000), BKP(0x09800000), BKP(0x09c00000)
+	.word BKP(0x0a000000), BKP(0x0a400000), BKP(0x0a800000), BKP(0x0ac00000)
+	.word BKP(0x0b000000), BKP(0x0b400000), BKP(0x0b800000), BKP(0x0bc00000)
+	.word BKP(0x0c000000), BKP(0x0c400000), BKP(0x0c800000), BKP(0x0cc00000)
+	.word BKP(0x0d000000), BKP(0x0d400000), BKP(0x0d800000), BKP(0x0dc00000)
+	.word BKP(0x0e000000), BKP(0x0e400000), BKP(0x0e800000), BKP(0x0ec00000)
+	.word BKP(0x0f000000), BKP(0x0f400000), BKP(0x0f800000), BKP(0x0fc00000)
+
+	.word BKP(0x10000000), BKP(0x10400000), BKP(0x10800000), BKP(0x10c00000)
+	.word BKP(0x11000000), BKP(0x11400000), BKP(0x11800000), BKP(0x11c00000)
+	.word BKP(0x12000000), BKP(0x12400000), BKP(0x12800000), BKP(0x12c00000)
+	.word BKP(0x13000000), BKP(0x13400000), BKP(0x13800000), BKP(0x13c00000)
+	.word BKP(0x14000000), BKP(0x14400000), BKP(0x14800000), BKP(0x14c00000)
+	.word BKP(0x15000000), BKP(0x15400000), BKP(0x15800000), BKP(0x15c00000)
+	.word BKP(0x16000000), BKP(0x16400000), BKP(0x16800000), BKP(0x16c00000)
+	.word BKP(0x17000000), BKP(0x17400000), BKP(0x17800000), BKP(0x17c00000)
+
+	.word BKP(0x18000000), BKP(0x18400000), BKP(0x18800000), BKP(0x18c00000)
+	.word BKP(0x19000000), BKP(0x19400000), BKP(0x19800000), BKP(0x19c00000)
+	.word BKP(0x1a000000), BKP(0x1a400000), BKP(0x1a800000), BKP(0x1ac00000)
+	.word BKP(0x1b000000), BKP(0x1b400000), BKP(0x1b800000), BKP(0x1bc00000)
+	.word BKP(0x1c000000), BKP(0x1c400000), BKP(0x1c800000), BKP(0x1cc00000)
+	.word BKP(0x1d000000), BKP(0x1d400000), BKP(0x1d800000), BKP(0x1dc00000)
+	.word BKP(0x1e000000), BKP(0x1e400000), BKP(0x1e800000), BKP(0x1ec00000)
+	.word BKP(0x1f000000), BKP(0x1f400000), BKP(0x1f800000), BKP(0x1fc00000)
+
+	.word BKP(0x20000000), BKP(0x20400000), BKP(0x20800000), BKP(0x20c00000)
+	.word BKP(0x21000000), BKP(0x21400000), BKP(0x21800000), BKP(0x21c00000)
+	.word BKP(0x22000000), BKP(0x22400000), BKP(0x22800000), BKP(0x22c00000)
+	.word BKP(0x23000000), BKP(0x23400000), BKP(0x23800000), BKP(0x23c00000)
+	.word BKP(0x24000000), BKP(0x24400000), BKP(0x24800000), BKP(0x24c00000)
+	.word BKP(0x25000000), BKP(0x25400000), BKP(0x25800000), BKP(0x25c00000)
+	.word BKP(0x26000000), BKP(0x26400000), BKP(0x26800000), BKP(0x26c00000)
+	.word BKP(0x27000000), BKP(0x27400000), BKP(0x27800000), BKP(0x27c00000)
+
+	.word BKP(0x28000000), BKP(0x28400000), BKP(0x28800000), BKP(0x28c00000)
+	.word BKP(0x29000000), BKP(0x29400000), BKP(0x29800000), BKP(0x29c00000)
+	.word BKP(0x2a000000), BKP(0x2a400000), BKP(0x2a800000), BKP(0x2ac00000)
+	.word BKP(0x2b000000), BKP(0x2b400000), BKP(0x2b800000), BKP(0x2bc00000)
+	.word BKP(0x2c000000), BKP(0x2c400000), BKP(0x2c800000), BKP(0x2cc00000)
+	.word BKP(0x2d000000), BKP(0x2d400000), BKP(0x2d800000), BKP(0x2dc00000)
+	.word BKP(0x2e000000), BKP(0x2e400000), BKP(0x2e800000), BKP(0x2ec00000)
+	.word BKP(0x2f000000), BKP(0x2f400000), BKP(0x2f800000), BKP(0x2fc00000)
+
+	.word BKP(0x30000000), BKP(0x30400000), BKP(0x30800000), BKP(0x30c00000)
+	.word BKP(0x31000000), BKP(0x31400000), BKP(0x31800000), BKP(0x31c00000)
+	.word BKP(0x32000000), BKP(0x32400000), BKP(0x32800000), BKP(0x32c00000)
+	.word BKP(0x33000000), BKP(0x33400000), BKP(0x33800000), BKP(0x33c00000)
+	.word BKP(0x34000000), BKP(0x34400000), BKP(0x34800000), BKP(0x34c00000)
+	.word BKP(0x35000000), BKP(0x35400000), BKP(0x35800000), BKP(0x35c00000)
+	.word BKP(0x36000000), BKP(0x36400000), BKP(0x36800000), BKP(0x36c00000)
+	.word BKP(0x37000000), BKP(0x37400000), BKP(0x37800000), BKP(0x37c00000)
+
+	.word BKP(0x38000000), BKP(0x38400000), BKP(0x38800000), BKP(0x38c00000)
+	.word BKP(0x39000000), BKP(0x39400000), BKP(0x39800000), BKP(0x39c00000)
+	.word BKP(0x3a000000), BKP(0x3a400000), BKP(0x3a800000), BKP(0x3ac00000)
+	.word BKP(0x3b000000), BKP(0x3b400000), BKP(0x3b800000), BKP(0x3bc00000)
+	.word BKP(0x3c000000), BKP(0x3c400000), BKP(0x3c800000), BKP(0x3cc00000)
+	.word BKP(0x3d000000), BKP(0x3d400000), BKP(0x3d800000), BKP(0x3dc00000)
+_K_io_map:
+	.word X,X,X,X /* 0x3e000000 - device IO early remap */
+	.word X,X,X,X /* 0x3f000000 - hypervisor space*/
+
+#if 0
+/*
+ * This is in here as an example for devices which need to be mapped really
+ * early.
+ */
+	.p2align 12
+	.globl _K_io_kmap
+	.globl _K_init_devicetable
+_K_init_devicetable:  /*  Should be 4MB worth of entries  */
+	.word FOURK_IO(MSM_GPIO1_PHYS),FOURK_IO(MSM_GPIO2_PHYS),FOURK_IO(MSM_SIRC_PHYS),X
+	.word FOURK_IO(TLMM_GPIO1_PHYS),X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+	.word X,X,X,X
+#endif
diff --git a/arch/hexagon/kernel/vm_ops.S b/arch/hexagon/kernel/vm_ops.S
new file mode 100644
index 000000000000..24d7fcac4ff2
--- /dev/null
+++ b/arch/hexagon/kernel/vm_ops.S
@@ -0,0 +1,102 @@
+/*
+ * Hexagon VM instruction support
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/linkage.h>
+#include <asm/hexagon_vm.h>
+
+/*
+ * C wrappers for virtual machine "instructions".  These
+ * could be, and perhaps some day will be, handled as in-line
+ * macros, but for tracing/debugging it's handy to have
+ * a single point of invocation for each of them.
+ * Conveniently, they take paramters and return values
+ * consistent with the ABI calling convention.
+ */
+
+ENTRY(__vmrte)
+	trap1(#HVM_TRAP1_VMRTE);
+	jumpr	R31;
+
+ENTRY(__vmsetvec)
+	trap1(#HVM_TRAP1_VMSETVEC);
+	jumpr	R31;
+
+ENTRY(__vmsetie)
+	trap1(#HVM_TRAP1_VMSETIE);
+	jumpr	R31;
+
+ENTRY(__vmgetie)
+	trap1(#HVM_TRAP1_VMGETIE);
+	jumpr	R31;
+
+ENTRY(__vmintop)
+	trap1(#HVM_TRAP1_VMINTOP);
+	jumpr	R31;
+
+ENTRY(__vmclrmap)
+	trap1(#HVM_TRAP1_VMCLRMAP);
+	jumpr	R31;
+
+ENTRY(__vmnewmap)
+	r1 = #VM_NEWMAP_TYPE_PGTABLES;
+	trap1(#HVM_TRAP1_VMNEWMAP);
+	jumpr	R31;
+
+ENTRY(__vmcache)
+	trap1(#HVM_TRAP1_VMCACHE);
+	jumpr	R31;
+
+ENTRY(__vmgettime)
+	trap1(#HVM_TRAP1_VMGETTIME);
+	jumpr	R31;
+
+ENTRY(__vmsettime)
+	trap1(#HVM_TRAP1_VMSETTIME);
+	jumpr	R31;
+
+ENTRY(__vmwait)
+	trap1(#HVM_TRAP1_VMWAIT);
+	jumpr	R31;
+
+ENTRY(__vmyield)
+	trap1(#HVM_TRAP1_VMYIELD);
+	jumpr	R31;
+
+ENTRY(__vmstart)
+	trap1(#HVM_TRAP1_VMSTART);
+	jumpr	R31;
+
+ENTRY(__vmstop)
+	trap1(#HVM_TRAP1_VMSTOP);
+	jumpr	R31;
+
+ENTRY(__vmvpid)
+	trap1(#HVM_TRAP1_VMVPID);
+	jumpr	R31;
+
+/*  Probably not actually going to use these; see vm_entry.S  */
+
+ENTRY(__vmsetregs)
+	trap1(#HVM_TRAP1_VMSETREGS);
+	jumpr	R31;
+
+ENTRY(__vmgetregs)
+	trap1(#HVM_TRAP1_VMGETREGS);
+	jumpr	R31;
diff --git a/arch/hexagon/kernel/vm_switch.S b/arch/hexagon/kernel/vm_switch.S
new file mode 100644
index 000000000000..0decf2f58e32
--- /dev/null
+++ b/arch/hexagon/kernel/vm_switch.S
@@ -0,0 +1,95 @@
+/*
+ * Context switch support for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <asm/asm-offsets.h>
+
+.text
+
+/*
+ * The register used as a fast-path thread information pointer
+ * is determined as a kernel configuration option.  If it happens
+ * to be a callee-save register, we're going to be saving and
+ * restoring it twice here.
+ *
+ * This code anticipates a revised ABI where R20-23 are added
+ * to the set of callee-save registers, but this should be
+ * backward compatible to legacy tools.
+ */
+
+
+/*
+ *	void switch_to(struct task_struct *prev,
+ *		struct task_struct *next, struct task_struct *last);
+ */
+	.p2align 2
+	.globl __switch_to
+	.type	__switch_to, @function
+
+/*
+ * When we exit the wormhole, we need to store the previous task
+ * in the new R0's pointer.  Technically it should be R2, but they should
+ * be the same; seems like a legacy thing.  In short, don't butcher
+ * R0, let it go back out unmolested.
+ */
+
+__switch_to:
+	/*
+	 * Push callee-saves onto "prev" stack.
+	 * Here, we're sneaky because the LR and FP
+	 * storage of the thread_stack structure
+	 * is automagically allocated by allocframe,
+	 * so we pass struct size less 8.
+	 */
+	allocframe(#(_SWITCH_STACK_SIZE - 8));
+	memd(R29+#(_SWITCH_R2726))=R27:26;
+	memd(R29+#(_SWITCH_R2524))=R25:24;
+	memd(R29+#(_SWITCH_R2322))=R23:22;
+	memd(R29+#(_SWITCH_R2120))=R21:20;
+	memd(R29+#(_SWITCH_R1918))=R19:18;
+	memd(R29+#(_SWITCH_R1716))=R17:16;
+	/* Stash thread_info pointer in task_struct */
+	memw(R0+#_TASK_THREAD_INFO) = THREADINFO_REG;
+	memw(R0 +#(_TASK_STRUCT_THREAD + _THREAD_STRUCT_SWITCH_SP)) = R29;
+	/* Switch to "next" stack and restore callee saves from there */
+	R29 = memw(R1 + #(_TASK_STRUCT_THREAD + _THREAD_STRUCT_SWITCH_SP));
+	{
+	    R27:26 = memd(R29+#(_SWITCH_R2726));
+	    R25:24 = memd(R29+#(_SWITCH_R2524));
+	}
+	{
+	    R23:22 = memd(R29+#(_SWITCH_R2322));
+	    R21:20 = memd(R29+#(_SWITCH_R2120));
+	}
+	{
+	    R19:18 = memd(R29+#(_SWITCH_R1918));
+	    R17:16 = memd(R29+#(_SWITCH_R1716));
+	}
+	{
+	    /* THREADINFO_REG is currently one of the callee-saved regs
+	     * above, and so be sure to re-load it last.
+	     */
+	    THREADINFO_REG = memw(R1 + #_TASK_THREAD_INFO);
+	    R31:30 = memd(R29+#_SWITCH_FP);
+	}
+	{
+	    R29 = add(R29,#_SWITCH_STACK_SIZE);
+	    jumpr R31;
+	}
+	.size	__switch_to, .-__switch_to
diff --git a/arch/hexagon/kernel/vm_vectors.S b/arch/hexagon/kernel/vm_vectors.S
new file mode 100644
index 000000000000..97a4b50b00df
--- /dev/null
+++ b/arch/hexagon/kernel/vm_vectors.S
@@ -0,0 +1,48 @@
+/*
+ * Event jump tables
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <asm/hexagon_vm.h>
+
+.text
+
+/*  This is registered early on to allow angel  */
+.global _K_provisional_vec
+_K_provisional_vec:
+	jump 1f;
+	jump 1f;
+	jump 1f;
+	jump 1f;
+	jump 1f;
+	trap1(#HVM_TRAP1_VMRTE)
+	jump 1f;
+	jump 1f;
+
+
+.global _K_VM_event_vector
+_K_VM_event_vector:
+1:
+	jump 1b;  /*  Reset  */
+	jump _K_enter_machcheck;
+	jump _K_enter_genex;
+	jump 1b;  /*  3 Rsvd  */
+	jump 1b;  /*  4 Rsvd  */
+	jump _K_enter_trap0;
+	jump 1b;  /*  6 Rsvd  */
+	jump _K_enter_interrupt;
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..071d3c30edfb
--- /dev/null
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -0,0 +1,88 @@
+/*
+ * Linker script for Hexagon kernel
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#define LOAD_OFFSET PAGE_OFFSET
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/asm-offsets.h>	/*  Most of the kernel defines are here  */
+#include <asm/mem-layout.h>	/*  except for page_offset  */
+#include <asm/cache.h>		/*  and now we're pulling cache line size  */
+OUTPUT_ARCH(hexagon)
+ENTRY(stext)
+
+jiffies = jiffies_64;
+
+/*
+See asm-generic/vmlinux.lds.h for expansion of some of these macros.
+See asm-generic/sections.h for seemingly required labels.
+*/
+
+#define PAGE_SIZE _PAGE_SIZE
+
+/*  This LOAD_OFFSET is temporary for debugging on the simulator; it may change
+    for hypervisor pseudo-physical memory.  */
+
+
+SECTIONS
+{
+	. = PAGE_OFFSET + LOAD_ADDRESS;
+
+	__init_begin = .;
+	HEAD_TEXT_SECTION
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	PERCPU_SECTION(L1_CACHE_BYTES)
+	__init_end = .;
+
+        . = ALIGN(_PAGE_SIZE);
+	_stext = .;
+	.text : AT(ADDR(.text) - LOAD_OFFSET) {
+		_text = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		*(.fixup)
+	}
+	_etext = .;
+
+	INIT_DATA_SECTION(PAGE_SIZE)
+
+	_sdata = .;
+		RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE)
+		RO_DATA_SECTION(PAGE_SIZE)
+	_edata = .;
+
+	EXCEPTION_TABLE(16)
+	NOTES
+
+	BSS_SECTION(_PAGE_SIZE, _PAGE_SIZE, _PAGE_SIZE)
+
+	_end = .;
+
+	/DISCARD/ : {
+		EXIT_TEXT
+		EXIT_DATA
+		EXIT_CALL
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
+
+}
diff --git a/arch/hexagon/lib/Makefile b/arch/hexagon/lib/Makefile
new file mode 100644
index 000000000000..874655e85671
--- /dev/null
+++ b/arch/hexagon/lib/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for hexagon-specific library files.
+#
+obj-y = checksum.o io.o memcpy.o memset.o
diff --git a/arch/hexagon/lib/checksum.c b/arch/hexagon/lib/checksum.c
new file mode 100644
index 000000000000..93005522d52b
--- /dev/null
+++ b/arch/hexagon/lib/checksum.c
@@ -0,0 +1,203 @@
+/*
+ * Checksum functions for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*  This was derived from arch/alpha/lib/checksum.c  */
+
+
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/byteorder.h>
+#include <net/checksum.h>
+#include <linux/uaccess.h>
+#include <asm/intrinsics.h>
+
+
+/*  Vector value operations  */
+#define SIGN(x, y)	((0x8000ULL*x)<<y)
+#define CARRY(x, y)	((0x0002ULL*x)<<y)
+#define SELECT(x, y)	((0x0001ULL*x)<<y)
+
+#define VR_NEGATE(a, b, c, d)	(SIGN(a, 48) + SIGN(b, 32) + SIGN(c, 16) \
+	+ SIGN(d, 0))
+#define VR_CARRY(a, b, c, d)	(CARRY(a, 48) + CARRY(b, 32) + CARRY(c, 16) \
+	+ CARRY(d, 0))
+#define VR_SELECT(a, b, c, d)	(SELECT(a, 48) + SELECT(b, 32) + SELECT(c, 16) \
+	+ SELECT(d, 0))
+
+
+/* optimized HEXAGON V3 intrinsic version */
+static inline unsigned short from64to16(u64 x)
+{
+	u64 sum;
+
+	sum = HEXAGON_P_vrmpyh_PP(x^VR_NEGATE(1, 1, 1, 1),
+			     VR_SELECT(1, 1, 1, 1));
+	sum += VR_CARRY(0, 0, 1, 0);
+	sum = HEXAGON_P_vrmpyh_PP(sum, VR_SELECT(0, 0, 1, 1));
+
+	return 0xFFFF & sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented.
+ */
+__sum16 csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
+			  unsigned short len, unsigned short proto,
+			  __wsum sum)
+{
+	return (__force __sum16)~from64to16(
+		(__force u64)saddr + (__force u64)daddr +
+		(__force u64)sum + ((len + proto) << 8));
+}
+
+__wsum csum_tcpudp_nofold(unsigned long saddr, unsigned long daddr,
+			  unsigned short len, unsigned short proto,
+			  __wsum sum)
+{
+	u64 result;
+
+	result = (__force u64)saddr + (__force u64)daddr +
+		 (__force u64)sum + ((len + proto) << 8);
+
+	/* Fold down to 32-bits so we don't lose in the typedef-less
+	   network stack.  */
+	/* 64 to 33 */
+	result = (result & 0xffffffffUL) + (result >> 32);
+	/* 33 to 32 */
+	result = (result & 0xffffffffUL) + (result >> 32);
+	return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+
+/*
+ * Do a 64-bit checksum on an arbitrary memory area..
+ *
+ * This isn't a great routine, but it's not _horrible_ either. The
+ * inner loop could be unrolled a bit further, and there are better
+ * ways to do the carry, but this is reasonable.
+ */
+
+/* optimized HEXAGON intrinsic version, with over read fixed */
+unsigned int do_csum(const void *voidptr, int len)
+{
+	u64 sum0, sum1, x0, x1, *ptr8_o, *ptr8_e, *ptr8;
+	int i, start, mid, end, mask;
+	const char *ptr = voidptr;
+	unsigned short *ptr2;
+	unsigned int *ptr4;
+
+	if (len <= 0)
+		return 0;
+
+	start = 0xF & (16-(((int) ptr) & 0xF)) ;
+	mask  = 0x7fffffffUL >> HEXAGON_R_cl0_R(len);
+	start = start & mask ;
+
+	mid = len - start;
+	end = mid & 0xF;
+	mid = mid>>4;
+	sum0 = mid << 18;
+	sum1 = 0;
+
+	if (start & 1)
+		sum0 += (u64) (ptr[0] << 8);
+	ptr2 = (unsigned short *) &ptr[start & 1];
+	if (start & 2)
+		sum1 += (u64) ptr2[0];
+	ptr4 = (unsigned int *) &ptr[start & 3];
+	if (start & 4) {
+		sum0 = HEXAGON_P_vrmpyhacc_PP(sum0,
+			VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]),
+			VR_SELECT(0, 0, 1, 1));
+		sum0 += VR_SELECT(0, 0, 1, 0);
+	}
+	ptr8 = (u64 *) &ptr[start & 7];
+	if (start & 8) {
+		sum1 = HEXAGON_P_vrmpyhacc_PP(sum1,
+			VR_NEGATE(1, 1, 1, 1)^(ptr8[0]),
+			VR_SELECT(1, 1, 1, 1));
+		sum1 += VR_CARRY(0, 0, 1, 0);
+	}
+	ptr8_o = (u64 *) (ptr + start);
+	ptr8_e = (u64 *) (ptr + start + 8);
+
+	if (mid) {
+		x0 = *ptr8_e; ptr8_e += 2;
+		x1 = *ptr8_o; ptr8_o += 2;
+		if (mid > 1)
+			for (i = 0; i < mid-1; i++) {
+				sum0 = HEXAGON_P_vrmpyhacc_PP(sum0,
+					x0^VR_NEGATE(1, 1, 1, 1),
+					VR_SELECT(1, 1, 1, 1));
+				sum1 = HEXAGON_P_vrmpyhacc_PP(sum1,
+					x1^VR_NEGATE(1, 1, 1, 1),
+					VR_SELECT(1, 1, 1, 1));
+				x0 = *ptr8_e; ptr8_e += 2;
+				x1 = *ptr8_o; ptr8_o += 2;
+			}
+		sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, x0^VR_NEGATE(1, 1, 1, 1),
+			VR_SELECT(1, 1, 1, 1));
+		sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, x1^VR_NEGATE(1, 1, 1, 1),
+			VR_SELECT(1, 1, 1, 1));
+	}
+
+	ptr4 = (unsigned int *) &ptr[start + (mid * 16) + (end & 8)];
+	if (end & 4) {
+		sum1 = HEXAGON_P_vrmpyhacc_PP(sum1,
+			VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]),
+			VR_SELECT(0, 0, 1, 1));
+		sum1 += VR_SELECT(0, 0, 1, 0);
+	}
+	ptr2 = (unsigned short *) &ptr[start + (mid * 16) + (end & 12)];
+	if (end & 2)
+		sum0 += (u64) ptr2[0];
+
+	if (end & 1)
+		sum1 += (u64) ptr[start + (mid * 16) + (end & 14)];
+
+	ptr8 = (u64 *) &ptr[start + (mid * 16)];
+	if (end & 8) {
+		sum0 = HEXAGON_P_vrmpyhacc_PP(sum0,
+			VR_NEGATE(1, 1, 1, 1)^(ptr8[0]),
+			VR_SELECT(1, 1, 1, 1));
+		sum0 += VR_CARRY(0, 0, 1, 0);
+	}
+	sum0 = HEXAGON_P_vrmpyh_PP((sum0+sum1)^VR_NEGATE(0, 0, 0, 1),
+		VR_SELECT(0, 0, 1, 1));
+	sum0 += VR_NEGATE(0, 0, 0, 1);
+	sum0 = HEXAGON_P_vrmpyh_PP(sum0, VR_SELECT(0, 0, 1, 1));
+
+	if (start & 1)
+		sum0 = (sum0 << 8) | (0xFF & (sum0 >> 8));
+
+	return 0xFFFF & sum0;
+}
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c
new file mode 100644
index 000000000000..8ae47ba0e705
--- /dev/null
+++ b/arch/hexagon/lib/io.c
@@ -0,0 +1,91 @@
+/*
+ * I/O access functions for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <asm/io.h>
+
+/*  These are all FIFO routines!  */
+
+/*
+ * __raw_readsw - read words a short at a time
+ * @addr:  source address
+ * @data:  data address
+ * @len: number of shorts to read
+ */
+void __raw_readsw(const void __iomem *addr, void *data, int len)
+{
+	const volatile short int *src = (short int *) addr;
+	short int *dst = (short int *) data;
+
+	if ((u32)data & 0x1)
+		panic("unaligned pointer to readsw");
+
+	while (len-- > 0)
+		*dst++ = *src;
+
+}
+
+/*
+ * __raw_writesw - read words a short at a time
+ * @addr:  source address
+ * @data:  data address
+ * @len: number of shorts to read
+ */
+void __raw_writesw(void __iomem *addr, const void *data, int len)
+{
+	const short int *src = (short int *)data;
+	volatile short int *dst = (short int *)addr;
+
+	if ((u32)data & 0x1)
+		panic("unaligned pointer to writesw");
+
+	while (len-- > 0)
+		*dst = *src++;
+
+
+}
+
+/*  Pretty sure len is pre-adjusted for the length of the access already */
+void __raw_readsl(const void __iomem *addr, void *data, int len)
+{
+	const volatile long *src = (long *) addr;
+	long *dst = (long *) data;
+
+	if ((u32)data & 0x3)
+		panic("unaligned pointer to readsl");
+
+	while (len-- > 0)
+		*dst++ = *src;
+
+
+}
+
+void __raw_writesl(void __iomem *addr, const void *data, int len)
+{
+	const long *src = (long *)data;
+	volatile long *dst = (long *)addr;
+
+	if ((u32)data & 0x3)
+		panic("unaligned pointer to writesl");
+
+	while (len-- > 0)
+		*dst = *src++;
+
+
+}
diff --git a/arch/hexagon/lib/memcpy.S b/arch/hexagon/lib/memcpy.S
new file mode 100644
index 000000000000..2101c3395665
--- /dev/null
+++ b/arch/hexagon/lib/memcpy.S
@@ -0,0 +1,543 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * Description
+ *
+ *   library function for memcpy where length bytes are copied from
+ *   ptr_in to ptr_out. ptr_out is returned unchanged.
+ *   Allows any combination of alignment on input and output pointers
+ *   and length from 0 to 2^32-1
+ *
+ * Restrictions
+ *   The arrays should not overlap, the program will produce undefined output
+ *   if they do.
+ *   For blocks less than 16 bytes a byte by byte copy is performed. For
+ *   8byte alignments, and length multiples, a dword copy is performed up to
+ *   96bytes
+ * History
+ *
+ *   DJH  5/15/09 Initial version 1.0
+ *   DJH  6/ 1/09 Version 1.1 modified ABI to inlcude R16-R19
+ *   DJH  7/12/09 Version 1.2 optimized codesize down to 760 was 840
+ *   DJH 10/14/09 Version 1.3 added special loop for aligned case, was
+ *                            overreading bloated codesize back up to 892
+ *   DJH  4/20/10 Version 1.4 fixed Ldword_loop_epilog loop to prevent loads
+ *                            occuring if only 1 left outstanding, fixes bug
+ *                            # 3888, corrected for all alignments. Peeled off
+ *                            1 32byte chunk from kernel loop and extended 8byte
+ *                            loop at end to solve all combinations and prevent
+ *                            over read.  Fixed Ldword_loop_prolog to prevent
+ *                            overread for blocks less than 48bytes. Reduced
+ *                            codesize to 752 bytes
+ *   DJH  4/21/10 version 1.5 1.4 fix broke code for input block ends not
+ *                            aligned to dword boundaries,underwriting by 1
+ *                            byte, added detection for this and fixed. A
+ *                            little bloat.
+ *   DJH  4/23/10 version 1.6 corrected stack error, R20 was not being restored
+ *                            always, fixed the error of R20 being modified
+ *                            before it was being saved
+ * Natural c model
+ * ===============
+ * void * memcpy(char * ptr_out, char * ptr_in, int length) {
+ *   int i;
+ *   if(length) for(i=0; i < length; i++) { ptr_out[i] = ptr_in[i]; }
+ *   return(ptr_out);
+ * }
+ *
+ * Optimized memcpy function
+ * =========================
+ * void * memcpy(char * ptr_out, char * ptr_in, int len) {
+ *   int i, prolog, kernel, epilog, mask;
+ *   u8 offset;
+ *   s64 data0, dataF8, data70;
+ *
+ *   s64 * ptr8_in;
+ *   s64 * ptr8_out;
+ *   s32 * ptr4;
+ *   s16 * ptr2;
+ *
+ *   offset = ((int) ptr_in) & 7;
+ *   ptr8_in = (s64 *) &ptr_in[-offset];   //read in the aligned pointers
+ *
+ *   data70 = *ptr8_in++;
+ *   dataF8 = *ptr8_in++;
+ *
+ *   data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
+ *
+ *   prolog = 32 - ((int) ptr_out);
+ *   mask  = 0x7fffffff >> HEXAGON_R_cl0_R(len);
+ *   prolog = prolog & mask;
+ *   kernel = len - prolog;
+ *   epilog = kernel & 0x1F;
+ *   kernel = kernel>>5;
+ *
+ *   if (prolog & 1) { ptr_out[0] = (u8) data0; data0 >>= 8; ptr_out += 1;}
+ *   ptr2 = (s16 *) &ptr_out[0];
+ *   if (prolog & 2) { ptr2[0] = (u16) data0;  data0 >>= 16; ptr_out += 2;}
+ *   ptr4 = (s32 *) &ptr_out[0];
+ *   if (prolog & 4) { ptr4[0] = (u32) data0;  data0 >>= 32; ptr_out += 4;}
+ *
+ *   offset = offset + (prolog & 7);
+ *   if (offset >= 8) {
+ *     data70 = dataF8;
+ *     dataF8 = *ptr8_in++;
+ *   }
+ *   offset = offset & 0x7;
+ *
+ *   prolog = prolog >> 3;
+ *   if (prolog) for (i=0; i < prolog; i++) {
+ *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
+ *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
+ *       data70 = dataF8;
+ *       dataF8 = *ptr8_in++;
+ *   }
+ *   if(kernel) { kernel -= 1; epilog += 32; }
+ *   if(kernel) for(i=0; i < kernel; i++) {
+ *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
+ *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
+ *       data70 = *ptr8_in++;
+ *
+ *       data0 = HEXAGON_P_valignb_PPp(data70, dataF8, offset);
+ *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
+ *       dataF8 = *ptr8_in++;
+ *
+ *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
+ *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
+ *       data70 = *ptr8_in++;
+ *
+ *       data0 = HEXAGON_P_valignb_PPp(data70, dataF8, offset);
+ *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
+ *       dataF8 = *ptr8_in++;
+ *   }
+ *   epilogdws = epilog >> 3;
+ *   if (epilogdws) for (i=0; i < epilogdws; i++) {
+ *       data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
+ *       ptr8_out = (s64 *) &ptr_out[0]; *ptr8_out = data0; ptr_out += 8;
+ *       data70 = dataF8;
+ *       dataF8 = *ptr8_in++;
+ *   }
+ *   data0 = HEXAGON_P_valignb_PPp(dataF8, data70, offset);
+ *
+ *   ptr4 = (s32 *) &ptr_out[0];
+ *   if (epilog & 4) { ptr4[0] = (u32) data0; data0 >>= 32; ptr_out += 4;}
+ *   ptr2 = (s16 *) &ptr_out[0];
+ *   if (epilog & 2) { ptr2[0] = (u16) data0; data0 >>= 16; ptr_out += 2;}
+ *   if (epilog & 1) { *ptr_out++ = (u8) data0; }
+ *
+ *   return(ptr_out - length);
+ * }
+ *
+ * Codesize : 784 bytes
+ */
+
+
+#define ptr_out		R0	/*  destination  pounter  */
+#define ptr_in		R1	/*  source pointer  */
+#define len		R2	/*  length of copy in bytes  */
+
+#define data70		R13:12	/*  lo 8 bytes of non-aligned transfer  */
+#define dataF8		R11:10	/*  hi 8 bytes of non-aligned transfer  */
+#define ldata0		R7:6	/*  even 8 bytes chunks  */
+#define ldata1		R25:24	/*  odd 8 bytes chunks  */
+#define data1		R7	/*  lower 8 bytes of ldata1  */
+#define data0		R6	/*  lower 8 bytes of ldata0  */
+
+#define ifbyte		p0	/*  if transfer has bytes in epilog/prolog  */
+#define ifhword		p0	/*  if transfer has shorts in epilog/prolog  */
+#define ifword		p0	/*  if transfer has words in epilog/prolog  */
+#define noprolog	p0	/*  no prolog, xfer starts at 32byte  */
+#define nokernel	p1	/*  no 32byte multiple block in the transfer  */
+#define noepilog	p0	/*  no epilog, xfer ends on 32byte boundary  */
+#define align		p2	/*  alignment of input rel to 8byte boundary  */
+#define kernel1		p0	/*  kernel count == 1  */
+
+#define dalign		R25	/*  rel alignment of input to output data  */
+#define star3		R16	/*  number bytes in prolog - dwords  */
+#define rest		R8	/*  length - prolog bytes  */
+#define back		R7	/*  nr bytes > dword boundary in src block  */
+#define epilog		R3	/*  bytes in epilog  */
+#define inc		R15:14	/*  inc kernel by -1 and defetch ptr by 32  */
+#define kernel		R4	/*  number of 32byte chunks in kernel  */
+#define ptr_in_p_128	R5	/*  pointer for prefetch of input data  */
+#define mask		R8	/*  mask used to determine prolog size  */
+#define shift		R8	/*  used to work a shifter to extract bytes  */
+#define shift2		R5	/*  in epilog to workshifter to extract bytes */
+#define prolog		R15	/*  bytes in  prolog  */
+#define epilogdws	R15	/*  number dwords in epilog  */
+#define shiftb		R14	/*  used to extract bytes  */
+#define offset		R9	/*  same as align in reg  */
+#define ptr_out_p_32	R17	/*  pointer to output dczero  */
+#define align888	R14	/*  if simple dword loop can be used  */
+#define len8		R9	/*  number of dwords in length  */
+#define over		R20	/*  nr of bytes > last inp buf dword boundary */
+
+#define ptr_in_p_128kernel	R5:4	/*  packed fetch pointer & kernel cnt */
+
+	.section .text
+	.p2align 4
+        .global memcpy
+        .type memcpy, @function
+memcpy:
+{
+	p2 = cmp.eq(len, #0);		/*  =0 */
+	align888 = or(ptr_in, ptr_out);	/*  %8 < 97 */
+	p0 = cmp.gtu(len, #23);		/*  %1, <24 */
+	p1 = cmp.eq(ptr_in, ptr_out);	/*  attempt to overwrite self */
+}
+{
+	p1 = or(p2, p1);
+	p3 = cmp.gtu(len, #95);		/*  %8 < 97 */
+	align888 = or(align888, len);	/*  %8 < 97 */
+	len8 = lsr(len, #3);		/*  %8 < 97 */
+}
+{
+	dcfetch(ptr_in);		/*  zero/ptrin=ptrout causes fetch */
+	p2 = bitsclr(align888, #7);	/*  %8 < 97  */
+	if(p1) jumpr r31;		/*  =0  */
+}
+{
+	p2 = and(p2,!p3);			/*  %8 < 97  */
+	if (p2.new) len = add(len, #-8);	/*  %8 < 97  */
+	if (p2.new) jump:NT .Ldwordaligned; 	/*  %8 < 97  */
+}
+{
+	if(!p0) jump .Lbytes23orless;	/*  %1, <24  */
+	mask.l = #LO(0x7fffffff);
+	/*  all bytes before line multiples of data  */
+	prolog = sub(#0, ptr_out);
+}
+{
+	/*  save r31 on stack, decrement sp by 16  */
+	allocframe(#24);
+	mask.h = #HI(0x7fffffff);
+	ptr_in_p_128 = add(ptr_in, #32);
+	back = cl0(len);
+}
+{
+	memd(sp+#0) = R17:16;		/*  save r16,r17 on stack6  */
+	r31.l = #LO(.Lmemcpy_return);	/*  set up final return pointer  */
+	prolog &= lsr(mask, back);
+	offset = and(ptr_in, #7);
+}
+{
+	memd(sp+#8) = R25:24;		/*  save r25,r24 on stack  */
+	dalign = sub(ptr_out, ptr_in);
+	r31.h = #HI(.Lmemcpy_return);	/*  set up final return pointer  */
+}
+{
+	/*  see if there if input buffer end if aligned  */
+	over = add(len, ptr_in);
+	back = add(len, offset);
+	memd(sp+#16) = R21:20;		/*  save r20,r21 on stack  */
+}
+{
+	noprolog = bitsclr(prolog, #7);
+	prolog = and(prolog, #31);
+	dcfetch(ptr_in_p_128);
+	ptr_in_p_128 = add(ptr_in_p_128, #32);
+}
+{
+	kernel = sub(len, prolog);
+	shift = asl(prolog, #3);
+	star3 = and(prolog, #7);
+	ptr_in = and(ptr_in, #-8);
+}
+{
+	prolog = lsr(prolog, #3);
+	epilog = and(kernel, #31);
+	ptr_out_p_32 = add(ptr_out, prolog);
+	over = and(over, #7);
+}
+{
+	p3 = cmp.gtu(back, #8);
+	kernel = lsr(kernel, #5);
+	dcfetch(ptr_in_p_128);
+	ptr_in_p_128 = add(ptr_in_p_128, #32);
+}
+{
+	p1 = cmp.eq(prolog, #0);
+	if(!p1.new) prolog = add(prolog, #1);
+	dcfetch(ptr_in_p_128);	/*  reserve the line 64bytes on  */
+	ptr_in_p_128 = add(ptr_in_p_128, #32);
+}
+{
+	nokernel = cmp.eq(kernel,#0);
+	dcfetch(ptr_in_p_128);	/* reserve the line 64bytes on  */
+	ptr_in_p_128 = add(ptr_in_p_128, #32);
+	shiftb = and(shift, #8);
+}
+{
+	dcfetch(ptr_in_p_128);		/*  reserve the line 64bytes on  */
+	ptr_in_p_128 = add(ptr_in_p_128, #32);
+	if(nokernel) jump .Lskip64;
+	p2 = cmp.eq(kernel, #1);	/*  skip ovr if kernel == 0  */
+}
+{
+	dczeroa(ptr_out_p_32);
+	/*  don't advance pointer  */
+	if(!p2) ptr_out_p_32 = add(ptr_out_p_32, #32);
+}
+{
+	dalign = and(dalign, #31);
+	dczeroa(ptr_out_p_32);
+}
+.Lskip64:
+{
+	data70 = memd(ptr_in++#16);
+	if(p3) dataF8 = memd(ptr_in+#8);
+	if(noprolog) jump .Lnoprolog32;
+	align = offset;
+}
+/*  upto initial 7 bytes  */
+{
+	ldata0 = valignb(dataF8, data70, align);
+	ifbyte = tstbit(shift,#3);
+	offset = add(offset, star3);
+}
+{
+	if(ifbyte) memb(ptr_out++#1) = data0;
+	ldata0 = lsr(ldata0, shiftb);
+	shiftb = and(shift, #16);
+	ifhword = tstbit(shift,#4);
+}
+{
+	if(ifhword) memh(ptr_out++#2) = data0;
+	ldata0 = lsr(ldata0, shiftb);
+	ifword = tstbit(shift,#5);
+	p2 = cmp.gtu(offset, #7);
+}
+{
+	if(ifword) memw(ptr_out++#4) = data0;
+	if(p2) data70 = dataF8;
+	if(p2) dataF8 = memd(ptr_in++#8);	/*  another 8 bytes  */
+	align = offset;
+}
+.Lnoprolog32:
+{
+	p3 = sp1loop0(.Ldword_loop_prolog, prolog)
+	rest = sub(len, star3);	/*  whats left after the loop  */
+	p0 = cmp.gt(over, #0);
+}
+	if(p0) rest = add(rest, #16);
+.Ldword_loop_prolog:
+{
+	if(p3) memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(dataF8, data70, align);
+	p0 = cmp.gt(rest, #16);
+}
+{
+	data70 = dataF8;
+	if(p0) dataF8 = memd(ptr_in++#8);
+	rest = add(rest, #-8);
+}:endloop0
+.Lkernel:
+{
+	/*  kernel is at least 32bytes  */
+	p3 = cmp.gtu(kernel, #0);
+	/*  last itn. remove edge effects  */
+	if(p3.new) kernel = add(kernel, #-1);
+	/*  dealt with in last dword loop  */
+	if(p3.new) epilog = add(epilog, #32);
+}
+{
+	nokernel = cmp.eq(kernel, #0);		/*  after adjustment, recheck */
+	if(nokernel.new) jump:NT .Lepilog;	/*  likely not taken  */
+	inc = combine(#32, #-1);
+	p3 = cmp.gtu(dalign, #24);
+}
+{
+	if(p3) jump .Lodd_alignment;
+}
+{
+	loop0(.Loword_loop_25to31, kernel);
+	kernel1 = cmp.gtu(kernel, #1);
+	rest = kernel;
+}
+	.falign
+.Loword_loop_25to31:
+{
+	dcfetch(ptr_in_p_128);	/*  prefetch 4 lines ahead  */
+	if(kernel1) ptr_out_p_32 = add(ptr_out_p_32, #32);
+}
+{
+	dczeroa(ptr_out_p_32);	/*  reserve the next 32bytes in cache  */
+	p3 = cmp.eq(kernel, rest);
+}
+{
+	/*  kernel -= 1  */
+	ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc);
+	/*  kill write on first iteration  */
+	if(!p3) memd(ptr_out++#8) = ldata1;
+	ldata1 = valignb(dataF8, data70, align);
+	data70 = memd(ptr_in++#8);
+}
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(data70, dataF8, align);
+	dataF8 = memd(ptr_in++#8);
+}
+{
+	memd(ptr_out++#8) = ldata1;
+	ldata1 = valignb(dataF8, data70, align);
+	data70 = memd(ptr_in++#8);
+}
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(data70, dataF8, align);
+	dataF8 = memd(ptr_in++#8);
+	kernel1 = cmp.gtu(kernel, #1);
+}:endloop0
+{
+	memd(ptr_out++#8) = ldata1;
+	jump .Lepilog;
+}
+.Lodd_alignment:
+{
+	loop0(.Loword_loop_00to24, kernel);
+	kernel1 = cmp.gtu(kernel, #1);
+	rest = add(kernel, #-1);
+}
+	.falign
+.Loword_loop_00to24:
+{
+	dcfetch(ptr_in_p_128);	/*  prefetch 4 lines ahead  */
+	ptr_in_p_128kernel = vaddw(ptr_in_p_128kernel, inc);
+	if(kernel1) ptr_out_p_32 = add(ptr_out_p_32, #32);
+}
+{
+	dczeroa(ptr_out_p_32);	/*  reserve the next 32bytes in cache  */
+}
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(dataF8, data70, align);
+	data70 = memd(ptr_in++#8);
+}
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(data70, dataF8, align);
+	dataF8 = memd(ptr_in++#8);
+}
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(dataF8, data70, align);
+	data70 = memd(ptr_in++#8);
+}
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(data70, dataF8, align);
+	dataF8 = memd(ptr_in++#8);
+	kernel1 = cmp.gtu(kernel, #1);
+}:endloop0
+.Lepilog:
+{
+	noepilog = cmp.eq(epilog,#0);
+	epilogdws = lsr(epilog, #3);
+	kernel = and(epilog, #7);
+}
+{
+	if(noepilog) jumpr r31;
+	if(noepilog) ptr_out = sub(ptr_out, len);
+	p3 = cmp.eq(epilogdws, #0);
+	shift2 = asl(epilog, #3);
+}
+{
+	shiftb = and(shift2, #32);
+	ifword = tstbit(epilog,#2);
+	if(p3) jump .Lepilog60;
+	if(!p3) epilog = add(epilog, #-16);
+}
+{
+	loop0(.Ldword_loop_epilog, epilogdws);
+	/*  stop criteria is lsbs unless = 0 then its 8  */
+	p3 = cmp.eq(kernel, #0);
+	if(p3.new) kernel= #8;
+	p1 = cmp.gt(over, #0);
+}
+	/*  if not aligned to end of buffer execute 1 more iteration  */
+	if(p1) kernel= #0;
+.Ldword_loop_epilog:
+{
+	memd(ptr_out++#8) = ldata0;
+	ldata0 = valignb(dataF8, data70, align);
+	p3 = cmp.gt(epilog, kernel);
+}
+{
+	data70 = dataF8;
+	if(p3) dataF8 = memd(ptr_in++#8);
+	epilog = add(epilog, #-8);
+}:endloop0
+/* copy last 7 bytes */
+.Lepilog60:
+{
+	if(ifword) memw(ptr_out++#4) = data0;
+	ldata0 = lsr(ldata0, shiftb);
+	ifhword = tstbit(epilog,#1);
+	shiftb = and(shift2, #16);
+}
+{
+	if(ifhword) memh(ptr_out++#2) = data0;
+	ldata0 = lsr(ldata0, shiftb);
+	ifbyte = tstbit(epilog,#0);
+	if(ifbyte.new) len = add(len, #-1);
+}
+{
+	if(ifbyte) memb(ptr_out) = data0;
+	ptr_out = sub(ptr_out, len);	/*  return dest pointer  */
+        jumpr r31;
+}
+/*  do byte copy for small n  */
+.Lbytes23orless:
+{
+	p3 = sp1loop0(.Lbyte_copy, len);
+	len = add(len, #-1);
+}
+.Lbyte_copy:
+{
+	data0 = memb(ptr_in++#1);
+	if(p3) memb(ptr_out++#1) = data0;
+}:endloop0
+{
+	memb(ptr_out) = data0;
+	ptr_out = sub(ptr_out, len);
+	jumpr r31;
+}
+/*  do dword copies for aligned in, out and length  */
+.Ldwordaligned:
+{
+	p3 = sp1loop0(.Ldword_copy, len8);
+}
+.Ldword_copy:
+{
+	if(p3) memd(ptr_out++#8) = ldata0;
+	ldata0 = memd(ptr_in++#8);
+}:endloop0
+{
+	memd(ptr_out) = ldata0;
+	ptr_out = sub(ptr_out, len);
+	jumpr r31;	/*  return to function caller  */
+}
+.Lmemcpy_return:
+	r21:20 = memd(sp+#16);	/*  restore r20+r21  */
+{
+	r25:24 = memd(sp+#8);	/*  restore r24+r25  */
+	r17:16 = memd(sp+#0);	/*  restore r16+r17  */
+}
+	deallocframe;	/*  restore r31 and incrment stack by 16  */
+	jumpr r31
diff --git a/arch/hexagon/lib/memset.S b/arch/hexagon/lib/memset.S
new file mode 100644
index 000000000000..26d961439ab0
--- /dev/null
+++ b/arch/hexagon/lib/memset.S
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2011 Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+
+/* HEXAGON assembly optimized memset */
+/* Replaces the standard library function memset */
+
+
+        .macro HEXAGON_OPT_FUNC_BEGIN name
+	.text
+	.p2align 4
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro HEXAGON_OPT_FUNC_FINISH name
+	.size  \name, . - \name
+	.endm
+
+/* FUNCTION: memset (v2 version) */
+#if __HEXAGON_ARCH__ < 3
+HEXAGON_OPT_FUNC_BEGIN memset
+	{
+		r6 = #8
+		r7 = extractu(r0, #3 , #0)
+		p0 = cmp.eq(r2, #0)
+		p1 = cmp.gtu(r2, #7)
+	}
+	{
+		r4 = vsplatb(r1)
+		r8 = r0           /* leave r0 intact for return val  */
+		r9 = sub(r6, r7)  /* bytes until double alignment  */
+		if p0 jumpr r31   /* count == 0, so return  */
+	}
+	{
+		r3 = #0
+		r7 = #0
+		p0 = tstbit(r9, #0)
+		if p1 jump 2f /* skip byte loop */
+	}
+
+/* less than 8 bytes to set, so just set a byte at a time and return  */
+
+		loop0(1f, r2) /* byte loop */
+	.falign
+1: /* byte loop */
+	{
+		memb(r8++#1) = r4
+	}:endloop0
+		jumpr r31
+	.falign
+2: /* skip byte loop */
+	{
+		r6 = #1
+		p0 = tstbit(r9, #1)
+		p1 = cmp.eq(r2, #1)
+		if !p0 jump 3f /* skip initial byte store */
+	}
+	{
+		memb(r8++#1) = r4
+		r3:2 = sub(r3:2, r7:6)
+		if p1 jumpr r31
+	}
+	.falign
+3: /* skip initial byte store */
+	{
+		r6 = #2
+		p0 = tstbit(r9, #2)
+		p1 = cmp.eq(r2, #2)
+		if !p0 jump 4f /* skip initial half store */
+	}
+	{
+		memh(r8++#2) = r4
+		r3:2 = sub(r3:2, r7:6)
+		if p1 jumpr r31
+	}
+	.falign
+4: /* skip initial half store */
+	{
+		r6 = #4
+		p0 = cmp.gtu(r2, #7)
+		p1 = cmp.eq(r2, #4)
+		if !p0 jump 5f /* skip initial word store */
+	}
+	{
+		memw(r8++#4) = r4
+		r3:2 = sub(r3:2, r7:6)
+		p0 = cmp.gtu(r2, #11)
+		if p1 jumpr r31
+	}
+	.falign
+5: /* skip initial word store */
+	{
+		r10 = lsr(r2, #3)
+		p1 = cmp.eq(r3, #1)
+		if !p0 jump 7f /* skip double loop */
+	}
+	{
+		r5 = r4
+		r6 = #8
+		loop0(6f, r10) /* double loop */
+	}
+
+/* set bytes a double word at a time  */
+
+	.falign
+6: /* double loop */
+	{
+		memd(r8++#8) = r5:4
+		r3:2 = sub(r3:2, r7:6)
+		p1 = cmp.eq(r2, #8)
+	}:endloop0
+	.falign
+7: /* skip double loop */
+	{
+		p0 = tstbit(r2, #2)
+		if p1 jumpr r31
+	}
+	{
+		r6 = #4
+		p0 = tstbit(r2, #1)
+		p1 = cmp.eq(r2, #4)
+		if !p0 jump 8f /* skip final word store */
+	}
+	{
+		memw(r8++#4) = r4
+		r3:2 = sub(r3:2, r7:6)
+		if p1 jumpr r31
+	}
+	.falign
+8: /* skip final word store */
+	{
+		p1 = cmp.eq(r2, #2)
+		if !p0 jump 9f /* skip final half store */
+	}
+	{
+		memh(r8++#2) = r4
+		if p1 jumpr r31
+	}
+	.falign
+9: /* skip final half store */
+	{
+		memb(r8++#1) = r4
+		jumpr r31
+	}
+HEXAGON_OPT_FUNC_FINISH memset
+#endif
+
+
+/*  FUNCTION: memset (v3 and higher version)  */
+#if __HEXAGON_ARCH__ >= 3
+HEXAGON_OPT_FUNC_BEGIN memset
+	{
+		r7=vsplatb(r1)
+		r6 = r0
+		if (r2==#0) jump:nt .L1
+	}
+	{
+		r5:4=combine(r7,r7)
+		p0 = cmp.gtu(r2,#8)
+		if (p0.new) jump:nt .L3
+	}
+	{
+		r3 = r0
+		loop0(.L47,r2)
+	}
+	.falign
+.L47:
+	{
+		memb(r3++#1) = r1
+	}:endloop0 /* start=.L47 */
+		jumpr r31
+.L3:
+	{
+		p0 = tstbit(r0,#0)
+		if (!p0.new) jump:nt .L8
+		p1 = cmp.eq(r2, #1)
+	}
+	{
+		r6 = add(r0, #1)
+		r2 = add(r2,#-1)
+		memb(r0) = r1
+		if (p1) jump .L1
+	}
+.L8:
+	{
+		p0 = tstbit(r6,#1)
+		if (!p0.new) jump:nt .L10
+	}
+	{
+		r2 = add(r2,#-2)
+		memh(r6++#2) = r7
+		p0 = cmp.eq(r2, #2)
+		if (p0.new) jump:nt .L1
+	}
+.L10:
+	{
+		p0 = tstbit(r6,#2)
+		if (!p0.new) jump:nt .L12
+	}
+	{
+		r2 = add(r2,#-4)
+		memw(r6++#4) = r7
+		p0 = cmp.eq(r2, #4)
+		if (p0.new) jump:nt .L1
+	}
+.L12:
+	{
+		p0 = cmp.gtu(r2,#127)
+		if (!p0.new) jump:nt .L14
+	}
+		r3 = and(r6,#31)
+		if (r3==#0) jump:nt .L17
+	{
+		memd(r6++#8) = r5:4
+		r2 = add(r2,#-8)
+	}
+		r3 = and(r6,#31)
+		if (r3==#0) jump:nt .L17
+	{
+		memd(r6++#8) = r5:4
+		r2 = add(r2,#-8)
+	}
+		r3 = and(r6,#31)
+		if (r3==#0) jump:nt .L17
+	{
+		memd(r6++#8) = r5:4
+		r2 = add(r2,#-8)
+	}
+.L17:
+	{
+		r3 = lsr(r2,#5)
+		if (r1!=#0) jump:nt .L18
+	}
+	{
+		r8 = r3
+		r3 = r6
+		loop0(.L46,r3)
+	}
+	.falign
+.L46:
+	{
+		dczeroa(r6)
+		r6 = add(r6,#32)
+		r2 = add(r2,#-32)
+	}:endloop0 /* start=.L46 */
+.L14:
+	{
+		p0 = cmp.gtu(r2,#7)
+		if (!p0.new) jump:nt .L28
+		r8 = lsr(r2,#3)
+	}
+		loop0(.L44,r8)
+	.falign
+.L44:
+	{
+		memd(r6++#8) = r5:4
+		r2 = add(r2,#-8)
+	}:endloop0 /* start=.L44 */
+.L28:
+	{
+		p0 = tstbit(r2,#2)
+		if (!p0.new) jump:nt .L33
+	}
+	{
+		r2 = add(r2,#-4)
+		memw(r6++#4) = r7
+	}
+.L33:
+	{
+		p0 = tstbit(r2,#1)
+		if (!p0.new) jump:nt .L35
+	}
+	{
+		r2 = add(r2,#-2)
+		memh(r6++#2) = r7
+	}
+.L35:
+		p0 = cmp.eq(r2,#1)
+		if (p0) memb(r6) = r1
+.L1:
+		jumpr r31
+.L18:
+		loop0(.L45,r3)
+	.falign
+.L45:
+		dczeroa(r6)
+	{
+		memd(r6++#8) = r5:4
+		r2 = add(r2,#-32)
+	}
+		memd(r6++#8) = r5:4
+		memd(r6++#8) = r5:4
+	{
+		memd(r6++#8) = r5:4
+	}:endloop0 /* start=.L45  */
+		jump .L14
+HEXAGON_OPT_FUNC_FINISH memset
+#endif
diff --git a/arch/hexagon/mm/Makefile b/arch/hexagon/mm/Makefile
new file mode 100644
index 000000000000..1a0be4d576e1
--- /dev/null
+++ b/arch/hexagon/mm/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for Hexagon memory management subsystem
+#
+
+obj-y := init.o pgalloc.o ioremap.o uaccess.o vm_fault.o cache.o
+obj-y += copy_to_user.o copy_from_user.o strnlen_user.o vm_tlb.o
diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c
new file mode 100644
index 000000000000..c5cf6ee27587
--- /dev/null
+++ b/arch/hexagon/mm/cache.c
@@ -0,0 +1,128 @@
+/*
+ * Cache management functions for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/hexagon_vm.h>
+
+#define spanlines(start, end) \
+	(((end - (start & ~(LINESIZE - 1))) >> LINEBITS) + 1)
+
+void flush_dcache_range(unsigned long start, unsigned long end)
+{
+	unsigned long lines = spanlines(start, end-1);
+	unsigned long i, flags;
+
+	start &= ~(LINESIZE - 1);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < lines; i++) {
+		__asm__ __volatile__ (
+		"	dccleaninva(%0);	"
+		:
+		: "r" (start)
+		);
+		start += LINESIZE;
+	}
+	local_irq_restore(flags);
+}
+
+void flush_icache_range(unsigned long start, unsigned long end)
+{
+	unsigned long lines = spanlines(start, end-1);
+	unsigned long i, flags;
+
+	start &= ~(LINESIZE - 1);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < lines; i++) {
+		__asm__ __volatile__ (
+			"	dccleana(%0); "
+			"	icinva(%0);	"
+			:
+			: "r" (start)
+		);
+		start += LINESIZE;
+	}
+	__asm__ __volatile__ (
+		"isync"
+	);
+	local_irq_restore(flags);
+}
+
+void hexagon_clean_dcache_range(unsigned long start, unsigned long end)
+{
+	unsigned long lines = spanlines(start, end-1);
+	unsigned long i, flags;
+
+	start &= ~(LINESIZE - 1);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < lines; i++) {
+		__asm__ __volatile__ (
+		"	dccleana(%0);	"
+		:
+		: "r" (start)
+		);
+		start += LINESIZE;
+	}
+	local_irq_restore(flags);
+}
+
+void hexagon_inv_dcache_range(unsigned long start, unsigned long end)
+{
+	unsigned long lines = spanlines(start, end-1);
+	unsigned long i, flags;
+
+	start &= ~(LINESIZE - 1);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < lines; i++) {
+		__asm__ __volatile__ (
+		"	dcinva(%0);	"
+		:
+		: "r" (start)
+		);
+		start += LINESIZE;
+	}
+	local_irq_restore(flags);
+}
+
+
+
+
+/*
+ * This is just really brutal and shouldn't be used anyways,
+ * especially on V2.  Left here just in case.
+ */
+void flush_cache_all_hexagon(void)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	__vmcache_ickill();
+	__vmcache_dckill();
+	__vmcache_l2kill();
+	local_irq_restore(flags);
+	mb();
+}
diff --git a/arch/hexagon/mm/copy_from_user.S b/arch/hexagon/mm/copy_from_user.S
new file mode 100644
index 000000000000..8eb1d4d61a3d
--- /dev/null
+++ b/arch/hexagon/mm/copy_from_user.S
@@ -0,0 +1,114 @@
+/*
+ * User memory copy functions for kernel
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * The right way to do this involves valignb
+ * The easy way to do this is only speed up src/dest similar alignment.
+ */
+
+/*
+ * Copy to/from user are the same, except that for packets with a load and
+ * a store, I don't know how to tell which kind of exception we got.
+ * Therefore, we duplicate the function, and handle faulting addresses
+ * differently for each function
+ */
+
+/*
+ * copy from user: loads can fault
+ */
+#define src_sav r13
+#define dst_sav r12
+#define src_dst_sav r13:12
+#define d_dbuf r15:14
+#define w_dbuf r15
+
+#define dst r0
+#define src r1
+#define bytes r2
+#define loopcount r5
+
+#define FUNCNAME __copy_from_user_hexagon
+#include "copy_user_template.S"
+
+	/* LOAD FAULTS from COPY_FROM_USER */
+
+	/* Alignment loop.  r2 has been updated. Return it. */
+	.falign
+1009:
+2009:
+4009:
+	{
+		r0 = r2
+		jumpr r31
+	}
+	/* Normal copy loops. Do epilog. Use src-src_sav to compute distance */
+	/* X - (A - B) == X + B - A */
+	.falign
+8089:
+	{
+		memd(dst) = d_dbuf
+		r2 += sub(src_sav,src)
+	}
+	{
+		r0 = r2
+		jumpr r31
+	}
+	.falign
+4089:
+	{
+		memw(dst) = w_dbuf
+		r2 += sub(src_sav,src)
+	}
+	{
+		r0 = r2
+		jumpr r31
+	}
+	.falign
+2089:
+	{
+		memh(dst) = w_dbuf
+		r2 += sub(src_sav,src)
+	}
+	{
+		r0 = r2
+		jumpr r31
+	}
+	.falign
+1089:
+	{
+		memb(dst) = w_dbuf
+		r2 += sub(src_sav,src)
+	}
+	{
+		r0 = r2
+		jumpr r31
+	}
+
+	/* COPY FROM USER: only loads can fail */
+
+	.section __ex_table,"a"
+	.long 1000b,1009b
+	.long 2000b,2009b
+	.long 4000b,4009b
+	.long 8080b,8089b
+	.long 4080b,4089b
+	.long 2080b,2089b
+	.long 1080b,1089b
+	.previous
diff --git a/arch/hexagon/mm/copy_to_user.S b/arch/hexagon/mm/copy_to_user.S
new file mode 100644
index 000000000000..cb9740ed9e7d
--- /dev/null
+++ b/arch/hexagon/mm/copy_to_user.S
@@ -0,0 +1,92 @@
+/*
+ * User memory copying routines for the Hexagon Kernel
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/* The right way to do this involves valignb
+ * The easy way to do this is only speed up src/dest similar alignment.
+ */
+
+/*
+ * Copy to/from user are the same, except that for packets with a load and
+ * a store, I don't know how to tell which kind of exception we got.
+ * Therefore, we duplicate the function, and handle faulting addresses
+ * differently for each function
+ */
+
+/*
+ * copy to user: stores can fault
+ */
+#define src_sav r13
+#define dst_sav r12
+#define src_dst_sav r13:12
+#define d_dbuf r15:14
+#define w_dbuf r15
+
+#define dst r0
+#define src r1
+#define bytes r2
+#define loopcount r5
+
+#define FUNCNAME __copy_to_user_hexagon
+#include "copy_user_template.S"
+
+	/* STORE FAULTS from COPY_TO_USER */
+	.falign
+1109:
+2109:
+4109:
+	/* Alignment loop.  r2 has been updated.  Return it. */
+	{
+		r0 = r2
+		jumpr r31
+	}
+	/* Normal copy loops.  Use dst-dst_sav to compute distance */
+	/* dst holds best write, no need to unwind any loops */
+	/* X - (A - B) == X + B - A */
+	.falign
+8189:
+8199:
+4189:
+4199:
+2189:
+2199:
+1189:
+1199:
+	{
+		r2 += sub(dst_sav,dst)
+	}
+	{
+		r0 = r2
+		jumpr r31
+	}
+
+	/* COPY TO USER: only stores can fail */
+	.section __ex_table,"a"
+	.long 1100b,1109b
+	.long 2100b,2109b
+	.long 4100b,4109b
+	.long 8180b,8189b
+	.long 8190b,8199b
+	.long 4180b,4189b
+	.long 4190b,4199b
+	.long 2180b,2189b
+	.long 2190b,2199b
+	.long 1180b,1189b
+	.long 1190b,1199b
+	.previous
diff --git a/arch/hexagon/mm/copy_user_template.S b/arch/hexagon/mm/copy_user_template.S
new file mode 100644
index 000000000000..08d7d7b23daa
--- /dev/null
+++ b/arch/hexagon/mm/copy_user_template.S
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/* Numerology:
+ * WXYZ
+ * W: width in bytes
+ * X: Load=0, Store=1
+ * Y: Location 0=preamble,8=loop,9=epilog
+ * Z: Location=0,handler=9
+ */
+	.text
+	.global FUNCNAME
+	.type FUNCNAME, @function
+	.p2align 5
+FUNCNAME:
+	{
+		p0 = cmp.gtu(bytes,#0)
+		if (!p0.new) jump:nt .Ldone
+		r3 = or(dst,src)
+		r4 = xor(dst,src)
+	}
+	{
+		p1 = cmp.gtu(bytes,#15)
+		p0 = bitsclr(r3,#7)
+		if (!p0.new) jump:nt .Loop_not_aligned_8
+		src_dst_sav = combine(src,dst)
+	}
+
+	{
+		loopcount = lsr(bytes,#3)
+		if (!p1) jump .Lsmall
+	}
+	p3=sp1loop0(.Loop8,loopcount)
+.Loop8:
+8080:
+8180:
+	{
+		if (p3) memd(dst++#8) = d_dbuf
+		d_dbuf = memd(src++#8)
+	}:endloop0
+8190:
+	{
+		memd(dst++#8) = d_dbuf
+		bytes -= asl(loopcount,#3)
+		jump .Lsmall
+	}
+
+.Loop_not_aligned_8:
+	{
+		p0 = bitsclr(r4,#7)
+		if (p0.new) jump:nt .Lalign
+	}
+	{
+		p0 = bitsclr(r3,#3)
+		if (!p0.new) jump:nt .Loop_not_aligned_4
+		p1 = cmp.gtu(bytes,#7)
+	}
+
+	{
+		if (!p1) jump .Lsmall
+		loopcount = lsr(bytes,#2)
+	}
+	p3=sp1loop0(.Loop4,loopcount)
+.Loop4:
+4080:
+4180:
+	{
+		if (p3) memw(dst++#4) = w_dbuf
+		w_dbuf = memw(src++#4)
+	}:endloop0
+4190:
+	{
+		memw(dst++#4) = w_dbuf
+		bytes -= asl(loopcount,#2)
+		jump .Lsmall
+	}
+
+.Loop_not_aligned_4:
+	{
+		p0 = bitsclr(r3,#1)
+		if (!p0.new) jump:nt .Loop_not_aligned
+		p1 = cmp.gtu(bytes,#3)
+	}
+
+	{
+		if (!p1) jump .Lsmall
+		loopcount = lsr(bytes,#1)
+	}
+	p3=sp1loop0(.Loop2,loopcount)
+.Loop2:
+2080:
+2180:
+	{
+		if (p3) memh(dst++#2) = w_dbuf
+		w_dbuf = memuh(src++#2)
+	}:endloop0
+2190:
+	{
+		memh(dst++#2) = w_dbuf
+		bytes -= asl(loopcount,#1)
+		jump .Lsmall
+	}
+
+.Loop_not_aligned: /* Works for as small as one byte */
+	p3=sp1loop0(.Loop1,bytes)
+.Loop1:
+1080:
+1180:
+	{
+		if (p3) memb(dst++#1) = w_dbuf
+		w_dbuf = memub(src++#1)
+	}:endloop0
+	/* Done */
+1190:
+	{
+		memb(dst) = w_dbuf
+		jumpr r31
+		r0 = #0
+	}
+
+.Lsmall:
+	{
+		p0 = cmp.gtu(bytes,#0)
+		if (p0.new) jump:nt .Loop_not_aligned
+	}
+.Ldone:
+	{
+		r0 = #0
+		jumpr r31
+	}
+	.falign
+.Lalign:
+1000:
+	{
+		if (p0.new) w_dbuf = memub(src)
+		p0 = tstbit(src,#0)
+		if (!p1) jump .Lsmall
+	}
+1100:
+	{
+		if (p0) memb(dst++#1) = w_dbuf
+		if (p0) bytes = add(bytes,#-1)
+		if (p0) src = add(src,#1)
+	}
+2000:
+	{
+		if (p0.new) w_dbuf = memuh(src)
+		p0 = tstbit(src,#1)
+		if (!p1) jump .Lsmall
+	}
+2100:
+	{
+		if (p0) memh(dst++#2) = w_dbuf
+		if (p0) bytes = add(bytes,#-2)
+		if (p0) src = add(src,#2)
+	}
+4000:
+	{
+		if (p0.new) w_dbuf = memw(src)
+		p0 = tstbit(src,#2)
+		if (!p1) jump .Lsmall
+	}
+4100:
+	{
+		if (p0) memw(dst++#4) = w_dbuf
+		if (p0) bytes = add(bytes,#-4)
+		if (p0) src = add(src,#4)
+		jump FUNCNAME
+	}
+	.size FUNCNAME,.-FUNCNAME
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
new file mode 100644
index 000000000000..b57d741750b2
--- /dev/null
+++ b/arch/hexagon/mm/init.c
@@ -0,0 +1,276 @@
+/*
+ * Memory subsystem initialization for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <asm/atomic.h>
+#include <linux/highmem.h>
+#include <asm/tlb.h>
+#include <asm/sections.h>
+#include <asm/vm_mmu.h>
+
+/*
+ * Define a startpg just past the end of the kernel image and a lastpg
+ * that corresponds to the end of real or simulated platform memory.
+ */
+#define bootmem_startpg (PFN_UP(((unsigned long) _end) - PAGE_OFFSET))
+
+unsigned long bootmem_lastpg;  /*  Should be set by platform code  */
+
+/*  Set as variable to limit PMD copies  */
+int max_kernel_seg = 0x303;
+
+/*  think this should be (page_size-1) the way it's used...*/
+unsigned long zero_page_mask;
+
+/*  indicate pfn's of high memory  */
+unsigned long highstart_pfn, highend_pfn;
+
+/* struct mmu_gather defined in asm-generic.h;  */
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+/* Default cache attribute for newly created page tables */
+unsigned long _dflt_cache_att = CACHEDEF;
+
+/*
+ * The current "generation" of kernel map, which should not roll
+ * over until Hell freezes over.  Actual bound in years needs to be
+ * calculated to confirm.
+ */
+DEFINE_SPINLOCK(kmap_gen_lock);
+
+/*  checkpatch says don't init this to 0.  */
+unsigned long long kmap_generation;
+
+/*
+ * mem_init - initializes memory
+ *
+ * Frees up bootmem
+ * Fixes up more stuff for HIGHMEM
+ * Calculates and displays memory available/used
+ */
+void __init mem_init(void)
+{
+	/*  No idea where this is actually declared.  Seems to evade LXR.  */
+	totalram_pages += free_all_bootmem();
+	num_physpages = bootmem_lastpg;	/*  seriously, what?  */
+
+	printk(KERN_INFO "totalram_pages = %ld\n", totalram_pages);
+
+	/*
+	 *  To-Do:  someone somewhere should wipe out the bootmem map
+	 *  after we're done?
+	 */
+
+	/*
+	 * This can be moved to some more virtual-memory-specific
+	 * initialization hook at some point.  Set the init_mm
+	 * descriptors "context" value to point to the initial
+	 * kernel segment table's physical address.
+	 */
+	init_mm.context.ptbase = __pa(init_mm.pgd);
+}
+
+/*
+ * free_initmem - frees memory used by stuff declared with __init
+ *
+ * Todo:  free pages between __init_begin and __init_end; possibly
+ * some devtree related stuff as well.
+ */
+void __init_refok free_initmem(void)
+{
+}
+
+/*
+ * free_initrd_mem - frees...  initrd memory.
+ * @start - start of init memory
+ * @end - end of init memory
+ *
+ * Apparently has to be passed the address of the initrd memory.
+ *
+ * Wrapped by #ifdef CONFIG_BLKDEV_INITRD
+ */
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+}
+
+void sync_icache_dcache(pte_t pte)
+{
+	unsigned long addr;
+	struct page *page;
+
+	page = pte_page(pte);
+	addr = (unsigned long) page_address(page);
+
+	__vmcache_idsync(addr, PAGE_SIZE);
+}
+
+/*
+ * In order to set up page allocator "nodes",
+ * somebody has to call free_area_init() for UMA.
+ *
+ * In this mode, we only have one pg_data_t
+ * structure: contig_mem_data.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_sizes[MAX_NR_ZONES] = {0, };
+
+	/*
+	 *  This is not particularly well documented anywhere, but
+	 *  give ZONE_NORMAL all the memory, including the big holes
+	 *  left by the kernel+bootmem_map which are already left as reserved
+	 *  in the bootmem_map; free_area_init should see those bits and
+	 *  adjust accordingly.
+	 */
+
+	zones_sizes[ZONE_NORMAL] = max_low_pfn;
+
+	free_area_init(zones_sizes);  /*  sets up the zonelists and mem_map  */
+
+	/*
+	 * Start of high memory area.  Will probably need something more
+	 * fancy if we...  get more fancy.
+	 */
+	high_memory = (void *)((bootmem_lastpg + 1) << PAGE_SHIFT);
+}
+
+#ifndef DMA_RESERVE
+#define DMA_RESERVE		(4)
+#endif
+
+#define DMA_CHUNKSIZE		(1<<22)
+#define DMA_RESERVED_BYTES	(DMA_RESERVE * DMA_CHUNKSIZE)
+
+/*
+ * Pick out the memory size.  We look for mem=size,
+ * where size is "size[KkMm]"
+ */
+static int __init early_mem(char *p)
+{
+	unsigned long size;
+	char *endp;
+
+	size = memparse(p, &endp);
+
+	bootmem_lastpg = PFN_DOWN(size);
+
+	return 0;
+}
+early_param("mem", early_mem);
+
+size_t hexagon_coherent_pool_size = (size_t) (DMA_RESERVE << 22);
+
+void __init setup_arch_memory(void)
+{
+	int bootmap_size;
+	/*  XXX Todo: this probably should be cleaned up  */
+	u32 *segtable = (u32 *) &swapper_pg_dir[0];
+	u32 *segtable_end;
+
+	/*
+	 * Set up boot memory allocator
+	 *
+	 * The Gorman book also talks about these functions.
+	 * This needs to change for highmem setups.
+	 */
+
+	/* Memory size needs to be a multiple of 16M */
+	bootmem_lastpg = PFN_DOWN((bootmem_lastpg << PAGE_SHIFT) &
+		~((BIG_KERNEL_PAGE_SIZE) - 1));
+
+	/*
+	 * Reserve the top DMA_RESERVE bytes of RAM for DMA (uncached)
+	 * memory allocation
+	 */
+	bootmap_size = init_bootmem(bootmem_startpg, bootmem_lastpg -
+				    PFN_DOWN(DMA_RESERVED_BYTES));
+
+	printk(KERN_INFO "bootmem_startpg:  0x%08lx\n", bootmem_startpg);
+	printk(KERN_INFO "bootmem_lastpg:  0x%08lx\n", bootmem_lastpg);
+	printk(KERN_INFO "bootmap_size:  %d\n", bootmap_size);
+	printk(KERN_INFO "max_low_pfn:  0x%08lx\n", max_low_pfn);
+
+	/*
+	 * The default VM page tables (will be) populated with
+	 * VA=PA+PAGE_OFFSET mapping.  We go in and invalidate entries
+	 * higher than what we have memory for.
+	 */
+
+	/*  this is pointer arithmetic; each entry covers 4MB  */
+	segtable = segtable + (PAGE_OFFSET >> 22);
+
+	/*  this actually only goes to the end of the first gig  */
+	segtable_end = segtable + (1<<(30-22));
+
+	/*  Move forward to the start of empty pages  */
+	segtable += bootmem_lastpg >> (22-PAGE_SHIFT);
+
+	{
+	    int i;
+
+	    for (i = 1 ; i <= DMA_RESERVE ; i++)
+		segtable[-i] = ((segtable[-i] & __HVM_PTE_PGMASK_4MB)
+				| __HVM_PTE_R | __HVM_PTE_W | __HVM_PTE_X
+				| __HEXAGON_C_UNC << 6
+				| __HVM_PDE_S_4MB);
+	}
+
+	printk(KERN_INFO "clearing segtable from %p to %p\n", segtable,
+		segtable_end);
+	while (segtable < (segtable_end-8))
+		*(segtable++) = __HVM_PDE_S_INVALID;
+	/* stop the pointer at the device I/O 4MB page  */
+
+	printk(KERN_INFO "segtable = %p (should be equal to _K_io_map)\n",
+		segtable);
+
+#if 0
+	/*  Other half of the early device table from vm_init_segtable. */
+	printk(KERN_INFO "&_K_init_devicetable = 0x%08x\n",
+		(unsigned long) _K_init_devicetable-PAGE_OFFSET);
+	*segtable = ((u32) (unsigned long) _K_init_devicetable-PAGE_OFFSET) |
+		__HVM_PDE_S_4KB;
+	printk(KERN_INFO "*segtable = 0x%08x\n", *segtable);
+#endif
+
+	/*
+	 * Free all the memory that wasn't taken up by the bootmap, the DMA
+	 * reserve, or kernel itself.
+	 */
+	free_bootmem(PFN_PHYS(bootmem_startpg)+bootmap_size,
+		     PFN_PHYS(bootmem_lastpg - bootmem_startpg) - bootmap_size -
+		     DMA_RESERVED_BYTES);
+
+	/*
+	 *  The bootmem allocator seemingly just lives to feed memory
+	 *  to the paging system
+	 */
+	printk(KERN_INFO "PAGE_SIZE=%lu\n", PAGE_SIZE);
+	paging_init();  /*  See Gorman Book, 2.3  */
+
+	/*
+	 *  At this point, the page allocator is kind of initialized, but
+	 *  apparently no pages are available (just like with the bootmem
+	 *  allocator), and need to be freed themselves via mem_init(),
+	 *  which is called by start_kernel() later on in the process
+	 */
+}
diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
new file mode 100644
index 000000000000..3a37bc3b0116
--- /dev/null
+++ b/arch/hexagon/mm/ioremap.c
@@ -0,0 +1,56 @@
+/*
+ * I/O remap functions for Hexagon
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
+{
+	unsigned long last_addr, addr;
+	unsigned long offset = phys_addr & ~PAGE_MASK;
+	struct vm_struct *area;
+
+	pgprot_t prot = __pgprot(_PAGE_PRESENT|_PAGE_READ|_PAGE_WRITE
+					|(__HEXAGON_C_DEV << 6));
+
+	last_addr = phys_addr + size - 1;
+
+	/*  Wrapping not allowed  */
+	if (!size || (last_addr < phys_addr))
+		return NULL;
+
+	/*  Rounds up to next page size, including whole-page offset */
+	size = PAGE_ALIGN(offset + size);
+
+	area = get_vm_area(size, VM_IOREMAP);
+	addr = (unsigned long)area->addr;
+
+	if (ioremap_page_range(addr, addr+size, phys_addr, prot)) {
+		vunmap((void *)addr);
+		return NULL;
+	}
+
+	return (void __iomem *) (offset + addr);
+}
+
+void __iounmap(const volatile void __iomem *addr)
+{
+	vunmap((void *) ((unsigned long) addr & PAGE_MASK));
+}
diff --git a/arch/hexagon/mm/pgalloc.c b/arch/hexagon/mm/pgalloc.c
new file mode 100644
index 000000000000..b175e2d42b89
--- /dev/null
+++ b/arch/hexagon/mm/pgalloc.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#include <linux/init.h>
+
+void __init pgtable_cache_init(void)
+{
+}
diff --git a/arch/hexagon/mm/strnlen_user.S b/arch/hexagon/mm/strnlen_user.S
new file mode 100644
index 000000000000..5c6a16c7c72a
--- /dev/null
+++ b/arch/hexagon/mm/strnlen_user.S
@@ -0,0 +1,139 @@
+/*
+ * User string length functions for kernel
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#define isrc	r0
+#define max	r1	/*  Do not change!  */
+
+#define end	r2
+#define tmp1	r3
+
+#define obo	r6	/*  off-by-one  */
+#define start	r7
+#define mod8	r8
+#define dbuf    r15:14
+#define dcmp	r13:12
+
+/*
+ * The vector mask version of this turned out *really* badly.
+ * The hardware loop version also turned out *really* badly.
+ * Seems straight pointer arithmetic basically wins here.
+ */
+
+#define fname __strnlen_user
+
+	.text
+	.global fname
+	.type fname, @function
+	.p2align 5  /*  why?  */
+fname:
+	{
+		mod8 = and(isrc,#7);
+		end = add(isrc,max);
+		start = isrc;
+	}
+	{
+		P0 = cmp.eq(mod8,#0);
+		mod8 = and(end,#7);
+		dcmp = #0;
+		if (P0.new) jump:t dw_loop;	/*  fire up the oven  */
+	}
+
+alignment_loop:
+fail_1:	{
+		tmp1 = memb(start++#1);
+	}
+	{
+		P0 = cmp.eq(tmp1,#0);
+		if (P0.new) jump:nt exit_found;
+		P1 = cmp.gtu(end,start);
+		mod8 = and(start,#7);
+	}
+	{
+		if (!P1) jump exit_error;  /*  hit the end  */
+		P0 = cmp.eq(mod8,#0);
+	}
+	{
+		if (!P0) jump alignment_loop;
+	}
+
+
+
+dw_loop:
+fail_2:	{
+		dbuf = memd(start);
+		obo = add(start,#1);
+	}
+	{
+		P0 = vcmpb.eq(dbuf,dcmp);
+	}
+	{
+		tmp1 = P0;
+		P0 = cmp.gtu(end,start);
+	}
+	{
+		tmp1 = ct0(tmp1);
+		mod8 = and(end,#7);
+		if (!P0) jump end_check;
+	}
+	{
+		P0 = cmp.eq(tmp1,#32);
+		if (!P0.new) jump:nt exit_found;
+		if (!P0.new) start = add(obo,tmp1);
+	}
+	{
+		start = add(start,#8);
+		jump dw_loop;
+	}	/*  might be nice to combine these jumps...   */
+
+
+end_check:
+	{
+		P0 = cmp.gt(tmp1,mod8);
+		if (P0.new) jump:nt exit_error;	/*  neverfound!  */
+		start = add(obo,tmp1);
+	}
+
+exit_found:
+	{
+		R0 = sub(start,isrc);
+		jumpr R31;
+	}
+
+exit_error:
+	{
+		R0 = add(max,#1);
+		jumpr R31;
+	}
+
+	/*  Uh, what does the "fixup" return here?  */
+	.falign
+fix_1:
+	{
+		R0 = #0;
+		jumpr R31;
+	}
+
+	.size fname,.-fname
+
+
+.section __ex_table,"a"
+.long fail_1,fix_1
+.long fail_2,fix_1
+.previous
diff --git a/arch/hexagon/mm/uaccess.c b/arch/hexagon/mm/uaccess.c
new file mode 100644
index 000000000000..e748108b47a7
--- /dev/null
+++ b/arch/hexagon/mm/uaccess.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * Support for user memory access from kernel.  This will
+ * probably be inlined for performance at some point, but
+ * for ease of debug, and to a lesser degree for code size,
+ * we implement here as subroutines.
+ */
+#include <linux/types.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+/*
+ * For clear_user(), exploit previously defined copy_to_user function
+ * and the fact that we've got a handy zero page defined in kernel/head.S
+ *
+ * dczero here would be even faster.
+ */
+__kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count)
+{
+	long uncleared;
+
+	while (count > PAGE_SIZE) {
+		uncleared = __copy_to_user_hexagon(dest, &empty_zero_page,
+						PAGE_SIZE);
+		if (uncleared)
+			return count - (PAGE_SIZE - uncleared);
+		count -= PAGE_SIZE;
+		dest += PAGE_SIZE;
+	}
+	if (count)
+		count = __copy_to_user_hexagon(dest, &empty_zero_page, count);
+
+	return count;
+}
+
+unsigned long clear_user_hexagon(void __user *dest, unsigned long count)
+{
+	if (!access_ok(VERIFY_WRITE, dest, count))
+		return count;
+	else
+		return __clear_user_hexagon(dest, count);
+}
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
new file mode 100644
index 000000000000..c10b76ff9d65
--- /dev/null
+++ b/arch/hexagon/mm/vm_fault.c
@@ -0,0 +1,187 @@
+/*
+ * Memory fault handling for Hexagon
+ *
+ * Copyright (c) 2010-2011 Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * Page fault handling for the Hexagon Virtual Machine.
+ * Can also be called by a native port emulating the HVM
+ * execptions.
+ */
+
+#include <asm/pgtable.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <linux/signal.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+
+/*
+ * Decode of hardware exception sends us to one of several
+ * entry points.  At each, we generate canonical arguments
+ * for handling by the abstract memory management code.
+ */
+#define FLT_IFETCH     -1
+#define FLT_LOAD        0
+#define FLT_STORE       1
+
+
+/*
+ * Canonical page fault handler
+ */
+void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	siginfo_t info;
+	int si_code = SEGV_MAPERR;
+	int fault;
+	const struct exception_table_entry *fixup;
+
+	/*
+	 * If we're in an interrupt or have no user context,
+	 * then must not take the fault.
+	 */
+	if (unlikely(in_interrupt() || !mm))
+		goto no_context;
+
+	local_irq_enable();
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+
+	if (vma->vm_start <= address)
+		goto good_area;
+
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+good_area:
+	/* Address space is OK.  Now check access rights. */
+	si_code = SEGV_ACCERR;
+
+	switch (cause) {
+	case FLT_IFETCH:
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+		break;
+	case FLT_LOAD:
+		if (!(vma->vm_flags & VM_READ))
+			goto bad_area;
+		break;
+	case FLT_STORE:
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		break;
+	}
+
+	fault = handle_mm_fault(mm, vma, address, (cause > 0));
+
+	/* The most common case -- we are done. */
+	if (likely(!(fault & VM_FAULT_ERROR))) {
+		if (fault & VM_FAULT_MAJOR)
+			current->maj_flt++;
+		else
+			current->min_flt++;
+
+		up_read(&mm->mmap_sem);
+		return;
+	}
+
+	up_read(&mm->mmap_sem);
+
+	/* Handle copyin/out exception cases */
+	if (!user_mode(regs))
+		goto no_context;
+
+	if (fault & VM_FAULT_OOM) {
+		pagefault_out_of_memory();
+		return;
+	}
+
+	/* User-mode address is in the memory map, but we are
+	 * unable to fix up the page fault.
+	 */
+	if (fault & VM_FAULT_SIGBUS) {
+		info.si_signo = SIGBUS;
+		info.si_code = BUS_ADRERR;
+	}
+	/* Address is not in the memory map */
+	else {
+		info.si_signo = SIGSEGV;
+		info.si_code = SEGV_ACCERR;
+	}
+	info.si_errno = 0;
+	info.si_addr = (void __user *)address;
+	force_sig_info(info.si_code, &info, current);
+	return;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs)) {
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code = si_code;
+		info.si_addr = (void *)address;
+		force_sig_info(SIGSEGV, &info, current);
+		return;
+	}
+	/* Kernel-mode fault falls through */
+
+no_context:
+	fixup = search_exception_tables(pt_elr(regs));
+	if (fixup) {
+		pt_set_elr(regs, fixup->fixup);
+		return;
+	}
+
+	/* Things are looking very, very bad now */
+	bust_spinlocks(1);
+	printk(KERN_EMERG "Unable to handle kernel paging request at "
+		"virtual address 0x%08lx, regs %p\n", address, regs);
+	die("Bad Kernel VA", regs, SIGKILL);
+}
+
+
+void read_protection_fault(struct pt_regs *regs)
+{
+	unsigned long badvadr = pt_badva(regs);
+
+	do_page_fault(badvadr, FLT_LOAD, regs);
+}
+
+void write_protection_fault(struct pt_regs *regs)
+{
+	unsigned long badvadr = pt_badva(regs);
+
+	do_page_fault(badvadr, FLT_STORE, regs);
+}
+
+void execute_protection_fault(struct pt_regs *regs)
+{
+	unsigned long badvadr = pt_badva(regs);
+
+	do_page_fault(badvadr, FLT_IFETCH, regs);
+}
diff --git a/arch/hexagon/mm/vm_tlb.c b/arch/hexagon/mm/vm_tlb.c
new file mode 100644
index 000000000000..c6ff41575461
--- /dev/null
+++ b/arch/hexagon/mm/vm_tlb.c
@@ -0,0 +1,93 @@
+/*
+ * Hexagon Virtual Machine TLB functions
+ *
+ * Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * The Hexagon Virtual Machine conceals the real workings of
+ * the TLB, but there are one or two functions that need to
+ * be instantiated for it, differently from a native build.
+ */
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/hexagon_vm.h>
+
+/*
+ * Initial VM implementation has only one map active at a time, with
+ * TLB purgings on changes.  So either we're nuking the current map,
+ * or it's a no-op.  This operation is messy on true SMPs where other
+ * processors must be induced to flush the copies in their local TLBs,
+ * but Hexagon thread-based virtual processors share the same MMU.
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context.ptbase == current->active_mm->context.ptbase)
+		__vmclrmap((void *)start, end - start);
+}
+
+/*
+ * Flush a page from the kernel virtual map - used by highmem
+ */
+void flush_tlb_one(unsigned long vaddr)
+{
+	__vmclrmap((void *)vaddr, PAGE_SIZE);
+}
+
+/*
+ * Flush all TLBs across all CPUs, virtual or real.
+ * A single Hexagon core has 6 thread contexts but
+ * only one TLB.
+ */
+void tlb_flush_all(void)
+{
+	/*  should probably use that fixaddr end or whateve label  */
+	__vmclrmap(0, 0xffff0000);
+}
+
+/*
+ * Flush TLB entries associated with a given mm_struct mapping.
+ */
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	/* Current Virtual Machine has only one map active at a time */
+	if (current->active_mm->context.ptbase == mm->context.ptbase)
+		tlb_flush_all();
+}
+
+/*
+ * Flush TLB state associated with a page of a vma.
+ */
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vaddr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context.ptbase  == current->active_mm->context.ptbase)
+		__vmclrmap((void *)vaddr, PAGE_SIZE);
+}
+
+/*
+ * Flush TLB entries associated with a kernel address range.
+ * Like flush range, but without the check on the vma->vm_mm.
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+		__vmclrmap((void *)start, end - start);
+}
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index 0e5cd1405e0e..43ab1cd097a5 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -234,4 +234,4 @@ CONFIG_CRYPTO_MD5=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRC_T10DIF=y
 CONFIG_MISC_DEVICES=y
-CONFIG_DMAR=y
+CONFIG_INTEL_IOMMU=y
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
index 2f7caddf093e..ae16ec4f6308 100644
--- a/arch/ia64/dig/Makefile
+++ b/arch/ia64/dig/Makefile
@@ -6,7 +6,7 @@
 #
 
 obj-y := setup.o
-ifeq ($(CONFIG_DMAR), y)
+ifeq ($(CONFIG_INTEL_IOMMU), y)
 obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o
 else
 obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 80241fe03f50..f5f4ef149aac 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -915,7 +915,7 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
  * @dir:  R/W or both.
  * @attrs: optional dma attributes
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
 static dma_addr_t sba_map_page(struct device *dev, struct page *page,
 			       unsigned long poff, size_t size,
@@ -1044,7 +1044,7 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
  * @dir:  R/W or both.
  * @attrs: optional dma attributes
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
 static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
 			   enum dma_data_direction dir, struct dma_attrs *attrs)
@@ -1127,7 +1127,7 @@ void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
  * @size:  number of bytes mapped in driver buffer.
  * @dma_handle:  IOVA of new buffer.
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
 static void *
 sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
@@ -1190,7 +1190,7 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
  * @vaddr:  virtual address IOVA of "consistent" buffer.
  * @dma_handler:  IO virtual address of "consistent" buffer.
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
 static void sba_free_coherent (struct device *dev, size_t size, void *vaddr,
 			       dma_addr_t dma_handle)
@@ -1453,7 +1453,7 @@ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
  * @dir:  R/W or both.
  * @attrs: optional dma attributes
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
 static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
 			    int nents, enum dma_data_direction dir,
@@ -1549,7 +1549,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
  * @dir:  R/W or both.
  * @attrs: optional dma attributes
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt
+ * See Documentation/DMA-API-HOWTO.txt
  */
 static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
 			       int nents, enum dma_data_direction dir,
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index 7e81966ce481..47afcc61f6e5 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -172,7 +172,7 @@ static const struct net_device_ops simeth_netdev_ops = {
 	.ndo_stop		= simeth_close,
 	.ndo_start_xmit		= simeth_tx,
 	.ndo_get_stats		= simeth_get_stats,
-	.ndo_set_multicast_list	= set_multicast_list, /* not yet used */
+	.ndo_set_rx_mode	= set_multicast_list, /* not yet used */
 
 };
 
diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
index d66d446b127c..d05e78f6db94 100644
--- a/arch/ia64/include/asm/device.h
+++ b/arch/ia64/include/asm/device.h
@@ -10,7 +10,7 @@ struct dev_archdata {
 #ifdef CONFIG_ACPI
 	void	*acpi_handle;
 #endif
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_INTEL_IOMMU
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
 };
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 745e095fe82e..105c93b00b1b 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -7,12 +7,14 @@
 
 extern void pci_iommu_shutdown(void);
 extern void no_iommu_init(void);
+#ifdef	CONFIG_INTEL_IOMMU
 extern int force_iommu, no_iommu;
-extern int iommu_detected;
-#ifdef	CONFIG_DMAR
 extern int iommu_pass_through;
+extern int iommu_detected;
 #else
 #define iommu_pass_through	(0)
+#define no_iommu		(1)
+#define iommu_detected		(0)
 #endif
 extern void iommu_dma_init(void);
 extern void machvec_init(const char *name);
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 73b5f785e70c..127dd7be346a 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -139,7 +139,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 	return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14);
 }
 
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_INTEL_IOMMU
 extern void pci_iommu_alloc(void);
 #endif
 #endif /* _ASM_IA64_PCI_H */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 395c2f216dd8..d959c84904be 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -43,7 +43,7 @@ obj-$(CONFIG_IA64_ESI)		+= esi.o
 ifneq ($(CONFIG_IA64_ESI),)
 obj-y				+= esi_stub.o	# must be in kernel proper
 endif
-obj-$(CONFIG_DMAR)		+= pci-dma.o
+obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
 obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 3be485a300b1..bfb4d01e0e51 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -88,7 +88,7 @@ acpi_get_sysname(void)
 	struct acpi_table_rsdp *rsdp;
 	struct acpi_table_xsdt *xsdt;
 	struct acpi_table_header *hdr;
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_INTEL_IOMMU
 	u64 i, nentries;
 #endif
 
@@ -125,7 +125,7 @@ acpi_get_sysname(void)
 		return "xen";
 	}
 
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_INTEL_IOMMU
 	/* Look for Intel IOMMU */
 	nentries = (hdr->length - sizeof(*hdr)) /
 			 sizeof(xsdt->table_offset_entry[0]);
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 009df5434a7a..94e0db72d4a6 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -131,7 +131,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 	return ia64_teardown_msi_irq(irq);
 }
 
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_INTEL_IOMMU
 #ifdef CONFIG_SMP
 static int dmar_msi_set_affinity(struct irq_data *data,
 				 const struct cpumask *mask, bool force)
@@ -210,5 +210,5 @@ int arch_setup_dmar_msi(unsigned int irq)
 				      "edge");
 	return 0;
 }
-#endif /* CONFIG_DMAR */
+#endif /* CONFIG_INTEL_IOMMU */
 
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index f6b1ff0aea76..c16162c70860 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -14,7 +14,7 @@
 
 #include <asm/system.h>
 
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_INTEL_IOMMU
 
 #include <linux/kernel.h>
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8213efe1998c..43f4c92816ef 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -33,6 +33,7 @@
 #include <linux/uaccess.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include <linux/pci.h>
 
 #include <asm/pgtable.h>
 #include <asm/gcc_intrin.h>
@@ -204,7 +205,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
 	case KVM_CAP_IOMMU:
-		r = iommu_found();
+		r = iommu_present(&pci_bus_type);
 		break;
 	default:
 		r = 0;
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index b92b9445255d..6c4e9aaa70c1 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -10,6 +10,7 @@ config M32R
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_ATOMIC64
 
 config SBUS
 	bool
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 9e8ee9d2b8ca..6c28582fb98f 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -21,6 +21,15 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
 	bool
 
+config GENERIC_CLOCKEVENTS
+	bool
+
+config GENERIC_CMOS_UPDATE
+	def_bool !MMU
+
+config GENERIC_GPIO
+	bool
+
 config GENERIC_HWEIGHT
 	bool
 	default y
@@ -29,10 +38,16 @@ config GENERIC_CALIBRATE_DELAY
 	bool
 	default y
 
+config GENERIC_IOMAP
+	def_bool MMU
+
 config TIME_LOW_RES
 	bool
 	default y
 
+config ARCH_USES_GETTIMEOFFSET
+	def_bool MMU
+
 config NO_IOPORT
 	def_bool y
 
@@ -62,13 +77,31 @@ config MMU
 	  Select if you want MMU-based virtualised addressing space
 	  support by paged memory management. If unsure, say 'Y'.
 
-menu "Platform dependent setup"
+config MMU_MOTOROLA
+	bool
+
+config MMU_SUN3
+	bool
+	depends on MMU && !MMU_MOTOROLA
+
+menu "Platform setup"
+
+source arch/m68k/Kconfig.cpu
+
+source arch/m68k/Kconfig.machine
+
+source arch/m68k/Kconfig.bus
+
+endmenu
+
+menu "Kernel Features"
 
-if MMU
-source arch/m68k/Kconfig.mmu
+if COLDFIRE
+source "kernel/Kconfig.preempt"
 endif
-if !MMU
-source arch/m68k/Kconfig.nommu
+
+if !MMU || COLDFIRE
+source "kernel/time/Kconfig"
 endif
 
 source "mm/Kconfig"
@@ -85,9 +118,9 @@ if !MMU
 menu "Power management options"
 
 config PM
-        bool "Power Management support"
-        help
-          Support processor power management modes
+	bool "Power Management support"
+	help
+	  Support processor power management modes
 
 endmenu
 endif
@@ -96,151 +129,7 @@ source "net/Kconfig"
 
 source "drivers/Kconfig"
 
-if MMU
-
-menu "Character devices"
-
-config ATARI_MFPSER
-	tristate "Atari MFP serial support"
-	depends on ATARI
-	---help---
-	  If you like to use the MFP serial ports ("Modem1", "Serial1") under
-	  Linux, say Y. The driver equally supports all kinds of MFP serial
-	  ports and automatically detects whether Serial1 is available.
-
-	  To compile this driver as a module, choose M here.
-
-	  Note for Falcon users: You also have an MFP port, it's just not
-	  wired to the outside... But you could use the port under Linux.
-
-config ATARI_MIDI
-	tristate "Atari MIDI serial support"
-	depends on ATARI
-	help
-	  If you want to use your Atari's MIDI port in Linux, say Y.
-
-	  To compile this driver as a module, choose M here.
-
-config ATARI_DSP56K
-	tristate "Atari DSP56k support (EXPERIMENTAL)"
-	depends on ATARI && EXPERIMENTAL
-	help
-	  If you want to be able to use the DSP56001 in Falcons, say Y. This
-	  driver is still experimental, and if you don't know what it is, or
-	  if you don't have this processor, just say N.
-
-	  To compile this driver as a module, choose M here.
-
-config AMIGA_BUILTIN_SERIAL
-	tristate "Amiga builtin serial support"
-	depends on AMIGA
-	help
-	  If you want to use your Amiga's built-in serial port in Linux,
-	  answer Y.
-
-	  To compile this driver as a module, choose M here.
-
-config MULTIFACE_III_TTY
-	tristate "Multiface Card III serial support"
-	depends on AMIGA
-	help
-	  If you want to use a Multiface III card's serial port in Linux,
-	  answer Y.
-
-	  To compile this driver as a module, choose M here.
-
-config GVPIOEXT
-	tristate "GVP IO-Extender support"
-	depends on PARPORT=n && ZORRO
-	help
-	  If you want to use a GVP IO-Extender serial card in Linux, say Y.
-	  Otherwise, say N.
-
-config GVPIOEXT_LP
-	tristate "GVP IO-Extender parallel printer support"
-	depends on GVPIOEXT
-	help
-	  Say Y to enable driving a printer from the parallel port on your
-	  GVP IO-Extender card, N otherwise.
-
-config GVPIOEXT_PLIP
-	tristate "GVP IO-Extender PLIP support"
-	depends on GVPIOEXT
-	help
-	  Say Y to enable doing IP over the parallel port on your GVP
-	  IO-Extender card, N otherwise.
-
-config MAC_HID
-	bool
-	depends on INPUT_ADBHID
-	default y
-
-config HPDCA
-	tristate "HP DCA serial support"
-	depends on DIO && SERIAL_8250
-	help
-	  If you want to use the internal "DCA" serial ports on an HP300
-	  machine, say Y here.
-
-config HPAPCI
-	tristate "HP APCI serial support"
-	depends on HP300 && SERIAL_8250 && EXPERIMENTAL
-	help
-	  If you want to use the internal "APCI" serial ports on an HP400
-	  machine, say Y here.
-
-config MVME147_SCC
-	bool "SCC support for MVME147 serial ports"
-	depends on MVME147 && BROKEN
-	help
-	  This is the driver for the serial ports on the Motorola MVME147
-	  boards.  Everyone using one of these boards should say Y here.
-
-config MVME162_SCC
-	bool "SCC support for MVME162 serial ports"
-	depends on MVME16x && BROKEN
-	help
-	  This is the driver for the serial ports on the Motorola MVME162 and
-	  172 boards.  Everyone using one of these boards should say Y here.
-
-config BVME6000_SCC
-	bool "SCC support for BVME6000 serial ports"
-	depends on BVME6000 && BROKEN
-	help
-	  This is the driver for the serial ports on the BVME4000 and BVME6000
-	  boards from BVM Ltd.  Everyone using one of these boards should say
-	  Y here.
-
-config DN_SERIAL
-	bool "Support for DN serial port (dummy)"
-	depends on APOLLO
-
-config SERIAL_CONSOLE
-	bool "Support for serial port console"
-	depends on (AMIGA || ATARI || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_MIDI=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL)
-	---help---
-	  If you say Y here, it will be possible to use a serial port as the
-	  system console (the system console is the device which receives all
-	  kernel messages and warnings and which allows logins in single user
-	  mode). This could be useful if some terminal or printer is connected
-	  to that serial port.
-
-	  Even if you say Y here, the currently visible virtual console
-	  (/dev/tty0) will still be used as the system console by default, but
-	  you can alter that using a kernel command line option such as
-	  "console=ttyS1". (Try "man bootparam" or see the documentation of
-	  your boot loader (lilo or loadlin) about how to pass options to the
-	  kernel at boot time.)
-
-	  If you don't have a VGA card installed and you say Y here, the
-	  kernel will automatically use the first serial line, /dev/ttyS0, as
-	  system console.
-
-	  If unsure, say N.
-
-endmenu
-
-endif
+source "arch/m68k/Kconfig.devices"
 
 source "fs/Kconfig"
 
diff --git a/arch/m68k/Kconfig.bus b/arch/m68k/Kconfig.bus
new file mode 100644
index 000000000000..8294f0c1785e
--- /dev/null
+++ b/arch/m68k/Kconfig.bus
@@ -0,0 +1,55 @@
+if MMU
+
+comment "Bus Support"
+
+config NUBUS
+	bool
+	depends on MAC
+	default y
+
+config ZORRO
+	bool "Amiga Zorro (AutoConfig) bus support"
+	depends on AMIGA
+	help
+	  This enables support for the Zorro bus in the Amiga. If you have
+	  expansion cards in your Amiga that conform to the Amiga
+	  AutoConfig(tm) specification, say Y, otherwise N. Note that even
+	  expansion cards that do not fit in the Zorro slots but fit in e.g.
+	  the CPU slot may fall in this category, so you have to say Y to let
+	  Linux use these.
+
+config AMIGA_PCMCIA
+	bool "Amiga 1200/600 PCMCIA support (EXPERIMENTAL)"
+	depends on AMIGA && EXPERIMENTAL
+	help
+	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
+	  600. If you intend to use pcmcia cards say Y; otherwise say N.
+
+config ISA
+	bool
+	depends on Q40 || AMIGA_PCMCIA
+	default y
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+config GENERIC_ISA_DMA
+	def_bool ISA
+
+source "drivers/pci/Kconfig"
+
+source "drivers/zorro/Kconfig"
+
+endif
+
+if !MMU
+
+config ISA_DMA_API
+        def_bool !M5272
+
+source "drivers/pcmcia/Kconfig"
+
+endif
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
new file mode 100644
index 000000000000..e632b2d12106
--- /dev/null
+++ b/arch/m68k/Kconfig.cpu
@@ -0,0 +1,429 @@
+comment "Processor Type"
+
+config M68000
+	bool
+	select CPU_HAS_NO_BITFIELDS
+	help
+	  The Freescale (was Motorola) 68000 CPU is the first generation of
+	  the well known M68K family of processors. The CPU core as well as
+	  being available as a stand alone CPU was also used in many
+	  System-On-Chip devices (eg 68328, 68302, etc). It does not contain
+	  a paging MMU.
+
+config MCPU32
+	bool
+	select CPU_HAS_NO_BITFIELDS
+	help
+	  The Freescale (was then Motorola) CPU32 is a CPU core that is
+	  based on the 68020 processor. For the most part it is used in
+	  System-On-Chip parts, and does not contain a paging MMU.
+
+config COLDFIRE
+	bool
+	select GENERIC_GPIO
+	select ARCH_REQUIRE_GPIOLIB
+	select CPU_HAS_NO_BITFIELDS
+	help
+	  The Freescale ColdFire family of processors is a modern derivitive
+	  of the 68000 processor family. They are mainly targeted at embedded
+	  applications, and are all System-On-Chip (SOC) devices, as opposed
+	  to stand alone CPUs. They implement a subset of the original 68000
+	  processor instruction set.
+
+config M68020
+	bool "68020 support"
+	depends on MMU
+	help
+	  If you anticipate running this kernel on a computer with a MC68020
+	  processor, say Y. Otherwise, say N. Note that the 68020 requires a
+	  68851 MMU (Memory Management Unit) to run Linux/m68k, except on the
+	  Sun 3, which provides its own version.
+
+config M68030
+	bool "68030 support"
+	depends on MMU && !MMU_SUN3
+	help
+	  If you anticipate running this kernel on a computer with a MC68030
+	  processor, say Y. Otherwise, say N. Note that a MC68EC030 will not
+	  work, as it does not include an MMU (Memory Management Unit).
+
+config M68040
+	bool "68040 support"
+	depends on MMU && !MMU_SUN3
+	help
+	  If you anticipate running this kernel on a computer with a MC68LC040
+	  or MC68040 processor, say Y. Otherwise, say N. Note that an
+	  MC68EC040 will not work, as it does not include an MMU (Memory
+	  Management Unit).
+
+config M68060
+	bool "68060 support"
+	depends on MMU && !MMU_SUN3
+	help
+	  If you anticipate running this kernel on a computer with a MC68060
+	  processor, say Y. Otherwise, say N.
+
+config M68328
+	bool "MC68328"
+	depends on !MMU
+	select M68000
+	help
+	  Motorola 68328 processor support.
+
+config M68EZ328
+	bool "MC68EZ328"
+	depends on !MMU
+	select M68000
+	help
+	  Motorola 68EX328 processor support.
+
+config M68VZ328
+	bool "MC68VZ328"
+	depends on !MMU
+	select M68000
+	help
+	  Motorola 68VZ328 processor support.
+
+config M68360
+	bool "MC68360"
+	depends on !MMU
+	select MCPU32
+	help
+	  Motorola 68360 processor support.
+
+config M5206
+	bool "MCF5206"
+	depends on !MMU
+	select COLDFIRE
+	select COLDFIRE_SW_A7
+	select HAVE_MBAR
+	help
+	  Motorola ColdFire 5206 processor support.
+
+config M5206e
+	bool "MCF5206e"
+	depends on !MMU
+	select COLDFIRE
+	select COLDFIRE_SW_A7
+	select HAVE_MBAR
+	help
+	  Motorola ColdFire 5206e processor support.
+
+config M520x
+	bool "MCF520x"
+	depends on !MMU
+	select COLDFIRE
+	select GENERIC_CLOCKEVENTS
+	select HAVE_CACHE_SPLIT
+	help
+	   Freescale Coldfire 5207/5208 processor support.
+
+config M523x
+	bool "MCF523x"
+	depends on !MMU
+	select COLDFIRE
+	select GENERIC_CLOCKEVENTS
+	select HAVE_CACHE_SPLIT
+	select HAVE_IPSBAR
+	help
+	  Freescale Coldfire 5230/1/2/4/5 processor support
+
+config M5249
+	bool "MCF5249"
+	depends on !MMU
+	select COLDFIRE
+	select COLDFIRE_SW_A7
+	select HAVE_MBAR
+	help
+	  Motorola ColdFire 5249 processor support.
+
+config M527x
+	bool
+
+config M5271
+	bool "MCF5271"
+	depends on !MMU
+	select COLDFIRE
+	select M527x
+	select HAVE_CACHE_SPLIT
+	select HAVE_IPSBAR
+	select GENERIC_CLOCKEVENTS
+	help
+	  Freescale (Motorola) ColdFire 5270/5271 processor support.
+
+config M5272
+	bool "MCF5272"
+	depends on !MMU
+	select COLDFIRE
+	select COLDFIRE_SW_A7
+	select HAVE_MBAR
+	help
+	  Motorola ColdFire 5272 processor support.
+
+config M5275
+	bool "MCF5275"
+	depends on !MMU
+	select COLDFIRE
+	select M527x
+	select HAVE_CACHE_SPLIT
+	select HAVE_IPSBAR
+	select GENERIC_CLOCKEVENTS
+	help
+	  Freescale (Motorola) ColdFire 5274/5275 processor support.
+
+config M528x
+	bool "MCF528x"
+	depends on !MMU
+	select COLDFIRE
+	select GENERIC_CLOCKEVENTS
+	select HAVE_CACHE_SPLIT
+	select HAVE_IPSBAR
+	help
+	  Motorola ColdFire 5280/5282 processor support.
+
+config M5307
+	bool "MCF5307"
+	depends on !MMU
+	select COLDFIRE
+	select COLDFIRE_SW_A7
+	select HAVE_CACHE_CB
+	select HAVE_MBAR
+	help
+	  Motorola ColdFire 5307 processor support.
+
+config M532x
+	bool "MCF532x"
+	depends on !MMU
+	select COLDFIRE
+	select HAVE_CACHE_CB
+	help
+	  Freescale (Motorola) ColdFire 532x processor support.
+
+config M5407
+	bool "MCF5407"
+	depends on !MMU
+	select COLDFIRE
+	select COLDFIRE_SW_A7
+	select HAVE_CACHE_CB
+	select HAVE_MBAR
+	help
+	  Motorola ColdFire 5407 processor support.
+
+config M54xx
+	bool
+
+config M547x
+	bool "MCF547x"
+	depends on !MMU
+	select COLDFIRE
+	select M54xx
+	select HAVE_CACHE_CB
+	select HAVE_MBAR
+	help
+	  Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support.
+
+config M548x
+	bool "MCF548x"
+	depends on !MMU
+	select COLDFIRE
+	select M54xx
+	select HAVE_CACHE_CB
+	select HAVE_MBAR
+	help
+	  Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support.
+
+
+comment "Processor Specific Options"
+
+config M68KFPU_EMU
+	bool "Math emulation support (EXPERIMENTAL)"
+	depends on MMU
+	depends on EXPERIMENTAL
+	help
+	  At some point in the future, this will cause floating-point math
+	  instructions to be emulated by the kernel on machines that lack a
+	  floating-point math coprocessor.  Thrill-seekers and chronically
+	  sleep-deprived psychotic hacker types can say Y now, everyone else
+	  should probably wait a while.
+
+config M68KFPU_EMU_EXTRAPREC
+	bool "Math emulation extra precision"
+	depends on M68KFPU_EMU
+	help
+	  The fpu uses normally a few bit more during calculations for
+	  correct rounding, the emulator can (often) do the same but this
+	  extra calculation can cost quite some time, so you can disable
+	  it here. The emulator will then "only" calculate with a 64 bit
+	  mantissa and round slightly incorrect, what is more than enough
+	  for normal usage.
+
+config M68KFPU_EMU_ONLY
+	bool "Math emulation only kernel"
+	depends on M68KFPU_EMU
+	help
+	  This option prevents any floating-point instructions from being
+	  compiled into the kernel, thereby the kernel doesn't save any
+	  floating point context anymore during task switches, so this
+	  kernel will only be usable on machines without a floating-point
+	  math coprocessor. This makes the kernel a bit faster as no tests
+	  needs to be executed whether a floating-point instruction in the
+	  kernel should be executed or not.
+
+config ADVANCED
+	bool "Advanced configuration options"
+	depends on MMU
+	---help---
+	  This gives you access to some advanced options for the CPU. The
+	  defaults should be fine for most users, but these options may make
+	  it possible for you to improve performance somewhat if you know what
+	  you are doing.
+
+	  Note that the answer to this question won't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about these options.
+
+	  Most users should say N to this question.
+
+config RMW_INSNS
+	bool "Use read-modify-write instructions"
+	depends on ADVANCED
+	---help---
+	  This allows to use certain instructions that work with indivisible
+	  read-modify-write bus cycles. While this is faster than the
+	  workaround of disabling interrupts, it can conflict with DMA
+	  ( = direct memory access) on many Amiga systems, and it is also said
+	  to destabilize other machines. It is very likely that this will
+	  cause serious problems on any Amiga or Atari Medusa if set. The only
+	  configuration where it should work are 68030-based Ataris, where it
+	  apparently improves performance. But you've been warned! Unless you
+	  really know what you are doing, say N. Try Y only if you're quite
+	  adventurous.
+
+config SINGLE_MEMORY_CHUNK
+	bool "Use one physical chunk of memory only" if ADVANCED && !SUN3
+	depends on MMU
+	default y if SUN3
+	select NEED_MULTIPLE_NODES
+	help
+	  Ignore all but the first contiguous chunk of physical memory for VM
+	  purposes.  This will save a few bytes kernel size and may speed up
+	  some operations.  Say N if not sure.
+
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool MMU && !SINGLE_MEMORY_CHUNK
+
+config 060_WRITETHROUGH
+	bool "Use write-through caching for 68060 supervisor accesses"
+	depends on ADVANCED && M68060
+	---help---
+	  The 68060 generally uses copyback caching of recently accessed data.
+	  Copyback caching means that memory writes will be held in an on-chip
+	  cache and only written back to memory some time later.  Saying Y
+	  here will force supervisor (kernel) accesses to use writethrough
+	  caching.  Writethrough caching means that data is written to memory
+	  straight away, so that cache and memory data always agree.
+	  Writethrough caching is less efficient, but is needed for some
+	  drivers on 68060 based systems where the 68060 bus snooping signal
+	  is hardwired on.  The 53c710 SCSI driver is known to suffer from
+	  this problem.
+
+config M68K_L2_CACHE
+	bool
+	depends on MAC
+	default y
+
+config NODES_SHIFT
+	int
+	default "3"
+	depends on !SINGLE_MEMORY_CHUNK
+
+config FPU
+	bool
+
+config COLDFIRE_SW_A7
+	bool
+
+config HAVE_CACHE_SPLIT
+	bool
+
+config HAVE_CACHE_CB
+	bool
+
+config HAVE_MBAR
+	bool
+
+config HAVE_IPSBAR
+	bool
+
+config CLOCK_SET
+	bool "Enable setting the CPU clock frequency"
+	depends on COLDFIRE
+	default n
+	help
+	  On some CPU's you do not need to know what the core CPU clock
+	  frequency is. On these you can disable clock setting. On some
+	  traditional 68K parts, and on all ColdFire parts you need to set
+	  the appropriate CPU clock frequency. On these devices many of the
+	  onboard peripherals derive their timing from the master CPU clock
+	  frequency.
+
+config CLOCK_FREQ
+	int "Set the core clock frequency"
+	default "66666666"
+	depends on CLOCK_SET
+	help
+	  Define the CPU clock frequency in use. This is the core clock
+	  frequency, it may or may not be the same as the external clock
+	  crystal fitted to your board. Some processors have an internal
+	  PLL and can have their frequency programmed at run time, others
+	  use internal dividers. In general the kernel won't setup a PLL
+	  if it is fitted (there are some exceptions). This value will be
+	  specific to the exact CPU that you are using.
+
+config OLDMASK
+	bool "Old mask 5307 (1H55J) silicon"
+	depends on M5307
+	help
+	  Build support for the older revision ColdFire 5307 silicon.
+	  Specifically this is the 1H55J mask revision.
+
+if HAVE_CACHE_SPLIT
+choice
+	prompt "Split Cache Configuration"
+	default CACHE_I
+
+config CACHE_I
+	bool "Instruction"
+	help
+	  Use all of the ColdFire CPU cache memory as an instruction cache.
+
+config CACHE_D
+	bool "Data"
+	help
+	  Use all of the ColdFire CPU cache memory as a data cache.
+
+config CACHE_BOTH
+	bool "Both"
+	help
+	  Split the ColdFire CPU cache, and use half as an instruction cache
+	  and half as a data cache.
+endchoice
+endif
+
+if HAVE_CACHE_CB
+choice
+	prompt "Data cache mode"
+	default CACHE_WRITETHRU
+
+config CACHE_WRITETHRU
+	bool "Write-through"
+	help
+	  The ColdFire CPU cache is set into Write-through mode.
+
+config CACHE_COPYBACK
+	bool "Copy-back"
+	help
+	  The ColdFire CPU cache is set into Copy-back mode.
+endchoice
+endif
+
diff --git a/arch/m68k/Kconfig.devices b/arch/m68k/Kconfig.devices
new file mode 100644
index 000000000000..d214034be6a6
--- /dev/null
+++ b/arch/m68k/Kconfig.devices
@@ -0,0 +1,123 @@
+if MMU
+
+config ARCH_MAY_HAVE_PC_FDC
+	bool
+	depends on BROKEN && (Q40 || SUN3X)
+	default y
+
+menu "Platform devices"
+
+config HEARTBEAT
+	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
+	default y if !AMIGA && !APOLLO && !ATARI && !MAC && !Q40 && HP300
+	help
+	  Use the power-on LED on your machine as a load meter.  The exact
+	  behavior is platform-dependent, but normally the flash frequency is
+	  a hyperbolic function of the 5-minute load average.
+
+# We have a dedicated heartbeat LED. :-)
+config PROC_HARDWARE
+	bool "/proc/hardware support"
+	help
+	  Say Y here to support the /proc/hardware file, which gives you
+	  access to information about the machine you're running on,
+	  including the model, CPU, MMU, clock speed, BogoMIPS rating,
+	  and memory size.
+
+endmenu
+
+menu "Character devices"
+
+config ATARI_MFPSER
+	tristate "Atari MFP serial support"
+	depends on ATARI
+	---help---
+	  If you like to use the MFP serial ports ("Modem1", "Serial1") under
+	  Linux, say Y. The driver equally supports all kinds of MFP serial
+	  ports and automatically detects whether Serial1 is available.
+
+	  To compile this driver as a module, choose M here.
+
+	  Note for Falcon users: You also have an MFP port, it's just not
+	  wired to the outside... But you could use the port under Linux.
+
+config ATARI_MIDI
+	tristate "Atari MIDI serial support"
+	depends on ATARI
+	help
+	  If you want to use your Atari's MIDI port in Linux, say Y.
+
+	  To compile this driver as a module, choose M here.
+
+config ATARI_DSP56K
+	tristate "Atari DSP56k support (EXPERIMENTAL)"
+	depends on ATARI && EXPERIMENTAL
+	help
+	  If you want to be able to use the DSP56001 in Falcons, say Y. This
+	  driver is still experimental, and if you don't know what it is, or
+	  if you don't have this processor, just say N.
+
+	  To compile this driver as a module, choose M here.
+
+config AMIGA_BUILTIN_SERIAL
+	tristate "Amiga builtin serial support"
+	depends on AMIGA
+	help
+	  If you want to use your Amiga's built-in serial port in Linux,
+	  answer Y.
+
+	  To compile this driver as a module, choose M here.
+
+config MULTIFACE_III_TTY
+	tristate "Multiface Card III serial support"
+	depends on AMIGA
+	help
+	  If you want to use a Multiface III card's serial port in Linux,
+	  answer Y.
+
+	  To compile this driver as a module, choose M here.
+
+config HPDCA
+	tristate "HP DCA serial support"
+	depends on DIO && SERIAL_8250
+	help
+	  If you want to use the internal "DCA" serial ports on an HP300
+	  machine, say Y here.
+
+config HPAPCI
+	tristate "HP APCI serial support"
+	depends on HP300 && SERIAL_8250 && EXPERIMENTAL
+	help
+	  If you want to use the internal "APCI" serial ports on an HP400
+	  machine, say Y here.
+
+config DN_SERIAL
+	bool "Support for DN serial port (dummy)"
+	depends on APOLLO
+
+config SERIAL_CONSOLE
+	bool "Support for serial port console"
+	depends on (AMIGA || ATARI || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_MIDI=y || AMIGA_BUILTIN_SERIAL=y || MULTIFACE_III_TTY=y || SERIAL=y || SERIAL167 || DN_SERIAL)
+	---help---
+	  If you say Y here, it will be possible to use a serial port as the
+	  system console (the system console is the device which receives all
+	  kernel messages and warnings and which allows logins in single user
+	  mode). This could be useful if some terminal or printer is connected
+	  to that serial port.
+
+	  Even if you say Y here, the currently visible virtual console
+	  (/dev/tty0) will still be used as the system console by default, but
+	  you can alter that using a kernel command line option such as
+	  "console=ttyS1". (Try "man bootparam" or see the documentation of
+	  your boot loader (lilo or loadlin) about how to pass options to the
+	  kernel at boot time.)
+
+	  If you don't have a VGA card installed and you say Y here, the
+	  kernel will automatically use the first serial line, /dev/ttyS0, as
+	  system console.
+
+	  If unsure, say N.
+
+endmenu
+
+endif
diff --git a/arch/m68k/Kconfig.nommu b/arch/m68k/Kconfig.machine
index ff46383112a4..ef4a26aff780 100644
--- a/arch/m68k/Kconfig.nommu
+++ b/arch/m68k/Kconfig.machine
@@ -1,297 +1,142 @@
-config FPU
-	bool
-	default n
-
-config GENERIC_GPIO
-	bool
-	default n
-
-config GENERIC_CMOS_UPDATE
-	bool
-	default y
-
-config GENERIC_CLOCKEVENTS
-	bool
-	default n
-
-config M68000
-	bool
-	select CPU_HAS_NO_BITFIELDS
-	help
-	  The Freescale (was Motorola) 68000 CPU is the first generation of
-	  the well known M68K family of processors. The CPU core as well as
-	  being available as a stand alone CPU was also used in many
-	  System-On-Chip devices (eg 68328, 68302, etc). It does not contain
-	  a paging MMU.
-
-config MCPU32
-	bool
-	select CPU_HAS_NO_BITFIELDS
-	help
-	  The Freescale (was then Motorola) CPU32 is a CPU core that is
-	  based on the 68020 processor. For the most part it is used in
-	  System-On-Chip parts, and does not contain a paging MMU.
-
-config COLDFIRE
-	bool
-	select GENERIC_GPIO
-	select ARCH_REQUIRE_GPIOLIB
-	select CPU_HAS_NO_BITFIELDS
-	help
-	  The Freescale ColdFire family of processors is a modern derivitive
-	  of the 68000 processor family. They are mainly targeted at embedded
-	  applications, and are all System-On-Chip (SOC) devices, as opposed
-	  to stand alone CPUs. They implement a subset of the original 68000
-	  processor instruction set.
-
-config COLDFIRE_SW_A7
-	bool
-	default n
-
-config HAVE_CACHE_SPLIT
-	bool
-
-config HAVE_CACHE_CB
-	bool
-
-config HAVE_MBAR
-	bool
-
-config HAVE_IPSBAR
-	bool
-
-choice
-	prompt "CPU"
-	default M68EZ328
-
-config M68328
-	bool "MC68328"
-	select M68000
-	help
-	  Motorola 68328 processor support.
-
-config M68EZ328
-	bool "MC68EZ328"
-	select M68000
-	help
-	  Motorola 68EX328 processor support.
-
-config M68VZ328
-	bool "MC68VZ328"
-	select M68000
-	help
-	  Motorola 68VZ328 processor support.
-
-config M68360
-	bool "MC68360"
-	select MCPU32
-	help
-	  Motorola 68360 processor support.
-
-config M5206
-	bool "MCF5206"
-	select COLDFIRE
-	select COLDFIRE_SW_A7
-	select HAVE_MBAR
-	help
-	  Motorola ColdFire 5206 processor support.
+comment "Machine Types"
+
+config AMIGA
+	bool "Amiga support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the Amiga series of computers. If
+	  you plan to use this kernel on an Amiga, say Y here and browse the
+	  material available in <file:Documentation/m68k>; otherwise say N.
+
+config ATARI
+	bool "Atari support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the 68000-based Atari series of
+	  computers (including the TT, Falcon and Medusa). If you plan to use
+	  this kernel on an Atari, say Y here and browse the material
+	  available in <file:Documentation/m68k>; otherwise say N.
+
+config MAC
+	bool "Macintosh support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the Apple Macintosh series of
+	  computers (yes, there is experimental support now, at least for part
+	  of the series).
+
+	  Say N unless you're willing to code the remaining necessary support.
+	  ;)
+
+config APOLLO
+	bool "Apollo support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  Say Y here if you want to run Linux on an MC680x0-based Apollo
+	  Domain workstation such as the DN3500.
+
+config VME
+	bool "VME (Motorola and BVM) support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  Say Y here if you want to build a kernel for a 680x0 based VME
+	  board.  Boards currently supported include Motorola boards MVME147,
+	  MVME162, MVME166, MVME167, MVME172, and MVME177.  BVME4000 and
+	  BVME6000 boards from BVM Ltd are also supported.
+
+config MVME147
+	bool "MVME147 support"
+	depends on MMU
+	depends on VME
+	help
+	  Say Y to include support for early Motorola VME boards.  This will
+	  build a kernel which can run on MVME147 single-board computers.  If
+	  you select this option you will have to select the appropriate
+	  drivers for SCSI, Ethernet and serial ports later on.
+
+config MVME16x
+	bool "MVME162, 166 and 167 support"
+	depends on MMU
+	depends on VME
+	help
+	  Say Y to include support for Motorola VME boards.  This will build a
+	  kernel which can run on MVME162, MVME166, MVME167, MVME172, and
+	  MVME177 boards.  If you select this option you will have to select
+	  the appropriate drivers for SCSI, Ethernet and serial ports later
+	  on.
+
+config BVME6000
+	bool "BVME4000 and BVME6000 support"
+	depends on MMU
+	depends on VME
+	help
+	  Say Y to include support for VME boards from BVM Ltd.  This will
+	  build a kernel which can run on BVME4000 and BVME6000 boards.  If
+	  you select this option you will have to select the appropriate
+	  drivers for SCSI, Ethernet and serial ports later on.
+
+config HP300
+	bool "HP9000/300 and HP9000/400 support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  This option enables support for the HP9000/300 and HP9000/400 series
+	  of workstations. Support for these machines is still somewhat
+	  experimental. If you plan to try to use the kernel on such a machine
+	  say Y here.
+	  Everybody else says N.
+
+config SUN3X
+	bool "Sun3x support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	select M68030
+	help
+	  This option enables support for the Sun 3x series of workstations.
+	  Be warned that this support is very experimental.
+	  Note that Sun 3x kernels are not compatible with Sun 3 hardware.
+	  General Linux information on the Sun 3x series (now discontinued)
+	  is at <http://www.angelfire.com/ca2/tech68k/sun3.html>.
+
+	  If you don't want to compile a kernel for a Sun 3x, say N.
+
+config Q40
+	bool "Q40/Q60 support"
+	depends on MMU
+	select MMU_MOTOROLA if MMU
+	help
+	  The Q40 is a Motorola 68040-based successor to the Sinclair QL
+	  manufactured in Germany.  There is an official Q40 home page at
+	  <http://www.q40.de/>.  This option enables support for the Q40 and
+	  Q60. Select your CPU below.  For 68LC060 don't forget to enable FPU
+	  emulation.
+
+config SUN3
+	bool "Sun3 support"
+	depends on MMU
+	depends on !MMU_MOTOROLA
+	select MMU_SUN3 if MMU
+	select M68020
+	help
+	  This option enables support for the Sun 3 series of workstations
+	  (3/50, 3/60, 3/1xx, 3/2xx systems). Enabling this option requires
+	  that all other hardware types must be disabled, as Sun 3 kernels
+	  are incompatible with all other m68k targets (including Sun 3x!).
+
+	  If you don't want to compile a kernel exclusively for a Sun 3, say N.
 
-config M5206e
-	bool "MCF5206e"
-	select COLDFIRE
-	select COLDFIRE_SW_A7
-	select HAVE_MBAR
-	help
-	  Motorola ColdFire 5206e processor support.
-
-config M520x
-	bool "MCF520x"
-	select COLDFIRE
-	select GENERIC_CLOCKEVENTS
-	select HAVE_CACHE_SPLIT
-	help
-	   Freescale Coldfire 5207/5208 processor support.
-
-config M523x
-	bool "MCF523x"
-	select COLDFIRE
-	select GENERIC_CLOCKEVENTS
-	select HAVE_CACHE_SPLIT
-	select HAVE_IPSBAR
-	help
-	  Freescale Coldfire 5230/1/2/4/5 processor support
-
-config M5249
-	bool "MCF5249"
-	select COLDFIRE
-	select COLDFIRE_SW_A7
-	select HAVE_MBAR
-	help
-	  Motorola ColdFire 5249 processor support.
-
-config M5271
-	bool "MCF5271"
-	select COLDFIRE
-	select HAVE_CACHE_SPLIT
-	select HAVE_IPSBAR
-	help
-	  Freescale (Motorola) ColdFire 5270/5271 processor support.
-
-config M5272
-	bool "MCF5272"
-	select COLDFIRE
-	select COLDFIRE_SW_A7
-	select HAVE_MBAR
-	help
-	  Motorola ColdFire 5272 processor support.
-
-config M5275
-	bool "MCF5275"
-	select COLDFIRE
-	select HAVE_CACHE_SPLIT
-	select HAVE_IPSBAR
-	help
-	  Freescale (Motorola) ColdFire 5274/5275 processor support.
-
-config M528x
-	bool "MCF528x"
-	select COLDFIRE
-	select GENERIC_CLOCKEVENTS
-	select HAVE_CACHE_SPLIT
-	select HAVE_IPSBAR
-	help
-	  Motorola ColdFire 5280/5282 processor support.
-
-config M5307
-	bool "MCF5307"
-	select COLDFIRE
-	select COLDFIRE_SW_A7
-	select HAVE_CACHE_CB
-	select HAVE_MBAR
-	help
-	  Motorola ColdFire 5307 processor support.
-
-config M532x
-	bool "MCF532x"
-	select COLDFIRE
-	select HAVE_CACHE_CB
-	help
-	  Freescale (Motorola) ColdFire 532x processor support.
-
-config M5407
-	bool "MCF5407"
-	select COLDFIRE
-	select COLDFIRE_SW_A7
-	select HAVE_CACHE_CB
-	select HAVE_MBAR
-	help
-	  Motorola ColdFire 5407 processor support.
-
-config M547x
-	bool "MCF547x"
-	select COLDFIRE
-	select HAVE_CACHE_CB
-	select HAVE_MBAR
-	help
-	  Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support.
-
-config M548x
-	bool "MCF548x"
-	select COLDFIRE
-	select HAVE_CACHE_CB
-	select HAVE_MBAR
-	help
-	  Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support.
-
-endchoice
-
-config M527x
-	bool
-	depends on (M5271 || M5275)
-	select GENERIC_CLOCKEVENTS
-	default y
-
-config M54xx
+config PILOT
 	bool
-	depends on (M548x || M547x)
-	default y
-
-config CLOCK_SET
-	bool "Enable setting the CPU clock frequency"
-	default n
-	help
-	  On some CPU's you do not need to know what the core CPU clock
-	  frequency is. On these you can disable clock setting. On some
-	  traditional 68K parts, and on all ColdFire parts you need to set
-	  the appropriate CPU clock frequency. On these devices many of the
-	  onboard peripherals derive their timing from the master CPU clock
-	  frequency.
-
-config CLOCK_FREQ
-	int "Set the core clock frequency"
-	default "66666666"
-	depends on CLOCK_SET
-	help
-	  Define the CPU clock frequency in use. This is the core clock
-	  frequency, it may or may not be the same as the external clock
-	  crystal fitted to your board. Some processors have an internal
-	  PLL and can have their frequency programmed at run time, others
-	  use internal dividers. In general the kernel won't setup a PLL
-	  if it is fitted (there are some exceptions). This value will be
-	  specific to the exact CPU that you are using.
-
-config OLDMASK
-	bool "Old mask 5307 (1H55J) silicon"
-	depends on M5307
-	help
-	  Build support for the older revision ColdFire 5307 silicon.
-	  Specifically this is the 1H55J mask revision.
-
-if HAVE_CACHE_SPLIT
-choice
-	prompt "Split Cache Configuration"
-	default CACHE_I
-
-config CACHE_I
-	bool "Instruction"
-	help
-	  Use all of the ColdFire CPU cache memory as an instruction cache.
-
-config CACHE_D
-	bool "Data"
-	help
-	  Use all of the ColdFire CPU cache memory as a data cache.
-
-config CACHE_BOTH
-	bool "Both"
-	help
-	  Split the ColdFire CPU cache, and use half as an instruction cache
-	  and half as a data cache.
-endchoice
-endif
-
-if HAVE_CACHE_CB
-choice
-	prompt "Data cache mode"
-	default CACHE_WRITETHRU
-
-config CACHE_WRITETHRU
-	bool "Write-through"
-	help
-	  The ColdFire CPU cache is set into Write-through mode.
-
-config CACHE_COPYBACK
-	bool "Copy-back"
-	help
-	  The ColdFire CPU cache is set into Copy-back mode.
-endchoice
-endif
-
-comment "Platform"
 
 config PILOT3
 	bool "Pilot 1000/5000, PalmPilot Personal/Pro, or PalmIII support"
 	depends on M68328
+	select PILOT
 	help
 	  Support for the Palm Pilot 1000/5000, Personal/Pro and PalmIII.
 
@@ -302,7 +147,7 @@ config XCOPILOT_BUGS
 	  Support the bugs of Xcopilot.
 
 config UC5272
-	bool 'Arcturus Networks uC5272 dimm board support'
+	bool "Arcturus Networks uC5272 dimm board support"
 	depends on M5272
 	help
 	  Support for the Arcturus Networks uC5272 dimm board.
@@ -356,15 +201,23 @@ config UCQUICC
 	help
 	  Support for the Lineo uCquicc board.
 
+config ARNEWSH
+	bool
+
 config ARN5206
 	bool "Arnewsh 5206 board support"
 	depends on M5206
+	select ARNEWSH
 	help
 	  Support for the Arnewsh 5206 board.
 
+config FREESCALE
+	bool
+
 config M5206eC3
 	bool "Motorola M5206eC3 board support"
 	depends on M5206e
+	select FREESCALE
 	help
 	  Support for the Motorola M5206eC3 board.
 
@@ -377,75 +230,92 @@ config ELITE
 config M5208EVB
 	bool "Freescale M5208EVB board support"
 	depends on M520x
+	select FREESCALE
 	help
 	  Support for the Freescale Coldfire M5208EVB.
 
 config M5235EVB
 	bool "Freescale M5235EVB support"
 	depends on M523x
+	select FREESCALE
 	help
 	  Support for the Freescale M5235EVB board.
 
 config M5249C3
 	bool "Motorola M5249C3 board support"
 	depends on M5249
+	select FREESCALE
 	help
 	  Support for the Motorola M5249C3 board.
 
 config M5271EVB
 	bool "Freescale (Motorola) M5271EVB board support"
 	depends on M5271
+	select FREESCALE
 	help
 	  Support for the Freescale (Motorola) M5271EVB board.
 
 config M5275EVB
 	bool "Freescale (Motorola) M5275EVB board support"
 	depends on M5275
+	select FREESCALE
 	help
 	  Support for the Freescale (Motorola) M5275EVB board.
 
 config M5272C3
 	bool "Motorola M5272C3 board support"
 	depends on M5272
+	select FREESCALE
 	help
 	  Support for the Motorola M5272C3 board.
 
+config senTec
+	bool
+
 config COBRA5272
 	bool "senTec COBRA5272 board support"
 	depends on M5272
+	select senTec
 	help
 	  Support for the senTec COBRA5272 board.
 
+config AVNET
+	bool
+
 config AVNET5282
 	bool "Avnet 5282 board support"
 	depends on M528x
+	select AVNET
 	help
-	  Support for the Avnet 5282 board.  
-	  
+	  Support for the Avnet 5282 board.
+
 config M5282EVB
 	bool "Motorola M5282EVB board support"
 	depends on M528x
+	select FREESCALE
 	help
 	  Support for the Motorola M5282EVB board.
 
 config COBRA5282
 	bool "senTec COBRA5282 board support"
 	depends on M528x
+	select senTec
 	help
 	  Support for the senTec COBRA5282 board.
-	  
+
 config SOM5282EM
 	bool "EMAC.Inc SOM5282EM board support"
 	depends on M528x
+	select EMAC_INC
 	help
-	  Support for the EMAC.Inc SOM5282EM module.  
-	  
+	  Support for the EMAC.Inc SOM5282EM module.
+
 config WILDFIRE
 	bool "Intec Automation Inc. WildFire board support"
 	depends on M528x
 	help
 	  Support for the Intec Automation Inc. WildFire.
-	  
+
 config WILDFIREMOD
 	bool "Intec Automation Inc. WildFire module support"
 	depends on M528x
@@ -455,12 +325,14 @@ config WILDFIREMOD
 config ARN5307
 	bool "Arnewsh 5307 board support"
 	depends on M5307
+	select ARNEWSH
 	help
 	  Support for the Arnewsh 5307 board.
 
 config M5307C3
 	bool "Motorola M5307C3 board support"
 	depends on M5307
+	select FREESCALE
 	help
 	  Support for the Motorola M5307C3 board.
 
@@ -473,6 +345,7 @@ config SECUREEDGEMP3
 config M5329EVB
 	bool "Freescale (Motorola) M5329EVB board support"
 	depends on M532x
+	select FREESCALE
 	help
 	  Support for the Freescale (Motorola) M5329EVB board.
 
@@ -485,6 +358,7 @@ config COBRA5329
 config M5407C3
 	bool "Motorola M5407C3 board support"
 	depends on M5407
+	select FREESCALE
 	help
 	  Support for the Motorola M5407C3 board.
 
@@ -524,9 +398,13 @@ config SNAPGEAR
 	help
 	  Special additional support for SnapGear router boards.
 
+config SNEHA
+	bool
+
 config CPU16B
 	bool "Sneha Technologies S.L. Sarasvati board support"
 	depends on M5272
+	select SNEHA
 	help
 	  Support for the SNEHA CPU16B board.
 
@@ -536,63 +414,20 @@ config MOD5272
 	help
 	  Support for the Netburner MOD-5272 board.
 
+config SAVANT
+	bool
+
 config SAVANTrosie1
 	bool "Savant Rosie1 board support"
 	depends on M523x
+	select SAVANT
 	help
 	  Support for the Savant Rosie1 board.
 
-config ROMFS_FROM_ROM
-	bool "ROMFS image not RAM resident"
-	depends on (NETtel || SNAPGEAR)
-	help
-	  The ROMfs filesystem will stay resident in the FLASH/ROM, not be
-	  moved into RAM.
-
-config PILOT
-	bool
-	default y
-	depends on (PILOT3 || PILOT5)
-
-config ARNEWSH
-	bool
-	default y
-	depends on (ARN5206 || ARN5307)
-
-config FREESCALE
-	bool
-	default y
-	depends on (M5206eC3 || M5208EVB || M5235EVB || M5249C3 || M5271EVB || M5272C3 || M5275EVB || M5282EVB || M5307C3 || M5329EVB || M5407C3)
-
-config HW_FEITH
-	bool
-	default y
-	depends on (CLEOPATRA || CANCam || SCALES)
-
-config senTec
-	bool
-	default y
-	depends on (COBRA5272 || COBRA5282)
-	
-config EMAC_INC
-	bool
-	default y
-	depends on (SOM5282EM)
 
-config SNEHA
-	bool
-	default y
-	depends on CPU16B
+if !MMU || COLDFIRE
 
-config SAVANT
-	bool
-	default y
-	depends on SAVANTrosie1
-
-config AVNET
-	bool
-	default y
-	depends on (AVNET5282)
+comment "Machine Options"
 
 config UBOOT
 	bool "Support for U-Boot command line parameters"
@@ -673,33 +508,6 @@ config KERNELBASE
 	  a system with the RAM based at address 0, and leaving enough room
 	  for the theoretical maximum number of 256 vectors.
 
-choice
-	prompt "RAM bus width"
-	default RAMAUTOBIT
-
-config RAMAUTOBIT
-	bool "AUTO"
-	help
-	  Select the physical RAM data bus size. Not needed on most platforms,
-	  so you can generally choose AUTO.
-
-config RAM8BIT
-	bool "8bit"
-	help
-	  Configure RAM bus to be 8 bits wide.
-
-config RAM16BIT
-	bool "16bit"
-	help
-	  Configure RAM bus to be 16 bits wide.
-
-config RAM32BIT
-	bool "32bit"
-	help
-	  Configure RAM bus to be 32 bits wide.
-
-endchoice
-
 comment "ROM configuration"
 
 config ROM
@@ -772,16 +580,4 @@ config ROMKERNEL
 
 endchoice
 
-if COLDFIRE
-source "kernel/Kconfig.preempt"
 endif
-
-source "kernel/time/Kconfig"
-
-config ISA_DMA_API
-	bool
-	depends on !M5272
-	default y
-
-source "drivers/pcmcia/Kconfig"
-
diff --git a/arch/m68k/Kconfig.mmu b/arch/m68k/Kconfig.mmu
deleted file mode 100644
index 13e20bbc4079..000000000000
--- a/arch/m68k/Kconfig.mmu
+++ /dev/null
@@ -1,411 +0,0 @@
-config GENERIC_IOMAP
-	bool
-	default y
-
-config ARCH_MAY_HAVE_PC_FDC
-	bool
-	depends on BROKEN && (Q40 || SUN3X)
-	default y
-
-config ARCH_USES_GETTIMEOFFSET
-	def_bool y
-
-config EISA
-	bool
-	---help---
-	  The Extended Industry Standard Architecture (EISA) bus was
-	  developed as an open alternative to the IBM MicroChannel bus.
-
-	  The EISA bus provided some of the features of the IBM MicroChannel
-	  bus while maintaining backward compatibility with cards made for
-	  the older ISA bus.  The EISA bus saw limited use between 1988 and
-	  1995 when it was made obsolete by the PCI bus.
-
-	  Say Y here if you are building a kernel for an EISA-based machine.
-
-	  Otherwise, say N.
-
-config MCA
-	bool
-	help
-	  MicroChannel Architecture is found in some IBM PS/2 machines and
-	  laptops.  It is a bus system similar to PCI or ISA. See
-	  <file:Documentation/mca.txt> (and especially the web page given
-	  there) before attempting to build an MCA bus kernel.
-
-config PCMCIA
-	tristate
-	---help---
-	  Say Y here if you want to attach PCMCIA- or PC-cards to your Linux
-	  computer.  These are credit-card size devices such as network cards,
-	  modems or hard drives often used with laptops computers.  There are
-	  actually two varieties of these cards: the older 16 bit PCMCIA cards
-	  and the newer 32 bit CardBus cards.  If you want to use CardBus
-	  cards, you need to say Y here and also to "CardBus support" below.
-
-	  To use your PC-cards, you will need supporting software from David
-	  Hinds' pcmcia-cs package (see the file <file:Documentation/Changes>
-	  for location).  Please also read the PCMCIA-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.
-
-	  To compile this driver as modules, choose M here: the
-	  modules will be called pcmcia_core and ds.
-
-config AMIGA
-	bool "Amiga support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the Amiga series of computers. If
-	  you plan to use this kernel on an Amiga, say Y here and browse the
-	  material available in <file:Documentation/m68k>; otherwise say N.
-
-config ATARI
-	bool "Atari support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the 68000-based Atari series of
-	  computers (including the TT, Falcon and Medusa). If you plan to use
-	  this kernel on an Atari, say Y here and browse the material
-	  available in <file:Documentation/m68k>; otherwise say N.
-
-config MAC
-	bool "Macintosh support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the Apple Macintosh series of
-	  computers (yes, there is experimental support now, at least for part
-	  of the series).
-
-	  Say N unless you're willing to code the remaining necessary support.
-	  ;)
-
-config NUBUS
-	bool
-	depends on MAC
-	default y
-
-config M68K_L2_CACHE
-	bool
-	depends on MAC
-	default y
-
-config APOLLO
-	bool "Apollo support"
-	select MMU_MOTOROLA if MMU
-	help
-	  Say Y here if you want to run Linux on an MC680x0-based Apollo
-	  Domain workstation such as the DN3500.
-
-config VME
-	bool "VME (Motorola and BVM) support"
-	select MMU_MOTOROLA if MMU
-	help
-	  Say Y here if you want to build a kernel for a 680x0 based VME
-	  board.  Boards currently supported include Motorola boards MVME147,
-	  MVME162, MVME166, MVME167, MVME172, and MVME177.  BVME4000 and
-	  BVME6000 boards from BVM Ltd are also supported.
-
-config MVME147
-	bool "MVME147 support"
-	depends on VME
-	help
-	  Say Y to include support for early Motorola VME boards.  This will
-	  build a kernel which can run on MVME147 single-board computers.  If
-	  you select this option you will have to select the appropriate
-	  drivers for SCSI, Ethernet and serial ports later on.
-
-config MVME16x
-	bool "MVME162, 166 and 167 support"
-	depends on VME
-	help
-	  Say Y to include support for Motorola VME boards.  This will build a
-	  kernel which can run on MVME162, MVME166, MVME167, MVME172, and
-	  MVME177 boards.  If you select this option you will have to select
-	  the appropriate drivers for SCSI, Ethernet and serial ports later
-	  on.
-
-config BVME6000
-	bool "BVME4000 and BVME6000 support"
-	depends on VME
-	help
-	  Say Y to include support for VME boards from BVM Ltd.  This will
-	  build a kernel which can run on BVME4000 and BVME6000 boards.  If
-	  you select this option you will have to select the appropriate
-	  drivers for SCSI, Ethernet and serial ports later on.
-
-config HP300
-	bool "HP9000/300 and HP9000/400 support"
-	select MMU_MOTOROLA if MMU
-	help
-	  This option enables support for the HP9000/300 and HP9000/400 series
-	  of workstations. Support for these machines is still somewhat
-	  experimental. If you plan to try to use the kernel on such a machine
-	  say Y here.
-	  Everybody else says N.
-
-config DIO
-	bool "DIO bus support"
-	depends on HP300
-	default y
-	help
-	  Say Y here to enable support for the "DIO" expansion bus used in
-	  HP300 machines. If you are using such a system you almost certainly
-	  want this.
-
-config SUN3X
-	bool "Sun3x support"
-	select MMU_MOTOROLA if MMU
-	select M68030
-	help
-	  This option enables support for the Sun 3x series of workstations.
-	  Be warned that this support is very experimental.
-	  Note that Sun 3x kernels are not compatible with Sun 3 hardware.
-	  General Linux information on the Sun 3x series (now discontinued)
-	  is at <http://www.angelfire.com/ca2/tech68k/sun3.html>.
-
-	  If you don't want to compile a kernel for a Sun 3x, say N.
-
-config Q40
-	bool "Q40/Q60 support"
-	select MMU_MOTOROLA if MMU
-	help
-	  The Q40 is a Motorola 68040-based successor to the Sinclair QL
-	  manufactured in Germany.  There is an official Q40 home page at
-	  <http://www.q40.de/>.  This option enables support for the Q40 and
-	  Q60. Select your CPU below.  For 68LC060 don't forget to enable FPU
-	  emulation.
-
-config SUN3
-	bool "Sun3 support"
-	depends on !MMU_MOTOROLA
-	select MMU_SUN3 if MMU
-	select M68020
-	help
-	  This option enables support for the Sun 3 series of workstations
-	  (3/50, 3/60, 3/1xx, 3/2xx systems). Enabling this option requires
-	  that all other hardware types must be disabled, as Sun 3 kernels
-	  are incompatible with all other m68k targets (including Sun 3x!).
-
-	  If you don't want to compile a kernel exclusively for a Sun 3, say N.
-
-config NATFEAT
-	bool "ARAnyM emulator support"
-	depends on ATARI
-	help
-	  This option enables support for ARAnyM native features, such as
-	  access to a disk image as /dev/hda.
-
-config NFBLOCK
-	tristate "NatFeat block device support"
-	depends on BLOCK && NATFEAT
-	help
-	  Say Y to include support for the ARAnyM NatFeat block device
-	  which allows direct access to the hard drives without using
-	  the hardware emulation.
-
-config NFCON
-	tristate "NatFeat console driver"
-	depends on NATFEAT
-	help
-	  Say Y to include support for the ARAnyM NatFeat console driver
-	  which allows the console output to be redirected to the stderr
-	  output of ARAnyM.
-
-config NFETH
-	tristate "NatFeat Ethernet support"
-	depends on NET_ETHERNET && NATFEAT
-	help
-	  Say Y to include support for the ARAnyM NatFeat network device
-	  which will emulate a regular ethernet device while presenting an
-	  ethertap device to the host system.
-
-comment "Processor type"
-
-config M68020
-	bool "68020 support"
-	help
-	  If you anticipate running this kernel on a computer with a MC68020
-	  processor, say Y. Otherwise, say N. Note that the 68020 requires a
-	  68851 MMU (Memory Management Unit) to run Linux/m68k, except on the
-	  Sun 3, which provides its own version.
-
-config M68030
-	bool "68030 support"
-	depends on !MMU_SUN3
-	help
-	  If you anticipate running this kernel on a computer with a MC68030
-	  processor, say Y. Otherwise, say N. Note that a MC68EC030 will not
-	  work, as it does not include an MMU (Memory Management Unit).
-
-config M68040
-	bool "68040 support"
-	depends on !MMU_SUN3
-	help
-	  If you anticipate running this kernel on a computer with a MC68LC040
-	  or MC68040 processor, say Y. Otherwise, say N. Note that an
-	  MC68EC040 will not work, as it does not include an MMU (Memory
-	  Management Unit).
-
-config M68060
-	bool "68060 support"
-	depends on !MMU_SUN3
-	help
-	  If you anticipate running this kernel on a computer with a MC68060
-	  processor, say Y. Otherwise, say N.
-
-config MMU_MOTOROLA
-	bool
-
-config MMU_SUN3
-	bool
-	depends on MMU && !MMU_MOTOROLA
-
-config M68KFPU_EMU
-	bool "Math emulation support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
-	help
-	  At some point in the future, this will cause floating-point math
-	  instructions to be emulated by the kernel on machines that lack a
-	  floating-point math coprocessor.  Thrill-seekers and chronically
-	  sleep-deprived psychotic hacker types can say Y now, everyone else
-	  should probably wait a while.
-
-config M68KFPU_EMU_EXTRAPREC
-	bool "Math emulation extra precision"
-	depends on M68KFPU_EMU
-	help
-	  The fpu uses normally a few bit more during calculations for
-	  correct rounding, the emulator can (often) do the same but this
-	  extra calculation can cost quite some time, so you can disable
-	  it here. The emulator will then "only" calculate with a 64 bit
-	  mantissa and round slightly incorrect, what is more than enough
-	  for normal usage.
-
-config M68KFPU_EMU_ONLY
-	bool "Math emulation only kernel"
-	depends on M68KFPU_EMU
-	help
-	  This option prevents any floating-point instructions from being
-	  compiled into the kernel, thereby the kernel doesn't save any
-	  floating point context anymore during task switches, so this
-	  kernel will only be usable on machines without a floating-point
-	  math coprocessor. This makes the kernel a bit faster as no tests
-	  needs to be executed whether a floating-point instruction in the
-	  kernel should be executed or not.
-
-config ADVANCED
-	bool "Advanced configuration options"
-	---help---
-	  This gives you access to some advanced options for the CPU. The
-	  defaults should be fine for most users, but these options may make
-	  it possible for you to improve performance somewhat if you know what
-	  you are doing.
-
-	  Note that the answer to this question won't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  the questions about these options.
-
-	  Most users should say N to this question.
-
-config RMW_INSNS
-	bool "Use read-modify-write instructions"
-	depends on ADVANCED
-	---help---
-	  This allows to use certain instructions that work with indivisible
-	  read-modify-write bus cycles. While this is faster than the
-	  workaround of disabling interrupts, it can conflict with DMA
-	  ( = direct memory access) on many Amiga systems, and it is also said
-	  to destabilize other machines. It is very likely that this will
-	  cause serious problems on any Amiga or Atari Medusa if set. The only
-	  configuration where it should work are 68030-based Ataris, where it
-	  apparently improves performance. But you've been warned! Unless you
-	  really know what you are doing, say N. Try Y only if you're quite
-	  adventurous.
-
-config SINGLE_MEMORY_CHUNK
-	bool "Use one physical chunk of memory only" if ADVANCED && !SUN3
-	default y if SUN3
-	select NEED_MULTIPLE_NODES
-	help
-	  Ignore all but the first contiguous chunk of physical memory for VM
-	  purposes.  This will save a few bytes kernel size and may speed up
-	  some operations.  Say N if not sure.
-
-config 060_WRITETHROUGH
-	bool "Use write-through caching for 68060 supervisor accesses"
-	depends on ADVANCED && M68060
-	---help---
-	  The 68060 generally uses copyback caching of recently accessed data.
-	  Copyback caching means that memory writes will be held in an on-chip
-	  cache and only written back to memory some time later.  Saying Y
-	  here will force supervisor (kernel) accesses to use writethrough
-	  caching.  Writethrough caching means that data is written to memory
-	  straight away, so that cache and memory data always agree.
-	  Writethrough caching is less efficient, but is needed for some
-	  drivers on 68060 based systems where the 68060 bus snooping signal
-	  is hardwired on.  The 53c710 SCSI driver is known to suffer from
-	  this problem.
-
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool !SINGLE_MEMORY_CHUNK
-
-config NODES_SHIFT
-	int
-	default "3"
-	depends on !SINGLE_MEMORY_CHUNK
-
-config ZORRO
-	bool "Amiga Zorro (AutoConfig) bus support"
-	depends on AMIGA
-	help
-	  This enables support for the Zorro bus in the Amiga. If you have
-	  expansion cards in your Amiga that conform to the Amiga
-	  AutoConfig(tm) specification, say Y, otherwise N. Note that even
-	  expansion cards that do not fit in the Zorro slots but fit in e.g.
-	  the CPU slot may fall in this category, so you have to say Y to let
-	  Linux use these.
-
-config AMIGA_PCMCIA
-	bool "Amiga 1200/600 PCMCIA support (EXPERIMENTAL)"
-	depends on AMIGA && EXPERIMENTAL
-	help
-	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
-	  600. If you intend to use pcmcia cards say Y; otherwise say N.
-
-config HEARTBEAT
-	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
-	default y if !AMIGA && !APOLLO && !ATARI && !MAC && !Q40 && HP300
-	help
-	  Use the power-on LED on your machine as a load meter.  The exact
-	  behavior is platform-dependent, but normally the flash frequency is
-	  a hyperbolic function of the 5-minute load average.
-
-# We have a dedicated heartbeat LED. :-)
-config PROC_HARDWARE
-	bool "/proc/hardware support"
-	help
-	  Say Y here to support the /proc/hardware file, which gives you
-	  access to information about the machine you're running on,
-	  including the model, CPU, MMU, clock speed, BogoMIPS rating,
-	  and memory size.
-
-config ISA
-	bool
-	depends on Q40 || AMIGA_PCMCIA
-	default y
-	help
-	  Find out whether you have ISA slots on your motherboard.  ISA is the
-	  name of a bus system, i.e. the way the CPU talks to the other stuff
-	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
-	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
-	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
-
-config GENERIC_ISA_DMA
-	bool
-	depends on Q40 || AMIGA_PCMCIA
-	default y
-
-source "drivers/pci/Kconfig"
-
-source "drivers/zorro/Kconfig"
-
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index be46cadd4017..cf318f20c64d 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -1,7 +1,171 @@
+#
+# m68k/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Hamish Macdonald
+# Copyright (C) 2002,2011 Greg Ungerer <gerg@snapgear.com>
+#
+
 KBUILD_DEFCONFIG := multi_defconfig
 
+#
+#	Enable processor type. Ordering of these is important - we want to
+#	use the minimum processor type of the range we support. The logic
+#	for 680x0 will only allow use of the -m68060 or -m68040 if no other
+#	680x0 type is specified - and no option is specified for 68030 or
+#	68020. The other m68k/ColdFire types always specify some type of
+#	compiler cpu type flag.
+#
+ifndef CONFIG_M68040
+cpuflags-$(CONFIG_M68060)	:= -m68060
+endif
+ifndef CONFIG_M68060
+cpuflags-$(CONFIG_M68040)	:= -m68040
+endif
+cpuflags-$(CONFIG_M68030)	:=
+cpuflags-$(CONFIG_M68020)	:=
+cpuflags-$(CONFIG_M68360)	:= -m68332
+cpuflags-$(CONFIG_M68000)	:= -m68000
+cpuflags-$(CONFIG_M54xx)	:= $(call cc-option,-mcpu=5475,-m5200)
+cpuflags-$(CONFIG_M5407)	:= $(call cc-option,-mcpu=5407,-m5200)
+cpuflags-$(CONFIG_M532x)	:= $(call cc-option,-mcpu=532x,-m5307)
+cpuflags-$(CONFIG_M5307)	:= $(call cc-option,-mcpu=5307,-m5200)
+cpuflags-$(CONFIG_M528x)	:= $(call cc-option,-mcpu=528x,-m5307)
+cpuflags-$(CONFIG_M5275)	:= $(call cc-option,-mcpu=5275,-m5307)
+cpuflags-$(CONFIG_M5272)	:= $(call cc-option,-mcpu=5272,-m5307)
+cpuflags-$(CONFIG_M5271)	:= $(call cc-option,-mcpu=5271,-m5307)
+cpuflags-$(CONFIG_M523x)	:= $(call cc-option,-mcpu=523x,-m5307)
+cpuflags-$(CONFIG_M5249)	:= $(call cc-option,-mcpu=5249,-m5200)
+cpuflags-$(CONFIG_M520x)	:= $(call cc-option,-mcpu=5208,-m5200)
+cpuflags-$(CONFIG_M5206e)	:= $(call cc-option,-mcpu=5206e,-m5200)
+cpuflags-$(CONFIG_M5206)	:= $(call cc-option,-mcpu=5206,-m5200)
+
+KBUILD_AFLAGS += $(cpuflags-y)
+KBUILD_CFLAGS += $(cpuflags-y) -pipe
 ifdef CONFIG_MMU
-include $(srctree)/arch/m68k/Makefile_mm
+# without -fno-strength-reduce the 53c7xx.c driver fails ;-(
+KBUILD_CFLAGS += -fno-strength-reduce -ffixed-a2
+else
+# we can use a m68k-linux-gcc toolchain with these in place
+KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
+KBUILD_CFLAGS += -D__uClinux__
+KBUILD_AFLAGS += -D__uClinux__
+endif
+
+LDFLAGS := -m m68kelf
+KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds
+ifneq ($(SUBARCH),$(ARCH))
+	ifeq ($(CROSS_COMPILE),)
+		CROSS_COMPILE := $(call cc-cross-prefix, \
+			m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
+	endif
+endif
+
+ifdef CONFIG_SUN3
+LDFLAGS_vmlinux = -N
+endif
+
+CHECKFLAGS += -D__mc68000__
+
+
+ifdef CONFIG_KGDB
+# If configured for kgdb support, include debugging infos and keep the
+# frame pointer
+KBUILD_CFLAGS := $(subst -fomit-frame-pointer,,$(KBUILD_CFLAGS)) -g
+endif
+
+#
+# Select the assembler head startup code. Order is important. The default
+# head code is first, processor specific selections can override it after.
+#
+head-y				:= arch/m68k/kernel/head.o
+head-$(CONFIG_SUN3)		:= arch/m68k/kernel/sun3-head.o
+head-$(CONFIG_M68360)		:= arch/m68k/platform/68360/head.o
+head-$(CONFIG_M68000)		:= arch/m68k/platform/68328/head.o
+head-$(CONFIG_COLDFIRE)		:= arch/m68k/platform/coldfire/head.o
+
+core-y				+= arch/m68k/kernel/	arch/m68k/mm/
+libs-y				+= arch/m68k/lib/
+
+core-$(CONFIG_Q40)		+= arch/m68k/q40/
+core-$(CONFIG_AMIGA)		+= arch/m68k/amiga/
+core-$(CONFIG_ATARI)		+= arch/m68k/atari/
+core-$(CONFIG_MAC)		+= arch/m68k/mac/
+core-$(CONFIG_HP300)		+= arch/m68k/hp300/
+core-$(CONFIG_APOLLO)		+= arch/m68k/apollo/
+core-$(CONFIG_MVME147)		+= arch/m68k/mvme147/
+core-$(CONFIG_MVME16x)		+= arch/m68k/mvme16x/
+core-$(CONFIG_BVME6000)		+= arch/m68k/bvme6000/
+core-$(CONFIG_SUN3X)		+= arch/m68k/sun3x/	arch/m68k/sun3/
+core-$(CONFIG_SUN3)		+= arch/m68k/sun3/	arch/m68k/sun3/prom/
+core-$(CONFIG_NATFEAT)		+= arch/m68k/emu/
+core-$(CONFIG_M68040)		+= arch/m68k/fpsp040/
+core-$(CONFIG_M68060)		+= arch/m68k/ifpsp060/
+core-$(CONFIG_M68KFPU_EMU)	+= arch/m68k/math-emu/
+core-$(CONFIG_M68360)		+= arch/m68k/platform/68360/
+core-$(CONFIG_M68000)		+= arch/m68k/platform/68328/
+core-$(CONFIG_M68EZ328)		+= arch/m68k/platform/68EZ328/
+core-$(CONFIG_M68VZ328)		+= arch/m68k/platform/68VZ328/
+core-$(CONFIG_COLDFIRE)		+= arch/m68k/platform/coldfire/
+core-$(CONFIG_M5206)		+= arch/m68k/platform/5206/
+core-$(CONFIG_M5206e)		+= arch/m68k/platform/5206/
+core-$(CONFIG_M520x)		+= arch/m68k/platform/520x/
+core-$(CONFIG_M523x)		+= arch/m68k/platform/523x/
+core-$(CONFIG_M5249)		+= arch/m68k/platform/5249/
+core-$(CONFIG_M527x)		+= arch/m68k/platform/527x/
+core-$(CONFIG_M5272)		+= arch/m68k/platform/5272/
+core-$(CONFIG_M528x)		+= arch/m68k/platform/528x/
+core-$(CONFIG_M5307)		+= arch/m68k/platform/5307/
+core-$(CONFIG_M532x)		+= arch/m68k/platform/532x/
+core-$(CONFIG_M5407)		+= arch/m68k/platform/5407/
+core-$(CONFIG_M54xx)		+= arch/m68k/platform/54xx/
+
+
+all:	zImage
+
+lilo:	vmlinux
+	if [ -f $(INSTALL_PATH)/vmlinux ]; then mv -f $(INSTALL_PATH)/vmlinux $(INSTALL_PATH)/vmlinux.old; fi
+	if [ -f $(INSTALL_PATH)/System.map ]; then mv -f $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
+	cat vmlinux > $(INSTALL_PATH)/vmlinux
+	cp System.map $(INSTALL_PATH)/System.map
+	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
+
+zImage compressed: vmlinux.gz
+
+vmlinux.gz: vmlinux
+
+ifndef CONFIG_KGDB
+	cp vmlinux vmlinux.tmp
+	$(STRIP) vmlinux.tmp
+	gzip -9c vmlinux.tmp >vmlinux.gz
+	rm vmlinux.tmp
 else
-include $(srctree)/arch/m68k/Makefile_no
+	gzip -9c vmlinux >vmlinux.gz
 endif
+
+bzImage: vmlinux.bz2
+
+vmlinux.bz2: vmlinux
+
+ifndef CONFIG_KGDB
+	cp vmlinux vmlinux.tmp
+	$(STRIP) vmlinux.tmp
+	bzip2 -1c vmlinux.tmp >vmlinux.bz2
+	rm vmlinux.tmp
+else
+	bzip2 -1c vmlinux >vmlinux.bz2
+endif
+
+archclean:
+	rm -f vmlinux.gz vmlinux.bz2
+
+install:
+	sh $(srctree)/arch/m68k/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/m68k/Makefile_mm b/arch/m68k/Makefile_mm
deleted file mode 100644
index d449b6d5aecf..000000000000
--- a/arch/m68k/Makefile_mm
+++ /dev/null
@@ -1,121 +0,0 @@
-#
-# m68k/Makefile
-#
-# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# Copyright (C) 1994 by Hamish Macdonald
-#
-
-# override top level makefile
-AS += -m68020
-LDFLAGS := -m m68kelf
-KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/m68k/kernel/module.lds
-ifneq ($(SUBARCH),$(ARCH))
-	ifeq ($(CROSS_COMPILE),)
-		CROSS_COMPILE := $(call cc-cross-prefix, \
-			m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
-	endif
-endif
-
-ifdef CONFIG_SUN3
-LDFLAGS_vmlinux = -N
-endif
-
-CHECKFLAGS += -D__mc68000__
-
-# without -fno-strength-reduce the 53c7xx.c driver fails ;-(
-KBUILD_CFLAGS += -pipe -fno-strength-reduce -ffixed-a2
-
-# enable processor switch if compiled only for a single cpu
-ifndef CONFIG_M68020
-ifndef CONFIG_M68030
-
-ifndef CONFIG_M68060
-KBUILD_CFLAGS += -m68040
-endif
-
-ifndef CONFIG_M68040
-KBUILD_CFLAGS += -m68060
-endif
-
-endif
-endif
-
-ifdef CONFIG_KGDB
-# If configured for kgdb support, include debugging infos and keep the
-# frame pointer
-KBUILD_CFLAGS := $(subst -fomit-frame-pointer,,$(KBUILD_CFLAGS)) -g
-endif
-
-ifndef CONFIG_SUN3
-head-y := arch/m68k/kernel/head.o
-else
-head-y := arch/m68k/kernel/sun3-head.o
-endif
-
-core-y				+= arch/m68k/kernel/	arch/m68k/mm/
-libs-y				+= arch/m68k/lib/
-
-core-$(CONFIG_Q40)		+= arch/m68k/q40/
-core-$(CONFIG_AMIGA)		+= arch/m68k/amiga/
-core-$(CONFIG_ATARI)		+= arch/m68k/atari/
-core-$(CONFIG_MAC)		+= arch/m68k/mac/
-core-$(CONFIG_HP300)		+= arch/m68k/hp300/
-core-$(CONFIG_APOLLO)		+= arch/m68k/apollo/
-core-$(CONFIG_MVME147)		+= arch/m68k/mvme147/
-core-$(CONFIG_MVME16x)		+= arch/m68k/mvme16x/
-core-$(CONFIG_BVME6000)		+= arch/m68k/bvme6000/
-core-$(CONFIG_SUN3X)		+= arch/m68k/sun3x/	arch/m68k/sun3/
-core-$(CONFIG_SUN3)		+= arch/m68k/sun3/	arch/m68k/sun3/prom/
-core-$(CONFIG_NATFEAT)		+= arch/m68k/emu/
-core-$(CONFIG_M68040)		+= arch/m68k/fpsp040/
-core-$(CONFIG_M68060)		+= arch/m68k/ifpsp060/
-core-$(CONFIG_M68KFPU_EMU)	+= arch/m68k/math-emu/
-
-all:	zImage
-
-lilo:	vmlinux
-	if [ -f $(INSTALL_PATH)/vmlinux ]; then mv -f $(INSTALL_PATH)/vmlinux $(INSTALL_PATH)/vmlinux.old; fi
-	if [ -f $(INSTALL_PATH)/System.map ]; then mv -f $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
-	cat vmlinux > $(INSTALL_PATH)/vmlinux
-	cp System.map $(INSTALL_PATH)/System.map
-	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
-
-zImage compressed: vmlinux.gz
-
-vmlinux.gz: vmlinux
-
-ifndef CONFIG_KGDB
-	cp vmlinux vmlinux.tmp
-	$(STRIP) vmlinux.tmp
-	gzip -9c vmlinux.tmp >vmlinux.gz
-	rm vmlinux.tmp
-else
-	gzip -9c vmlinux >vmlinux.gz
-endif
-
-bzImage: vmlinux.bz2
-
-vmlinux.bz2: vmlinux
-
-ifndef CONFIG_KGDB
-	cp vmlinux vmlinux.tmp
-	$(STRIP) vmlinux.tmp
-	bzip2 -1c vmlinux.tmp >vmlinux.bz2
-	rm vmlinux.tmp
-else
-	bzip2 -1c vmlinux >vmlinux.bz2
-endif
-
-archclean:
-	rm -f vmlinux.gz vmlinux.bz2
-
-install:
-	sh $(srctree)/arch/m68k/install.sh $(KERNELRELEASE) vmlinux.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/m68k/Makefile_no b/arch/m68k/Makefile_no
deleted file mode 100644
index 844d3f172264..000000000000
--- a/arch/m68k/Makefile_no
+++ /dev/null
@@ -1,124 +0,0 @@
-#
-# arch/m68k/Makefile
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-# (C) Copyright 2002, Greg Ungerer <gerg@snapgear.com>
-#
-
-platform-$(CONFIG_M68328)	:= 68328
-platform-$(CONFIG_M68EZ328)	:= 68EZ328
-platform-$(CONFIG_M68VZ328)	:= 68VZ328
-platform-$(CONFIG_M68360)	:= 68360
-platform-$(CONFIG_M5206)	:= 5206
-platform-$(CONFIG_M5206e)	:= 5206
-platform-$(CONFIG_M520x)	:= 520x
-platform-$(CONFIG_M523x)	:= 523x
-platform-$(CONFIG_M5249)	:= 5249
-platform-$(CONFIG_M527x)	:= 527x
-platform-$(CONFIG_M5272)	:= 5272
-platform-$(CONFIG_M528x)	:= 528x
-platform-$(CONFIG_M5307)	:= 5307
-platform-$(CONFIG_M532x)	:= 532x
-platform-$(CONFIG_M5407)	:= 5407
-platform-$(CONFIG_M54xx)	:= 54xx
-PLATFORM := $(platform-y)
-
-board-$(CONFIG_PILOT)		:= pilot
-board-$(CONFIG_UC5272)          := UC5272
-board-$(CONFIG_UC5282)          := UC5282
-board-$(CONFIG_UCSIMM)		:= ucsimm
-board-$(CONFIG_UCDIMM)		:= ucdimm
-board-$(CONFIG_UCQUICC)		:= uCquicc
-board-$(CONFIG_DRAGEN2)		:= de2
-board-$(CONFIG_ARNEWSH)		:= ARNEWSH
-board-$(CONFIG_FREESCALE)	:= FREESCALE
-board-$(CONFIG_M5235EVB)	:= M5235EVB
-board-$(CONFIG_M5271EVB)	:= M5271EVB
-board-$(CONFIG_M5275EVB)	:= M5275EVB
-board-$(CONFIG_M5282EVB)	:= M5282EVB
-board-$(CONFIG_ELITE)		:= eLITE
-board-$(CONFIG_NETtel)		:= NETtel
-board-$(CONFIG_SECUREEDGEMP3)	:= MP3
-board-$(CONFIG_CLEOPATRA)	:= CLEOPATRA
-board-$(CONFIG_senTec)		:= senTec
-board-$(CONFIG_SNEHA) 	        := SNEHA
-board-$(CONFIG_M5208EVB)	:= M5208EVB
-board-$(CONFIG_MOD5272)		:= MOD5272
-board-$(CONFIG_AVNET)           := AVNET
-board-$(CONFIG_SAVANT)		:= SAVANT
-BOARD := $(board-y)
-
-model-$(CONFIG_RAMKERNEL)	:= ram
-model-$(CONFIG_ROMKERNEL)	:= rom
-MODEL := $(model-y)
-
-#
-# Some code support is grouped together for a common cpu-subclass (for
-# example all ColdFire cpu's are very similar). Determine the sub-class
-# for the selected cpu. ONLY need to define this for the non-base member
-# of the family.
-#
-cpuclass-$(CONFIG_M5206)	:= coldfire
-cpuclass-$(CONFIG_M5206e)	:= coldfire
-cpuclass-$(CONFIG_M520x)	:= coldfire
-cpuclass-$(CONFIG_M523x)	:= coldfire
-cpuclass-$(CONFIG_M5249)	:= coldfire
-cpuclass-$(CONFIG_M527x)	:= coldfire
-cpuclass-$(CONFIG_M5272)	:= coldfire
-cpuclass-$(CONFIG_M528x)	:= coldfire
-cpuclass-$(CONFIG_M5307)	:= coldfire
-cpuclass-$(CONFIG_M532x)	:= coldfire
-cpuclass-$(CONFIG_M5407)	:= coldfire
-cpuclass-$(CONFIG_M54xx)	:= coldfire
-cpuclass-$(CONFIG_M68328)	:= 68328
-cpuclass-$(CONFIG_M68EZ328)	:= 68328
-cpuclass-$(CONFIG_M68VZ328)	:= 68328
-cpuclass-$(CONFIG_M68360)	:= 68360
-CPUCLASS := $(cpuclass-y)
-
-ifneq ($(CPUCLASS),$(PLATFORM))
-CLASSDIR := arch/m68k/platform/$(cpuclass-y)/
-endif
-
-export PLATFORM BOARD MODEL CPUCLASS
-
-#
-# Some CFLAG additions based on specific CPU type.
-#
-cflags-$(CONFIG_M5206)		:= $(call cc-option,-mcpu=5206,-m5200)
-cflags-$(CONFIG_M5206e)		:= $(call cc-option,-mcpu=5206e,-m5200)
-cflags-$(CONFIG_M520x)		:= $(call cc-option,-mcpu=5208,-m5200)
-cflags-$(CONFIG_M523x)		:= $(call cc-option,-mcpu=523x,-m5307)
-cflags-$(CONFIG_M5249)		:= $(call cc-option,-mcpu=5249,-m5200)
-cflags-$(CONFIG_M5271)		:= $(call cc-option,-mcpu=5271,-m5307)
-cflags-$(CONFIG_M5272)		:= $(call cc-option,-mcpu=5272,-m5307)
-cflags-$(CONFIG_M5275)		:= $(call cc-option,-mcpu=5275,-m5307)
-cflags-$(CONFIG_M528x)		:= $(call cc-option,-mcpu=528x,-m5307)
-cflags-$(CONFIG_M5307)		:= $(call cc-option,-mcpu=5307,-m5200)
-cflags-$(CONFIG_M532x)		:= $(call cc-option,-mcpu=532x,-m5307)
-cflags-$(CONFIG_M5407)		:= $(call cc-option,-mcpu=5407,-m5200)
-cflags-$(CONFIG_M54xx)		:= $(call cc-option,-mcpu=5475,-m5200)
-cflags-$(CONFIG_M68328)		:= -m68000
-cflags-$(CONFIG_M68EZ328)	:= -m68000
-cflags-$(CONFIG_M68VZ328)	:= -m68000
-cflags-$(CONFIG_M68360)		:= -m68332
-
-KBUILD_AFLAGS += $(cflags-y)
-
-KBUILD_CFLAGS += $(cflags-y)
-KBUILD_CFLAGS += -D__linux__
-KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
-
-head-y := arch/m68k/platform/$(cpuclass-y)/head.o
-
-core-y	+= arch/m68k/kernel/ \
-	   arch/m68k/mm/ \
-	   $(CLASSDIR) \
-	   arch/m68k/platform/$(PLATFORM)/
-libs-y	+= arch/m68k/lib/
-
-archclean:
-
diff --git a/arch/m68k/include/asm/entry.h b/arch/m68k/include/asm/entry.h
index 876eec6f2b52..c3c5a8643e15 100644
--- a/arch/m68k/include/asm/entry.h
+++ b/arch/m68k/include/asm/entry.h
@@ -1,5 +1,254 @@
-#ifdef __uClinux__
-#include "entry_no.h"
+#ifndef __M68K_ENTRY_H
+#define __M68K_ENTRY_H
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#ifdef __ASSEMBLY__
+#include <asm/thread_info.h>
+#endif
+
+/*
+ * Stack layout in 'ret_from_exception':
+ *
+ *	This allows access to the syscall arguments in registers d1-d5
+ *
+ *	 0(sp) - d1
+ *	 4(sp) - d2
+ *	 8(sp) - d3
+ *	 C(sp) - d4
+ *	10(sp) - d5
+ *	14(sp) - a0
+ *	18(sp) - a1
+ *	1C(sp) - a2
+ *	20(sp) - d0
+ *	24(sp) - orig_d0
+ *	28(sp) - stack adjustment
+ *	2C(sp) - [ sr              ] [ format & vector ]
+ *	2E(sp) - [ pc-hiword       ] [ sr              ]
+ *	30(sp) - [ pc-loword       ] [ pc-hiword       ]
+ *	32(sp) - [ format & vector ] [ pc-loword       ]
+ *		  ^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^
+ *			M68K		  COLDFIRE
+ */
+
+/* the following macro is used when enabling interrupts */
+#if defined(MACH_ATARI_ONLY)
+	/* block out HSYNC on the atari */
+#define ALLOWINT	(~0x400)
+#define	MAX_NOINT_IPL	3
 #else
-#include "entry_mm.h"
+	/* portable version */
+#define ALLOWINT	(~0x700)
+#define	MAX_NOINT_IPL	0
+#endif /* machine compilation types */
+
+#ifdef __ASSEMBLY__
+/*
+ * This defines the normal kernel pt-regs layout.
+ *
+ * regs a3-a6 and d6-d7 are preserved by C code
+ * the kernel doesn't mess with usp unless it needs to
+ */
+#define SWITCH_STACK_SIZE	(6*4+4)	/* includes return address */
+
+#ifdef CONFIG_COLDFIRE
+#ifdef CONFIG_COLDFIRE_SW_A7
+/*
+ * This is made a little more tricky on older ColdFires. There is no
+ * separate supervisor and user stack pointers. Need to artificially
+ * construct a usp in software... When doing this we need to disable
+ * interrupts, otherwise bad things will happen.
+ */
+.globl sw_usp
+.globl sw_ksp
+
+.macro SAVE_ALL_SYS
+	move	#0x2700,%sr		/* disable intrs */
+	btst	#5,%sp@(2)		/* from user? */
+	bnes	6f			/* no, skip */
+	movel	%sp,sw_usp		/* save user sp */
+	addql	#8,sw_usp		/* remove exception */
+	movel	sw_ksp,%sp		/* kernel sp */
+	subql	#8,%sp			/* room for exception */
+	clrl	%sp@-			/* stkadj */
+	movel	%d0,%sp@-		/* orig d0 */
+	movel	%d0,%sp@-		/* d0 */
+	lea	%sp@(-32),%sp		/* space for 8 regs */
+	moveml	%d1-%d5/%a0-%a2,%sp@
+	movel	sw_usp,%a0		/* get usp */
+	movel	%a0@-,%sp@(PT_OFF_PC)	/* copy exception program counter */
+	movel	%a0@-,%sp@(PT_OFF_FORMATVEC)/*copy exception format/vector/sr */
+	bra	7f
+	6:
+	clrl	%sp@-			/* stkadj */
+	movel	%d0,%sp@-		/* orig d0 */
+	movel	%d0,%sp@-		/* d0 */
+	lea	%sp@(-32),%sp		/* space for 8 regs */
+	moveml	%d1-%d5/%a0-%a2,%sp@
+	7:
+.endm
+
+.macro SAVE_ALL_INT
+	SAVE_ALL_SYS
+	moveq	#-1,%d0			/* not system call entry */
+	movel	%d0,%sp@(PT_OFF_ORIG_D0)
+.endm
+
+.macro RESTORE_USER
+	move	#0x2700,%sr		/* disable intrs */
+	movel	sw_usp,%a0		/* get usp */
+	movel	%sp@(PT_OFF_PC),%a0@-	/* copy exception program counter */
+	movel	%sp@(PT_OFF_FORMATVEC),%a0@-/*copy exception format/vector/sr */
+	moveml	%sp@,%d1-%d5/%a0-%a2
+	lea	%sp@(32),%sp		/* space for 8 regs */
+	movel	%sp@+,%d0
+	addql	#4,%sp			/* orig d0 */
+	addl	%sp@+,%sp		/* stkadj */
+	addql	#8,%sp			/* remove exception */
+	movel	%sp,sw_ksp		/* save ksp */
+	subql	#8,sw_usp		/* set exception */
+	movel	sw_usp,%sp		/* restore usp */
+	rte
+.endm
+
+.macro RDUSP
+	movel	sw_usp,%a3
+.endm
+
+.macro WRUSP
+	movel	%a3,sw_usp
+.endm
+
+#else /* !CONFIG_COLDFIRE_SW_A7 */
+/*
+ * Modern ColdFire parts have separate supervisor and user stack
+ * pointers. Simple load and restore macros for this case.
+ */
+.macro SAVE_ALL_SYS
+	move	#0x2700,%sr		/* disable intrs */
+	clrl	%sp@-			/* stkadj */
+	movel	%d0,%sp@-		/* orig d0 */
+	movel	%d0,%sp@-		/* d0 */
+	lea	%sp@(-32),%sp		/* space for 8 regs */
+	moveml	%d1-%d5/%a0-%a2,%sp@
+.endm
+
+.macro SAVE_ALL_INT
+	move	#0x2700,%sr		/* disable intrs */
+	clrl	%sp@-			/* stkadj */
+	pea	-1:w			/* orig d0 */
+	movel	%d0,%sp@-		/* d0 */
+	lea	%sp@(-32),%sp		/* space for 8 regs */
+	moveml	%d1-%d5/%a0-%a2,%sp@
+.endm
+
+.macro RESTORE_USER
+	moveml	%sp@,%d1-%d5/%a0-%a2
+	lea	%sp@(32),%sp		/* space for 8 regs */
+	movel	%sp@+,%d0
+	addql	#4,%sp			/* orig d0 */
+	addl	%sp@+,%sp		/* stkadj */
+	rte
+.endm
+
+.macro RDUSP
+	/*move	%usp,%a3*/
+	.word	0x4e6b
+.endm
+
+.macro WRUSP
+	/*move	%a3,%usp*/
+	.word	0x4e63
+.endm
+
+#endif /* !CONFIG_COLDFIRE_SW_A7 */
+
+.macro SAVE_SWITCH_STACK
+	lea	%sp@(-24),%sp		/* 6 regs */
+	moveml	%a3-%a6/%d6-%d7,%sp@
+.endm
+
+.macro RESTORE_SWITCH_STACK
+	moveml	%sp@,%a3-%a6/%d6-%d7
+	lea	%sp@(24),%sp		/* 6 regs */
+.endm
+
+#else /* !CONFIG_COLDFIRE */
+
+/*
+ * All other types of m68k parts (68000, 680x0, CPU32) have the same
+ * entry and exit code.
+ */
+
+/*
+ * a -1 in the orig_d0 field signifies
+ * that the stack frame is NOT for syscall
+ */
+.macro SAVE_ALL_INT
+	clrl	%sp@-			/* stk_adj */
+	pea	-1:w			/* orig d0 */
+	movel	%d0,%sp@-		/* d0 */
+	moveml	%d1-%d5/%a0-%a2,%sp@-
+.endm
+
+.macro SAVE_ALL_SYS
+	clrl	%sp@-			/* stk_adj */
+	movel	%d0,%sp@-		/* orig d0 */
+	movel	%d0,%sp@-		/* d0 */
+	moveml	%d1-%d5/%a0-%a2,%sp@-
+.endm
+
+.macro RESTORE_ALL
+	moveml	%sp@+,%a0-%a2/%d1-%d5
+	movel	%sp@+,%d0
+	addql	#4,%sp			/* orig d0 */
+	addl	%sp@+,%sp		/* stk adj */
+	rte
+.endm
+
+
+.macro SAVE_SWITCH_STACK
+	moveml	%a3-%a6/%d6-%d7,%sp@-
+.endm
+
+.macro RESTORE_SWITCH_STACK
+	moveml	%sp@+,%a3-%a6/%d6-%d7
+.endm
+
+#endif /* !CONFIG_COLDFIRE */
+
+/*
+ * Register %a2 is reserved and set to current task on MMU enabled systems.
+ * Non-MMU systems do not reserve %a2 in this way, and this definition is
+ * not used for them.
+ */
+#define curptr a2
+
+#define GET_CURRENT(tmp) get_current tmp
+.macro get_current reg=%d0
+	movel	%sp,\reg
+	andw	#-THREAD_SIZE,\reg
+	movel	\reg,%curptr
+	movel	%curptr@,%curptr
+.endm
+
+#else /* C source */
+
+#define STR(X) STR1(X)
+#define STR1(X) #X
+
+#define SAVE_ALL_INT				\
+	"clrl	%%sp@-;"    /* stk_adj */	\
+	"pea	-1:w;"	    /* orig d0 = -1 */	\
+	"movel	%%d0,%%sp@-;" /* d0 */		\
+	"moveml	%%d1-%%d5/%%a0-%%a2,%%sp@-"
+
+#define GET_CURRENT(tmp) \
+	"movel	%%sp,"#tmp"\n\t" \
+	"andw	#-"STR(THREAD_SIZE)","#tmp"\n\t" \
+	"movel	"#tmp",%%a2\n\t" \
+	"movel	%%a2@,%%a2"
+
 #endif
+
+#endif /* __M68K_ENTRY_H */
diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h
deleted file mode 100644
index 73b8c8fbed9c..000000000000
--- a/arch/m68k/include/asm/entry_mm.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef __M68K_ENTRY_H
-#define __M68K_ENTRY_H
-
-#include <asm/setup.h>
-#include <asm/page.h>
-#ifdef __ASSEMBLY__
-#include <asm/thread_info.h>
-#endif
-
-/*
- * Stack layout in 'ret_from_exception':
- *
- *	This allows access to the syscall arguments in registers d1-d5
- *
- *	 0(sp) - d1
- *	 4(sp) - d2
- *	 8(sp) - d3
- *	 C(sp) - d4
- *	10(sp) - d5
- *	14(sp) - a0
- *	18(sp) - a1
- *	1C(sp) - a2
- *	20(sp) - d0
- *	24(sp) - orig_d0
- *	28(sp) - stack adjustment
- *	2C(sp) - sr
- *	2E(sp) - pc
- *	32(sp) - format & vector
- */
-
-/*
- * 97/05/14 Andreas: Register %a2 is now set to the current task throughout
- *		     the whole kernel.
- */
-
-/* the following macro is used when enabling interrupts */
-#if defined(MACH_ATARI_ONLY)
-	/* block out HSYNC on the atari */
-#define ALLOWINT	(~0x400)
-#define	MAX_NOINT_IPL	3
-#else
-	/* portable version */
-#define ALLOWINT	(~0x700)
-#define	MAX_NOINT_IPL	0
-#endif /* machine compilation types */
-
-#ifdef __ASSEMBLY__
-
-#define curptr a2
-
-LFLUSH_I_AND_D = 0x00000808
-
-#define SAVE_ALL_INT save_all_int
-#define SAVE_ALL_SYS save_all_sys
-#define RESTORE_ALL restore_all
-/*
- * This defines the normal kernel pt-regs layout.
- *
- * regs a3-a6 and d6-d7 are preserved by C code
- * the kernel doesn't mess with usp unless it needs to
- */
-
-/*
- * a -1 in the orig_d0 field signifies
- * that the stack frame is NOT for syscall
- */
-.macro	save_all_int
-	clrl	%sp@-		| stk_adj
-	pea	-1:w		| orig d0
-	movel	%d0,%sp@-	| d0
-	moveml	%d1-%d5/%a0-%a1/%curptr,%sp@-
-.endm
-
-.macro	save_all_sys
-	clrl	%sp@-		| stk_adj
-	movel	%d0,%sp@-	| orig d0
-	movel	%d0,%sp@-	| d0
-	moveml	%d1-%d5/%a0-%a1/%curptr,%sp@-
-.endm
-
-.macro	restore_all
-	moveml	%sp@+,%a0-%a1/%curptr/%d1-%d5
-	movel	%sp@+,%d0
-	addql	#4,%sp		| orig d0
-	addl	%sp@+,%sp	| stk adj
-	rte
-.endm
-
-#define SWITCH_STACK_SIZE (6*4+4)	/* includes return address */
-
-#define SAVE_SWITCH_STACK save_switch_stack
-#define RESTORE_SWITCH_STACK restore_switch_stack
-#define GET_CURRENT(tmp) get_current tmp
-
-.macro	save_switch_stack
-	moveml	%a3-%a6/%d6-%d7,%sp@-
-.endm
-
-.macro	restore_switch_stack
-	moveml	%sp@+,%a3-%a6/%d6-%d7
-.endm
-
-.macro	get_current reg=%d0
-	movel	%sp,\reg
-	andw	#-THREAD_SIZE,\reg
-	movel	\reg,%curptr
-	movel	%curptr@,%curptr
-.endm
-
-#else /* C source */
-
-#define STR(X) STR1(X)
-#define STR1(X) #X
-
-#define SAVE_ALL_INT				\
-	"clrl	%%sp@-;"    /* stk_adj */	\
-	"pea	-1:w;"	    /* orig d0 = -1 */	\
-	"movel	%%d0,%%sp@-;" /* d0 */		\
-	"moveml	%%d1-%%d5/%%a0-%%a2,%%sp@-"
-#define GET_CURRENT(tmp) \
-	"movel	%%sp,"#tmp"\n\t" \
-	"andw	#-"STR(THREAD_SIZE)","#tmp"\n\t" \
-	"movel	"#tmp",%%a2\n\t" \
-	"movel	%%a2@,%%a2"
-
-#endif
-
-#endif /* __M68K_ENTRY_H */
diff --git a/arch/m68k/include/asm/entry_no.h b/arch/m68k/include/asm/entry_no.h
deleted file mode 100644
index 68611e3dbb1d..000000000000
--- a/arch/m68k/include/asm/entry_no.h
+++ /dev/null
@@ -1,181 +0,0 @@
-#ifndef __M68KNOMMU_ENTRY_H
-#define __M68KNOMMU_ENTRY_H
-
-#include <asm/setup.h>
-#include <asm/page.h>
-
-/*
- * Stack layout in 'ret_from_exception':
- *
- * This allows access to the syscall arguments in registers d1-d5
- *
- *	 0(sp) - d1
- *	 4(sp) - d2
- *	 8(sp) - d3
- *	 C(sp) - d4
- *	10(sp) - d5
- *	14(sp) - a0
- *	18(sp) - a1
- *	1C(sp) - a2
- *	20(sp) - d0
- *	24(sp) - orig_d0
- *	28(sp) - stack adjustment
- *	2C(sp) - [ sr              ] [ format & vector ]
- *	2E(sp) - [ pc-hiword       ] [ sr              ]
- *	30(sp) - [ pc-loword       ] [ pc-hiword       ]
- *	32(sp) - [ format & vector ] [ pc-loword       ]
- *		  ^^^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^
- *			M68K		  COLDFIRE
- */
-
-#define ALLOWINT (~0x700)
-
-#ifdef __ASSEMBLY__
-
-#define SWITCH_STACK_SIZE (6*4+4)	/* Includes return address */
-
-/*
- * This defines the normal kernel pt-regs layout.
- *
- * regs are a2-a6 and d6-d7 preserved by C code
- * the kernel doesn't mess with usp unless it needs to
- */
-
-#ifdef CONFIG_COLDFIRE
-#ifdef CONFIG_COLDFIRE_SW_A7
-/*
- * This is made a little more tricky on older ColdFires. There is no
- * separate supervisor and user stack pointers. Need to artificially
- * construct a usp in software... When doing this we need to disable
- * interrupts, otherwise bad things will happen.
- */
-.globl sw_usp
-.globl sw_ksp
-
-.macro SAVE_ALL
-	move	#0x2700,%sr		/* disable intrs */
-	btst	#5,%sp@(2)		/* from user? */
-	bnes	6f			/* no, skip */
-	movel	%sp,sw_usp		/* save user sp */
-	addql	#8,sw_usp		/* remove exception */
-	movel	sw_ksp,%sp		/* kernel sp */
-	subql	#8,%sp			/* room for exception */
-	clrl	%sp@-			/* stkadj */
-	movel	%d0,%sp@-		/* orig d0 */
-	movel	%d0,%sp@-		/* d0 */
-	lea	%sp@(-32),%sp		/* space for 8 regs */
-	moveml	%d1-%d5/%a0-%a2,%sp@
-	movel	sw_usp,%a0		/* get usp */
-	movel	%a0@-,%sp@(PT_OFF_PC)	/* copy exception program counter */
-	movel	%a0@-,%sp@(PT_OFF_FORMATVEC)/*copy exception format/vector/sr */
-	bra	7f
-	6:
-	clrl	%sp@-			/* stkadj */
-	movel	%d0,%sp@-		/* orig d0 */
-	movel	%d0,%sp@-		/* d0 */
-	lea	%sp@(-32),%sp		/* space for 8 regs */
-	moveml	%d1-%d5/%a0-%a2,%sp@
-	7:
-.endm
-
-.macro RESTORE_USER
-	move	#0x2700,%sr		/* disable intrs */
-	movel	sw_usp,%a0		/* get usp */
-	movel	%sp@(PT_OFF_PC),%a0@-	/* copy exception program counter */
-	movel	%sp@(PT_OFF_FORMATVEC),%a0@-/*copy exception format/vector/sr */
-	moveml	%sp@,%d1-%d5/%a0-%a2
-	lea	%sp@(32),%sp		/* space for 8 regs */
-	movel	%sp@+,%d0
-	addql	#4,%sp			/* orig d0 */
-	addl	%sp@+,%sp		/* stkadj */
-	addql	#8,%sp			/* remove exception */
-	movel	%sp,sw_ksp		/* save ksp */
-	subql	#8,sw_usp		/* set exception */
-	movel	sw_usp,%sp		/* restore usp */
-	rte
-.endm
-
-.macro RDUSP
-	movel	sw_usp,%a3
-.endm
-
-.macro WRUSP
-	movel	%a3,sw_usp
-.endm
-
-#else /* !CONFIG_COLDFIRE_SW_A7 */
-/*
- * Modern ColdFire parts have separate supervisor and user stack
- * pointers. Simple load and restore macros for this case.
- */
-.macro SAVE_ALL
-	move	#0x2700,%sr		/* disable intrs */
-	clrl	%sp@-			/* stkadj */
-	movel	%d0,%sp@-		/* orig d0 */
-	movel	%d0,%sp@-		/* d0 */
-	lea	%sp@(-32),%sp		/* space for 8 regs */
-	moveml	%d1-%d5/%a0-%a2,%sp@
-.endm
-
-.macro RESTORE_USER
-	moveml	%sp@,%d1-%d5/%a0-%a2
-	lea	%sp@(32),%sp		/* space for 8 regs */
-	movel	%sp@+,%d0
-	addql	#4,%sp			/* orig d0 */
-	addl	%sp@+,%sp		/* stkadj */
-	rte
-.endm
-
-.macro RDUSP
-	/*move	%usp,%a3*/
-	.word	0x4e6b
-.endm
-
-.macro WRUSP
-	/*move	%a3,%usp*/
-	.word	0x4e63
-.endm
-
-#endif /* !CONFIG_COLDFIRE_SW_A7 */
-
-.macro SAVE_SWITCH_STACK
-	lea	%sp@(-24),%sp		/* 6 regs */
-	moveml	%a3-%a6/%d6-%d7,%sp@
-.endm
-
-.macro RESTORE_SWITCH_STACK
-	moveml	%sp@,%a3-%a6/%d6-%d7
-	lea	%sp@(24),%sp		/* 6 regs */
-.endm
-
-#else /* !CONFIG_COLDFIRE */
-
-/*
- * Standard 68k interrupt entry and exit macros.
- */
-.macro SAVE_ALL
-	clrl	%sp@-			/* stkadj */
-	movel	%d0,%sp@-		/* orig d0 */
-	movel	%d0,%sp@-		/* d0 */
-	moveml	%d1-%d5/%a0-%a2,%sp@-
-.endm
-
-.macro RESTORE_ALL
-	moveml	%sp@+,%a0-%a2/%d1-%d5
-	movel	%sp@+,%d0
-	addql	#4,%sp			/* orig d0 */
-	addl	%sp@+,%sp		/* stkadj */
-	rte
-.endm
-
-.macro SAVE_SWITCH_STACK
-	moveml	%a3-%a6/%d6-%d7,%sp@-
-.endm
-
-.macro RESTORE_SWITCH_STACK
-	moveml	%sp@+,%a3-%a6/%d6-%d7
-.endm
-
-#endif /* !COLDFIRE_SW_A7 */
-#endif /* __ASSEMBLY__ */
-#endif /* __M68KNOMMU_ENTRY_H */
diff --git a/arch/m68k/include/asm/m520xsim.h b/arch/m68k/include/asm/m520xsim.h
index b6bf2c518bac..eda62de7e607 100644
--- a/arch/m68k/include/asm/m520xsim.h
+++ b/arch/m68k/include/asm/m520xsim.h
@@ -90,15 +90,13 @@
 #define MCFGPIO_PDDR_FECH		0xFC0A4013
 #define MCFGPIO_PDDR_FECL		0xFC0A4014
 
-#define MCFGPIO_PPDSDR_BUSCTL		0xFC0A401A
-#define MCFGPIO_PPDSDR_BE		0xFC0A401B
-#define MCFGPIO_PPDSDR_CS		0xFC0A401C
-#define MCFGPIO_PPDSDR_FECI2C		0xFC0A401D
-#define MCFGPIO_PPDSDR_QSPI		0xFC0A401E
-#define MCFGPIO_PPDSDR_TIMER		0xFC0A401F
-#define MCFGPIO_PPDSDR_UART		0xFC0A4021
-#define MCFGPIO_PPDSDR_FECH		0xFC0A4021
-#define MCFGPIO_PPDSDR_FECL		0xFC0A4022
+#define MCFGPIO_PPDSDR_CS		0xFC0A401A
+#define MCFGPIO_PPDSDR_FECI2C		0xFC0A401B
+#define MCFGPIO_PPDSDR_QSPI		0xFC0A401C
+#define MCFGPIO_PPDSDR_TIMER		0xFC0A401D
+#define MCFGPIO_PPDSDR_UART		0xFC0A401E
+#define MCFGPIO_PPDSDR_FECH		0xFC0A401F
+#define MCFGPIO_PPDSDR_FECL		0xFC0A4020
 
 #define MCFGPIO_PCLRR_BUSCTL		0xFC0A4024
 #define MCFGPIO_PCLRR_BE		0xFC0A4025
@@ -113,11 +111,11 @@
 /*
  * Generic GPIO support
  */
-#define MCFGPIO_PODR			MCFGPIO_PODR_BUSCTL
-#define MCFGPIO_PDDR			MCFGPIO_PDDR_BUSCTL
-#define MCFGPIO_PPDR			MCFGPIO_PPDSDR_BUSCTL
-#define MCFGPIO_SETR			MCFGPIO_PPDSDR_BUSCTL
-#define MCFGPIO_CLRR			MCFGPIO_PCLRR_BUSCTL
+#define MCFGPIO_PODR			MCFGPIO_PODR_CS
+#define MCFGPIO_PDDR			MCFGPIO_PDDR_CS
+#define MCFGPIO_PPDR			MCFGPIO_PPDSDR_CS
+#define MCFGPIO_SETR			MCFGPIO_PPDSDR_CS
+#define MCFGPIO_CLRR			MCFGPIO_PCLRR_CS
 
 #define MCFGPIO_PIN_MAX			80
 #define MCFGPIO_IRQ_MAX			8
diff --git a/arch/m68k/include/asm/mcfqspi.h b/arch/m68k/include/asm/mcfqspi.h
index 39d90d51111d..7fe631972f1f 100644
--- a/arch/m68k/include/asm/mcfqspi.h
+++ b/arch/m68k/include/asm/mcfqspi.h
@@ -24,9 +24,11 @@
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x)
 #define	MCFQSPI_IOBASE		(MCF_IPSBAR + 0x340)
 #elif defined(CONFIG_M5249)
-#define MCFQSPI_IOBASE		(MCF_MBAR + 0x300)
-#elif defined(CONFIG_M520x) || defined(CONFIG_M532x)
-#define MCFQSPI_IOBASE		0xFC058000
+#define MCFQSPI_IOBASE         (MCF_MBAR + 0x300)
+#elif defined(CONFIG_M520x)
+#define MCFQSPI_IOBASE         0xFC05C000
+#elif defined(CONFIG_M532x)
+#define MCFQSPI_IOBASE         0xFC058000
 #endif
 #define MCFQSPI_IOSIZE		0x40
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 90595721185f..a8d1c60eb9ce 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -5,6 +5,9 @@
  
 extern unsigned long memory_start;
 extern unsigned long memory_end;
+extern unsigned long _rambase;
+extern unsigned long _ramstart;
+extern unsigned long _ramend;
 
 #define get_user_page(vaddr)		__get_free_page(GFP_KERNEL)
 #define free_user_page(page, addr)	free_page(addr)
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index d8ef53ac03f9..568facf30276 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -135,6 +135,12 @@ do {                                                    \
 	wrusp(_usp);                                    \
 } while(0)
 
+static inline  int handle_kernel_fault(struct pt_regs *regs)
+{
+	/* Any fault in kernel is fatal on non-mmu */
+	return 0;
+}
+
 #endif
 
 /* Forward declaration, a strange C thing */
diff --git a/arch/m68k/include/asm/sections.h b/arch/m68k/include/asm/sections.h
index d64967ecfec6..5277e52715ec 100644
--- a/arch/m68k/include/asm/sections.h
+++ b/arch/m68k/include/asm/sections.h
@@ -3,4 +3,6 @@
 
 #include <asm-generic/sections.h>
 
+extern char _sbss[], _ebss[];
+
 #endif /* _ASM_M68K_SECTIONS_H */
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index c482ebc9dd54..e7f0f2e5ad44 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -1,5 +1,21 @@
-ifdef CONFIG_MMU
-include arch/m68k/kernel/Makefile_mm
-else
-include arch/m68k/kernel/Makefile_no
+#
+# Makefile for the linux kernel.
+#
+
+extra-$(CONFIG_MMU)	:= head.o
+extra-$(CONFIG_SUN3)	:= sun3-head.o
+extra-y			+= vmlinux.lds
+
+obj-y	:= entry.o m68k_ksyms.o module.o process.o ptrace.o setup.o signal.o \
+	   sys_m68k.o syscalltable.o time.o traps.o
+
+obj-$(CONFIG_MMU)	+= ints.o devres.o vectors.o
+devres-$(CONFIG_MMU)	= ../../../kernel/irq/devres.o
+
+ifndef CONFIG_MMU_SUN3
+obj-y			+= dma.o
 endif
+ifndef CONFIG_MMU
+obj-y			+= init_task.o irq.o
+endif
+
diff --git a/arch/m68k/kernel/Makefile_mm b/arch/m68k/kernel/Makefile_mm
deleted file mode 100644
index aced67804579..000000000000
--- a/arch/m68k/kernel/Makefile_mm
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-
-ifndef CONFIG_SUN3
-  extra-y := head.o
-else
-  extra-y := sun3-head.o
-endif
-extra-y	+= vmlinux.lds
-
-obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o syscalltable.o
-
-devres-y = ../../../kernel/irq/devres.o
-
-obj-y$(CONFIG_MMU_SUN3) += dma.o	# no, it's not a typo
diff --git a/arch/m68k/kernel/Makefile_no b/arch/m68k/kernel/Makefile_no
deleted file mode 100644
index 37c3fc074c0a..000000000000
--- a/arch/m68k/kernel/Makefile_no
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Makefile for arch/m68knommu/kernel.
-#
-
-extra-y := vmlinux.lds
-
-obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \
-	 setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
-
-obj-$(CONFIG_MODULES)	+= module.o
diff --git a/arch/m68k/kernel/entry_no.S b/arch/m68k/kernel/entry_no.S
index 5f0f6b598b5a..1b4289061a64 100644
--- a/arch/m68k/kernel/entry_no.S
+++ b/arch/m68k/kernel/entry_no.S
@@ -43,7 +43,7 @@
 .globl sys_vfork
 
 ENTRY(buserr)
-	SAVE_ALL
+	SAVE_ALL_INT
 	moveq	#-1,%d0
 	movel	%d0,%sp@(PT_OFF_ORIG_D0)
 	movel	%sp,%sp@- 		/* stack frame pointer argument */
@@ -52,7 +52,7 @@ ENTRY(buserr)
 	jra	ret_from_exception
 
 ENTRY(trap)
-	SAVE_ALL
+	SAVE_ALL_INT
 	moveq	#-1,%d0
 	movel	%d0,%sp@(PT_OFF_ORIG_D0)
 	movel	%sp,%sp@- 		/* stack frame pointer argument */
@@ -64,7 +64,7 @@ ENTRY(trap)
 
 .globl dbginterrupt
 ENTRY(dbginterrupt)
-	SAVE_ALL
+	SAVE_ALL_INT
 	moveq	#-1,%d0
 	movel	%d0,%sp@(PT_OFF_ORIG_D0)
 	movel	%sp,%sp@- 		/* stack frame pointer argument */
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index 16b2de7f5101..2ed8c0fb1517 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -36,6 +36,7 @@
 #include <asm/irq.h>
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
+#include <asm/sections.h>
 
 unsigned long memory_start;
 unsigned long memory_end;
@@ -80,9 +81,6 @@ void (*mach_power_off)(void);
 #define	CPU_INSTR_PER_JIFFY	16
 #endif
 
-extern int _stext, _etext, _sdata, _edata, _sbss, _ebss, _end;
-extern int _ramstart, _ramend;
-
 #if defined(CONFIG_UBOOT)
 /*
  * parse_uboot_commandline
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index c98add3f5f0f..89362f2bb56a 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1,5 +1,1107 @@
-#ifdef CONFIG_MMU
-#include "traps_mm.c"
+/*
+ *  linux/arch/m68k/kernel/traps.c
+ *
+ *  Copyright (C) 1993, 1994 by Hamish Macdonald
+ *
+ *  68040 fixes by Michael Rausch
+ *  68040 fixes by Martin Apel
+ *  68040 fixes and writeback by Richard Zidlicky
+ *  68060 fixes by Roman Hodek
+ *  68060 fixes by Jesper Skov
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+/*
+ * Sets up all exception vectors
+ */
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/user.h>
+#include <linux/string.h>
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/ptrace.h>
+#include <linux/kallsyms.h>
+
+#include <asm/setup.h>
+#include <asm/fpu.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/traps.h>
+#include <asm/pgalloc.h>
+#include <asm/machdep.h>
+#include <asm/siginfo.h>
+
+
+static const char *vec_names[] = {
+	[VEC_RESETSP]	= "RESET SP",
+	[VEC_RESETPC]	= "RESET PC",
+	[VEC_BUSERR]	= "BUS ERROR",
+	[VEC_ADDRERR]	= "ADDRESS ERROR",
+	[VEC_ILLEGAL]	= "ILLEGAL INSTRUCTION",
+	[VEC_ZERODIV]	= "ZERO DIVIDE",
+	[VEC_CHK]	= "CHK",
+	[VEC_TRAP]	= "TRAPcc",
+	[VEC_PRIV]	= "PRIVILEGE VIOLATION",
+	[VEC_TRACE]	= "TRACE",
+	[VEC_LINE10]	= "LINE 1010",
+	[VEC_LINE11]	= "LINE 1111",
+	[VEC_RESV12]	= "UNASSIGNED RESERVED 12",
+	[VEC_COPROC]	= "COPROCESSOR PROTOCOL VIOLATION",
+	[VEC_FORMAT]	= "FORMAT ERROR",
+	[VEC_UNINT]	= "UNINITIALIZED INTERRUPT",
+	[VEC_RESV16]	= "UNASSIGNED RESERVED 16",
+	[VEC_RESV17]	= "UNASSIGNED RESERVED 17",
+	[VEC_RESV18]	= "UNASSIGNED RESERVED 18",
+	[VEC_RESV19]	= "UNASSIGNED RESERVED 19",
+	[VEC_RESV20]	= "UNASSIGNED RESERVED 20",
+	[VEC_RESV21]	= "UNASSIGNED RESERVED 21",
+	[VEC_RESV22]	= "UNASSIGNED RESERVED 22",
+	[VEC_RESV23]	= "UNASSIGNED RESERVED 23",
+	[VEC_SPUR]	= "SPURIOUS INTERRUPT",
+	[VEC_INT1]	= "LEVEL 1 INT",
+	[VEC_INT2]	= "LEVEL 2 INT",
+	[VEC_INT3]	= "LEVEL 3 INT",
+	[VEC_INT4]	= "LEVEL 4 INT",
+	[VEC_INT5]	= "LEVEL 5 INT",
+	[VEC_INT6]	= "LEVEL 6 INT",
+	[VEC_INT7]	= "LEVEL 7 INT",
+	[VEC_SYS]	= "SYSCALL",
+	[VEC_TRAP1]	= "TRAP #1",
+	[VEC_TRAP2]	= "TRAP #2",
+	[VEC_TRAP3]	= "TRAP #3",
+	[VEC_TRAP4]	= "TRAP #4",
+	[VEC_TRAP5]	= "TRAP #5",
+	[VEC_TRAP6]	= "TRAP #6",
+	[VEC_TRAP7]	= "TRAP #7",
+	[VEC_TRAP8]	= "TRAP #8",
+	[VEC_TRAP9]	= "TRAP #9",
+	[VEC_TRAP10]	= "TRAP #10",
+	[VEC_TRAP11]	= "TRAP #11",
+	[VEC_TRAP12]	= "TRAP #12",
+	[VEC_TRAP13]	= "TRAP #13",
+	[VEC_TRAP14]	= "TRAP #14",
+	[VEC_TRAP15]	= "TRAP #15",
+	[VEC_FPBRUC]	= "FPCP BSUN",
+	[VEC_FPIR]	= "FPCP INEXACT",
+	[VEC_FPDIVZ]	= "FPCP DIV BY 0",
+	[VEC_FPUNDER]	= "FPCP UNDERFLOW",
+	[VEC_FPOE]	= "FPCP OPERAND ERROR",
+	[VEC_FPOVER]	= "FPCP OVERFLOW",
+	[VEC_FPNAN]	= "FPCP SNAN",
+	[VEC_FPUNSUP]	= "FPCP UNSUPPORTED OPERATION",
+	[VEC_MMUCFG]	= "MMU CONFIGURATION ERROR",
+	[VEC_MMUILL]	= "MMU ILLEGAL OPERATION ERROR",
+	[VEC_MMUACC]	= "MMU ACCESS LEVEL VIOLATION ERROR",
+	[VEC_RESV59]	= "UNASSIGNED RESERVED 59",
+	[VEC_UNIMPEA]	= "UNASSIGNED RESERVED 60",
+	[VEC_UNIMPII]	= "UNASSIGNED RESERVED 61",
+	[VEC_RESV62]	= "UNASSIGNED RESERVED 62",
+	[VEC_RESV63]	= "UNASSIGNED RESERVED 63",
+};
+
+static const char *space_names[] = {
+	[0]		= "Space 0",
+	[USER_DATA]	= "User Data",
+	[USER_PROGRAM]	= "User Program",
+#ifndef CONFIG_SUN3
+	[3]		= "Space 3",
 #else
-#include "traps_no.c"
+	[FC_CONTROL]	= "Control",
+#endif
+	[4]		= "Space 4",
+	[SUPER_DATA]	= "Super Data",
+	[SUPER_PROGRAM]	= "Super Program",
+	[CPU_SPACE]	= "CPU"
+};
+
+void die_if_kernel(char *,struct pt_regs *,int);
+asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
+                             unsigned long error_code);
+int send_fault_sig(struct pt_regs *regs);
+
+asmlinkage void trap_c(struct frame *fp);
+
+#if defined (CONFIG_M68060)
+static inline void access_error060 (struct frame *fp)
+{
+	unsigned long fslw = fp->un.fmt4.pc; /* is really FSLW for access error */
+
+#ifdef DEBUG
+	printk("fslw=%#lx, fa=%#lx\n", fslw, fp->un.fmt4.effaddr);
+#endif
+
+	if (fslw & MMU060_BPE) {
+		/* branch prediction error -> clear branch cache */
+		__asm__ __volatile__ ("movec %/cacr,%/d0\n\t"
+				      "orl   #0x00400000,%/d0\n\t"
+				      "movec %/d0,%/cacr"
+				      : : : "d0" );
+		/* return if there's no other error */
+		if (!(fslw & MMU060_ERR_BITS) && !(fslw & MMU060_SEE))
+			return;
+	}
+
+	if (fslw & (MMU060_DESC_ERR | MMU060_WP | MMU060_SP)) {
+		unsigned long errorcode;
+		unsigned long addr = fp->un.fmt4.effaddr;
+
+		if (fslw & MMU060_MA)
+			addr = (addr + PAGE_SIZE - 1) & PAGE_MASK;
+
+		errorcode = 1;
+		if (fslw & MMU060_DESC_ERR) {
+			__flush_tlb040_one(addr);
+			errorcode = 0;
+		}
+		if (fslw & MMU060_W)
+			errorcode |= 2;
+#ifdef DEBUG
+		printk("errorcode = %d\n", errorcode );
+#endif
+		do_page_fault(&fp->ptregs, addr, errorcode);
+	} else if (fslw & (MMU060_SEE)){
+		/* Software Emulation Error.
+		 * fault during mem_read/mem_write in ifpsp060/os.S
+		 */
+		send_fault_sig(&fp->ptregs);
+	} else if (!(fslw & (MMU060_RE|MMU060_WE)) ||
+		   send_fault_sig(&fp->ptregs) > 0) {
+		printk("pc=%#lx, fa=%#lx\n", fp->ptregs.pc, fp->un.fmt4.effaddr);
+		printk( "68060 access error, fslw=%lx\n", fslw );
+		trap_c( fp );
+	}
+}
+#endif /* CONFIG_M68060 */
+
+#if defined (CONFIG_M68040)
+static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
+{
+	unsigned long mmusr;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(MAKE_MM_SEG(wbs));
+
+	if (iswrite)
+		asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr));
+	else
+		asm volatile (".chip 68040; ptestr (%0); .chip 68k" : : "a" (addr));
+
+	asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr));
+
+	set_fs(old_fs);
+
+	return mmusr;
+}
+
+static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
+				   unsigned long wbd)
+{
+	int res = 0;
+	mm_segment_t old_fs = get_fs();
+
+	/* set_fs can not be moved, otherwise put_user() may oops */
+	set_fs(MAKE_MM_SEG(wbs));
+
+	switch (wbs & WBSIZ_040) {
+	case BA_SIZE_BYTE:
+		res = put_user(wbd & 0xff, (char __user *)wba);
+		break;
+	case BA_SIZE_WORD:
+		res = put_user(wbd & 0xffff, (short __user *)wba);
+		break;
+	case BA_SIZE_LONG:
+		res = put_user(wbd, (int __user *)wba);
+		break;
+	}
+
+	/* set_fs can not be moved, otherwise put_user() may oops */
+	set_fs(old_fs);
+
+
+#ifdef DEBUG
+	printk("do_040writeback1, res=%d\n",res);
+#endif
+
+	return res;
+}
+
+/* after an exception in a writeback the stack frame corresponding
+ * to that exception is discarded, set a few bits in the old frame
+ * to simulate what it should look like
+ */
+static inline void fix_xframe040(struct frame *fp, unsigned long wba, unsigned short wbs)
+{
+	fp->un.fmt7.faddr = wba;
+	fp->un.fmt7.ssw = wbs & 0xff;
+	if (wba != current->thread.faddr)
+	    fp->un.fmt7.ssw |= MA_040;
+}
+
+static inline void do_040writebacks(struct frame *fp)
+{
+	int res = 0;
+#if 0
+	if (fp->un.fmt7.wb1s & WBV_040)
+		printk("access_error040: cannot handle 1st writeback. oops.\n");
+#endif
+
+	if ((fp->un.fmt7.wb2s & WBV_040) &&
+	    !(fp->un.fmt7.wb2s & WBTT_040)) {
+		res = do_040writeback1(fp->un.fmt7.wb2s, fp->un.fmt7.wb2a,
+				       fp->un.fmt7.wb2d);
+		if (res)
+			fix_xframe040(fp, fp->un.fmt7.wb2a, fp->un.fmt7.wb2s);
+		else
+			fp->un.fmt7.wb2s = 0;
+	}
+
+	/* do the 2nd wb only if the first one was successful (except for a kernel wb) */
+	if (fp->un.fmt7.wb3s & WBV_040 && (!res || fp->un.fmt7.wb3s & 4)) {
+		res = do_040writeback1(fp->un.fmt7.wb3s, fp->un.fmt7.wb3a,
+				       fp->un.fmt7.wb3d);
+		if (res)
+		    {
+			fix_xframe040(fp, fp->un.fmt7.wb3a, fp->un.fmt7.wb3s);
+
+			fp->un.fmt7.wb2s = fp->un.fmt7.wb3s;
+			fp->un.fmt7.wb3s &= (~WBV_040);
+			fp->un.fmt7.wb2a = fp->un.fmt7.wb3a;
+			fp->un.fmt7.wb2d = fp->un.fmt7.wb3d;
+		    }
+		else
+			fp->un.fmt7.wb3s = 0;
+	}
+
+	if (res)
+		send_fault_sig(&fp->ptregs);
+}
+
+/*
+ * called from sigreturn(), must ensure userspace code didn't
+ * manipulate exception frame to circumvent protection, then complete
+ * pending writebacks
+ * we just clear TM2 to turn it into a userspace access
+ */
+asmlinkage void berr_040cleanup(struct frame *fp)
+{
+	fp->un.fmt7.wb2s &= ~4;
+	fp->un.fmt7.wb3s &= ~4;
+
+	do_040writebacks(fp);
+}
+
+static inline void access_error040(struct frame *fp)
+{
+	unsigned short ssw = fp->un.fmt7.ssw;
+	unsigned long mmusr;
+
+#ifdef DEBUG
+	printk("ssw=%#x, fa=%#lx\n", ssw, fp->un.fmt7.faddr);
+        printk("wb1s=%#x, wb2s=%#x, wb3s=%#x\n", fp->un.fmt7.wb1s,
+		fp->un.fmt7.wb2s, fp->un.fmt7.wb3s);
+	printk ("wb2a=%lx, wb3a=%lx, wb2d=%lx, wb3d=%lx\n",
+		fp->un.fmt7.wb2a, fp->un.fmt7.wb3a,
+		fp->un.fmt7.wb2d, fp->un.fmt7.wb3d);
+#endif
+
+	if (ssw & ATC_040) {
+		unsigned long addr = fp->un.fmt7.faddr;
+		unsigned long errorcode;
+
+		/*
+		 * The MMU status has to be determined AFTER the address
+		 * has been corrected if there was a misaligned access (MA).
+		 */
+		if (ssw & MA_040)
+			addr = (addr + 7) & -8;
+
+		/* MMU error, get the MMUSR info for this access */
+		mmusr = probe040(!(ssw & RW_040), addr, ssw);
+#ifdef DEBUG
+		printk("mmusr = %lx\n", mmusr);
+#endif
+		errorcode = 1;
+		if (!(mmusr & MMU_R_040)) {
+			/* clear the invalid atc entry */
+			__flush_tlb040_one(addr);
+			errorcode = 0;
+		}
+
+		/* despite what documentation seems to say, RMW
+		 * accesses have always both the LK and RW bits set */
+		if (!(ssw & RW_040) || (ssw & LK_040))
+			errorcode |= 2;
+
+		if (do_page_fault(&fp->ptregs, addr, errorcode)) {
+#ifdef DEBUG
+			printk("do_page_fault() !=0\n");
+#endif
+			if (user_mode(&fp->ptregs)){
+				/* delay writebacks after signal delivery */
+#ifdef DEBUG
+			        printk(".. was usermode - return\n");
+#endif
+				return;
+			}
+			/* disable writeback into user space from kernel
+			 * (if do_page_fault didn't fix the mapping,
+                         * the writeback won't do good)
+			 */
+disable_wb:
+#ifdef DEBUG
+			printk(".. disabling wb2\n");
+#endif
+			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
+				fp->un.fmt7.wb2s &= ~WBV_040;
+			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
+				fp->un.fmt7.wb3s &= ~WBV_040;
+		}
+	} else {
+		/* In case of a bus error we either kill the process or expect
+		 * the kernel to catch the fault, which then is also responsible
+		 * for cleaning up the mess.
+		 */
+		current->thread.signo = SIGBUS;
+		current->thread.faddr = fp->un.fmt7.faddr;
+		if (send_fault_sig(&fp->ptregs) >= 0)
+			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
+			       fp->un.fmt7.faddr);
+		goto disable_wb;
+	}
+
+	do_040writebacks(fp);
+}
+#endif /* CONFIG_M68040 */
+
+#if defined(CONFIG_SUN3)
+#include <asm/sun3mmu.h>
+
+extern int mmu_emu_handle_fault (unsigned long, int, int);
+
+/* sun3 version of bus_error030 */
+
+static inline void bus_error030 (struct frame *fp)
+{
+	unsigned char buserr_type = sun3_get_buserr ();
+	unsigned long addr, errorcode;
+	unsigned short ssw = fp->un.fmtb.ssw;
+	extern unsigned long _sun3_map_test_start, _sun3_map_test_end;
+
+#ifdef DEBUG
+	if (ssw & (FC | FB))
+		printk ("Instruction fault at %#010lx\n",
+			ssw & FC ?
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
+			:
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
+	if (ssw & DF)
+		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+			ssw & RW ? "read" : "write",
+			fp->un.fmtb.daddr,
+			space_names[ssw & DFC], fp->ptregs.pc);
+#endif
+
+	/*
+	 * Check if this page should be demand-mapped. This needs to go before
+	 * the testing for a bad kernel-space access (demand-mapping applies
+	 * to kernel accesses too).
+	 */
+
+	if ((ssw & DF)
+	    && (buserr_type & (SUN3_BUSERR_PROTERR | SUN3_BUSERR_INVALID))) {
+		if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 0))
+			return;
+	}
+
+	/* Check for kernel-space pagefault (BAD). */
+	if (fp->ptregs.sr & PS_S) {
+		/* kernel fault must be a data fault to user space */
+		if (! ((ssw & DF) && ((ssw & DFC) == USER_DATA))) {
+		     // try checking the kernel mappings before surrender
+		     if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 1))
+			  return;
+			/* instruction fault or kernel data fault! */
+			if (ssw & (FC | FB))
+				printk ("Instruction fault at %#010lx\n",
+					fp->ptregs.pc);
+			if (ssw & DF) {
+				/* was this fault incurred testing bus mappings? */
+				if((fp->ptregs.pc >= (unsigned long)&_sun3_map_test_start) &&
+				   (fp->ptregs.pc <= (unsigned long)&_sun3_map_test_end)) {
+					send_fault_sig(&fp->ptregs);
+					return;
+				}
+
+				printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+					ssw & RW ? "read" : "write",
+					fp->un.fmtb.daddr,
+					space_names[ssw & DFC], fp->ptregs.pc);
+			}
+			printk ("BAD KERNEL BUSERR\n");
+
+			die_if_kernel("Oops", &fp->ptregs,0);
+			force_sig(SIGKILL, current);
+			return;
+		}
+	} else {
+		/* user fault */
+		if (!(ssw & (FC | FB)) && !(ssw & DF))
+			/* not an instruction fault or data fault! BAD */
+			panic ("USER BUSERR w/o instruction or data fault");
+	}
+
+
+	/* First handle the data fault, if any.  */
+	if (ssw & DF) {
+		addr = fp->un.fmtb.daddr;
+
+// errorcode bit 0:	0 -> no page		1 -> protection fault
+// errorcode bit 1:	0 -> read fault		1 -> write fault
+
+// (buserr_type & SUN3_BUSERR_PROTERR)	-> protection fault
+// (buserr_type & SUN3_BUSERR_INVALID)	-> invalid page fault
+
+		if (buserr_type & SUN3_BUSERR_PROTERR)
+			errorcode = 0x01;
+		else if (buserr_type & SUN3_BUSERR_INVALID)
+			errorcode = 0x00;
+		else {
+#ifdef DEBUG
+			printk ("*** unexpected busfault type=%#04x\n", buserr_type);
+			printk ("invalid %s access at %#lx from pc %#lx\n",
+				!(ssw & RW) ? "write" : "read", addr,
+				fp->ptregs.pc);
+#endif
+			die_if_kernel ("Oops", &fp->ptregs, buserr_type);
+			force_sig (SIGBUS, current);
+			return;
+		}
+
+//todo: wtf is RM bit? --m
+		if (!(ssw & RW) || ssw & RM)
+			errorcode |= 0x02;
+
+		/* Handle page fault. */
+		do_page_fault (&fp->ptregs, addr, errorcode);
+
+		/* Retry the data fault now. */
+		return;
+	}
+
+	/* Now handle the instruction fault. */
+
+	/* Get the fault address. */
+	if (fp->ptregs.format == 0xA)
+		addr = fp->ptregs.pc + 4;
+	else
+		addr = fp->un.fmtb.baddr;
+	if (ssw & FC)
+		addr -= 2;
+
+	if (buserr_type & SUN3_BUSERR_INVALID) {
+		if (!mmu_emu_handle_fault (fp->un.fmtb.daddr, 1, 0))
+			do_page_fault (&fp->ptregs, addr, 0);
+       } else {
+#ifdef DEBUG
+		printk ("protection fault on insn access (segv).\n");
+#endif
+		force_sig (SIGSEGV, current);
+       }
+}
+#else
+#if defined(CPU_M68020_OR_M68030)
+static inline void bus_error030 (struct frame *fp)
+{
+	volatile unsigned short temp;
+	unsigned short mmusr;
+	unsigned long addr, errorcode;
+	unsigned short ssw = fp->un.fmtb.ssw;
+#ifdef DEBUG
+	unsigned long desc;
+
+	printk ("pid = %x  ", current->pid);
+	printk ("SSW=%#06x  ", ssw);
+
+	if (ssw & (FC | FB))
+		printk ("Instruction fault at %#010lx\n",
+			ssw & FC ?
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
+			:
+			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
+	if (ssw & DF)
+		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+			ssw & RW ? "read" : "write",
+			fp->un.fmtb.daddr,
+			space_names[ssw & DFC], fp->ptregs.pc);
+#endif
+
+	/* ++andreas: If a data fault and an instruction fault happen
+	   at the same time map in both pages.  */
+
+	/* First handle the data fault, if any.  */
+	if (ssw & DF) {
+		addr = fp->un.fmtb.daddr;
+
+#ifdef DEBUG
+		asm volatile ("ptestr %3,%2@,#7,%0\n\t"
+			      "pmove %%psr,%1@"
+			      : "=a&" (desc)
+			      : "a" (&temp), "a" (addr), "d" (ssw));
+#else
+		asm volatile ("ptestr %2,%1@,#7\n\t"
+			      "pmove %%psr,%0@"
+			      : : "a" (&temp), "a" (addr), "d" (ssw));
+#endif
+		mmusr = temp;
+
+#ifdef DEBUG
+		printk("mmusr is %#x for addr %#lx in task %p\n",
+		       mmusr, addr, current);
+		printk("descriptor address is %#lx, contents %#lx\n",
+		       __va(desc), *(unsigned long *)__va(desc));
+#endif
+
+		errorcode = (mmusr & MMU_I) ? 0 : 1;
+		if (!(ssw & RW) || (ssw & RM))
+			errorcode |= 2;
+
+		if (mmusr & (MMU_I | MMU_WP)) {
+			if (ssw & 4) {
+				printk("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+				       ssw & RW ? "read" : "write",
+				       fp->un.fmtb.daddr,
+				       space_names[ssw & DFC], fp->ptregs.pc);
+				goto buserr;
+			}
+			/* Don't try to do anything further if an exception was
+			   handled. */
+			if (do_page_fault (&fp->ptregs, addr, errorcode) < 0)
+				return;
+		} else if (!(mmusr & MMU_I)) {
+			/* probably a 020 cas fault */
+			if (!(ssw & RM) && send_fault_sig(&fp->ptregs) > 0)
+				printk("unexpected bus error (%#x,%#x)\n", ssw, mmusr);
+		} else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
+			printk("invalid %s access at %#lx from pc %#lx\n",
+			       !(ssw & RW) ? "write" : "read", addr,
+			       fp->ptregs.pc);
+			die_if_kernel("Oops",&fp->ptregs,mmusr);
+			force_sig(SIGSEGV, current);
+			return;
+		} else {
+#if 0
+			static volatile long tlong;
+#endif
+
+			printk("weird %s access at %#lx from pc %#lx (ssw is %#x)\n",
+			       !(ssw & RW) ? "write" : "read", addr,
+			       fp->ptregs.pc, ssw);
+			asm volatile ("ptestr #1,%1@,#0\n\t"
+				      "pmove %%psr,%0@"
+				      : /* no outputs */
+				      : "a" (&temp), "a" (addr));
+			mmusr = temp;
+
+			printk ("level 0 mmusr is %#x\n", mmusr);
+#if 0
+			asm volatile ("pmove %%tt0,%0@"
+				      : /* no outputs */
+				      : "a" (&tlong));
+			printk("tt0 is %#lx, ", tlong);
+			asm volatile ("pmove %%tt1,%0@"
+				      : /* no outputs */
+				      : "a" (&tlong));
+			printk("tt1 is %#lx\n", tlong);
+#endif
+#ifdef DEBUG
+			printk("Unknown SIGSEGV - 1\n");
+#endif
+			die_if_kernel("Oops",&fp->ptregs,mmusr);
+			force_sig(SIGSEGV, current);
+			return;
+		}
+
+		/* setup an ATC entry for the access about to be retried */
+		if (!(ssw & RW) || (ssw & RM))
+			asm volatile ("ploadw %1,%0@" : /* no outputs */
+				      : "a" (addr), "d" (ssw));
+		else
+			asm volatile ("ploadr %1,%0@" : /* no outputs */
+				      : "a" (addr), "d" (ssw));
+	}
+
+	/* Now handle the instruction fault. */
+
+	if (!(ssw & (FC|FB)))
+		return;
+
+	if (fp->ptregs.sr & PS_S) {
+		printk("Instruction fault at %#010lx\n",
+			fp->ptregs.pc);
+	buserr:
+		printk ("BAD KERNEL BUSERR\n");
+		die_if_kernel("Oops",&fp->ptregs,0);
+		force_sig(SIGKILL, current);
+		return;
+	}
+
+	/* get the fault address */
+	if (fp->ptregs.format == 10)
+		addr = fp->ptregs.pc + 4;
+	else
+		addr = fp->un.fmtb.baddr;
+	if (ssw & FC)
+		addr -= 2;
+
+	if ((ssw & DF) && ((addr ^ fp->un.fmtb.daddr) & PAGE_MASK) == 0)
+		/* Insn fault on same page as data fault.  But we
+		   should still create the ATC entry.  */
+		goto create_atc_entry;
+
+#ifdef DEBUG
+	asm volatile ("ptestr #1,%2@,#7,%0\n\t"
+		      "pmove %%psr,%1@"
+		      : "=a&" (desc)
+		      : "a" (&temp), "a" (addr));
+#else
+	asm volatile ("ptestr #1,%1@,#7\n\t"
+		      "pmove %%psr,%0@"
+		      : : "a" (&temp), "a" (addr));
+#endif
+	mmusr = temp;
+
+#ifdef DEBUG
+	printk ("mmusr is %#x for addr %#lx in task %p\n",
+		mmusr, addr, current);
+	printk ("descriptor address is %#lx, contents %#lx\n",
+		__va(desc), *(unsigned long *)__va(desc));
+#endif
+
+	if (mmusr & MMU_I)
+		do_page_fault (&fp->ptregs, addr, 0);
+	else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
+		printk ("invalid insn access at %#lx from pc %#lx\n",
+			addr, fp->ptregs.pc);
+#ifdef DEBUG
+		printk("Unknown SIGSEGV - 2\n");
+#endif
+		die_if_kernel("Oops",&fp->ptregs,mmusr);
+		force_sig(SIGSEGV, current);
+		return;
+	}
+
+create_atc_entry:
+	/* setup an ATC entry for the access about to be retried */
+	asm volatile ("ploadr #2,%0@" : /* no outputs */
+		      : "a" (addr));
+}
+#endif /* CPU_M68020_OR_M68030 */
+#endif /* !CONFIG_SUN3 */
+
+asmlinkage void buserr_c(struct frame *fp)
+{
+	/* Only set esp0 if coming from user mode */
+	if (user_mode(&fp->ptregs))
+		current->thread.esp0 = (unsigned long) fp;
+
+#ifdef DEBUG
+	printk ("*** Bus Error *** Format is %x\n", fp->ptregs.format);
+#endif
+
+	switch (fp->ptregs.format) {
+#if defined (CONFIG_M68060)
+	case 4:				/* 68060 access error */
+	  access_error060 (fp);
+	  break;
+#endif
+#if defined (CONFIG_M68040)
+	case 0x7:			/* 68040 access error */
+	  access_error040 (fp);
+	  break;
+#endif
+#if defined (CPU_M68020_OR_M68030)
+	case 0xa:
+	case 0xb:
+	  bus_error030 (fp);
+	  break;
+#endif
+	default:
+	  die_if_kernel("bad frame format",&fp->ptregs,0);
+#ifdef DEBUG
+	  printk("Unknown SIGSEGV - 4\n");
+#endif
+	  force_sig(SIGSEGV, current);
+	}
+}
+
+
+static int kstack_depth_to_print = 48;
+
+void show_trace(unsigned long *stack)
+{
+	unsigned long *endstack;
+	unsigned long addr;
+	int i;
+
+	printk("Call Trace:");
+	addr = (unsigned long)stack + THREAD_SIZE - 1;
+	endstack = (unsigned long *)(addr & -THREAD_SIZE);
+	i = 0;
+	while (stack + 1 <= endstack) {
+		addr = *stack++;
+		/*
+		 * If the address is either in the text segment of the
+		 * kernel, or in the region which contains vmalloc'ed
+		 * memory, it *may* be the address of a calling
+		 * routine; if so, print it so that someone tracing
+		 * down the cause of the crash will be able to figure
+		 * out the call path that was taken.
+		 */
+		if (__kernel_text_address(addr)) {
+#ifndef CONFIG_KALLSYMS
+			if (i % 5 == 0)
+				printk("\n       ");
+#endif
+			printk(" [<%08lx>] %pS\n", addr, (void *)addr);
+			i++;
+		}
+	}
+	printk("\n");
+}
+
+void show_registers(struct pt_regs *regs)
+{
+	struct frame *fp = (struct frame *)regs;
+	mm_segment_t old_fs = get_fs();
+	u16 c, *cp;
+	unsigned long addr;
+	int i;
+
+	print_modules();
+	printk("PC: [<%08lx>] %pS\n", regs->pc, (void *)regs->pc);
+	printk("SR: %04x  SP: %p  a2: %08lx\n", regs->sr, regs, regs->a2);
+	printk("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
+	       regs->d0, regs->d1, regs->d2, regs->d3);
+	printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
+	       regs->d4, regs->d5, regs->a0, regs->a1);
+
+	printk("Process %s (pid: %d, task=%p)\n",
+		current->comm, task_pid_nr(current), current);
+	addr = (unsigned long)&fp->un;
+	printk("Frame format=%X ", regs->format);
+	switch (regs->format) {
+	case 0x2:
+		printk("instr addr=%08lx\n", fp->un.fmt2.iaddr);
+		addr += sizeof(fp->un.fmt2);
+		break;
+	case 0x3:
+		printk("eff addr=%08lx\n", fp->un.fmt3.effaddr);
+		addr += sizeof(fp->un.fmt3);
+		break;
+	case 0x4:
+		printk((CPU_IS_060 ? "fault addr=%08lx fslw=%08lx\n"
+			: "eff addr=%08lx pc=%08lx\n"),
+			fp->un.fmt4.effaddr, fp->un.fmt4.pc);
+		addr += sizeof(fp->un.fmt4);
+		break;
+	case 0x7:
+		printk("eff addr=%08lx ssw=%04x faddr=%08lx\n",
+			fp->un.fmt7.effaddr, fp->un.fmt7.ssw, fp->un.fmt7.faddr);
+		printk("wb 1 stat/addr/data: %04x %08lx %08lx\n",
+			fp->un.fmt7.wb1s, fp->un.fmt7.wb1a, fp->un.fmt7.wb1dpd0);
+		printk("wb 2 stat/addr/data: %04x %08lx %08lx\n",
+			fp->un.fmt7.wb2s, fp->un.fmt7.wb2a, fp->un.fmt7.wb2d);
+		printk("wb 3 stat/addr/data: %04x %08lx %08lx\n",
+			fp->un.fmt7.wb3s, fp->un.fmt7.wb3a, fp->un.fmt7.wb3d);
+		printk("push data: %08lx %08lx %08lx %08lx\n",
+			fp->un.fmt7.wb1dpd0, fp->un.fmt7.pd1, fp->un.fmt7.pd2,
+			fp->un.fmt7.pd3);
+		addr += sizeof(fp->un.fmt7);
+		break;
+	case 0x9:
+		printk("instr addr=%08lx\n", fp->un.fmt9.iaddr);
+		addr += sizeof(fp->un.fmt9);
+		break;
+	case 0xa:
+		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
+			fp->un.fmta.ssw, fp->un.fmta.isc, fp->un.fmta.isb,
+			fp->un.fmta.daddr, fp->un.fmta.dobuf);
+		addr += sizeof(fp->un.fmta);
+		break;
+	case 0xb:
+		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
+			fp->un.fmtb.ssw, fp->un.fmtb.isc, fp->un.fmtb.isb,
+			fp->un.fmtb.daddr, fp->un.fmtb.dobuf);
+		printk("baddr=%08lx dibuf=%08lx ver=%x\n",
+			fp->un.fmtb.baddr, fp->un.fmtb.dibuf, fp->un.fmtb.ver);
+		addr += sizeof(fp->un.fmtb);
+		break;
+	default:
+		printk("\n");
+	}
+	show_stack(NULL, (unsigned long *)addr);
+
+	printk("Code:");
+	set_fs(KERNEL_DS);
+	cp = (u16 *)regs->pc;
+	for (i = -8; i < 16; i++) {
+		if (get_user(c, cp + i) && i >= 0) {
+			printk(" Bad PC value.");
+			break;
+		}
+		printk(i ? " %04x" : " <%04x>", c);
+	}
+	set_fs(old_fs);
+	printk ("\n");
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack)
+{
+	unsigned long *p;
+	unsigned long *endstack;
+	int i;
+
+	if (!stack) {
+		if (task)
+			stack = (unsigned long *)task->thread.esp0;
+		else
+			stack = (unsigned long *)&stack;
+	}
+	endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1) & -THREAD_SIZE);
+
+	printk("Stack from %08lx:", (unsigned long)stack);
+	p = stack;
+	for (i = 0; i < kstack_depth_to_print; i++) {
+		if (p + 1 > endstack)
+			break;
+		if (i % 8 == 0)
+			printk("\n       ");
+		printk(" %08lx", *p++);
+	}
+	printk("\n");
+	show_trace(stack);
+}
+
+/*
+ * The architecture-independent backtrace generator
+ */
+void dump_stack(void)
+{
+	unsigned long stack;
+
+	show_trace(&stack);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+/*
+ * The vector number returned in the frame pointer may also contain
+ * the "fs" (Fault Status) bits on ColdFire. These are in the bottom
+ * 2 bits, and upper 2 bits. So we need to mask out the real vector
+ * number before using it in comparisons. You don't need to do this on
+ * real 68k parts, but it won't hurt either.
+ */
+
+void bad_super_trap (struct frame *fp)
+{
+	int vector = (fp->ptregs.vector >> 2) & 0xff;
+
+	console_verbose();
+	if (vector < ARRAY_SIZE(vec_names))
+		printk ("*** %s ***   FORMAT=%X\n",
+			vec_names[vector],
+			fp->ptregs.format);
+	else
+		printk ("*** Exception %d ***   FORMAT=%X\n",
+			vector, fp->ptregs.format);
+	if (vector == VEC_ADDRERR && CPU_IS_020_OR_030) {
+		unsigned short ssw = fp->un.fmtb.ssw;
+
+		printk ("SSW=%#06x  ", ssw);
+
+		if (ssw & RC)
+			printk ("Pipe stage C instruction fault at %#010lx\n",
+				(fp->ptregs.format) == 0xA ?
+				fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2);
+		if (ssw & RB)
+			printk ("Pipe stage B instruction fault at %#010lx\n",
+				(fp->ptregs.format) == 0xA ?
+				fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
+		if (ssw & DF)
+			printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
+				ssw & RW ? "read" : "write",
+				fp->un.fmtb.daddr, space_names[ssw & DFC],
+				fp->ptregs.pc);
+	}
+	printk ("Current process id is %d\n", task_pid_nr(current));
+	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
+}
+
+asmlinkage void trap_c(struct frame *fp)
+{
+	int sig;
+	int vector = (fp->ptregs.vector >> 2) & 0xff;
+	siginfo_t info;
+
+	if (fp->ptregs.sr & PS_S) {
+		if (vector == VEC_TRACE) {
+			/* traced a trapping instruction on a 68020/30,
+			 * real exception will be executed afterwards.
+			 */
+		} else if (!handle_kernel_fault(&fp->ptregs))
+			bad_super_trap(fp);
+		return;
+	}
+
+	/* send the appropriate signal to the user program */
+	switch (vector) {
+	    case VEC_ADDRERR:
+		info.si_code = BUS_ADRALN;
+		sig = SIGBUS;
+		break;
+	    case VEC_ILLEGAL:
+	    case VEC_LINE10:
+	    case VEC_LINE11:
+		info.si_code = ILL_ILLOPC;
+		sig = SIGILL;
+		break;
+	    case VEC_PRIV:
+		info.si_code = ILL_PRVOPC;
+		sig = SIGILL;
+		break;
+	    case VEC_COPROC:
+		info.si_code = ILL_COPROC;
+		sig = SIGILL;
+		break;
+	    case VEC_TRAP1:
+	    case VEC_TRAP2:
+	    case VEC_TRAP3:
+	    case VEC_TRAP4:
+	    case VEC_TRAP5:
+	    case VEC_TRAP6:
+	    case VEC_TRAP7:
+	    case VEC_TRAP8:
+	    case VEC_TRAP9:
+	    case VEC_TRAP10:
+	    case VEC_TRAP11:
+	    case VEC_TRAP12:
+	    case VEC_TRAP13:
+	    case VEC_TRAP14:
+		info.si_code = ILL_ILLTRP;
+		sig = SIGILL;
+		break;
+	    case VEC_FPBRUC:
+	    case VEC_FPOE:
+	    case VEC_FPNAN:
+		info.si_code = FPE_FLTINV;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPIR:
+		info.si_code = FPE_FLTRES;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPDIVZ:
+		info.si_code = FPE_FLTDIV;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPUNDER:
+		info.si_code = FPE_FLTUND;
+		sig = SIGFPE;
+		break;
+	    case VEC_FPOVER:
+		info.si_code = FPE_FLTOVF;
+		sig = SIGFPE;
+		break;
+	    case VEC_ZERODIV:
+		info.si_code = FPE_INTDIV;
+		sig = SIGFPE;
+		break;
+	    case VEC_CHK:
+	    case VEC_TRAP:
+		info.si_code = FPE_INTOVF;
+		sig = SIGFPE;
+		break;
+	    case VEC_TRACE:		/* ptrace single step */
+		info.si_code = TRAP_TRACE;
+		sig = SIGTRAP;
+		break;
+	    case VEC_TRAP15:		/* breakpoint */
+		info.si_code = TRAP_BRKPT;
+		sig = SIGTRAP;
+		break;
+	    default:
+		info.si_code = ILL_ILLOPC;
+		sig = SIGILL;
+		break;
+	}
+	info.si_signo = sig;
+	info.si_errno = 0;
+	switch (fp->ptregs.format) {
+	    default:
+		info.si_addr = (void *) fp->ptregs.pc;
+		break;
+	    case 2:
+		info.si_addr = (void *) fp->un.fmt2.iaddr;
+		break;
+	    case 7:
+		info.si_addr = (void *) fp->un.fmt7.effaddr;
+		break;
+	    case 9:
+		info.si_addr = (void *) fp->un.fmt9.iaddr;
+		break;
+	    case 10:
+		info.si_addr = (void *) fp->un.fmta.daddr;
+		break;
+	    case 11:
+		info.si_addr = (void *) fp->un.fmtb.daddr;
+		break;
+	}
+	force_sig_info (sig, &info, current);
+}
+
+void die_if_kernel (char *str, struct pt_regs *fp, int nr)
+{
+	if (!(fp->sr & PS_S))
+		return;
+
+	console_verbose();
+	printk("%s: %08x\n",str,nr);
+	show_registers(fp);
+	add_taint(TAINT_DIE);
+	do_exit(SIGSEGV);
+}
+
+asmlinkage void set_esp0(unsigned long ssp)
+{
+	current->thread.esp0 = ssp;
+}
+
+/*
+ * This function is called if an error occur while accessing
+ * user-space from the fpsp040 code.
+ */
+asmlinkage void fpsp040_die(void)
+{
+	do_exit(SIGSEGV);
+}
+
+#ifdef CONFIG_M68KFPU_EMU
+asmlinkage void fpemu_signal(int signal, int code, void *addr)
+{
+	siginfo_t info;
+
+	info.si_signo = signal;
+	info.si_errno = 0;
+	info.si_code = code;
+	info.si_addr = addr;
+	force_sig_info(signal, &info, current);
+}
 #endif
diff --git a/arch/m68k/kernel/traps_mm.c b/arch/m68k/kernel/traps_mm.c
deleted file mode 100644
index 4022bbc28878..000000000000
--- a/arch/m68k/kernel/traps_mm.c
+++ /dev/null
@@ -1,1207 +0,0 @@
-/*
- *  linux/arch/m68k/kernel/traps.c
- *
- *  Copyright (C) 1993, 1994 by Hamish Macdonald
- *
- *  68040 fixes by Michael Rausch
- *  68040 fixes by Martin Apel
- *  68040 fixes and writeback by Richard Zidlicky
- *  68060 fixes by Roman Hodek
- *  68060 fixes by Jesper Skov
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/*
- * Sets up all exception vectors
- */
-
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/user.h>
-#include <linux/string.h>
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <linux/ptrace.h>
-#include <linux/kallsyms.h>
-
-#include <asm/setup.h>
-#include <asm/fpu.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
-#include <asm/pgalloc.h>
-#include <asm/machdep.h>
-#include <asm/siginfo.h>
-
-/* assembler routines */
-asmlinkage void system_call(void);
-asmlinkage void buserr(void);
-asmlinkage void trap(void);
-asmlinkage void nmihandler(void);
-#ifdef CONFIG_M68KFPU_EMU
-asmlinkage void fpu_emu(void);
-#endif
-
-e_vector vectors[256];
-
-/* nmi handler for the Amiga */
-asm(".text\n"
-    __ALIGN_STR "\n"
-    "nmihandler: rte");
-
-/*
- * this must be called very early as the kernel might
- * use some instruction that are emulated on the 060
- * and so we're prepared for early probe attempts (e.g. nf_init).
- */
-void __init base_trap_init(void)
-{
-	if (MACH_IS_SUN3X) {
-		extern e_vector *sun3x_prom_vbr;
-
-		__asm__ volatile ("movec %%vbr, %0" : "=r" (sun3x_prom_vbr));
-	}
-
-	/* setup the exception vector table */
-	__asm__ volatile ("movec %0,%%vbr" : : "r" ((void*)vectors));
-
-	if (CPU_IS_060) {
-		/* set up ISP entry points */
-		asmlinkage void unimp_vec(void) asm ("_060_isp_unimp");
-
-		vectors[VEC_UNIMPII] = unimp_vec;
-	}
-
-	vectors[VEC_BUSERR] = buserr;
-	vectors[VEC_ILLEGAL] = trap;
-	vectors[VEC_SYS] = system_call;
-}
-
-void __init trap_init (void)
-{
-	int i;
-
-	for (i = VEC_SPUR; i <= VEC_INT7; i++)
-		vectors[i] = bad_inthandler;
-
-	for (i = 0; i < VEC_USER; i++)
-		if (!vectors[i])
-			vectors[i] = trap;
-
-	for (i = VEC_USER; i < 256; i++)
-		vectors[i] = bad_inthandler;
-
-#ifdef CONFIG_M68KFPU_EMU
-	if (FPU_IS_EMU)
-		vectors[VEC_LINE11] = fpu_emu;
-#endif
-
-	if (CPU_IS_040 && !FPU_IS_EMU) {
-		/* set up FPSP entry points */
-		asmlinkage void dz_vec(void) asm ("dz");
-		asmlinkage void inex_vec(void) asm ("inex");
-		asmlinkage void ovfl_vec(void) asm ("ovfl");
-		asmlinkage void unfl_vec(void) asm ("unfl");
-		asmlinkage void snan_vec(void) asm ("snan");
-		asmlinkage void operr_vec(void) asm ("operr");
-		asmlinkage void bsun_vec(void) asm ("bsun");
-		asmlinkage void fline_vec(void) asm ("fline");
-		asmlinkage void unsupp_vec(void) asm ("unsupp");
-
-		vectors[VEC_FPDIVZ] = dz_vec;
-		vectors[VEC_FPIR] = inex_vec;
-		vectors[VEC_FPOVER] = ovfl_vec;
-		vectors[VEC_FPUNDER] = unfl_vec;
-		vectors[VEC_FPNAN] = snan_vec;
-		vectors[VEC_FPOE] = operr_vec;
-		vectors[VEC_FPBRUC] = bsun_vec;
-		vectors[VEC_LINE11] = fline_vec;
-		vectors[VEC_FPUNSUP] = unsupp_vec;
-	}
-
-	if (CPU_IS_060 && !FPU_IS_EMU) {
-		/* set up IFPSP entry points */
-		asmlinkage void snan_vec6(void) asm ("_060_fpsp_snan");
-		asmlinkage void operr_vec6(void) asm ("_060_fpsp_operr");
-		asmlinkage void ovfl_vec6(void) asm ("_060_fpsp_ovfl");
-		asmlinkage void unfl_vec6(void) asm ("_060_fpsp_unfl");
-		asmlinkage void dz_vec6(void) asm ("_060_fpsp_dz");
-		asmlinkage void inex_vec6(void) asm ("_060_fpsp_inex");
-		asmlinkage void fline_vec6(void) asm ("_060_fpsp_fline");
-		asmlinkage void unsupp_vec6(void) asm ("_060_fpsp_unsupp");
-		asmlinkage void effadd_vec6(void) asm ("_060_fpsp_effadd");
-
-		vectors[VEC_FPNAN] = snan_vec6;
-		vectors[VEC_FPOE] = operr_vec6;
-		vectors[VEC_FPOVER] = ovfl_vec6;
-		vectors[VEC_FPUNDER] = unfl_vec6;
-		vectors[VEC_FPDIVZ] = dz_vec6;
-		vectors[VEC_FPIR] = inex_vec6;
-		vectors[VEC_LINE11] = fline_vec6;
-		vectors[VEC_FPUNSUP] = unsupp_vec6;
-		vectors[VEC_UNIMPEA] = effadd_vec6;
-	}
-
-        /* if running on an amiga, make the NMI interrupt do nothing */
-	if (MACH_IS_AMIGA) {
-		vectors[VEC_INT7] = nmihandler;
-	}
-}
-
-
-static const char *vec_names[] = {
-	[VEC_RESETSP]	= "RESET SP",
-	[VEC_RESETPC]	= "RESET PC",
-	[VEC_BUSERR]	= "BUS ERROR",
-	[VEC_ADDRERR]	= "ADDRESS ERROR",
-	[VEC_ILLEGAL]	= "ILLEGAL INSTRUCTION",
-	[VEC_ZERODIV]	= "ZERO DIVIDE",
-	[VEC_CHK]	= "CHK",
-	[VEC_TRAP]	= "TRAPcc",
-	[VEC_PRIV]	= "PRIVILEGE VIOLATION",
-	[VEC_TRACE]	= "TRACE",
-	[VEC_LINE10]	= "LINE 1010",
-	[VEC_LINE11]	= "LINE 1111",
-	[VEC_RESV12]	= "UNASSIGNED RESERVED 12",
-	[VEC_COPROC]	= "COPROCESSOR PROTOCOL VIOLATION",
-	[VEC_FORMAT]	= "FORMAT ERROR",
-	[VEC_UNINT]	= "UNINITIALIZED INTERRUPT",
-	[VEC_RESV16]	= "UNASSIGNED RESERVED 16",
-	[VEC_RESV17]	= "UNASSIGNED RESERVED 17",
-	[VEC_RESV18]	= "UNASSIGNED RESERVED 18",
-	[VEC_RESV19]	= "UNASSIGNED RESERVED 19",
-	[VEC_RESV20]	= "UNASSIGNED RESERVED 20",
-	[VEC_RESV21]	= "UNASSIGNED RESERVED 21",
-	[VEC_RESV22]	= "UNASSIGNED RESERVED 22",
-	[VEC_RESV23]	= "UNASSIGNED RESERVED 23",
-	[VEC_SPUR]	= "SPURIOUS INTERRUPT",
-	[VEC_INT1]	= "LEVEL 1 INT",
-	[VEC_INT2]	= "LEVEL 2 INT",
-	[VEC_INT3]	= "LEVEL 3 INT",
-	[VEC_INT4]	= "LEVEL 4 INT",
-	[VEC_INT5]	= "LEVEL 5 INT",
-	[VEC_INT6]	= "LEVEL 6 INT",
-	[VEC_INT7]	= "LEVEL 7 INT",
-	[VEC_SYS]	= "SYSCALL",
-	[VEC_TRAP1]	= "TRAP #1",
-	[VEC_TRAP2]	= "TRAP #2",
-	[VEC_TRAP3]	= "TRAP #3",
-	[VEC_TRAP4]	= "TRAP #4",
-	[VEC_TRAP5]	= "TRAP #5",
-	[VEC_TRAP6]	= "TRAP #6",
-	[VEC_TRAP7]	= "TRAP #7",
-	[VEC_TRAP8]	= "TRAP #8",
-	[VEC_TRAP9]	= "TRAP #9",
-	[VEC_TRAP10]	= "TRAP #10",
-	[VEC_TRAP11]	= "TRAP #11",
-	[VEC_TRAP12]	= "TRAP #12",
-	[VEC_TRAP13]	= "TRAP #13",
-	[VEC_TRAP14]	= "TRAP #14",
-	[VEC_TRAP15]	= "TRAP #15",
-	[VEC_FPBRUC]	= "FPCP BSUN",
-	[VEC_FPIR]	= "FPCP INEXACT",
-	[VEC_FPDIVZ]	= "FPCP DIV BY 0",
-	[VEC_FPUNDER]	= "FPCP UNDERFLOW",
-	[VEC_FPOE]	= "FPCP OPERAND ERROR",
-	[VEC_FPOVER]	= "FPCP OVERFLOW",
-	[VEC_FPNAN]	= "FPCP SNAN",
-	[VEC_FPUNSUP]	= "FPCP UNSUPPORTED OPERATION",
-	[VEC_MMUCFG]	= "MMU CONFIGURATION ERROR",
-	[VEC_MMUILL]	= "MMU ILLEGAL OPERATION ERROR",
-	[VEC_MMUACC]	= "MMU ACCESS LEVEL VIOLATION ERROR",
-	[VEC_RESV59]	= "UNASSIGNED RESERVED 59",
-	[VEC_UNIMPEA]	= "UNASSIGNED RESERVED 60",
-	[VEC_UNIMPII]	= "UNASSIGNED RESERVED 61",
-	[VEC_RESV62]	= "UNASSIGNED RESERVED 62",
-	[VEC_RESV63]	= "UNASSIGNED RESERVED 63",
-};
-
-static const char *space_names[] = {
-	[0]		= "Space 0",
-	[USER_DATA]	= "User Data",
-	[USER_PROGRAM]	= "User Program",
-#ifndef CONFIG_SUN3
-	[3]		= "Space 3",
-#else
-	[FC_CONTROL]	= "Control",
-#endif
-	[4]		= "Space 4",
-	[SUPER_DATA]	= "Super Data",
-	[SUPER_PROGRAM]	= "Super Program",
-	[CPU_SPACE]	= "CPU"
-};
-
-void die_if_kernel(char *,struct pt_regs *,int);
-asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
-                             unsigned long error_code);
-int send_fault_sig(struct pt_regs *regs);
-
-asmlinkage void trap_c(struct frame *fp);
-
-#if defined (CONFIG_M68060)
-static inline void access_error060 (struct frame *fp)
-{
-	unsigned long fslw = fp->un.fmt4.pc; /* is really FSLW for access error */
-
-#ifdef DEBUG
-	printk("fslw=%#lx, fa=%#lx\n", fslw, fp->un.fmt4.effaddr);
-#endif
-
-	if (fslw & MMU060_BPE) {
-		/* branch prediction error -> clear branch cache */
-		__asm__ __volatile__ ("movec %/cacr,%/d0\n\t"
-				      "orl   #0x00400000,%/d0\n\t"
-				      "movec %/d0,%/cacr"
-				      : : : "d0" );
-		/* return if there's no other error */
-		if (!(fslw & MMU060_ERR_BITS) && !(fslw & MMU060_SEE))
-			return;
-	}
-
-	if (fslw & (MMU060_DESC_ERR | MMU060_WP | MMU060_SP)) {
-		unsigned long errorcode;
-		unsigned long addr = fp->un.fmt4.effaddr;
-
-		if (fslw & MMU060_MA)
-			addr = (addr + PAGE_SIZE - 1) & PAGE_MASK;
-
-		errorcode = 1;
-		if (fslw & MMU060_DESC_ERR) {
-			__flush_tlb040_one(addr);
-			errorcode = 0;
-		}
-		if (fslw & MMU060_W)
-			errorcode |= 2;
-#ifdef DEBUG
-		printk("errorcode = %d\n", errorcode );
-#endif
-		do_page_fault(&fp->ptregs, addr, errorcode);
-	} else if (fslw & (MMU060_SEE)){
-		/* Software Emulation Error.
-		 * fault during mem_read/mem_write in ifpsp060/os.S
-		 */
-		send_fault_sig(&fp->ptregs);
-	} else if (!(fslw & (MMU060_RE|MMU060_WE)) ||
-		   send_fault_sig(&fp->ptregs) > 0) {
-		printk("pc=%#lx, fa=%#lx\n", fp->ptregs.pc, fp->un.fmt4.effaddr);
-		printk( "68060 access error, fslw=%lx\n", fslw );
-		trap_c( fp );
-	}
-}
-#endif /* CONFIG_M68060 */
-
-#if defined (CONFIG_M68040)
-static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
-{
-	unsigned long mmusr;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(MAKE_MM_SEG(wbs));
-
-	if (iswrite)
-		asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr));
-	else
-		asm volatile (".chip 68040; ptestr (%0); .chip 68k" : : "a" (addr));
-
-	asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr));
-
-	set_fs(old_fs);
-
-	return mmusr;
-}
-
-static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
-				   unsigned long wbd)
-{
-	int res = 0;
-	mm_segment_t old_fs = get_fs();
-
-	/* set_fs can not be moved, otherwise put_user() may oops */
-	set_fs(MAKE_MM_SEG(wbs));
-
-	switch (wbs & WBSIZ_040) {
-	case BA_SIZE_BYTE:
-		res = put_user(wbd & 0xff, (char __user *)wba);
-		break;
-	case BA_SIZE_WORD:
-		res = put_user(wbd & 0xffff, (short __user *)wba);
-		break;
-	case BA_SIZE_LONG:
-		res = put_user(wbd, (int __user *)wba);
-		break;
-	}
-
-	/* set_fs can not be moved, otherwise put_user() may oops */
-	set_fs(old_fs);
-
-
-#ifdef DEBUG
-	printk("do_040writeback1, res=%d\n",res);
-#endif
-
-	return res;
-}
-
-/* after an exception in a writeback the stack frame corresponding
- * to that exception is discarded, set a few bits in the old frame
- * to simulate what it should look like
- */
-static inline void fix_xframe040(struct frame *fp, unsigned long wba, unsigned short wbs)
-{
-	fp->un.fmt7.faddr = wba;
-	fp->un.fmt7.ssw = wbs & 0xff;
-	if (wba != current->thread.faddr)
-	    fp->un.fmt7.ssw |= MA_040;
-}
-
-static inline void do_040writebacks(struct frame *fp)
-{
-	int res = 0;
-#if 0
-	if (fp->un.fmt7.wb1s & WBV_040)
-		printk("access_error040: cannot handle 1st writeback. oops.\n");
-#endif
-
-	if ((fp->un.fmt7.wb2s & WBV_040) &&
-	    !(fp->un.fmt7.wb2s & WBTT_040)) {
-		res = do_040writeback1(fp->un.fmt7.wb2s, fp->un.fmt7.wb2a,
-				       fp->un.fmt7.wb2d);
-		if (res)
-			fix_xframe040(fp, fp->un.fmt7.wb2a, fp->un.fmt7.wb2s);
-		else
-			fp->un.fmt7.wb2s = 0;
-	}
-
-	/* do the 2nd wb only if the first one was successful (except for a kernel wb) */
-	if (fp->un.fmt7.wb3s & WBV_040 && (!res || fp->un.fmt7.wb3s & 4)) {
-		res = do_040writeback1(fp->un.fmt7.wb3s, fp->un.fmt7.wb3a,
-				       fp->un.fmt7.wb3d);
-		if (res)
-		    {
-			fix_xframe040(fp, fp->un.fmt7.wb3a, fp->un.fmt7.wb3s);
-
-			fp->un.fmt7.wb2s = fp->un.fmt7.wb3s;
-			fp->un.fmt7.wb3s &= (~WBV_040);
-			fp->un.fmt7.wb2a = fp->un.fmt7.wb3a;
-			fp->un.fmt7.wb2d = fp->un.fmt7.wb3d;
-		    }
-		else
-			fp->un.fmt7.wb3s = 0;
-	}
-
-	if (res)
-		send_fault_sig(&fp->ptregs);
-}
-
-/*
- * called from sigreturn(), must ensure userspace code didn't
- * manipulate exception frame to circumvent protection, then complete
- * pending writebacks
- * we just clear TM2 to turn it into a userspace access
- */
-asmlinkage void berr_040cleanup(struct frame *fp)
-{
-	fp->un.fmt7.wb2s &= ~4;
-	fp->un.fmt7.wb3s &= ~4;
-
-	do_040writebacks(fp);
-}
-
-static inline void access_error040(struct frame *fp)
-{
-	unsigned short ssw = fp->un.fmt7.ssw;
-	unsigned long mmusr;
-
-#ifdef DEBUG
-	printk("ssw=%#x, fa=%#lx\n", ssw, fp->un.fmt7.faddr);
-        printk("wb1s=%#x, wb2s=%#x, wb3s=%#x\n", fp->un.fmt7.wb1s,
-		fp->un.fmt7.wb2s, fp->un.fmt7.wb3s);
-	printk ("wb2a=%lx, wb3a=%lx, wb2d=%lx, wb3d=%lx\n",
-		fp->un.fmt7.wb2a, fp->un.fmt7.wb3a,
-		fp->un.fmt7.wb2d, fp->un.fmt7.wb3d);
-#endif
-
-	if (ssw & ATC_040) {
-		unsigned long addr = fp->un.fmt7.faddr;
-		unsigned long errorcode;
-
-		/*
-		 * The MMU status has to be determined AFTER the address
-		 * has been corrected if there was a misaligned access (MA).
-		 */
-		if (ssw & MA_040)
-			addr = (addr + 7) & -8;
-
-		/* MMU error, get the MMUSR info for this access */
-		mmusr = probe040(!(ssw & RW_040), addr, ssw);
-#ifdef DEBUG
-		printk("mmusr = %lx\n", mmusr);
-#endif
-		errorcode = 1;
-		if (!(mmusr & MMU_R_040)) {
-			/* clear the invalid atc entry */
-			__flush_tlb040_one(addr);
-			errorcode = 0;
-		}
-
-		/* despite what documentation seems to say, RMW
-		 * accesses have always both the LK and RW bits set */
-		if (!(ssw & RW_040) || (ssw & LK_040))
-			errorcode |= 2;
-
-		if (do_page_fault(&fp->ptregs, addr, errorcode)) {
-#ifdef DEBUG
-			printk("do_page_fault() !=0\n");
-#endif
-			if (user_mode(&fp->ptregs)){
-				/* delay writebacks after signal delivery */
-#ifdef DEBUG
-			        printk(".. was usermode - return\n");
-#endif
-				return;
-			}
-			/* disable writeback into user space from kernel
-			 * (if do_page_fault didn't fix the mapping,
-                         * the writeback won't do good)
-			 */
-disable_wb:
-#ifdef DEBUG
-			printk(".. disabling wb2\n");
-#endif
-			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
-				fp->un.fmt7.wb2s &= ~WBV_040;
-			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
-				fp->un.fmt7.wb3s &= ~WBV_040;
-		}
-	} else {
-		/* In case of a bus error we either kill the process or expect
-		 * the kernel to catch the fault, which then is also responsible
-		 * for cleaning up the mess.
-		 */
-		current->thread.signo = SIGBUS;
-		current->thread.faddr = fp->un.fmt7.faddr;
-		if (send_fault_sig(&fp->ptregs) >= 0)
-			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
-			       fp->un.fmt7.faddr);
-		goto disable_wb;
-	}
-
-	do_040writebacks(fp);
-}
-#endif /* CONFIG_M68040 */
-
-#if defined(CONFIG_SUN3)
-#include <asm/sun3mmu.h>
-
-extern int mmu_emu_handle_fault (unsigned long, int, int);
-
-/* sun3 version of bus_error030 */
-
-static inline void bus_error030 (struct frame *fp)
-{
-	unsigned char buserr_type = sun3_get_buserr ();
-	unsigned long addr, errorcode;
-	unsigned short ssw = fp->un.fmtb.ssw;
-	extern unsigned long _sun3_map_test_start, _sun3_map_test_end;
-
-#ifdef DEBUG
-	if (ssw & (FC | FB))
-		printk ("Instruction fault at %#010lx\n",
-			ssw & FC ?
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
-			:
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
-	if (ssw & DF)
-		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-			ssw & RW ? "read" : "write",
-			fp->un.fmtb.daddr,
-			space_names[ssw & DFC], fp->ptregs.pc);
-#endif
-
-	/*
-	 * Check if this page should be demand-mapped. This needs to go before
-	 * the testing for a bad kernel-space access (demand-mapping applies
-	 * to kernel accesses too).
-	 */
-
-	if ((ssw & DF)
-	    && (buserr_type & (SUN3_BUSERR_PROTERR | SUN3_BUSERR_INVALID))) {
-		if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 0))
-			return;
-	}
-
-	/* Check for kernel-space pagefault (BAD). */
-	if (fp->ptregs.sr & PS_S) {
-		/* kernel fault must be a data fault to user space */
-		if (! ((ssw & DF) && ((ssw & DFC) == USER_DATA))) {
-		     // try checking the kernel mappings before surrender
-		     if (mmu_emu_handle_fault (fp->un.fmtb.daddr, ssw & RW, 1))
-			  return;
-			/* instruction fault or kernel data fault! */
-			if (ssw & (FC | FB))
-				printk ("Instruction fault at %#010lx\n",
-					fp->ptregs.pc);
-			if (ssw & DF) {
-				/* was this fault incurred testing bus mappings? */
-				if((fp->ptregs.pc >= (unsigned long)&_sun3_map_test_start) &&
-				   (fp->ptregs.pc <= (unsigned long)&_sun3_map_test_end)) {
-					send_fault_sig(&fp->ptregs);
-					return;
-				}
-
-				printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-					ssw & RW ? "read" : "write",
-					fp->un.fmtb.daddr,
-					space_names[ssw & DFC], fp->ptregs.pc);
-			}
-			printk ("BAD KERNEL BUSERR\n");
-
-			die_if_kernel("Oops", &fp->ptregs,0);
-			force_sig(SIGKILL, current);
-			return;
-		}
-	} else {
-		/* user fault */
-		if (!(ssw & (FC | FB)) && !(ssw & DF))
-			/* not an instruction fault or data fault! BAD */
-			panic ("USER BUSERR w/o instruction or data fault");
-	}
-
-
-	/* First handle the data fault, if any.  */
-	if (ssw & DF) {
-		addr = fp->un.fmtb.daddr;
-
-// errorcode bit 0:	0 -> no page		1 -> protection fault
-// errorcode bit 1:	0 -> read fault		1 -> write fault
-
-// (buserr_type & SUN3_BUSERR_PROTERR)	-> protection fault
-// (buserr_type & SUN3_BUSERR_INVALID)	-> invalid page fault
-
-		if (buserr_type & SUN3_BUSERR_PROTERR)
-			errorcode = 0x01;
-		else if (buserr_type & SUN3_BUSERR_INVALID)
-			errorcode = 0x00;
-		else {
-#ifdef DEBUG
-			printk ("*** unexpected busfault type=%#04x\n", buserr_type);
-			printk ("invalid %s access at %#lx from pc %#lx\n",
-				!(ssw & RW) ? "write" : "read", addr,
-				fp->ptregs.pc);
-#endif
-			die_if_kernel ("Oops", &fp->ptregs, buserr_type);
-			force_sig (SIGBUS, current);
-			return;
-		}
-
-//todo: wtf is RM bit? --m
-		if (!(ssw & RW) || ssw & RM)
-			errorcode |= 0x02;
-
-		/* Handle page fault. */
-		do_page_fault (&fp->ptregs, addr, errorcode);
-
-		/* Retry the data fault now. */
-		return;
-	}
-
-	/* Now handle the instruction fault. */
-
-	/* Get the fault address. */
-	if (fp->ptregs.format == 0xA)
-		addr = fp->ptregs.pc + 4;
-	else
-		addr = fp->un.fmtb.baddr;
-	if (ssw & FC)
-		addr -= 2;
-
-	if (buserr_type & SUN3_BUSERR_INVALID) {
-		if (!mmu_emu_handle_fault (fp->un.fmtb.daddr, 1, 0))
-			do_page_fault (&fp->ptregs, addr, 0);
-       } else {
-#ifdef DEBUG
-		printk ("protection fault on insn access (segv).\n");
-#endif
-		force_sig (SIGSEGV, current);
-       }
-}
-#else
-#if defined(CPU_M68020_OR_M68030)
-static inline void bus_error030 (struct frame *fp)
-{
-	volatile unsigned short temp;
-	unsigned short mmusr;
-	unsigned long addr, errorcode;
-	unsigned short ssw = fp->un.fmtb.ssw;
-#ifdef DEBUG
-	unsigned long desc;
-
-	printk ("pid = %x  ", current->pid);
-	printk ("SSW=%#06x  ", ssw);
-
-	if (ssw & (FC | FB))
-		printk ("Instruction fault at %#010lx\n",
-			ssw & FC ?
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2
-			:
-			fp->ptregs.format == 0xa ? fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
-	if (ssw & DF)
-		printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-			ssw & RW ? "read" : "write",
-			fp->un.fmtb.daddr,
-			space_names[ssw & DFC], fp->ptregs.pc);
-#endif
-
-	/* ++andreas: If a data fault and an instruction fault happen
-	   at the same time map in both pages.  */
-
-	/* First handle the data fault, if any.  */
-	if (ssw & DF) {
-		addr = fp->un.fmtb.daddr;
-
-#ifdef DEBUG
-		asm volatile ("ptestr %3,%2@,#7,%0\n\t"
-			      "pmove %%psr,%1@"
-			      : "=a&" (desc)
-			      : "a" (&temp), "a" (addr), "d" (ssw));
-#else
-		asm volatile ("ptestr %2,%1@,#7\n\t"
-			      "pmove %%psr,%0@"
-			      : : "a" (&temp), "a" (addr), "d" (ssw));
-#endif
-		mmusr = temp;
-
-#ifdef DEBUG
-		printk("mmusr is %#x for addr %#lx in task %p\n",
-		       mmusr, addr, current);
-		printk("descriptor address is %#lx, contents %#lx\n",
-		       __va(desc), *(unsigned long *)__va(desc));
-#endif
-
-		errorcode = (mmusr & MMU_I) ? 0 : 1;
-		if (!(ssw & RW) || (ssw & RM))
-			errorcode |= 2;
-
-		if (mmusr & (MMU_I | MMU_WP)) {
-			if (ssw & 4) {
-				printk("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-				       ssw & RW ? "read" : "write",
-				       fp->un.fmtb.daddr,
-				       space_names[ssw & DFC], fp->ptregs.pc);
-				goto buserr;
-			}
-			/* Don't try to do anything further if an exception was
-			   handled. */
-			if (do_page_fault (&fp->ptregs, addr, errorcode) < 0)
-				return;
-		} else if (!(mmusr & MMU_I)) {
-			/* probably a 020 cas fault */
-			if (!(ssw & RM) && send_fault_sig(&fp->ptregs) > 0)
-				printk("unexpected bus error (%#x,%#x)\n", ssw, mmusr);
-		} else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
-			printk("invalid %s access at %#lx from pc %#lx\n",
-			       !(ssw & RW) ? "write" : "read", addr,
-			       fp->ptregs.pc);
-			die_if_kernel("Oops",&fp->ptregs,mmusr);
-			force_sig(SIGSEGV, current);
-			return;
-		} else {
-#if 0
-			static volatile long tlong;
-#endif
-
-			printk("weird %s access at %#lx from pc %#lx (ssw is %#x)\n",
-			       !(ssw & RW) ? "write" : "read", addr,
-			       fp->ptregs.pc, ssw);
-			asm volatile ("ptestr #1,%1@,#0\n\t"
-				      "pmove %%psr,%0@"
-				      : /* no outputs */
-				      : "a" (&temp), "a" (addr));
-			mmusr = temp;
-
-			printk ("level 0 mmusr is %#x\n", mmusr);
-#if 0
-			asm volatile ("pmove %%tt0,%0@"
-				      : /* no outputs */
-				      : "a" (&tlong));
-			printk("tt0 is %#lx, ", tlong);
-			asm volatile ("pmove %%tt1,%0@"
-				      : /* no outputs */
-				      : "a" (&tlong));
-			printk("tt1 is %#lx\n", tlong);
-#endif
-#ifdef DEBUG
-			printk("Unknown SIGSEGV - 1\n");
-#endif
-			die_if_kernel("Oops",&fp->ptregs,mmusr);
-			force_sig(SIGSEGV, current);
-			return;
-		}
-
-		/* setup an ATC entry for the access about to be retried */
-		if (!(ssw & RW) || (ssw & RM))
-			asm volatile ("ploadw %1,%0@" : /* no outputs */
-				      : "a" (addr), "d" (ssw));
-		else
-			asm volatile ("ploadr %1,%0@" : /* no outputs */
-				      : "a" (addr), "d" (ssw));
-	}
-
-	/* Now handle the instruction fault. */
-
-	if (!(ssw & (FC|FB)))
-		return;
-
-	if (fp->ptregs.sr & PS_S) {
-		printk("Instruction fault at %#010lx\n",
-			fp->ptregs.pc);
-	buserr:
-		printk ("BAD KERNEL BUSERR\n");
-		die_if_kernel("Oops",&fp->ptregs,0);
-		force_sig(SIGKILL, current);
-		return;
-	}
-
-	/* get the fault address */
-	if (fp->ptregs.format == 10)
-		addr = fp->ptregs.pc + 4;
-	else
-		addr = fp->un.fmtb.baddr;
-	if (ssw & FC)
-		addr -= 2;
-
-	if ((ssw & DF) && ((addr ^ fp->un.fmtb.daddr) & PAGE_MASK) == 0)
-		/* Insn fault on same page as data fault.  But we
-		   should still create the ATC entry.  */
-		goto create_atc_entry;
-
-#ifdef DEBUG
-	asm volatile ("ptestr #1,%2@,#7,%0\n\t"
-		      "pmove %%psr,%1@"
-		      : "=a&" (desc)
-		      : "a" (&temp), "a" (addr));
-#else
-	asm volatile ("ptestr #1,%1@,#7\n\t"
-		      "pmove %%psr,%0@"
-		      : : "a" (&temp), "a" (addr));
-#endif
-	mmusr = temp;
-
-#ifdef DEBUG
-	printk ("mmusr is %#x for addr %#lx in task %p\n",
-		mmusr, addr, current);
-	printk ("descriptor address is %#lx, contents %#lx\n",
-		__va(desc), *(unsigned long *)__va(desc));
-#endif
-
-	if (mmusr & MMU_I)
-		do_page_fault (&fp->ptregs, addr, 0);
-	else if (mmusr & (MMU_B|MMU_L|MMU_S)) {
-		printk ("invalid insn access at %#lx from pc %#lx\n",
-			addr, fp->ptregs.pc);
-#ifdef DEBUG
-		printk("Unknown SIGSEGV - 2\n");
-#endif
-		die_if_kernel("Oops",&fp->ptregs,mmusr);
-		force_sig(SIGSEGV, current);
-		return;
-	}
-
-create_atc_entry:
-	/* setup an ATC entry for the access about to be retried */
-	asm volatile ("ploadr #2,%0@" : /* no outputs */
-		      : "a" (addr));
-}
-#endif /* CPU_M68020_OR_M68030 */
-#endif /* !CONFIG_SUN3 */
-
-asmlinkage void buserr_c(struct frame *fp)
-{
-	/* Only set esp0 if coming from user mode */
-	if (user_mode(&fp->ptregs))
-		current->thread.esp0 = (unsigned long) fp;
-
-#ifdef DEBUG
-	printk ("*** Bus Error *** Format is %x\n", fp->ptregs.format);
-#endif
-
-	switch (fp->ptregs.format) {
-#if defined (CONFIG_M68060)
-	case 4:				/* 68060 access error */
-	  access_error060 (fp);
-	  break;
-#endif
-#if defined (CONFIG_M68040)
-	case 0x7:			/* 68040 access error */
-	  access_error040 (fp);
-	  break;
-#endif
-#if defined (CPU_M68020_OR_M68030)
-	case 0xa:
-	case 0xb:
-	  bus_error030 (fp);
-	  break;
-#endif
-	default:
-	  die_if_kernel("bad frame format",&fp->ptregs,0);
-#ifdef DEBUG
-	  printk("Unknown SIGSEGV - 4\n");
-#endif
-	  force_sig(SIGSEGV, current);
-	}
-}
-
-
-static int kstack_depth_to_print = 48;
-
-void show_trace(unsigned long *stack)
-{
-	unsigned long *endstack;
-	unsigned long addr;
-	int i;
-
-	printk("Call Trace:");
-	addr = (unsigned long)stack + THREAD_SIZE - 1;
-	endstack = (unsigned long *)(addr & -THREAD_SIZE);
-	i = 0;
-	while (stack + 1 <= endstack) {
-		addr = *stack++;
-		/*
-		 * If the address is either in the text segment of the
-		 * kernel, or in the region which contains vmalloc'ed
-		 * memory, it *may* be the address of a calling
-		 * routine; if so, print it so that someone tracing
-		 * down the cause of the crash will be able to figure
-		 * out the call path that was taken.
-		 */
-		if (__kernel_text_address(addr)) {
-#ifndef CONFIG_KALLSYMS
-			if (i % 5 == 0)
-				printk("\n       ");
-#endif
-			printk(" [<%08lx>] %pS\n", addr, (void *)addr);
-			i++;
-		}
-	}
-	printk("\n");
-}
-
-void show_registers(struct pt_regs *regs)
-{
-	struct frame *fp = (struct frame *)regs;
-	mm_segment_t old_fs = get_fs();
-	u16 c, *cp;
-	unsigned long addr;
-	int i;
-
-	print_modules();
-	printk("PC: [<%08lx>] %pS\n", regs->pc, (void *)regs->pc);
-	printk("SR: %04x  SP: %p  a2: %08lx\n", regs->sr, regs, regs->a2);
-	printk("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
-	       regs->d0, regs->d1, regs->d2, regs->d3);
-	printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
-	       regs->d4, regs->d5, regs->a0, regs->a1);
-
-	printk("Process %s (pid: %d, task=%p)\n",
-		current->comm, task_pid_nr(current), current);
-	addr = (unsigned long)&fp->un;
-	printk("Frame format=%X ", regs->format);
-	switch (regs->format) {
-	case 0x2:
-		printk("instr addr=%08lx\n", fp->un.fmt2.iaddr);
-		addr += sizeof(fp->un.fmt2);
-		break;
-	case 0x3:
-		printk("eff addr=%08lx\n", fp->un.fmt3.effaddr);
-		addr += sizeof(fp->un.fmt3);
-		break;
-	case 0x4:
-		printk((CPU_IS_060 ? "fault addr=%08lx fslw=%08lx\n"
-			: "eff addr=%08lx pc=%08lx\n"),
-			fp->un.fmt4.effaddr, fp->un.fmt4.pc);
-		addr += sizeof(fp->un.fmt4);
-		break;
-	case 0x7:
-		printk("eff addr=%08lx ssw=%04x faddr=%08lx\n",
-			fp->un.fmt7.effaddr, fp->un.fmt7.ssw, fp->un.fmt7.faddr);
-		printk("wb 1 stat/addr/data: %04x %08lx %08lx\n",
-			fp->un.fmt7.wb1s, fp->un.fmt7.wb1a, fp->un.fmt7.wb1dpd0);
-		printk("wb 2 stat/addr/data: %04x %08lx %08lx\n",
-			fp->un.fmt7.wb2s, fp->un.fmt7.wb2a, fp->un.fmt7.wb2d);
-		printk("wb 3 stat/addr/data: %04x %08lx %08lx\n",
-			fp->un.fmt7.wb3s, fp->un.fmt7.wb3a, fp->un.fmt7.wb3d);
-		printk("push data: %08lx %08lx %08lx %08lx\n",
-			fp->un.fmt7.wb1dpd0, fp->un.fmt7.pd1, fp->un.fmt7.pd2,
-			fp->un.fmt7.pd3);
-		addr += sizeof(fp->un.fmt7);
-		break;
-	case 0x9:
-		printk("instr addr=%08lx\n", fp->un.fmt9.iaddr);
-		addr += sizeof(fp->un.fmt9);
-		break;
-	case 0xa:
-		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
-			fp->un.fmta.ssw, fp->un.fmta.isc, fp->un.fmta.isb,
-			fp->un.fmta.daddr, fp->un.fmta.dobuf);
-		addr += sizeof(fp->un.fmta);
-		break;
-	case 0xb:
-		printk("ssw=%04x isc=%04x isb=%04x daddr=%08lx dobuf=%08lx\n",
-			fp->un.fmtb.ssw, fp->un.fmtb.isc, fp->un.fmtb.isb,
-			fp->un.fmtb.daddr, fp->un.fmtb.dobuf);
-		printk("baddr=%08lx dibuf=%08lx ver=%x\n",
-			fp->un.fmtb.baddr, fp->un.fmtb.dibuf, fp->un.fmtb.ver);
-		addr += sizeof(fp->un.fmtb);
-		break;
-	default:
-		printk("\n");
-	}
-	show_stack(NULL, (unsigned long *)addr);
-
-	printk("Code:");
-	set_fs(KERNEL_DS);
-	cp = (u16 *)regs->pc;
-	for (i = -8; i < 16; i++) {
-		if (get_user(c, cp + i) && i >= 0) {
-			printk(" Bad PC value.");
-			break;
-		}
-		printk(i ? " %04x" : " <%04x>", c);
-	}
-	set_fs(old_fs);
-	printk ("\n");
-}
-
-void show_stack(struct task_struct *task, unsigned long *stack)
-{
-	unsigned long *p;
-	unsigned long *endstack;
-	int i;
-
-	if (!stack) {
-		if (task)
-			stack = (unsigned long *)task->thread.esp0;
-		else
-			stack = (unsigned long *)&stack;
-	}
-	endstack = (unsigned long *)(((unsigned long)stack + THREAD_SIZE - 1) & -THREAD_SIZE);
-
-	printk("Stack from %08lx:", (unsigned long)stack);
-	p = stack;
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (p + 1 > endstack)
-			break;
-		if (i % 8 == 0)
-			printk("\n       ");
-		printk(" %08lx", *p++);
-	}
-	printk("\n");
-	show_trace(stack);
-}
-
-/*
- * The architecture-independent backtrace generator
- */
-void dump_stack(void)
-{
-	unsigned long stack;
-
-	show_trace(&stack);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void bad_super_trap (struct frame *fp)
-{
-	console_verbose();
-	if (fp->ptregs.vector < 4 * ARRAY_SIZE(vec_names))
-		printk ("*** %s ***   FORMAT=%X\n",
-			vec_names[(fp->ptregs.vector) >> 2],
-			fp->ptregs.format);
-	else
-		printk ("*** Exception %d ***   FORMAT=%X\n",
-			(fp->ptregs.vector) >> 2,
-			fp->ptregs.format);
-	if (fp->ptregs.vector >> 2 == VEC_ADDRERR && CPU_IS_020_OR_030) {
-		unsigned short ssw = fp->un.fmtb.ssw;
-
-		printk ("SSW=%#06x  ", ssw);
-
-		if (ssw & RC)
-			printk ("Pipe stage C instruction fault at %#010lx\n",
-				(fp->ptregs.format) == 0xA ?
-				fp->ptregs.pc + 2 : fp->un.fmtb.baddr - 2);
-		if (ssw & RB)
-			printk ("Pipe stage B instruction fault at %#010lx\n",
-				(fp->ptregs.format) == 0xA ?
-				fp->ptregs.pc + 4 : fp->un.fmtb.baddr);
-		if (ssw & DF)
-			printk ("Data %s fault at %#010lx in %s (pc=%#lx)\n",
-				ssw & RW ? "read" : "write",
-				fp->un.fmtb.daddr, space_names[ssw & DFC],
-				fp->ptregs.pc);
-	}
-	printk ("Current process id is %d\n", task_pid_nr(current));
-	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
-}
-
-asmlinkage void trap_c(struct frame *fp)
-{
-	int sig;
-	siginfo_t info;
-
-	if (fp->ptregs.sr & PS_S) {
-		if (fp->ptregs.vector == VEC_TRACE << 2) {
-			/* traced a trapping instruction on a 68020/30,
-			 * real exception will be executed afterwards.
-			 */
-		} else if (!handle_kernel_fault(&fp->ptregs))
-			bad_super_trap(fp);
-		return;
-	}
-
-	/* send the appropriate signal to the user program */
-	switch ((fp->ptregs.vector) >> 2) {
-	    case VEC_ADDRERR:
-		info.si_code = BUS_ADRALN;
-		sig = SIGBUS;
-		break;
-	    case VEC_ILLEGAL:
-	    case VEC_LINE10:
-	    case VEC_LINE11:
-		info.si_code = ILL_ILLOPC;
-		sig = SIGILL;
-		break;
-	    case VEC_PRIV:
-		info.si_code = ILL_PRVOPC;
-		sig = SIGILL;
-		break;
-	    case VEC_COPROC:
-		info.si_code = ILL_COPROC;
-		sig = SIGILL;
-		break;
-	    case VEC_TRAP1:
-	    case VEC_TRAP2:
-	    case VEC_TRAP3:
-	    case VEC_TRAP4:
-	    case VEC_TRAP5:
-	    case VEC_TRAP6:
-	    case VEC_TRAP7:
-	    case VEC_TRAP8:
-	    case VEC_TRAP9:
-	    case VEC_TRAP10:
-	    case VEC_TRAP11:
-	    case VEC_TRAP12:
-	    case VEC_TRAP13:
-	    case VEC_TRAP14:
-		info.si_code = ILL_ILLTRP;
-		sig = SIGILL;
-		break;
-	    case VEC_FPBRUC:
-	    case VEC_FPOE:
-	    case VEC_FPNAN:
-		info.si_code = FPE_FLTINV;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPIR:
-		info.si_code = FPE_FLTRES;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPDIVZ:
-		info.si_code = FPE_FLTDIV;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPUNDER:
-		info.si_code = FPE_FLTUND;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPOVER:
-		info.si_code = FPE_FLTOVF;
-		sig = SIGFPE;
-		break;
-	    case VEC_ZERODIV:
-		info.si_code = FPE_INTDIV;
-		sig = SIGFPE;
-		break;
-	    case VEC_CHK:
-	    case VEC_TRAP:
-		info.si_code = FPE_INTOVF;
-		sig = SIGFPE;
-		break;
-	    case VEC_TRACE:		/* ptrace single step */
-		info.si_code = TRAP_TRACE;
-		sig = SIGTRAP;
-		break;
-	    case VEC_TRAP15:		/* breakpoint */
-		info.si_code = TRAP_BRKPT;
-		sig = SIGTRAP;
-		break;
-	    default:
-		info.si_code = ILL_ILLOPC;
-		sig = SIGILL;
-		break;
-	}
-	info.si_signo = sig;
-	info.si_errno = 0;
-	switch (fp->ptregs.format) {
-	    default:
-		info.si_addr = (void *) fp->ptregs.pc;
-		break;
-	    case 2:
-		info.si_addr = (void *) fp->un.fmt2.iaddr;
-		break;
-	    case 7:
-		info.si_addr = (void *) fp->un.fmt7.effaddr;
-		break;
-	    case 9:
-		info.si_addr = (void *) fp->un.fmt9.iaddr;
-		break;
-	    case 10:
-		info.si_addr = (void *) fp->un.fmta.daddr;
-		break;
-	    case 11:
-		info.si_addr = (void *) fp->un.fmtb.daddr;
-		break;
-	}
-	force_sig_info (sig, &info, current);
-}
-
-void die_if_kernel (char *str, struct pt_regs *fp, int nr)
-{
-	if (!(fp->sr & PS_S))
-		return;
-
-	console_verbose();
-	printk("%s: %08x\n",str,nr);
-	show_registers(fp);
-	add_taint(TAINT_DIE);
-	do_exit(SIGSEGV);
-}
-
-/*
- * This function is called if an error occur while accessing
- * user-space from the fpsp040 code.
- */
-asmlinkage void fpsp040_die(void)
-{
-	do_exit(SIGSEGV);
-}
-
-#ifdef CONFIG_M68KFPU_EMU
-asmlinkage void fpemu_signal(int signal, int code, void *addr)
-{
-	siginfo_t info;
-
-	info.si_signo = signal;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = addr;
-	force_sig_info(signal, &info, current);
-}
-#endif
diff --git a/arch/m68k/kernel/traps_no.c b/arch/m68k/kernel/traps_no.c
deleted file mode 100644
index e67b8c806959..000000000000
--- a/arch/m68k/kernel/traps_no.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- *  linux/arch/m68knommu/kernel/traps.c
- *
- *  Copyright (C) 1993, 1994 by Hamish Macdonald
- *
- *  68040 fixes by Michael Rausch
- *  68040 fixes by Martin Apel
- *  68060 fixes by Roman Hodek
- *  68060 fixes by Jesper Skov
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/*
- * Sets up all exception vectors
- */
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/user.h>
-#include <linux/string.h>
-#include <linux/linkage.h>
-#include <linux/init.h>
-#include <linux/ptrace.h>
-#include <linux/kallsyms.h>
-
-#include <asm/setup.h>
-#include <asm/fpu.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/traps.h>
-#include <asm/pgtable.h>
-#include <asm/machdep.h>
-#include <asm/siginfo.h>
-
-static char const * const vec_names[] = {
-	"RESET SP", "RESET PC", "BUS ERROR", "ADDRESS ERROR",
-	"ILLEGAL INSTRUCTION", "ZERO DIVIDE", "CHK", "TRAPcc",
-	"PRIVILEGE VIOLATION", "TRACE", "LINE 1010", "LINE 1111",
-	"UNASSIGNED RESERVED 12", "COPROCESSOR PROTOCOL VIOLATION",
-	"FORMAT ERROR", "UNINITIALIZED INTERRUPT",
-	"UNASSIGNED RESERVED 16", "UNASSIGNED RESERVED 17",
-	"UNASSIGNED RESERVED 18", "UNASSIGNED RESERVED 19",
-	"UNASSIGNED RESERVED 20", "UNASSIGNED RESERVED 21",
-	"UNASSIGNED RESERVED 22", "UNASSIGNED RESERVED 23",
-	"SPURIOUS INTERRUPT", "LEVEL 1 INT", "LEVEL 2 INT", "LEVEL 3 INT",
-	"LEVEL 4 INT", "LEVEL 5 INT", "LEVEL 6 INT", "LEVEL 7 INT",
-	"SYSCALL", "TRAP #1", "TRAP #2", "TRAP #3",
-	"TRAP #4", "TRAP #5", "TRAP #6", "TRAP #7",
-	"TRAP #8", "TRAP #9", "TRAP #10", "TRAP #11",
-	"TRAP #12", "TRAP #13", "TRAP #14", "TRAP #15",
-	"FPCP BSUN", "FPCP INEXACT", "FPCP DIV BY 0", "FPCP UNDERFLOW",
-	"FPCP OPERAND ERROR", "FPCP OVERFLOW", "FPCP SNAN",
-	"FPCP UNSUPPORTED OPERATION",
-	"MMU CONFIGURATION ERROR"
-};
-
-void die_if_kernel(char *str, struct pt_regs *fp, int nr)
-{
-	if (!(fp->sr & PS_S))
-		return;
-
-	console_verbose();
-	printk(KERN_EMERG "%s: %08x\n",str,nr);
-	printk(KERN_EMERG "PC: [<%08lx>]\nSR: %04x  SP: %p  a2: %08lx\n",
-	       fp->pc, fp->sr, fp, fp->a2);
-	printk(KERN_EMERG "d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
-	       fp->d0, fp->d1, fp->d2, fp->d3);
-	printk(KERN_EMERG "d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
-	       fp->d4, fp->d5, fp->a0, fp->a1);
-
-	printk(KERN_EMERG "Process %s (pid: %d, stackpage=%08lx)\n",
-		current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
-	show_stack(NULL, (unsigned long *)(fp + 1));
-	add_taint(TAINT_DIE);
-	do_exit(SIGSEGV);
-}
-
-asmlinkage void buserr_c(struct frame *fp)
-{
-	/* Only set esp0 if coming from user mode */
-	if (user_mode(&fp->ptregs))
-		current->thread.esp0 = (unsigned long) fp;
-
-#if defined(DEBUG)
-	printk (KERN_DEBUG "*** Bus Error *** Format is %x\n", fp->ptregs.format);
-#endif
-
-	die_if_kernel("bad frame format",&fp->ptregs,0);
-#if defined(DEBUG)
-	printk(KERN_DEBUG "Unknown SIGSEGV - 4\n");
-#endif
-	force_sig(SIGSEGV, current);
-}
-
-static void print_this_address(unsigned long addr, int i)
-{
-#ifdef CONFIG_KALLSYMS
-	printk(KERN_EMERG " [%08lx] ", addr);
-	print_symbol(KERN_CONT "%s\n", addr);
-#else
-	if (i % 5)
-		printk(KERN_CONT " [%08lx] ", addr);
-	else
-		printk(KERN_EMERG " [%08lx] ", addr);
-	i++;
-#endif
-}
-
-int kstack_depth_to_print = 48;
-
-static void __show_stack(struct task_struct *task, unsigned long *stack)
-{
-	unsigned long *endstack, addr;
-#ifdef CONFIG_FRAME_POINTER
-	unsigned long *last_stack;
-#endif
-	int i;
-
-	if (!stack)
-		stack = (unsigned long *)task->thread.ksp;
-
-	addr = (unsigned long) stack;
-	endstack = (unsigned long *) PAGE_ALIGN(addr);
-
-	printk(KERN_EMERG "Stack from %08lx:", (unsigned long)stack);
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (stack + 1 + i > endstack)
-			break;
-		if (i % 8 == 0)
-			printk(KERN_EMERG "       ");
-		printk(KERN_CONT " %08lx", *(stack + i));
-	}
-	printk("\n");
-	i = 0;
-
-#ifdef CONFIG_FRAME_POINTER
-	printk(KERN_EMERG "Call Trace:\n");
-
-	last_stack = stack - 1;
-	while (stack <= endstack && stack > last_stack) {
-
-		addr = *(stack + 1);
-		print_this_address(addr, i);
-		i++;
-
-		last_stack = stack;
-		stack = (unsigned long *)*stack;
-	}
-	printk("\n");
-#else
-	printk(KERN_EMERG "Call Trace with CONFIG_FRAME_POINTER disabled:\n");
-	while (stack <= endstack) {
-		addr = *stack++;
-		/*
-		 * If the address is either in the text segment of the kernel,
-		 * or in a region which is occupied by a module then it *may*
-		 * be the address of a calling routine; if so, print it so that
-		 * someone tracing down the cause of the crash will be able to
-		 * figure out the call path that was taken.
-		 */
-		if (__kernel_text_address(addr)) {
-			print_this_address(addr, i);
-			i++;
-		}
-	}
-	printk(KERN_CONT "\n");
-#endif
-}
-
-void bad_super_trap(struct frame *fp)
-{
-	int vector = (fp->ptregs.vector >> 2) & 0xff;
-
-	console_verbose();
-	if (vector < ARRAY_SIZE(vec_names))
-		printk (KERN_WARNING "*** %s ***   FORMAT=%X\n",
-			vec_names[vector],
-			fp->ptregs.format);
-	else
-		printk (KERN_WARNING "*** Exception %d ***   FORMAT=%X\n",
-			vector,
-			fp->ptregs.format);
-	printk (KERN_WARNING "Current process id is %d\n", current->pid);
-	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
-}
-
-asmlinkage void trap_c(struct frame *fp)
-{
-	int sig;
-	int vector = (fp->ptregs.vector >> 2) & 0xff;
-	siginfo_t info;
-
-	if (fp->ptregs.sr & PS_S) {
-		if (vector == VEC_TRACE) {
-			/* traced a trapping instruction */
-		} else
-			bad_super_trap(fp);
-		return;
-	}
-
-	/* send the appropriate signal to the user program */
-	switch (vector) {
-	    case VEC_ADDRERR:
-		info.si_code = BUS_ADRALN;
-		sig = SIGBUS;
-		break;
-	    case VEC_ILLEGAL:
-	    case VEC_LINE10:
-	    case VEC_LINE11:
-		info.si_code = ILL_ILLOPC;
-		sig = SIGILL;
-		break;
-	    case VEC_PRIV:
-		info.si_code = ILL_PRVOPC;
-		sig = SIGILL;
-		break;
-	    case VEC_COPROC:
-		info.si_code = ILL_COPROC;
-		sig = SIGILL;
-		break;
-	    case VEC_TRAP1: /* gdbserver breakpoint */
-		fp->ptregs.pc -= 2;
-		info.si_code = TRAP_TRACE;
-		sig = SIGTRAP;
-		break;
-	    case VEC_TRAP2:
-	    case VEC_TRAP3:
-	    case VEC_TRAP4:
-	    case VEC_TRAP5:
-	    case VEC_TRAP6:
-	    case VEC_TRAP7:
-	    case VEC_TRAP8:
-	    case VEC_TRAP9:
-	    case VEC_TRAP10:
-	    case VEC_TRAP11:
-	    case VEC_TRAP12:
-	    case VEC_TRAP13:
-	    case VEC_TRAP14:
-		info.si_code = ILL_ILLTRP;
-		sig = SIGILL;
-		break;
-	    case VEC_FPBRUC:
-	    case VEC_FPOE:
-	    case VEC_FPNAN:
-		info.si_code = FPE_FLTINV;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPIR:
-		info.si_code = FPE_FLTRES;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPDIVZ:
-		info.si_code = FPE_FLTDIV;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPUNDER:
-		info.si_code = FPE_FLTUND;
-		sig = SIGFPE;
-		break;
-	    case VEC_FPOVER:
-		info.si_code = FPE_FLTOVF;
-		sig = SIGFPE;
-		break;
-	    case VEC_ZERODIV:
-		info.si_code = FPE_INTDIV;
-		sig = SIGFPE;
-		break;
-	    case VEC_CHK:
-	    case VEC_TRAP:
-		info.si_code = FPE_INTOVF;
-		sig = SIGFPE;
-		break;
-	    case VEC_TRACE:		/* ptrace single step */
-		info.si_code = TRAP_TRACE;
-		sig = SIGTRAP;
-		break;
-	    case VEC_TRAP15:		/* breakpoint */
-		info.si_code = TRAP_BRKPT;
-		sig = SIGTRAP;
-		break;
-	    default:
-		info.si_code = ILL_ILLOPC;
-		sig = SIGILL;
-		break;
-	}
-	info.si_signo = sig;
-	info.si_errno = 0;
-	switch (fp->ptregs.format) {
-	    default:
-		info.si_addr = (void *) fp->ptregs.pc;
-		break;
-	    case 2:
-		info.si_addr = (void *) fp->un.fmt2.iaddr;
-		break;
-	    case 7:
-		info.si_addr = (void *) fp->un.fmt7.effaddr;
-		break;
-	    case 9:
-		info.si_addr = (void *) fp->un.fmt9.iaddr;
-		break;
-	    case 10:
-		info.si_addr = (void *) fp->un.fmta.daddr;
-		break;
-	    case 11:
-		info.si_addr = (void *) fp->un.fmtb.daddr;
-		break;
-	}
-	force_sig_info (sig, &info, current);
-}
-
-asmlinkage void set_esp0(unsigned long ssp)
-{
-	current->thread.esp0 = ssp;
-}
-
-/*
- * The architecture-independent backtrace generator
- */
-void dump_stack(void)
-{
-	/*
-	 * We need frame pointers for this little trick, which works as follows:
-	 *
-	 * +------------+ 0x00
-	 * | Next SP	|	-> 0x0c
-	 * +------------+ 0x04
-	 * | Caller	|
-	 * +------------+ 0x08
-	 * | Local vars	|	-> our stack var
-	 * +------------+ 0x0c
-	 * | Next SP	|	-> 0x18, that is what we pass to show_stack()
-	 * +------------+ 0x10
-	 * | Caller	|
-	 * +------------+ 0x14
-	 * | Local vars	|
-	 * +------------+ 0x18
-	 * | ...	|
-	 * +------------+
-	 */
-
-	unsigned long *stack;
-
-	stack = (unsigned long *)&stack;
-	stack++;
-	__show_stack(current, stack);
-}
-EXPORT_SYMBOL(dump_stack);
-
-void show_stack(struct task_struct *task, unsigned long *stack)
-{
-	if (!stack && !task)
-		dump_stack();
-	else
-		__show_stack(task, stack);
-}
diff --git a/arch/m68k/kernel/vectors.c b/arch/m68k/kernel/vectors.c
new file mode 100644
index 000000000000..147b03fbc71e
--- /dev/null
+++ b/arch/m68k/kernel/vectors.c
@@ -0,0 +1,145 @@
+/*
+ *  vectors.c
+ *
+ *  Copyright (C) 1993, 1994 by Hamish Macdonald
+ *
+ *  68040 fixes by Michael Rausch
+ *  68040 fixes by Martin Apel
+ *  68040 fixes and writeback by Richard Zidlicky
+ *  68060 fixes by Roman Hodek
+ *  68060 fixes by Jesper Skov
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+/*
+ * Sets up all exception vectors
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+
+#include <asm/setup.h>
+#include <asm/fpu.h>
+#include <asm/system.h>
+#include <asm/traps.h>
+
+/* assembler routines */
+asmlinkage void system_call(void);
+asmlinkage void buserr(void);
+asmlinkage void trap(void);
+asmlinkage void nmihandler(void);
+#ifdef CONFIG_M68KFPU_EMU
+asmlinkage void fpu_emu(void);
+#endif
+
+e_vector vectors[256];
+
+/* nmi handler for the Amiga */
+asm(".text\n"
+    __ALIGN_STR "\n"
+    "nmihandler: rte");
+
+/*
+ * this must be called very early as the kernel might
+ * use some instruction that are emulated on the 060
+ * and so we're prepared for early probe attempts (e.g. nf_init).
+ */
+void __init base_trap_init(void)
+{
+	if (MACH_IS_SUN3X) {
+		extern e_vector *sun3x_prom_vbr;
+
+		__asm__ volatile ("movec %%vbr, %0" : "=r" (sun3x_prom_vbr));
+	}
+
+	/* setup the exception vector table */
+	__asm__ volatile ("movec %0,%%vbr" : : "r" ((void*)vectors));
+
+	if (CPU_IS_060) {
+		/* set up ISP entry points */
+		asmlinkage void unimp_vec(void) asm ("_060_isp_unimp");
+
+		vectors[VEC_UNIMPII] = unimp_vec;
+	}
+
+	vectors[VEC_BUSERR] = buserr;
+	vectors[VEC_ILLEGAL] = trap;
+	vectors[VEC_SYS] = system_call;
+}
+
+void __init trap_init (void)
+{
+	int i;
+
+	for (i = VEC_SPUR; i <= VEC_INT7; i++)
+		vectors[i] = bad_inthandler;
+
+	for (i = 0; i < VEC_USER; i++)
+		if (!vectors[i])
+			vectors[i] = trap;
+
+	for (i = VEC_USER; i < 256; i++)
+		vectors[i] = bad_inthandler;
+
+#ifdef CONFIG_M68KFPU_EMU
+	if (FPU_IS_EMU)
+		vectors[VEC_LINE11] = fpu_emu;
+#endif
+
+	if (CPU_IS_040 && !FPU_IS_EMU) {
+		/* set up FPSP entry points */
+		asmlinkage void dz_vec(void) asm ("dz");
+		asmlinkage void inex_vec(void) asm ("inex");
+		asmlinkage void ovfl_vec(void) asm ("ovfl");
+		asmlinkage void unfl_vec(void) asm ("unfl");
+		asmlinkage void snan_vec(void) asm ("snan");
+		asmlinkage void operr_vec(void) asm ("operr");
+		asmlinkage void bsun_vec(void) asm ("bsun");
+		asmlinkage void fline_vec(void) asm ("fline");
+		asmlinkage void unsupp_vec(void) asm ("unsupp");
+
+		vectors[VEC_FPDIVZ] = dz_vec;
+		vectors[VEC_FPIR] = inex_vec;
+		vectors[VEC_FPOVER] = ovfl_vec;
+		vectors[VEC_FPUNDER] = unfl_vec;
+		vectors[VEC_FPNAN] = snan_vec;
+		vectors[VEC_FPOE] = operr_vec;
+		vectors[VEC_FPBRUC] = bsun_vec;
+		vectors[VEC_LINE11] = fline_vec;
+		vectors[VEC_FPUNSUP] = unsupp_vec;
+	}
+
+	if (CPU_IS_060 && !FPU_IS_EMU) {
+		/* set up IFPSP entry points */
+		asmlinkage void snan_vec6(void) asm ("_060_fpsp_snan");
+		asmlinkage void operr_vec6(void) asm ("_060_fpsp_operr");
+		asmlinkage void ovfl_vec6(void) asm ("_060_fpsp_ovfl");
+		asmlinkage void unfl_vec6(void) asm ("_060_fpsp_unfl");
+		asmlinkage void dz_vec6(void) asm ("_060_fpsp_dz");
+		asmlinkage void inex_vec6(void) asm ("_060_fpsp_inex");
+		asmlinkage void fline_vec6(void) asm ("_060_fpsp_fline");
+		asmlinkage void unsupp_vec6(void) asm ("_060_fpsp_unsupp");
+		asmlinkage void effadd_vec6(void) asm ("_060_fpsp_effadd");
+
+		vectors[VEC_FPNAN] = snan_vec6;
+		vectors[VEC_FPOE] = operr_vec6;
+		vectors[VEC_FPOVER] = ovfl_vec6;
+		vectors[VEC_FPUNDER] = unfl_vec6;
+		vectors[VEC_FPDIVZ] = dz_vec6;
+		vectors[VEC_FPIR] = inex_vec6;
+		vectors[VEC_LINE11] = fline_vec6;
+		vectors[VEC_FPUNSUP] = unsupp_vec6;
+		vectors[VEC_UNIMPEA] = effadd_vec6;
+	}
+
+        /* if running on an amiga, make the NMI interrupt do nothing */
+	if (MACH_IS_AMIGA) {
+		vectors[VEC_INT7] = nmihandler;
+	}
+}
+
diff --git a/arch/m68k/kernel/vmlinux.lds_no.S b/arch/m68k/kernel/vmlinux.lds_no.S
index 7dc4087a9545..4e2389340837 100644
--- a/arch/m68k/kernel/vmlinux.lds_no.S
+++ b/arch/m68k/kernel/vmlinux.lds_no.S
@@ -77,7 +77,6 @@ SECTIONS {
 
 		*(.rodata) *(.rodata.*)
 		*(__vermagic)		/* Kernel version magic */
-		*(__markers_strings)
 		*(.rodata1)
 		*(.rodata.str1.1)
 
diff --git a/arch/m68k/lib/memcpy.c b/arch/m68k/lib/memcpy.c
index 064889316974..10ca051d56b8 100644
--- a/arch/m68k/lib/memcpy.c
+++ b/arch/m68k/lib/memcpy.c
@@ -22,6 +22,15 @@ void *memcpy(void *to, const void *from, size_t n)
 		from = cfrom;
 		n--;
 	}
+#if defined(CONFIG_M68000)
+	if ((long)from & 1) {
+		char *cto = to;
+		const char *cfrom = from;
+		for (; n; n--)
+			*cto++ = *cfrom++;
+		return xto;
+	}
+#endif
 	if (n > 2 && (long)to & 2) {
 		short *sto = to;
 		const short *sfrom = from;
diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c
index 900d899f3323..f92190c159b4 100644
--- a/arch/m68k/mac/macints.c
+++ b/arch/m68k/mac/macints.c
@@ -370,7 +370,7 @@ int mac_irq_pending(unsigned int irq)
 		break;
 	case 4:
 		if (psc_present)
-			psc_irq_pending(irq);
+			return psc_irq_pending(irq);
 		break;
 	}
 	return 0;
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index e023fc6b37e5..eb915551de69 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -304,35 +304,41 @@ static void via_write_pram(int offset, __u8 data)
 static long via_read_time(void)
 {
 	union {
-		__u8  cdata[4];
-		long  idata;
+		__u8 cdata[4];
+		long idata;
 	} result, last_result;
-	int	ct;
+	int count = 1;
+
+	via_pram_command(0x81, &last_result.cdata[3]);
+	via_pram_command(0x85, &last_result.cdata[2]);
+	via_pram_command(0x89, &last_result.cdata[1]);
+	via_pram_command(0x8D, &last_result.cdata[0]);
 
 	/*
 	 * The NetBSD guys say to loop until you get the same reading
 	 * twice in a row.
 	 */
 
-	ct = 0;
-	do {
-		if (++ct > 10) {
-			printk("via_read_time: couldn't get valid time, "
-			       "last read = 0x%08lx and 0x%08lx\n",
-			       last_result.idata, result.idata);
-			break;
-		}
-
-		last_result.idata = result.idata;
-		result.idata = 0;
-
+	while (1) {
 		via_pram_command(0x81, &result.cdata[3]);
 		via_pram_command(0x85, &result.cdata[2]);
 		via_pram_command(0x89, &result.cdata[1]);
 		via_pram_command(0x8D, &result.cdata[0]);
-	} while (result.idata != last_result.idata);
 
-	return result.idata - RTC_OFFSET;
+		if (result.idata == last_result.idata)
+			return result.idata - RTC_OFFSET;
+
+		if (++count > 10)
+			break;
+
+		last_result.idata = result.idata;
+	}
+
+	pr_err("via_read_time: failed to read a stable value; "
+	       "got 0x%08lx then 0x%08lx\n",
+	       last_result.idata, result.idata);
+
+	return 0;
 }
 
 /*
diff --git a/arch/m68k/mm/init_no.c b/arch/m68k/mm/init_no.c
index 50cd12cf28d9..1e33d39ca9a0 100644
--- a/arch/m68k/mm/init_no.c
+++ b/arch/m68k/mm/init_no.c
@@ -32,6 +32,7 @@
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
+#include <asm/sections.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -44,9 +45,6 @@
  */
 void *empty_zero_page;
 
-extern unsigned long memory_start;
-extern unsigned long memory_end;
-
 /*
  * paging_init() continues the virtual memory environment setup which
  * was begun by the code in arch/head.S.
@@ -78,8 +76,6 @@ void __init mem_init(void)
 {
 	int codek = 0, datak = 0, initk = 0;
 	unsigned long tmp;
-	extern char _etext, _stext, _sdata, _ebss, __init_begin, __init_end;
-	extern unsigned int _ramend, _rambase;
 	unsigned long len = _ramend - _rambase;
 	unsigned long start_mem = memory_start; /* DAVIDM - these must start at end of kernel */
 	unsigned long end_mem   = memory_end; /* DAVIDM - this must not include kernel stack at top */
@@ -95,9 +91,9 @@ void __init mem_init(void)
 	/* this will put all memory onto the freelists */
 	totalram_pages = free_all_bootmem();
 
-	codek = (&_etext - &_stext) >> 10;
-	datak = (&_ebss - &_sdata) >> 10;
-	initk = (&__init_begin - &__init_end) >> 10;
+	codek = (_etext - _stext) >> 10;
+	datak = (_ebss - _sdata) >> 10;
+	initk = (__init_begin - __init_end) >> 10;
 
 	tmp = nr_free_pages() << PAGE_SHIFT;
 	printk(KERN_INFO "Memory available: %luk/%luk RAM, (%dk kernel code, %dk data)\n",
@@ -129,22 +125,21 @@ void free_initmem(void)
 {
 #ifdef CONFIG_RAMKERNEL
 	unsigned long addr;
-	extern char __init_begin, __init_end;
 	/*
 	 * The following code should be cool even if these sections
 	 * are not page aligned.
 	 */
-	addr = PAGE_ALIGN((unsigned long)(&__init_begin));
+	addr = PAGE_ALIGN((unsigned long) __init_begin);
 	/* next to check that the page we free is not a partial page */
-	for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) {
+	for (; addr + PAGE_SIZE < ((unsigned long) __init_end); addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
 		free_page(addr);
 		totalram_pages++;
 	}
 	pr_notice("Freeing unused kernel memory: %luk freed (0x%x - 0x%x)\n",
-			(addr - PAGE_ALIGN((long) &__init_begin)) >> 10,
-			(int)(PAGE_ALIGN((unsigned long)(&__init_begin))),
+			(addr - PAGE_ALIGN((unsigned long) __init_begin)) >> 10,
+			(int)(PAGE_ALIGN((unsigned long) __init_begin)),
 			(int)(addr - PAGE_SIZE));
 #endif
 }
diff --git a/arch/m68k/platform/520x/config.c b/arch/m68k/platform/520x/config.c
index 621238f1a219..8a98683f1b15 100644
--- a/arch/m68k/platform/520x/config.c
+++ b/arch/m68k/platform/520x/config.c
@@ -91,9 +91,9 @@ static struct resource m520x_qspi_resources[] = {
 	},
 };
 
-#define MCFQSPI_CS0    62
-#define MCFQSPI_CS1    63
-#define MCFQSPI_CS2    44
+#define MCFQSPI_CS0    46
+#define MCFQSPI_CS1    47
+#define MCFQSPI_CS2    27
 
 static int m520x_cs_setup(struct mcfqspi_cs_control *cs_control)
 {
diff --git a/arch/m68k/platform/520x/gpio.c b/arch/m68k/platform/520x/gpio.c
index d757328563d1..9bcc3e4b60c5 100644
--- a/arch/m68k/platform/520x/gpio.c
+++ b/arch/m68k/platform/520x/gpio.c
@@ -38,42 +38,6 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 	},
 	{
 		.gpio_chip			= {
-			.label			= "BUSCTL",
-			.request		= mcf_gpio_request,
-			.free			= mcf_gpio_free,
-			.direction_input	= mcf_gpio_direction_input,
-			.direction_output	= mcf_gpio_direction_output,
-			.get			= mcf_gpio_get_value,
-			.set			= mcf_gpio_set_value_fast,
-			.base			= 8,
-			.ngpio			= 4,
-		},
-		.pddr				= (void __iomem *) MCFGPIO_PDDR_BUSCTL,
-		.podr				= (void __iomem *) MCFGPIO_PODR_BUSCTL,
-		.ppdr				= (void __iomem *) MCFGPIO_PPDSDR_BUSCTL,
-		.setr				= (void __iomem *) MCFGPIO_PPDSDR_BUSCTL,
-		.clrr				= (void __iomem *) MCFGPIO_PCLRR_BUSCTL,
-	},
-	{
-		.gpio_chip			= {
-			.label			= "BE",
-			.request		= mcf_gpio_request,
-			.free			= mcf_gpio_free,
-			.direction_input	= mcf_gpio_direction_input,
-			.direction_output	= mcf_gpio_direction_output,
-			.get			= mcf_gpio_get_value,
-			.set			= mcf_gpio_set_value_fast,
-			.base			= 16,
-			.ngpio			= 4,
-		},
-		.pddr				= (void __iomem *) MCFGPIO_PDDR_BE,
-		.podr				= (void __iomem *) MCFGPIO_PODR_BE,
-		.ppdr				= (void __iomem *) MCFGPIO_PPDSDR_BE,
-		.setr				= (void __iomem *) MCFGPIO_PPDSDR_BE,
-		.clrr				= (void __iomem *) MCFGPIO_PCLRR_BE,
-	},
-	{
-		.gpio_chip			= {
 			.label			= "CS",
 			.request		= mcf_gpio_request,
 			.free			= mcf_gpio_free,
@@ -81,7 +45,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 25,
+			.base			= 9,
 			.ngpio			= 3,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_CS,
@@ -99,7 +63,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 32,
+			.base			= 16,
 			.ngpio			= 4,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_FECI2C,
@@ -117,7 +81,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 40,
+			.base			= 24,
 			.ngpio			= 4,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_QSPI,
@@ -135,7 +99,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 48,
+			.base			= 32,
 			.ngpio			= 4,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_TIMER,
@@ -153,7 +117,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 56,
+			.base			= 40,
 			.ngpio			= 8,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_UART,
@@ -171,7 +135,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 64,
+			.base			= 48,
 			.ngpio			= 8,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_FECH,
@@ -189,7 +153,7 @@ static struct mcf_gpio_chip mcf_gpio_chips[] = {
 			.direction_output	= mcf_gpio_direction_output,
 			.get			= mcf_gpio_get_value,
 			.set			= mcf_gpio_set_value_fast,
-			.base			= 72,
+			.base			= 56,
 			.ngpio			= 8,
 		},
 		.pddr				= (void __iomem *) MCFGPIO_PDDR_FECL,
diff --git a/arch/m68k/platform/68328/Makefile b/arch/m68k/platform/68328/Makefile
index 5e5435552d56..e4dfd8fde068 100644
--- a/arch/m68k/platform/68328/Makefile
+++ b/arch/m68k/platform/68328/Makefile
@@ -2,7 +2,10 @@
 # Makefile for arch/m68knommu/platform/68328.
 #
 
-head-y			= head-$(MODEL).o
+model-y			  := ram
+model-$(CONFIG_ROMKERNEL) := rom
+
+head-y			= head-$(model-y).o
 head-$(CONFIG_PILOT)	= head-pilot.o
 head-$(CONFIG_DRAGEN2)	= head-de2.o
 
diff --git a/arch/m68k/platform/68328/entry.S b/arch/m68k/platform/68328/entry.S
index 293e1eba9acc..5c39b80ed7de 100644
--- a/arch/m68k/platform/68328/entry.S
+++ b/arch/m68k/platform/68328/entry.S
@@ -67,7 +67,7 @@ ret_from_signal:
 	jra	ret_from_exception
 
 ENTRY(system_call)
-	SAVE_ALL
+	SAVE_ALL_SYS
 
 	/* save top of frame*/
 	pea	%sp@
@@ -129,7 +129,7 @@ Lsignal_return:
  * This is the main interrupt handler, responsible for calling process_int()
  */
 inthandler1:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -140,7 +140,7 @@ inthandler1:
 	bra	ret_from_interrupt
 
 inthandler2:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -151,7 +151,7 @@ inthandler2:
 	bra	ret_from_interrupt
 
 inthandler3:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -162,7 +162,7 @@ inthandler3:
 	bra	ret_from_interrupt
 
 inthandler4:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -173,7 +173,7 @@ inthandler4:
 	bra	ret_from_interrupt
 
 inthandler5:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -184,7 +184,7 @@ inthandler5:
 	bra	ret_from_interrupt
 
 inthandler6:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -195,7 +195,7 @@ inthandler6:
 	bra	ret_from_interrupt
 
 inthandler7:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
@@ -206,7 +206,7 @@ inthandler7:
 	bra	ret_from_interrupt
 
 inthandler:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and	#0x3ff, %d0
 
diff --git a/arch/m68k/platform/68360/Makefile b/arch/m68k/platform/68360/Makefile
index cf5af73a5789..f6f434383049 100644
--- a/arch/m68k/platform/68360/Makefile
+++ b/arch/m68k/platform/68360/Makefile
@@ -1,10 +1,12 @@
 #
 # Makefile for arch/m68knommu/platform/68360.
 #
+model-y			  := ram
+model-$(CONFIG_ROMKERNEL) := rom
 
 obj-y := config.o commproc.o entry.o ints.o
 
 extra-y := head.o
 
-$(obj)/head.o: $(obj)/head-$(MODEL).o
-	ln -sf head-$(MODEL).o $(obj)/head.o
+$(obj)/head.o: $(obj)/head-$(model-y).o
+	ln -sf head-$(model-y).o $(obj)/head.o
diff --git a/arch/m68k/platform/68360/entry.S b/arch/m68k/platform/68360/entry.S
index abbb89672ea0..aa47d1d49929 100644
--- a/arch/m68k/platform/68360/entry.S
+++ b/arch/m68k/platform/68360/entry.S
@@ -63,7 +63,7 @@ ret_from_signal:
 	jra	ret_from_exception
 
 ENTRY(system_call)
-	SAVE_ALL
+	SAVE_ALL_SYS
 
 	/* save top of frame*/
 	pea	%sp@
@@ -125,7 +125,7 @@ Lsignal_return:
  * This is the main interrupt handler, responsible for calling do_IRQ()
  */
 inthandler:
-	SAVE_ALL
+	SAVE_ALL_INT
 	movew	%sp@(PT_OFF_FORMATVEC), %d0
 	and.l	#0x3ff, %d0
 	lsr.l   #0x02,  %d0
diff --git a/arch/m68k/platform/coldfire/entry.S b/arch/m68k/platform/coldfire/entry.S
index bd27242c2f43..3157461a8d1d 100644
--- a/arch/m68k/platform/coldfire/entry.S
+++ b/arch/m68k/platform/coldfire/entry.S
@@ -61,7 +61,7 @@ enosys:
 	bra	1f
 
 ENTRY(system_call)
-	SAVE_ALL
+	SAVE_ALL_SYS
 	move	#0x2000,%sr		/* enable intrs again */
 
 	cmpl	#NR_syscalls,%d0
@@ -165,9 +165,7 @@ Lsignal_return:
  * sources). Calls up to high level code to do all the work.
  */
 ENTRY(inthandler)
-	SAVE_ALL
-	moveq	#-1,%d0
-	movel	%d0,%sp@(PT_OFF_ORIG_D0)
+	SAVE_ALL_INT
 
 	movew	%sp@(PT_OFF_FORMATVEC),%d0 /* put exception # in d0 */
 	andl	#0x03fc,%d0		/* mask out vector only */
diff --git a/arch/m68k/q40/README b/arch/m68k/q40/README
index b26d5f55e91d..93f4c4cd3c45 100644
--- a/arch/m68k/q40/README
+++ b/arch/m68k/q40/README
@@ -31,7 +31,7 @@ drivers used by the Q40, apart from the very obvious (console etc.):
 		char/joystick/*		# most of this should work, not
 				        # in default config.in
 	        block/q40ide.c		# startup for ide
-		      ide*		# see Documentation/ide.txt
+		      ide*		# see Documentation/ide/ide.txt
 		      floppy.c		# normal PC driver, DMA emu in asm/floppy.h
 					# and arch/m68k/kernel/entry.S
 					# see drivers/block/README.fd
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
index 8fbb0ec10233..3a3e5b886854 100644
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ b/arch/microblaze/include/asm/dma-mapping.h
@@ -16,7 +16,7 @@
 #define _ASM_MICROBLAZE_DMA_MAPPING_H
 
 /*
- * See Documentation/PCI/PCI-DMA-mapping.txt and
+ * See Documentation/DMA-API-HOWTO.txt and
  * Documentation/DMA-API.txt for documentation.
  */
 
@@ -28,12 +28,12 @@
 #include <linux/dma-attrs.h>
 #include <asm/io.h>
 #include <asm-generic/dma-coherent.h>
+#include <asm/cacheflush.h>
 
 #define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
 
 #define __dma_alloc_coherent(dev, gfp, size, handle)	NULL
 #define __dma_free_coherent(size, addr)		((void)0)
-#define __dma_sync(addr, size, rw)		((void)0)
 
 static inline unsigned long device_to_mask(struct device *dev)
 {
@@ -95,6 +95,22 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask)
 
 #include <asm-generic/dma-mapping-common.h>
 
+static inline void __dma_sync(unsigned long paddr,
+			      size_t size, enum dma_data_direction direction)
+{
+	switch (direction) {
+	case DMA_TO_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		flush_dcache_range(paddr, paddr + size);
+		break;
+	case DMA_FROM_DEVICE:
+		invalidate_dcache_range(paddr, paddr + size);
+		break;
+	default:
+		BUG();
+	}
+}
+
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
@@ -135,7 +151,7 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
-	__dma_sync(vaddr, size, (int)direction);
+	__dma_sync(virt_to_phys(vaddr), size, (int)direction);
 }
 
 #endif	/* _ASM_MICROBLAZE_DMA_MAPPING_H */
diff --git a/arch/microblaze/include/asm/elf.h b/arch/microblaze/include/asm/elf.h
index 098dfdde4b06..834849f59ae8 100644
--- a/arch/microblaze/include/asm/elf.h
+++ b/arch/microblaze/include/asm/elf.h
@@ -16,13 +16,15 @@
  * I've snaffled the value from the microblaze binutils source code
  * /binutils/microblaze/include/elf/microblaze.h
  */
-#define EM_XILINX_MICROBLAZE	0xbaab
-#define ELF_ARCH		EM_XILINX_MICROBLAZE
+#define EM_MICROBLAZE		189
+#define EM_MICROBLAZE_OLD	0xbaab
+#define ELF_ARCH		EM_MICROBLAZE
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
  */
-#define elf_check_arch(x)	((x)->e_machine == EM_XILINX_MICROBLAZE)
+#define elf_check_arch(x)	((x)->e_machine == EM_MICROBLAZE \
+				 || (x)->e_machine == EM_MICROBLAZE_OLD)
 
 /*
  * These are used to set parameters in the core dumps.
diff --git a/arch/microblaze/include/asm/system.h b/arch/microblaze/include/asm/system.h
index e6a2284571dc..5a433cbaafb3 100644
--- a/arch/microblaze/include/asm/system.h
+++ b/arch/microblaze/include/asm/system.h
@@ -17,8 +17,6 @@
 #include <asm-generic/cmpxchg.h>
 #include <asm-generic/cmpxchg-local.h>
 
-#define __ARCH_WANT_INTERRUPTS_ON_CTXSW
-
 struct task_struct;
 struct thread_info;
 
@@ -96,11 +94,4 @@ extern struct dentry *of_debugfs_root;
 
 #define arch_align_stack(x) (x)
 
-/*
- * MicroBlaze doesn't handle unaligned accesses in hardware.
- *
- * Based on this we force the IP header alignment in network drivers.
- */
-#define NET_IP_ALIGN	2
-
 #endif /* _ASM_MICROBLAZE_SYSTEM_H */
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 5bb95a11880d..072b0077abf9 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -95,7 +95,7 @@ static inline int ___range_ok(unsigned long addr, unsigned long size)
  *  - "addr", "addr + size" and "size" are all below the limit
  */
 #define access_ok(type, addr, size) \
-	(get_fs().seg > (((unsigned long)(addr)) | \
+	(get_fs().seg >= (((unsigned long)(addr)) | \
 		(size) | ((unsigned long)(addr) + (size))))
 
 /* || printk("access_ok failed for %s at 0x%08lx (size %d), seg 0x%08x\n",
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c
index 44394d80a683..54194b28574a 100644
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -34,6 +34,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
 	{"8.00.a", 0x12},
 	{"8.00.b", 0x13},
 	{"8.10.a", 0x14},
+	{"8.20.a", 0x15},
 	{NULL, 0},
 };
 
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 393e6b2db688..dc6416d265d6 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -11,7 +11,6 @@
 #include <linux/gfp.h>
 #include <linux/dma-debug.h>
 #include <asm/bug.h>
-#include <asm/cacheflush.h>
 
 /*
  * Generic direct DMA implementation
@@ -21,21 +20,6 @@
  * can set archdata.dma_data to an unsigned long holding the offset. By
  * default the offset is PCI_DRAM_OFFSET.
  */
-static inline void __dma_sync_page(unsigned long paddr, unsigned long offset,
-				size_t size, enum dma_data_direction direction)
-{
-	switch (direction) {
-	case DMA_TO_DEVICE:
-	case DMA_BIDIRECTIONAL:
-		flush_dcache_range(paddr + offset, paddr + offset + size);
-		break;
-	case DMA_FROM_DEVICE:
-		invalidate_dcache_range(paddr + offset, paddr + offset + size);
-		break;
-	default:
-		BUG();
-	}
-}
 
 static unsigned long get_dma_direct_offset(struct device *dev)
 {
@@ -91,7 +75,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 	/* FIXME this part of code is untested */
 	for_each_sg(sgl, sg, nents, i) {
 		sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
-		__dma_sync_page(page_to_phys(sg_page(sg)), sg->offset,
+		__dma_sync(page_to_phys(sg_page(sg)) + sg->offset,
 							sg->length, direction);
 	}
 
@@ -116,7 +100,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 					     enum dma_data_direction direction,
 					     struct dma_attrs *attrs)
 {
-	__dma_sync_page(page_to_phys(page), offset, size, direction);
+	__dma_sync(page_to_phys(page) + offset, size, direction);
 	return page_to_phys(page) + offset + get_dma_direct_offset(dev);
 }
 
@@ -131,7 +115,63 @@ static inline void dma_direct_unmap_page(struct device *dev,
  * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
  * dma_address is physical address
  */
-	__dma_sync_page(dma_address, 0 , size, direction);
+	__dma_sync(dma_address, size, direction);
+}
+
+static inline void
+dma_direct_sync_single_for_cpu(struct device *dev,
+			       dma_addr_t dma_handle, size_t size,
+			       enum dma_data_direction direction)
+{
+	/*
+	 * It's pointless to flush the cache as the memory segment
+	 * is given to the CPU
+	 */
+
+	if (direction == DMA_FROM_DEVICE)
+		__dma_sync(dma_handle, size, direction);
+}
+
+static inline void
+dma_direct_sync_single_for_device(struct device *dev,
+				  dma_addr_t dma_handle, size_t size,
+				  enum dma_data_direction direction)
+{
+	/*
+	 * It's pointless to invalidate the cache if the device isn't
+	 * supposed to write to the relevant region
+	 */
+
+	if (direction == DMA_TO_DEVICE)
+		__dma_sync(dma_handle, size, direction);
+}
+
+static inline void
+dma_direct_sync_sg_for_cpu(struct device *dev,
+			   struct scatterlist *sgl, int nents,
+			   enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	/* FIXME this part of code is untested */
+	if (direction == DMA_FROM_DEVICE)
+		for_each_sg(sgl, sg, nents, i)
+			__dma_sync(sg->dma_address, sg->length, direction);
+}
+
+static inline void
+dma_direct_sync_sg_for_device(struct device *dev,
+			      struct scatterlist *sgl, int nents,
+			      enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	int i;
+
+	/* FIXME this part of code is untested */
+	if (direction == DMA_TO_DEVICE)
+		for_each_sg(sgl, sg, nents, i)
+			__dma_sync(sg->dma_address, sg->length, direction);
 }
 
 struct dma_map_ops dma_direct_ops = {
@@ -142,6 +182,10 @@ struct dma_map_ops dma_direct_ops = {
 	.dma_supported	= dma_direct_dma_supported,
 	.map_page	= dma_direct_map_page,
 	.unmap_page	= dma_direct_unmap_page,
+	.sync_single_for_cpu		= dma_direct_sync_single_for_cpu,
+	.sync_single_for_device		= dma_direct_sync_single_for_device,
+	.sync_sg_for_cpu		= dma_direct_sync_sg_for_cpu,
+	.sync_sg_for_device		= dma_direct_sync_sg_for_device,
 };
 EXPORT_SYMBOL(dma_direct_ops);
 
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 66fad2301221..6348dc82f428 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -119,7 +119,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 	case MICROBLAZE_DIV_ZERO_EXCEPTION:
 		if (user_mode(regs)) {
 			pr_debug("Divide by zero exception in user mode\n");
-			_exception(SIGILL, regs, FPE_INTDIV, addr);
+			_exception(SIGFPE, regs, FPE_INTDIV, addr);
 			return;
 		}
 		printk(KERN_WARNING "Divide by zero exception " \
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index dbb812421d8a..95cc295976a7 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -179,6 +179,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
 	ti->cpu_context.msr = (childregs->msr|MSR_VM);
 	ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */
+	ti->cpu_context.msr &= ~MSR_IE;
 #endif
 	ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8;
 
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 6a8e0cc5c57d..043cb58f9c44 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -148,7 +148,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		ret = -1L;
 
 	if (unlikely(current->audit_context))
-		audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+		audit_syscall_entry(EM_MICROBLAZE, regs->r12,
 				    regs->r5, regs->r6,
 				    regs->r7, regs->r8);
 
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index e5550ce4e0eb..af74b1113aab 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -308,7 +308,8 @@ unsigned long long notrace sched_clock(void)
 {
 	if (timer_initialized) {
 		struct clocksource *cs = &clocksource_microblaze;
-		cycle_t cyc = cnt32_to_63(cs->read(NULL));
+
+		cycle_t cyc = cnt32_to_63(cs->read(NULL)) & LLONG_MAX;
 		return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
 	}
 	return 0;
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index 10c320aa908b..c13067b243c3 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -25,5 +25,6 @@ lib-y += lshrdi3.o
 lib-y += modsi3.o
 lib-y += muldi3.o
 lib-y += mulsi3.o
+lib-y += ucmpdi2.o
 lib-y += udivsi3.o
 lib-y += umodsi3.o
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
index 5810cec54a7a..f037266cdaf3 100644
--- a/arch/microblaze/lib/uaccess_old.S
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -10,6 +10,7 @@
 
 #include <linux/errno.h>
 #include <linux/linkage.h>
+#include <asm/page.h>
 
 /*
  * int __strncpy_user(char *to, char *from, int len);
@@ -33,8 +34,8 @@ __strncpy_user:
 	 * r3 - temp count
 	 * r4 - temp val
 	 */
+	beqid	r7,3f
 	addik	r3,r7,0		/* temp_count = len */
-	beqi	r3,3f
 1:
 	lbu	r4,r6,r0
 	sb	r4,r5,r0
@@ -76,8 +77,8 @@ __strncpy_user:
 .type  __strnlen_user, @function
 .align 4;
 __strnlen_user:
+	beqid	r6,3f
 	addik	r3,r6,0
-	beqi	r3,3f
 1:
 	lbu	r4,r5,r0
 	beqid	r4,2f		/* break on NUL */
@@ -102,6 +103,49 @@ __strnlen_user:
 	.section	__ex_table,"a"
 	.word	1b,4b
 
+/* Loop unrolling for __copy_tofrom_user */
+#define COPY(offset)	\
+1:	lwi	r4 , r6, 0x0000 + offset;	\
+2:	lwi	r19, r6, 0x0004 + offset;	\
+3:	lwi	r20, r6, 0x0008 + offset;	\
+4:	lwi	r21, r6, 0x000C + offset;	\
+5:	lwi	r22, r6, 0x0010 + offset;	\
+6:	lwi	r23, r6, 0x0014 + offset;	\
+7:	lwi	r24, r6, 0x0018 + offset;	\
+8:	lwi	r25, r6, 0x001C + offset;	\
+9:	swi	r4 , r5, 0x0000 + offset;	\
+10:	swi	r19, r5, 0x0004 + offset;	\
+11:	swi	r20, r5, 0x0008 + offset;	\
+12:	swi	r21, r5, 0x000C + offset;	\
+13:	swi	r22, r5, 0x0010 + offset;	\
+14:	swi	r23, r5, 0x0014 + offset;	\
+15:	swi	r24, r5, 0x0018 + offset;	\
+16:	swi	r25, r5, 0x001C + offset;	\
+	.section __ex_table,"a";		\
+	.word	1b, 0f;				\
+	.word	2b, 0f;				\
+	.word	3b, 0f;				\
+	.word	4b, 0f;				\
+	.word	5b, 0f;				\
+	.word	6b, 0f;				\
+	.word	7b, 0f;				\
+	.word	8b, 0f;				\
+	.word	9b, 0f;				\
+	.word	10b, 0f;			\
+	.word	11b, 0f;			\
+	.word	12b, 0f;			\
+	.word	13b, 0f;			\
+	.word	14b, 0f;			\
+	.word	15b, 0f;			\
+	.word	16b, 0f;			\
+	.text
+
+#define COPY_80(offset)	\
+	COPY(0x00 + offset);\
+	COPY(0x20 + offset);\
+	COPY(0x40 + offset);\
+	COPY(0x60 + offset);
+
 /*
  * int __copy_tofrom_user(char *to, char *from, int len)
  * Return:
@@ -119,34 +163,79 @@ __copy_tofrom_user:
 	 * r7, r3 - count
 	 * r4 - tempval
 	 */
-	beqid	r7, 3f /* zero size is not likely */
-	andi	r3, r7, 0x3 /* filter add count */
-	bneid	r3, 4f /* if is odd value then byte copying */
+	beqid	r7, 0f /* zero size is not likely */
 	or	r3, r5, r6 /* find if is any to/from unaligned */
-	andi	r3, r3, 0x3 /* mask unaligned */
-	bneid	r3, 1f /* it is unaligned -> then jump */
+	or	r3, r3, r7 /* find if count is unaligned */
+	andi	r3, r3, 0x3 /* mask last 3 bits */
+	bneid	r3, bu1 /* if r3 is not zero then byte copying */
+	or	r3, r0, r0
+
+	rsubi	r3, r7, PAGE_SIZE /* detect PAGE_SIZE */
+	beqid	r3, page;
 	or	r3, r0, r0
 
-/* at least one 4 byte copy */
-5:	lw	r4, r6, r3
-6:	sw	r4, r5, r3
+w1:	lw	r4, r6, r3 /* at least one 4 byte copy */
+w2:	sw	r4, r5, r3
 	addik	r7, r7, -4
-	bneid	r7, 5b
+	bneid	r7, w1
 	addik	r3, r3, 4
 	addik	r3, r7, 0
 	rtsd	r15, 8
 	nop
-4:	or	r3, r0, r0
-1:	lbu	r4,r6,r3
-2:	sb	r4,r5,r3
+
+	.section	__ex_table,"a"
+	.word	w1, 0f;
+	.word	w2, 0f;
+	.text
+
+.align 4 /* Alignment is important to keep icache happy */
+page:	/* Create room on stack and save registers for storign values */
+	addik   r1, r1, -32
+	swi	r19, r1, 4
+	swi	r20, r1, 8
+	swi	r21, r1, 12
+	swi	r22, r1, 16
+	swi	r23, r1, 20
+	swi	r24, r1, 24
+	swi	r25, r1, 28
+loop:	/* r4, r19, r20, r21, r22, r23, r24, r25 are used for storing values */
+	/* Loop unrolling to get performance boost */
+	COPY_80(0x000);
+	COPY_80(0x080);
+	COPY_80(0x100);
+	COPY_80(0x180);
+	/* copy loop */
+	addik	r6, r6, 0x200
+	addik	r7, r7, -0x200
+	bneid	r7, loop
+	addik	r5, r5, 0x200
+	/* Restore register content */
+	lwi	r19, r1, 4
+	lwi	r20, r1, 8
+	lwi	r21, r1, 12
+	lwi	r22, r1, 16
+	lwi	r23, r1, 20
+	lwi	r24, r1, 24
+	lwi	r25, r1, 28
+	addik   r1, r1, 32
+	/* return back */
+	addik	r3, r7, 0
+	rtsd	r15, 8
+	nop
+
+.align 4 /* Alignment is important to keep icache happy */
+bu1:	lbu	r4,r6,r3
+bu2:	sb	r4,r5,r3
 	addik	r7,r7,-1
-	bneid	r7,1b
+	bneid	r7,bu1
 	addik	r3,r3,1		/* delay slot */
-3:
+0:
 	addik	r3,r7,0
 	rtsd	r15,8
 	nop
 	.size   __copy_tofrom_user, . - __copy_tofrom_user
 
 	.section	__ex_table,"a"
-	.word	1b,3b,2b,3b,5b,3b,6b,3b
+	.word	bu1, 0b;
+	.word	bu2, 0b;
+	.text
diff --git a/arch/microblaze/lib/ucmpdi2.c b/arch/microblaze/lib/ucmpdi2.c
new file mode 100644
index 000000000000..63ca105b6713
--- /dev/null
+++ b/arch/microblaze/lib/ucmpdi2.c
@@ -0,0 +1,20 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+	const DWunion au = {.ll = a};
+	const DWunion bu = {.ll = b};
+
+	if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
+		return 0;
+	else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
+		return 2;
+	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+		return 0;
+	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+		return 2;
+	return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 213f2d671669..36a133e5ee35 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -304,11 +304,11 @@ asmlinkage void __init mmu_init(void)
 	/* Map in all of RAM starting at CONFIG_KERNEL_START */
 	mapin_ram();
 
-#ifdef HIGHMEM_START_BOOL
-	ioremap_base = HIGHMEM_START;
+#ifdef CONFIG_HIGHMEM_START_BOOL
+	ioremap_base = CONFIG_HIGHMEM_START;
 #else
 	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
-#endif /* CONFIG_HIGHMEM */
+#endif /* CONFIG_HIGHMEM_START_BOOL */
 	ioremap_bot = ioremap_base;
 
 	/* Initialize the context management stuff */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 177cdaf83564..4cbc6d8de210 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -24,6 +24,7 @@ config MIPS
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_JUMP_LABEL
+	select IRQ_FORCED_THREADING
 
 menu "Machine selection"
 
@@ -91,15 +92,8 @@ config BCM47XX
 	select DMA_NONCOHERENT
 	select HW_HAS_PCI
 	select IRQ_CPU
-	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
-	select SSB
-	select SSB_DRIVER_MIPS
-	select SSB_DRIVER_EXTIF
-	select SSB_EMBEDDED
-	select SSB_B43_PCI_BRIDGE if PCI
-	select SSB_PCICORE_HOSTMODE if PCI
 	select GENERIC_GPIO
 	select SYS_HAS_EARLY_PRINTK
 	select CFE
@@ -722,6 +716,7 @@ config CAVIUM_OCTEON_SIMULATOR
 	select SYS_SUPPORTS_HIGHMEM
 	select SYS_SUPPORTS_HOTPLUG_CPU
 	select SYS_HAS_CPU_CAVIUM_OCTEON
+	select HOLES_IN_ZONE
 	help
 	  The Octeon simulator is software performance model of the Cavium
 	  Octeon Processor. It supports simulating Octeon processors on x86
@@ -744,6 +739,7 @@ config CAVIUM_OCTEON_REFERENCE_BOARD
 	select ZONE_DMA32
 	select USB_ARCH_HAS_OHCI
 	select USB_ARCH_HAS_EHCI
+	select HOLES_IN_ZONE
 	help
 	  This option supports all of the Octeon reference boards from Cavium
 	  Networks. It builds a kernel that dynamically determines the Octeon
@@ -788,6 +784,7 @@ endchoice
 
 source "arch/mips/alchemy/Kconfig"
 source "arch/mips/ath79/Kconfig"
+source "arch/mips/bcm47xx/Kconfig"
 source "arch/mips/bcm63xx/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
@@ -973,6 +970,9 @@ config ISA_DMA_API
 config GENERIC_GPIO
 	bool
 
+config HOLES_IN_ZONE
+	bool
+
 #
 # Endianess selection.  Sufficiently obscure so many users don't know what to
 # answer,so we try hard to limit the available choices.  Also the use of a
diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index 3b2c18b14341..f72c48d4804c 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -492,7 +492,7 @@ static void __init alchemy_setup_macs(int ctype)
 		memcpy(au1xxx_eth0_platform_data.mac, ethaddr, 6);
 
 	ret = platform_device_register(&au1xxx_eth0_device);
-	if (!ret)
+	if (ret)
 		printk(KERN_INFO "Alchemy: failed to register MAC0\n");
 
 
diff --git a/arch/mips/alchemy/common/power.c b/arch/mips/alchemy/common/power.c
index 647e518c90bc..b86324a42601 100644
--- a/arch/mips/alchemy/common/power.c
+++ b/arch/mips/alchemy/common/power.c
@@ -158,15 +158,21 @@ static void restore_core_regs(void)
 
 void au_sleep(void)
 {
-	int cpuid = alchemy_get_cputype();
-	if (cpuid != ALCHEMY_CPU_UNKNOWN) {
-		save_core_regs();
-		if (cpuid <= ALCHEMY_CPU_AU1500)
-			alchemy_sleep_au1000();
-		else if (cpuid <= ALCHEMY_CPU_AU1200)
-			alchemy_sleep_au1550();
-		restore_core_regs();
+	save_core_regs();
+
+	switch (alchemy_get_cputype()) {
+	case ALCHEMY_CPU_AU1000:
+	case ALCHEMY_CPU_AU1500:
+	case ALCHEMY_CPU_AU1100:
+		alchemy_sleep_au1000();
+		break;
+	case ALCHEMY_CPU_AU1550:
+	case ALCHEMY_CPU_AU1200:
+		alchemy_sleep_au1550();
+		break;
 	}
+
+	restore_core_regs();
 }
 
 #endif	/* CONFIG_PM */
diff --git a/arch/mips/alchemy/devboards/bcsr.c b/arch/mips/alchemy/devboards/bcsr.c
index 596ad00e7f05..463d2c4d9441 100644
--- a/arch/mips/alchemy/devboards/bcsr.c
+++ b/arch/mips/alchemy/devboards/bcsr.c
@@ -89,8 +89,12 @@ static void bcsr_csc_handler(unsigned int irq, struct irq_desc *d)
 {
 	unsigned short bisr = __raw_readw(bcsr_virt + BCSR_REG_INTSTAT);
 
+	disable_irq_nosync(irq);
+
 	for ( ; bisr; bisr &= bisr - 1)
 		generic_handle_irq(bcsr_csc_base + __ffs(bisr));
+
+	enable_irq(irq);
 }
 
 /* NOTE: both the enable and mask bits must be cleared, otherwise the
diff --git a/arch/mips/alchemy/devboards/db1200/platform.c b/arch/mips/alchemy/devboards/db1200/platform.c
index fbb55935b99e..dda090bf74e6 100644
--- a/arch/mips/alchemy/devboards/db1200/platform.c
+++ b/arch/mips/alchemy/devboards/db1200/platform.c
@@ -422,6 +422,7 @@ static struct resource au1200_psc1_res[] = {
 	},
 };
 
+/* AC97 or I2S device */
 static struct platform_device db1200_audio_dev = {
 	/* name assigned later based on switch setting */
 	.id		= 1,	/* PSC ID */
@@ -429,19 +430,32 @@ static struct platform_device db1200_audio_dev = {
 	.resource	= au1200_psc1_res,
 };
 
+/* DB1200 ASoC card device */
+static struct platform_device db1200_sound_dev = {
+	/* name assigned later based on switch setting */
+	.id		= 1,	/* PSC ID */
+};
+
 static struct platform_device db1200_stac_dev = {
 	.name		= "ac97-codec",
 	.id		= 1,	/* on PSC1 */
 };
 
+static struct platform_device db1200_audiodma_dev = {
+	.name		= "au1xpsc-pcm",
+	.id		= 1,	/* PSC ID */
+};
+
 static struct platform_device *db1200_devs[] __initdata = {
 	NULL,		/* PSC0, selected by S6.8 */
 	&db1200_ide_dev,
 	&db1200_eth_dev,
 	&db1200_rtc_dev,
 	&db1200_nand_dev,
+	&db1200_audiodma_dev,
 	&db1200_audio_dev,
 	&db1200_stac_dev,
+	&db1200_sound_dev,
 };
 
 static int __init db1200_dev_init(void)
@@ -501,10 +515,12 @@ static int __init db1200_dev_init(void)
 	if (sw == BCSR_SWITCHES_DIP_8) {
 		bcsr_mod(BCSR_RESETS, 0, BCSR_RESETS_PSC1MUX);
 		db1200_audio_dev.name = "au1xpsc_i2s";
+		db1200_sound_dev.name = "db1200-i2s";
 		printk(KERN_INFO " S6.7 ON : PSC1 mode I2S\n");
 	} else {
 		bcsr_mod(BCSR_RESETS, BCSR_RESETS_PSC1MUX, 0);
 		db1200_audio_dev.name = "au1xpsc_ac97";
+		db1200_sound_dev.name = "db1200-ac97";
 		printk(KERN_INFO " S6.7 OFF: PSC1 mode AC97\n");
 	}
 
diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c
index 1dac4f27d334..4a8980027ecf 100644
--- a/arch/mips/alchemy/devboards/db1200/setup.c
+++ b/arch/mips/alchemy/devboards/db1200/setup.c
@@ -23,13 +23,6 @@ void __init board_setup(void)
 	unsigned long freq0, clksrc, div, pfc;
 	unsigned short whoami;
 
-	/* Set Config[OD] (disable overlapping bus transaction):
-	 * This gets rid of a _lot_ of spurious interrupts (especially
-	 * wrt. IDE); but incurs ~10% performance hit in some
-	 * cpu-bound applications.
-	 */
-	set_c0_config(1 << 19);
-
 	bcsr_init(DB1200_BCSR_PHYS_ADDR,
 		  DB1200_BCSR_PHYS_ADDR + DB1200_BCSR_HEXLED_OFS);
 
diff --git a/arch/mips/alchemy/devboards/db1x00/platform.c b/arch/mips/alchemy/devboards/db1x00/platform.c
index 978d5ab3d678..7057d28f7301 100644
--- a/arch/mips/alchemy/devboards/db1x00/platform.c
+++ b/arch/mips/alchemy/devboards/db1x00/platform.c
@@ -19,8 +19,11 @@
  */
 
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/platform_device.h>
 
+#include <asm/mach-au1x00/au1000.h>
+#include <asm/mach-au1x00/au1000_dma.h>
 #include <asm/mach-au1x00/au1xxx.h>
 #include <asm/mach-db1x00/bcsr.h>
 #include "../platform.h"
@@ -85,6 +88,45 @@
 #endif
 #endif
 
+static struct resource alchemy_ac97c_res[] = {
+	[0] = {
+		.start	= AU1000_AC97_PHYS_ADDR,
+		.end	= AU1000_AC97_PHYS_ADDR + 0xfff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= DMA_ID_AC97C_TX,
+		.end	= DMA_ID_AC97C_TX,
+		.flags	= IORESOURCE_DMA,
+	},
+	[2] = {
+		.start	= DMA_ID_AC97C_RX,
+		.end	= DMA_ID_AC97C_RX,
+		.flags	= IORESOURCE_DMA,
+	},
+};
+
+static struct platform_device alchemy_ac97c_dev = {
+	.name		= "alchemy-ac97c",
+	.id		= -1,
+	.resource	= alchemy_ac97c_res,
+	.num_resources	= ARRAY_SIZE(alchemy_ac97c_res),
+};
+
+static struct platform_device alchemy_ac97c_dma_dev = {
+	.name		= "alchemy-pcm-dma",
+	.id		= 0,
+};
+
+static struct platform_device db1x00_codec_dev = {
+	.name		= "ac97-codec",
+	.id		= -1,
+};
+
+static struct platform_device db1x00_audio_dev = {
+	.name		= "db1000-audio",
+};
+
 static int __init db1xxx_dev_init(void)
 {
 #ifdef DB1XXX_HAS_PCMCIA
@@ -113,6 +155,12 @@ static int __init db1xxx_dev_init(void)
 				    1);
 #endif
 	db1x_register_norflash(BOARD_FLASH_SIZE, BOARD_FLASH_WIDTH, F_SWAPPED);
+
+	platform_device_register(&db1x00_codec_dev);
+	platform_device_register(&alchemy_ac97c_dma_dev);
+	platform_device_register(&alchemy_ac97c_dev);
+	platform_device_register(&db1x00_audio_dev);
+
 	return 0;
 }
 device_initcall(db1xxx_dev_init);
diff --git a/arch/mips/ar7/irq.c b/arch/mips/ar7/irq.c
index 03db3daadbd8..88c4babfdb5d 100644
--- a/arch/mips/ar7/irq.c
+++ b/arch/mips/ar7/irq.c
@@ -98,7 +98,8 @@ static struct irq_chip ar7_sec_irq_type = {
 
 static struct irqaction ar7_cascade_action = {
 	.handler = no_action,
-	.name = "AR7 cascade interrupt"
+	.name = "AR7 cascade interrupt",
+	.flags = IRQF_NO_THREAD,
 };
 
 static void __init ar7_irq_init(int base)
diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
new file mode 100644
index 000000000000..6210b8d84109
--- /dev/null
+++ b/arch/mips/bcm47xx/Kconfig
@@ -0,0 +1,31 @@
+if BCM47XX
+
+config BCM47XX_SSB
+	bool "SSB Support for Broadcom BCM47XX"
+	select SYS_HAS_CPU_MIPS32_R1
+	select SSB
+	select SSB_DRIVER_MIPS
+	select SSB_DRIVER_EXTIF
+	select SSB_EMBEDDED
+	select SSB_B43_PCI_BRIDGE if PCI
+	select SSB_PCICORE_HOSTMODE if PCI
+	default y
+	help
+	 Add support for old Broadcom BCM47xx boards with Sonics Silicon Backplane support.
+
+	 This will generate an image with support for SSB and MIPS32 R1 instruction set.
+
+config BCM47XX_BCMA
+	bool "BCMA Support for Broadcom BCM47XX"
+	select SYS_HAS_CPU_MIPS32_R2
+	select BCMA
+	select BCMA_HOST_SOC
+	select BCMA_DRIVER_MIPS
+	select BCMA_DRIVER_PCI_HOSTMODE if PCI
+	default y
+	help
+	 Add support for new Broadcom BCM47xx boards with Broadcom specific Advanced Microcontroller Bus.
+
+	 This will generate an image with support for BCMA and MIPS32 R2 instruction set.
+
+endif
diff --git a/arch/mips/bcm47xx/Makefile b/arch/mips/bcm47xx/Makefile
index 7465e8a72d9a..4add17349ff9 100644
--- a/arch/mips/bcm47xx/Makefile
+++ b/arch/mips/bcm47xx/Makefile
@@ -3,4 +3,5 @@
 # under Linux.
 #
 
-obj-y := gpio.o irq.o nvram.o prom.o serial.o setup.o time.o wgt634u.o
+obj-y 				+= gpio.o irq.o nvram.o prom.o serial.o setup.o time.o
+obj-$(CONFIG_BCM47XX_SSB)	+= wgt634u.o
diff --git a/arch/mips/bcm47xx/gpio.c b/arch/mips/bcm47xx/gpio.c
index e4a5ee9c9721..57b425fd4d41 100644
--- a/arch/mips/bcm47xx/gpio.c
+++ b/arch/mips/bcm47xx/gpio.c
@@ -20,42 +20,82 @@ static DECLARE_BITMAP(gpio_in_use, BCM47XX_EXTIF_GPIO_LINES);
 
 int gpio_request(unsigned gpio, const char *tag)
 {
-	if (ssb_chipco_available(&ssb_bcm47xx.chipco) &&
-	    ((unsigned)gpio >= BCM47XX_CHIPCO_GPIO_LINES))
-		return -EINVAL;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		if (ssb_chipco_available(&bcm47xx_bus.ssb.chipco) &&
+		    ((unsigned)gpio >= BCM47XX_CHIPCO_GPIO_LINES))
+			return -EINVAL;
 
-	if (ssb_extif_available(&ssb_bcm47xx.extif) &&
-	    ((unsigned)gpio >= BCM47XX_EXTIF_GPIO_LINES))
-		return -EINVAL;
+		if (ssb_extif_available(&bcm47xx_bus.ssb.extif) &&
+		    ((unsigned)gpio >= BCM47XX_EXTIF_GPIO_LINES))
+			return -EINVAL;
 
-	if (test_and_set_bit(gpio, gpio_in_use))
-		return -EBUSY;
+		if (test_and_set_bit(gpio, gpio_in_use))
+			return -EBUSY;
 
-	return 0;
+		return 0;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		if (gpio >= BCM47XX_CHIPCO_GPIO_LINES)
+			return -EINVAL;
+
+		if (test_and_set_bit(gpio, gpio_in_use))
+			return -EBUSY;
+
+		return 0;
+#endif
+	}
+	return -EINVAL;
 }
 EXPORT_SYMBOL(gpio_request);
 
 void gpio_free(unsigned gpio)
 {
-	if (ssb_chipco_available(&ssb_bcm47xx.chipco) &&
-	    ((unsigned)gpio >= BCM47XX_CHIPCO_GPIO_LINES))
-		return;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		if (ssb_chipco_available(&bcm47xx_bus.ssb.chipco) &&
+		    ((unsigned)gpio >= BCM47XX_CHIPCO_GPIO_LINES))
+			return;
+
+		if (ssb_extif_available(&bcm47xx_bus.ssb.extif) &&
+		    ((unsigned)gpio >= BCM47XX_EXTIF_GPIO_LINES))
+			return;
 
-	if (ssb_extif_available(&ssb_bcm47xx.extif) &&
-	    ((unsigned)gpio >= BCM47XX_EXTIF_GPIO_LINES))
+		clear_bit(gpio, gpio_in_use);
 		return;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		if (gpio >= BCM47XX_CHIPCO_GPIO_LINES)
+			return;
 
-	clear_bit(gpio, gpio_in_use);
+		clear_bit(gpio, gpio_in_use);
+		return;
+#endif
+	}
 }
 EXPORT_SYMBOL(gpio_free);
 
 int gpio_to_irq(unsigned gpio)
 {
-	if (ssb_chipco_available(&ssb_bcm47xx.chipco))
-		return ssb_mips_irq(ssb_bcm47xx.chipco.dev) + 2;
-	else if (ssb_extif_available(&ssb_bcm47xx.extif))
-		return ssb_mips_irq(ssb_bcm47xx.extif.dev) + 2;
-	else
-		return -EINVAL;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		if (ssb_chipco_available(&bcm47xx_bus.ssb.chipco))
+			return ssb_mips_irq(bcm47xx_bus.ssb.chipco.dev) + 2;
+		else if (ssb_extif_available(&bcm47xx_bus.ssb.extif))
+			return ssb_mips_irq(bcm47xx_bus.ssb.extif.dev) + 2;
+		else
+			return -EINVAL;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		return bcma_core_mips_irq(bcm47xx_bus.bcma.bus.drv_cc.core) + 2;
+#endif
+	}
+	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(gpio_to_irq);
diff --git a/arch/mips/bcm47xx/irq.c b/arch/mips/bcm47xx/irq.c
index 325757acd020..8cf3833b2d29 100644
--- a/arch/mips/bcm47xx/irq.c
+++ b/arch/mips/bcm47xx/irq.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <asm/irq_cpu.h>
+#include <bcm47xx.h>
 
 void plat_irq_dispatch(void)
 {
@@ -51,5 +52,16 @@ void plat_irq_dispatch(void)
 
 void __init arch_init_irq(void)
 {
+#ifdef CONFIG_BCM47XX_BCMA
+	if (bcm47xx_bus_type == BCM47XX_BUS_TYPE_BCMA) {
+		bcma_write32(bcm47xx_bus.bcma.bus.drv_mips.core,
+			     BCMA_MIPS_MIPS74K_INTMASK(5), 1 << 31);
+		/*
+		 * the kernel reads the timer irq from some register and thinks
+		 * it's #5, but we offset it by 2 and route to #7
+		 */
+		cp0_compare_irq = 7;
+	}
+#endif
 	mips_cpu_irq_init();
 }
diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index 54db815bc86c..a84e3bb7387f 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -26,14 +26,35 @@ static char nvram_buf[NVRAM_SPACE];
 /* Probe for NVRAM header */
 static void early_nvram_init(void)
 {
-	struct ssb_mipscore *mcore = &ssb_bcm47xx.mipscore;
+#ifdef CONFIG_BCM47XX_SSB
+	struct ssb_mipscore *mcore_ssb;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	struct bcma_drv_cc *bcma_cc;
+#endif
 	struct nvram_header *header;
 	int i;
-	u32 base, lim, off;
+	u32 base = 0;
+	u32 lim = 0;
+	u32 off;
 	u32 *src, *dst;
 
-	base = mcore->flash_window;
-	lim = mcore->flash_window_size;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		mcore_ssb = &bcm47xx_bus.ssb.mipscore;
+		base = mcore_ssb->flash_window;
+		lim = mcore_ssb->flash_window_size;
+		break;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_cc = &bcm47xx_bus.bcma.bus.drv_cc;
+		base = bcma_cc->pflash.window;
+		lim = bcma_cc->pflash.window_size;
+		break;
+#endif
+	}
 
 	off = FLASH_MIN;
 	while (off <= lim) {
diff --git a/arch/mips/bcm47xx/serial.c b/arch/mips/bcm47xx/serial.c
index 59c11afdb2ab..57981e4fe2bc 100644
--- a/arch/mips/bcm47xx/serial.c
+++ b/arch/mips/bcm47xx/serial.c
@@ -23,10 +23,11 @@ static struct platform_device uart8250_device = {
 	},
 };
 
-static int __init uart8250_init(void)
+#ifdef CONFIG_BCM47XX_SSB
+static int __init uart8250_init_ssb(void)
 {
 	int i;
-	struct ssb_mipscore *mcore = &(ssb_bcm47xx.mipscore);
+	struct ssb_mipscore *mcore = &(bcm47xx_bus.ssb.mipscore);
 
 	memset(&uart8250_data, 0,  sizeof(uart8250_data));
 
@@ -44,6 +45,47 @@ static int __init uart8250_init(void)
 	}
 	return platform_device_register(&uart8250_device);
 }
+#endif
+
+#ifdef CONFIG_BCM47XX_BCMA
+static int __init uart8250_init_bcma(void)
+{
+	int i;
+	struct bcma_drv_cc *cc = &(bcm47xx_bus.bcma.bus.drv_cc);
+
+	memset(&uart8250_data, 0,  sizeof(uart8250_data));
+
+	for (i = 0; i < cc->nr_serial_ports; i++) {
+		struct plat_serial8250_port *p = &(uart8250_data[i]);
+		struct bcma_serial_port *bcma_port;
+		bcma_port = &(cc->serial_ports[i]);
+
+		p->mapbase = (unsigned int) bcma_port->regs;
+		p->membase = (void *) bcma_port->regs;
+		p->irq = bcma_port->irq + 2;
+		p->uartclk = bcma_port->baud_base;
+		p->regshift = bcma_port->reg_shift;
+		p->iotype = UPIO_MEM;
+		p->flags = UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ;
+	}
+	return platform_device_register(&uart8250_device);
+}
+#endif
+
+static int __init uart8250_init(void)
+{
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		return uart8250_init_ssb();
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		return uart8250_init_bcma();
+#endif
+	}
+	return -EINVAL;
+}
 
 module_init(uart8250_init);
 
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index cfae81571ded..17c3d14d7c49 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -29,21 +29,36 @@
 #include <linux/types.h>
 #include <linux/ssb/ssb.h>
 #include <linux/ssb/ssb_embedded.h>
+#include <linux/bcma/bcma_soc.h>
 #include <asm/bootinfo.h>
 #include <asm/reboot.h>
 #include <asm/time.h>
 #include <bcm47xx.h>
 #include <asm/mach-bcm47xx/nvram.h>
 
-struct ssb_bus ssb_bcm47xx;
-EXPORT_SYMBOL(ssb_bcm47xx);
+union bcm47xx_bus bcm47xx_bus;
+EXPORT_SYMBOL(bcm47xx_bus);
+
+enum bcm47xx_bus_type bcm47xx_bus_type;
+EXPORT_SYMBOL(bcm47xx_bus_type);
 
 static void bcm47xx_machine_restart(char *command)
 {
 	printk(KERN_ALERT "Please stand by while rebooting the system...\n");
 	local_irq_disable();
 	/* Set the watchdog timer to reset immediately */
-	ssb_watchdog_timer_set(&ssb_bcm47xx, 1);
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 1);
+		break;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 1);
+		break;
+#endif
+	}
 	while (1)
 		cpu_relax();
 }
@@ -52,11 +67,23 @@ static void bcm47xx_machine_halt(void)
 {
 	/* Disable interrupts and watchdog and spin forever */
 	local_irq_disable();
-	ssb_watchdog_timer_set(&ssb_bcm47xx, 0);
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0);
+		break;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 0);
+		break;
+#endif
+	}
 	while (1)
 		cpu_relax();
 }
 
+#ifdef CONFIG_BCM47XX_SSB
 #define READ_FROM_NVRAM(_outvar, name, buf) \
 	if (nvram_getprefix(prefix, name, buf, sizeof(buf)) >= 0)\
 		sprom->_outvar = simple_strtoul(buf, NULL, 0);
@@ -247,7 +274,7 @@ static int bcm47xx_get_invariants(struct ssb_bus *bus,
 	return 0;
 }
 
-void __init plat_mem_setup(void)
+static void __init bcm47xx_register_ssb(void)
 {
 	int err;
 	char buf[100];
@@ -258,12 +285,12 @@ void __init plat_mem_setup(void)
 		printk(KERN_WARNING "bcm47xx: someone else already registered"
 			" a ssb SPROM callback handler (err %d)\n", err);
 
-	err = ssb_bus_ssbbus_register(&ssb_bcm47xx, SSB_ENUM_BASE,
+	err = ssb_bus_ssbbus_register(&(bcm47xx_bus.ssb), SSB_ENUM_BASE,
 				      bcm47xx_get_invariants);
 	if (err)
 		panic("Failed to initialize SSB bus (err %d)\n", err);
 
-	mcore = &ssb_bcm47xx.mipscore;
+	mcore = &bcm47xx_bus.ssb.mipscore;
 	if (nvram_getenv("kernel_args", buf, sizeof(buf)) >= 0) {
 		if (strstr(buf, "console=ttyS1")) {
 			struct ssb_serial_port port;
@@ -276,8 +303,57 @@ void __init plat_mem_setup(void)
 			memcpy(&mcore->serial_ports[1], &port, sizeof(port));
 		}
 	}
+}
+#endif
+
+#ifdef CONFIG_BCM47XX_BCMA
+static void __init bcm47xx_register_bcma(void)
+{
+	int err;
+
+	err = bcma_host_soc_register(&bcm47xx_bus.bcma);
+	if (err)
+		panic("Failed to initialize BCMA bus (err %d)\n", err);
+}
+#endif
+
+void __init plat_mem_setup(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	if (c->cputype == CPU_74K) {
+		printk(KERN_INFO "bcm47xx: using bcma bus\n");
+#ifdef CONFIG_BCM47XX_BCMA
+		bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA;
+		bcm47xx_register_bcma();
+#endif
+	} else {
+		printk(KERN_INFO "bcm47xx: using ssb bus\n");
+#ifdef CONFIG_BCM47XX_SSB
+		bcm47xx_bus_type = BCM47XX_BUS_TYPE_SSB;
+		bcm47xx_register_ssb();
+#endif
+	}
 
 	_machine_restart = bcm47xx_machine_restart;
 	_machine_halt = bcm47xx_machine_halt;
 	pm_power_off = bcm47xx_machine_halt;
 }
+
+static int __init bcm47xx_register_bus_complete(void)
+{
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		/* Nothing to do */
+		break;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_bus_register(&bcm47xx_bus.bcma.bus);
+		break;
+#endif
+	}
+	return 0;
+}
+device_initcall(bcm47xx_register_bus_complete);
diff --git a/arch/mips/bcm47xx/time.c b/arch/mips/bcm47xx/time.c
index 0c6f47b3fd94..536374dcba78 100644
--- a/arch/mips/bcm47xx/time.c
+++ b/arch/mips/bcm47xx/time.c
@@ -30,7 +30,7 @@
 
 void __init plat_time_init(void)
 {
-	unsigned long hz;
+	unsigned long hz = 0;
 
 	/*
 	 * Use deterministic values for initial counter interrupt
@@ -39,7 +39,19 @@ void __init plat_time_init(void)
 	write_c0_count(0);
 	write_c0_compare(0xffff);
 
-	hz = ssb_cpu_clock(&ssb_bcm47xx.mipscore) / 2;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		hz = ssb_cpu_clock(&bcm47xx_bus.ssb.mipscore) / 2;
+		break;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		hz = bcma_cpu_clock(&bcm47xx_bus.bcma.bus.drv_mips) / 2;
+		break;
+#endif
+	}
+
 	if (!hz)
 		hz = 100000000;
 
diff --git a/arch/mips/bcm47xx/wgt634u.c b/arch/mips/bcm47xx/wgt634u.c
index 74d06965326f..e9f9ec8d443b 100644
--- a/arch/mips/bcm47xx/wgt634u.c
+++ b/arch/mips/bcm47xx/wgt634u.c
@@ -108,7 +108,7 @@ static irqreturn_t gpio_interrupt(int irq, void *ignored)
 
 	/* Interrupts are shared, check if the current one is
 	   a GPIO interrupt. */
-	if (!ssb_chipco_irq_status(&ssb_bcm47xx.chipco,
+	if (!ssb_chipco_irq_status(&bcm47xx_bus.ssb.chipco,
 				   SSB_CHIPCO_IRQ_GPIO))
 		return IRQ_NONE;
 
@@ -132,22 +132,26 @@ static int __init wgt634u_init(void)
 	 * machine. Use the MAC address as an heuristic. Netgear Inc. has
 	 * been allocated ranges 00:09:5b:xx:xx:xx and 00:0f:b5:xx:xx:xx.
 	 */
+	u8 *et0mac;
 
-	u8 *et0mac = ssb_bcm47xx.sprom.et0mac;
+	if (bcm47xx_bus_type != BCM47XX_BUS_TYPE_SSB)
+		return -ENODEV;
+
+	et0mac = bcm47xx_bus.ssb.sprom.et0mac;
 
 	if (et0mac[0] == 0x00 &&
 	    ((et0mac[1] == 0x09 && et0mac[2] == 0x5b) ||
 	     (et0mac[1] == 0x0f && et0mac[2] == 0xb5))) {
-		struct ssb_mipscore *mcore = &ssb_bcm47xx.mipscore;
+		struct ssb_mipscore *mcore = &bcm47xx_bus.ssb.mipscore;
 
 		printk(KERN_INFO "WGT634U machine detected.\n");
 
 		if (!request_irq(gpio_to_irq(WGT634U_GPIO_RESET),
 				 gpio_interrupt, IRQF_SHARED,
-				 "WGT634U GPIO", &ssb_bcm47xx.chipco)) {
+				 "WGT634U GPIO", &bcm47xx_bus.ssb.chipco)) {
 			gpio_direction_input(WGT634U_GPIO_RESET);
 			gpio_intmask(WGT634U_GPIO_RESET, 1);
-			ssb_chipco_irq_mask(&ssb_bcm47xx.chipco,
+			ssb_chipco_irq_mask(&bcm47xx_bus.ssb.chipco,
 					    SSB_CHIPCO_IRQ_GPIO,
 					    SSB_CHIPCO_IRQ_GPIO);
 		}
diff --git a/arch/mips/bcm63xx/irq.c b/arch/mips/bcm63xx/irq.c
index cea6021cb8d7..162e11b4ed75 100644
--- a/arch/mips/bcm63xx/irq.c
+++ b/arch/mips/bcm63xx/irq.c
@@ -222,6 +222,7 @@ static struct irq_chip bcm63xx_external_irq_chip = {
 static struct irqaction cpu_ip2_cascade_action = {
 	.handler	= no_action,
 	.name		= "cascade_ip2",
+	.flags		= IRQF_NO_THREAD,
 };
 
 void __init arch_init_irq(void)
diff --git a/arch/mips/cobalt/irq.c b/arch/mips/cobalt/irq.c
index cb9bf820fe53..965c777d3561 100644
--- a/arch/mips/cobalt/irq.c
+++ b/arch/mips/cobalt/irq.c
@@ -48,6 +48,7 @@ asmlinkage void plat_irq_dispatch(void)
 static struct irqaction cascade = {
 	.handler	= no_action,
 	.name		= "cascade",
+	.flags		= IRQF_NO_THREAD,
 };
 
 void __init arch_init_irq(void)
diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c
index fa45e924be05..f7b7ba6d5c45 100644
--- a/arch/mips/dec/setup.c
+++ b/arch/mips/dec/setup.c
@@ -101,20 +101,24 @@ int cpu_fpu_mask = DEC_CPU_IRQ_MASK(DEC_CPU_INR_FPU);
 static struct irqaction ioirq = {
 	.handler = no_action,
 	.name = "cascade",
+	.flags = IRQF_NO_THREAD,
 };
 static struct irqaction fpuirq = {
 	.handler = no_action,
 	.name = "fpu",
+	.flags = IRQF_NO_THREAD,
 };
 
 static struct irqaction busirq = {
 	.flags = IRQF_DISABLED,
 	.name = "bus error",
+	.flags = IRQF_NO_THREAD,
 };
 
 static struct irqaction haltirq = {
 	.handler = dec_intr_halt,
 	.name = "halt",
+	.flags = IRQF_NO_THREAD,
 };
 
 
diff --git a/arch/mips/emma/markeins/irq.c b/arch/mips/emma/markeins/irq.c
index 3dbd7a5a6ad3..7798887a1288 100644
--- a/arch/mips/emma/markeins/irq.c
+++ b/arch/mips/emma/markeins/irq.c
@@ -169,7 +169,7 @@ void emma2rh_gpio_irq_init(void)
 
 static struct irqaction irq_cascade = {
 	   .handler = no_action,
-	   .flags = 0,
+	   .flags = IRQF_NO_THREAD,
 	   .name = "cascade",
 	   .dev_id = NULL,
 	   .next = NULL,
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index dbc51065df5b..b77df0366ee6 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -111,7 +111,8 @@ struct compat_statfs {
 	int		f_bavail;
 	compat_fsid_t	f_fsid;
 	int		f_namelen;
-	int		f_spare[6];
+	int		f_flags;
+	int		f_spare[5];
 };
 
 #define COMPAT_RLIM_INFINITY	0x7fffffffUL
diff --git a/arch/mips/include/asm/lasat/lasat.h b/arch/mips/include/asm/lasat/lasat.h
index a1ada1c27c16..e8ff70f80e13 100644
--- a/arch/mips/include/asm/lasat/lasat.h
+++ b/arch/mips/include/asm/lasat/lasat.h
@@ -41,10 +41,8 @@ enum lasat_mtdparts {
 
 /*
  * The format of the data record in the EEPROM.
- * See Documentation/LASAT/eeprom.txt for a detailed description
- * of the fields in this struct, and the LASAT Hardware Configuration
- * field specification for a detailed description of the config
- * field.
+ * See the LASAT Hardware Configuration field specification for a detailed
+ * description of the config field.
  */
 #include <linux/types.h>
 
diff --git a/arch/mips/include/asm/mach-au1x00/au1xxx_psc.h b/arch/mips/include/asm/mach-au1x00/au1xxx_psc.h
index 892b7f168eb4..5a5cb7386427 100644
--- a/arch/mips/include/asm/mach-au1x00/au1xxx_psc.h
+++ b/arch/mips/include/asm/mach-au1x00/au1xxx_psc.h
@@ -394,19 +394,6 @@ typedef struct	psc_spi {
 #define PSC_SPITXRX_LC		(1 << 29)
 #define PSC_SPITXRX_SR		(1 << 28)
 
-/* PSC in SMBus (I2C) Mode. */
-typedef struct	psc_smb {
-	u32	psc_sel;
-	u32	psc_ctrl;
-	u32	psc_smbcfg;
-	u32	psc_smbmsk;
-	u32	psc_smbpcr;
-	u32	psc_smbstat;
-	u32	psc_smbevnt;
-	u32	psc_smbtxrx;
-	u32	psc_smbtmr;
-} psc_smb_t;
-
 /* SMBus Config Register. */
 #define PSC_SMBCFG_RT_MASK	(3 << 30)
 #define PSC_SMBCFG_RT_FIFO1	(0 << 30)
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
index d008f47a28bd..de95e0723e2b 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
@@ -19,7 +19,29 @@
 #ifndef __ASM_BCM47XX_H
 #define __ASM_BCM47XX_H
 
-/* SSB bus */
-extern struct ssb_bus ssb_bcm47xx;
+#include <linux/ssb/ssb.h>
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_soc.h>
+
+enum bcm47xx_bus_type {
+#ifdef CONFIG_BCM47XX_SSB
+	BCM47XX_BUS_TYPE_SSB,
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	BCM47XX_BUS_TYPE_BCMA,
+#endif
+};
+
+union bcm47xx_bus {
+#ifdef CONFIG_BCM47XX_SSB
+	struct ssb_bus ssb;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	struct bcma_soc bcma;
+#endif
+};
+
+extern union bcm47xx_bus bcm47xx_bus;
+extern enum bcm47xx_bus_type bcm47xx_bus_type;
 
 #endif /* __ASM_BCM47XX_H */
diff --git a/arch/mips/include/asm/mach-bcm47xx/gpio.h b/arch/mips/include/asm/mach-bcm47xx/gpio.h
index 98504142124e..76961cabeedf 100644
--- a/arch/mips/include/asm/mach-bcm47xx/gpio.h
+++ b/arch/mips/include/asm/mach-bcm47xx/gpio.h
@@ -10,6 +10,7 @@
 #define __BCM47XX_GPIO_H
 
 #include <linux/ssb/ssb_embedded.h>
+#include <linux/bcma/bcma.h>
 #include <asm/mach-bcm47xx/bcm47xx.h>
 
 #define BCM47XX_EXTIF_GPIO_LINES	5
@@ -21,41 +22,118 @@ extern int gpio_to_irq(unsigned gpio);
 
 static inline int gpio_get_value(unsigned gpio)
 {
-	return ssb_gpio_in(&ssb_bcm47xx, 1 << gpio);
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		return ssb_gpio_in(&bcm47xx_bus.ssb, 1 << gpio);
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		return bcma_chipco_gpio_in(&bcm47xx_bus.bcma.bus.drv_cc,
+					   1 << gpio);
+#endif
+	}
+	return -EINVAL;
 }
 
 static inline void gpio_set_value(unsigned gpio, int value)
 {
-	ssb_gpio_out(&ssb_bcm47xx, 1 << gpio, value ? 1 << gpio : 0);
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		ssb_gpio_out(&bcm47xx_bus.ssb, 1 << gpio,
+			     value ? 1 << gpio : 0);
+		return;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_chipco_gpio_out(&bcm47xx_bus.bcma.bus.drv_cc, 1 << gpio,
+				     value ? 1 << gpio : 0);
+		return;
+#endif
+	}
 }
 
 static inline int gpio_direction_input(unsigned gpio)
 {
-	ssb_gpio_outen(&ssb_bcm47xx, 1 << gpio, 0);
-	return 0;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		ssb_gpio_outen(&bcm47xx_bus.ssb, 1 << gpio, 0);
+		return 0;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_chipco_gpio_outen(&bcm47xx_bus.bcma.bus.drv_cc, 1 << gpio,
+				       0);
+		return 0;
+#endif
+	}
+	return -EINVAL;
 }
 
 static inline int gpio_direction_output(unsigned gpio, int value)
 {
-	/* first set the gpio out value */
-	ssb_gpio_out(&ssb_bcm47xx, 1 << gpio, value ? 1 << gpio : 0);
-	/* then set the gpio mode */
-	ssb_gpio_outen(&ssb_bcm47xx, 1 << gpio, 1 << gpio);
-	return 0;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		/* first set the gpio out value */
+		ssb_gpio_out(&bcm47xx_bus.ssb, 1 << gpio,
+			     value ? 1 << gpio : 0);
+		/* then set the gpio mode */
+		ssb_gpio_outen(&bcm47xx_bus.ssb, 1 << gpio, 1 << gpio);
+		return 0;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		/* first set the gpio out value */
+		bcma_chipco_gpio_out(&bcm47xx_bus.bcma.bus.drv_cc, 1 << gpio,
+				     value ? 1 << gpio : 0);
+		/* then set the gpio mode */
+		bcma_chipco_gpio_outen(&bcm47xx_bus.bcma.bus.drv_cc, 1 << gpio,
+				       1 << gpio);
+		return 0;
+#endif
+	}
+	return -EINVAL;
 }
 
 static inline int gpio_intmask(unsigned gpio, int value)
 {
-	ssb_gpio_intmask(&ssb_bcm47xx, 1 << gpio,
-			 value ? 1 << gpio : 0);
-	return 0;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		ssb_gpio_intmask(&bcm47xx_bus.ssb, 1 << gpio,
+				 value ? 1 << gpio : 0);
+		return 0;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_chipco_gpio_intmask(&bcm47xx_bus.bcma.bus.drv_cc,
+					 1 << gpio, value ? 1 << gpio : 0);
+		return 0;
+#endif
+	}
+	return -EINVAL;
 }
 
 static inline int gpio_polarity(unsigned gpio, int value)
 {
-	ssb_gpio_polarity(&ssb_bcm47xx, 1 << gpio,
-			  value ? 1 << gpio : 0);
-	return 0;
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		ssb_gpio_polarity(&bcm47xx_bus.ssb, 1 << gpio,
+				  value ? 1 << gpio : 0);
+		return 0;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		bcma_chipco_gpio_polarity(&bcm47xx_bus.bcma.bus.drv_cc,
+					  1 << gpio, value ? 1 << gpio : 0);
+		return 0;
+#endif
+	}
+	return -EINVAL;
 }
 
 
diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index 0d5a42b5f47a..a58addb98cfd 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@@ -54,7 +54,6 @@
 #define cpu_has_mips_r2_exec_hazard 0
 #define cpu_has_dsp		0
 #define cpu_has_mipsmt		0
-#define cpu_has_userlocal	0
 #define cpu_has_vint		0
 #define cpu_has_veic		0
 #define cpu_hwrena_impl_bits	0xc0000000
diff --git a/arch/mips/include/asm/mach-powertv/dma-coherence.h b/arch/mips/include/asm/mach-powertv/dma-coherence.h
index 62c094085947..35371641575d 100644
--- a/arch/mips/include/asm/mach-powertv/dma-coherence.h
+++ b/arch/mips/include/asm/mach-powertv/dma-coherence.h
@@ -13,7 +13,6 @@
 #define __ASM_MACH_POWERTV_DMA_COHERENCE_H
 
 #include <linux/sched.h>
-#include <linux/version.h>
 #include <linux/device.h>
 #include <asm/mach-powertv/asic.h>
 
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index b4ba2449444b..cb41af5f3406 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -195,9 +195,9 @@
 		 * to cover the pipeline delay.
 		 */
 		.set	mips32
-		mfc0	v1, CP0_TCSTATUS
+		mfc0	k0, CP0_TCSTATUS
 		.set	mips0
-		LONG_S	v1, PT_TCSTATUS(sp)
+		LONG_S	k0, PT_TCSTATUS(sp)
 #endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_S	$4, PT_R4(sp)
 		LONG_S	$5, PT_R5(sp)
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 73031f7fc827..4397972949fa 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 
 #include <linux/spinlock.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
@@ -86,7 +86,6 @@ struct jz_gpio_chip {
 	spinlock_t lock;
 
 	struct gpio_chip gpio_chip;
-	struct sys_device sysdev;
 };
 
 static struct jz_gpio_chip jz4740_gpio_chips[];
@@ -459,49 +458,47 @@ static struct jz_gpio_chip jz4740_gpio_chips[] = {
 	JZ4740_GPIO_CHIP(D),
 };
 
-static inline struct jz_gpio_chip *sysdev_to_chip(struct sys_device *dev)
+static void jz4740_gpio_suspend_chip(struct jz_gpio_chip *chip)
 {
-	return container_of(dev, struct jz_gpio_chip, sysdev);
+	chip->suspend_mask = readl(chip->base + JZ_REG_GPIO_MASK);
+	writel(~(chip->wakeup), chip->base + JZ_REG_GPIO_MASK_SET);
+	writel(chip->wakeup, chip->base + JZ_REG_GPIO_MASK_CLEAR);
 }
 
-static int jz4740_gpio_suspend(struct sys_device *dev, pm_message_t state)
+static int jz4740_gpio_suspend(void)
 {
-	struct jz_gpio_chip *chip = sysdev_to_chip(dev);
+	int i;
 
-	chip->suspend_mask = readl(chip->base + JZ_REG_GPIO_MASK);
-	writel(~(chip->wakeup), chip->base + JZ_REG_GPIO_MASK_SET);
-	writel(chip->wakeup, chip->base + JZ_REG_GPIO_MASK_CLEAR);
+	for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); i++)
+		jz4740_gpio_suspend_chip(&jz4740_gpio_chips[i]);
 
 	return 0;
 }
 
-static int jz4740_gpio_resume(struct sys_device *dev)
+static void jz4740_gpio_resume_chip(struct jz_gpio_chip *chip)
 {
-	struct jz_gpio_chip *chip = sysdev_to_chip(dev);
 	uint32_t mask = chip->suspend_mask;
 
 	writel(~mask, chip->base + JZ_REG_GPIO_MASK_CLEAR);
 	writel(mask, chip->base + JZ_REG_GPIO_MASK_SET);
+}
 
-	return 0;
+static void jz4740_gpio_resume(void)
+{
+	int i;
+
+	for (i = ARRAY_SIZE(jz4740_gpio_chips) - 1; i >= 0 ; i--)
+		jz4740_gpio_resume_chip(&jz4740_gpio_chips[i]);
 }
 
-static struct sysdev_class jz4740_gpio_sysdev_class = {
-	.name = "gpio",
+static struct syscore_ops jz4740_gpio_syscore_ops = {
 	.suspend = jz4740_gpio_suspend,
 	.resume = jz4740_gpio_resume,
 };
 
-static int jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
+static void jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
 {
-	int ret, irq;
-
-	chip->sysdev.id = id;
-	chip->sysdev.cls = &jz4740_gpio_sysdev_class;
-	ret = sysdev_register(&chip->sysdev);
-
-	if (ret)
-		return ret;
+	int irq;
 
 	spin_lock_init(&chip->lock);
 
@@ -519,22 +516,17 @@ static int jz4740_gpio_chip_init(struct jz_gpio_chip *chip, unsigned int id)
 		irq_set_chip_and_handler(irq, &jz_gpio_irq_chip,
 					 handle_level_irq);
 	}
-
-	return 0;
 }
 
 static int __init jz4740_gpio_init(void)
 {
 	unsigned int i;
-	int ret;
-
-	ret = sysdev_class_register(&jz4740_gpio_sysdev_class);
-	if (ret)
-		return ret;
 
 	for (i = 0; i < ARRAY_SIZE(jz4740_gpio_chips); ++i)
 		jz4740_gpio_chip_init(&jz4740_gpio_chips[i], i);
 
+	register_syscore_ops(&jz4740_gpio_syscore_ops);
+
 	printk(KERN_INFO "JZ4740 GPIO initialized\n");
 
 	return 0;
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index feb8021a305f..6a2d758dd8e9 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -19,6 +19,26 @@
 
 #include <asm-generic/sections.h>
 
+#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
+#define MCOUNT_OFFSET_INSNS 5
+#else
+#define MCOUNT_OFFSET_INSNS 4
+#endif
+
+/*
+ * Check if the address is in kernel space
+ *
+ * Clone core_kernel_text() from kernel/extable.c, but doesn't call
+ * init_kernel_text() for Ftrace doesn't trace functions in init sections.
+ */
+static inline int in_kernel_space(unsigned long ip)
+{
+	if (ip >= (unsigned long)_stext &&
+	    ip <= (unsigned long)_etext)
+		return 1;
+	return 0;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 #define JAL 0x0c000000		/* jump & link: ip --> ra, jump to target */
@@ -54,20 +74,6 @@ static inline void ftrace_dyn_arch_init_insns(void)
 #endif
 }
 
-/*
- * Check if the address is in kernel space
- *
- * Clone core_kernel_text() from kernel/extable.c, but doesn't call
- * init_kernel_text() for Ftrace doesn't trace functions in init sections.
- */
-static inline int in_kernel_space(unsigned long ip)
-{
-	if (ip >= (unsigned long)_stext &&
-	    ip <= (unsigned long)_etext)
-		return 1;
-	return 0;
-}
-
 static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 {
 	int faulted;
@@ -112,11 +118,6 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
  *                                  1: offset = 4 instructions
  */
 
-#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
-#define MCOUNT_OFFSET_INSNS 5
-#else
-#define MCOUNT_OFFSET_INSNS 4
-#endif
 #define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
 
 int ftrace_make_nop(struct module *mod,
diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c
index 5c74eb797f08..32b397b646ee 100644
--- a/arch/mips/kernel/i8259.c
+++ b/arch/mips/kernel/i8259.c
@@ -229,7 +229,7 @@ static void i8259A_shutdown(void)
 	 */
 	if (i8259A_auto_eoi >= 0) {
 		outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-		outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
+		outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 	}
 }
 
@@ -295,6 +295,7 @@ static void init_8259A(int auto_eoi)
 static struct irqaction irq2 = {
 	.handler = no_action,
 	.name = "cascade",
+	.flags = IRQF_NO_THREAD,
 };
 
 static struct resource pic1_io_resource = {
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 876a75cc376f..922a554cd108 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -349,3 +349,10 @@ SYSCALL_DEFINE6(32_fanotify_mark, int, fanotify_fd, unsigned int, flags,
 	return sys_fanotify_mark(fanotify_fd, flags, merge_64(a3, a4),
 				 dfd, pathname);
 }
+
+SYSCALL_DEFINE6(32_futex, u32 __user *, uaddr, int, op, u32, val,
+		struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+		u32, val3)
+{
+	return compat_sys_futex(uaddr, op, val, utime, uaddr2, val3);
+}
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index f9296e894e46..6de1f598346e 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -315,7 +315,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_fremovexattr
 	PTR	sys_tkill
 	PTR	sys_ni_syscall
-	PTR	compat_sys_futex
+	PTR	sys_32_futex
 	PTR	compat_sys_sched_setaffinity	/* 6195 */
 	PTR	compat_sys_sched_getaffinity
 	PTR	sys_cacheflush
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 4d7c9827706f..1d813169e453 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -441,7 +441,7 @@ sys_call_table:
 	PTR	sys_fremovexattr		/* 4235 */
 	PTR	sys_tkill
 	PTR	sys_sendfile64
-	PTR	compat_sys_futex
+	PTR	sys_32_futex
 	PTR	compat_sys_sched_setaffinity
 	PTR	compat_sys_sched_getaffinity	/* 4240 */
 	PTR	compat_sys_io_setup
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index dbbe0ce48d89..f8524003676a 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -8,6 +8,7 @@
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
 #include <linux/cache.h>
+#include <linux/irqflags.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/personality.h>
@@ -658,6 +659,8 @@ static void do_signal(struct pt_regs *regs)
 asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	__u32 thread_info_flags)
 {
+	local_irq_enable();
+
 	/* deal with pending signal delivery */
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index b7517e3abc85..cbea618af0b4 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -14,6 +14,7 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -364,21 +365,26 @@ static int regs_to_trapnr(struct pt_regs *regs)
 	return (regs->cp0_cause >> 2) & 0x1f;
 }
 
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
 
 void __noreturn die(const char *str, struct pt_regs *regs)
 {
 	static int die_counter;
 	int sig = SIGSEGV;
 #ifdef CONFIG_MIPS_MT_SMTC
-	unsigned long dvpret = dvpe();
+	unsigned long dvpret;
 #endif /* CONFIG_MIPS_MT_SMTC */
 
+	oops_enter();
+
 	if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP)
 		sig = 0;
 
 	console_verbose();
-	spin_lock_irq(&die_lock);
+	raw_spin_lock_irq(&die_lock);
+#ifdef CONFIG_MIPS_MT_SMTC
+	dvpret = dvpe();
+#endif /* CONFIG_MIPS_MT_SMTC */
 	bust_spinlocks(1);
 #ifdef CONFIG_MIPS_MT_SMTC
 	mips_mt_regdump(dvpret);
@@ -387,7 +393,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
 	add_taint(TAINT_DIE);
-	spin_unlock_irq(&die_lock);
+	raw_spin_unlock_irq(&die_lock);
+
+	oops_exit();
 
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 2cd50ad0d5c6..3efcb065f78a 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -192,7 +192,7 @@ static struct tc *get_tc(int index)
 	}
 	spin_unlock(&vpecontrol.tc_list_lock);
 
-	return NULL;
+	return res;
 }
 
 /* allocate a vpe and associate it with this minor (or index) */
diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index fc89795cafdb..f9737bb3c5ab 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -123,11 +123,10 @@ void ltq_enable_irq(struct irq_data *d)
 static unsigned int ltq_startup_eiu_irq(struct irq_data *d)
 {
 	int i;
-	int irq_nr = d->irq - INT_NUM_IRQ0;
 
 	ltq_enable_irq(d);
 	for (i = 0; i < MAX_EIU; i++) {
-		if (irq_nr == ltq_eiu_irq[i]) {
+		if (d->irq == ltq_eiu_irq[i]) {
 			/* low level - we should really handle set_type */
 			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) |
 				(0x6 << (i * 4)), LTQ_EIU_EXIN_C);
@@ -147,11 +146,10 @@ static unsigned int ltq_startup_eiu_irq(struct irq_data *d)
 static void ltq_shutdown_eiu_irq(struct irq_data *d)
 {
 	int i;
-	int irq_nr = d->irq - INT_NUM_IRQ0;
 
 	ltq_disable_irq(d);
 	for (i = 0; i < MAX_EIU; i++) {
-		if (irq_nr == ltq_eiu_irq[i]) {
+		if (d->irq == ltq_eiu_irq[i]) {
 			/* disable */
 			ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_INEN) & ~(1 << i),
 				LTQ_EIU_EXIN_INEN);
diff --git a/arch/mips/lantiq/xway/ebu.c b/arch/mips/lantiq/xway/ebu.c
index 66eb52fa50a1..033b3184c7a7 100644
--- a/arch/mips/lantiq/xway/ebu.c
+++ b/arch/mips/lantiq/xway/ebu.c
@@ -10,7 +10,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/ioport.h>
 
 #include <lantiq_soc.h>
diff --git a/arch/mips/lantiq/xway/pmu.c b/arch/mips/lantiq/xway/pmu.c
index 9d69f01e352b..39f0d2641cbf 100644
--- a/arch/mips/lantiq/xway/pmu.c
+++ b/arch/mips/lantiq/xway/pmu.c
@@ -8,7 +8,6 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/version.h>
 #include <linux/ioport.h>
 
 #include <lantiq_soc.h>
diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c
index de4c165515d7..d608b6ef0edd 100644
--- a/arch/mips/lasat/interrupt.c
+++ b/arch/mips/lasat/interrupt.c
@@ -105,6 +105,7 @@ asmlinkage void plat_irq_dispatch(void)
 static struct irqaction cascade = {
 	.handler	= no_action,
 	.name		= "cascade",
+	.flags		= IRQF_NO_THREAD,
 };
 
 void __init arch_init_irq(void)
diff --git a/arch/mips/loongson/fuloong-2e/irq.c b/arch/mips/loongson/fuloong-2e/irq.c
index d61a04222b87..3cf1fef29f0e 100644
--- a/arch/mips/loongson/fuloong-2e/irq.c
+++ b/arch/mips/loongson/fuloong-2e/irq.c
@@ -42,6 +42,7 @@ asmlinkage void mach_irq_dispatch(unsigned int pending)
 static struct irqaction cascade_irqaction = {
 	.handler = no_action,
 	.name = "cascade",
+	.flags = IRQF_NO_THREAD,
 };
 
 void __init mach_init_irq(void)
diff --git a/arch/mips/loongson/lemote-2f/irq.c b/arch/mips/loongson/lemote-2f/irq.c
index 081db102bb98..14b081841b6b 100644
--- a/arch/mips/loongson/lemote-2f/irq.c
+++ b/arch/mips/loongson/lemote-2f/irq.c
@@ -96,12 +96,13 @@ static irqreturn_t ip6_action(int cpl, void *dev_id)
 struct irqaction ip6_irqaction = {
 	.handler = ip6_action,
 	.name = "cascade",
-	.flags = IRQF_SHARED,
+	.flags = IRQF_SHARED | IRQF_NO_THREAD,
 };
 
 struct irqaction cascade_irqaction = {
 	.handler = no_action,
 	.name = "cascade",
+	.flags = IRQF_NO_THREAD,
 };
 
 void __init mach_init_irq(void)
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 9ff5d0fac556..302d779d5b0d 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2011 Wind River Systems,
  *   written by Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -15,12 +16,11 @@
 #include <linux/sched.h>
 
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
-
 EXPORT_SYMBOL(shm_align_mask);
 
 /* gap between mmap and stack */
 #define MIN_GAP (128*1024*1024UL)
-#define MAX_GAP        ((TASK_SIZE)/6*5)
+#define MAX_GAP ((TASK_SIZE)/6*5)
 
 static int mmap_is_legacy(void)
 {
@@ -57,13 +57,13 @@ static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr,
 	return base - off;
 }
 
-#define COLOUR_ALIGN(addr,pgoff)				\
+#define COLOUR_ALIGN(addr, pgoff)				\
 	((((addr) + shm_align_mask) & ~shm_align_mask) +	\
 	 (((pgoff) << PAGE_SHIFT) & shm_align_mask))
 
 enum mmap_allocation_direction {UP, DOWN};
 
-static unsigned long arch_get_unmapped_area_foo(struct file *filp,
+static unsigned long arch_get_unmapped_area_common(struct file *filp,
 	unsigned long addr0, unsigned long len, unsigned long pgoff,
 	unsigned long flags, enum mmap_allocation_direction dir)
 {
@@ -103,16 +103,16 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp,
 
 		vma = find_vma(mm, addr);
 		if (TASK_SIZE - len >= addr &&
-		   (!vma || addr + len <= vma->vm_start))
+		    (!vma || addr + len <= vma->vm_start))
 			return addr;
 	}
 
 	if (dir == UP) {
 		addr = mm->mmap_base;
-			if (do_color_align)
-				addr = COLOUR_ALIGN(addr, pgoff);
-			else
-				addr = PAGE_ALIGN(addr);
+		if (do_color_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
 
 		for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
 			/* At this point:  (!vma || addr < vma->vm_end). */
@@ -131,28 +131,30 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp,
 			mm->free_area_cache = mm->mmap_base;
 		}
 
-		/* either no address requested or can't fit in requested address hole */
+		/*
+		 * either no address requested, or the mapping can't fit into
+		 * the requested address hole
+		 */
 		addr = mm->free_area_cache;
-			if (do_color_align) {
-				unsigned long base =
-					COLOUR_ALIGN_DOWN(addr - len, pgoff);
-
+		if (do_color_align) {
+			unsigned long base =
+				COLOUR_ALIGN_DOWN(addr - len, pgoff);
 			addr = base + len;
-		 }
+		}
 
 		/* make sure it can fit in the remaining address space */
 		if (likely(addr > len)) {
 			vma = find_vma(mm, addr - len);
 			if (!vma || addr <= vma->vm_start) {
-				/* remember the address as a hint for next time */
-				return mm->free_area_cache = addr-len;
+				/* cache the address as a hint for next time */
+				return mm->free_area_cache = addr - len;
 			}
 		}
 
 		if (unlikely(mm->mmap_base < len))
 			goto bottomup;
 
-		addr = mm->mmap_base-len;
+		addr = mm->mmap_base - len;
 		if (do_color_align)
 			addr = COLOUR_ALIGN_DOWN(addr, pgoff);
 
@@ -163,8 +165,8 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp,
 			 * return with success:
 			 */
 			vma = find_vma(mm, addr);
-			if (likely(!vma || addr+len <= vma->vm_start)) {
-				/* remember the address as a hint for next time */
+			if (likely(!vma || addr + len <= vma->vm_start)) {
+				/* cache the address as a hint for next time */
 				return mm->free_area_cache = addr;
 			}
 
@@ -173,7 +175,7 @@ static unsigned long arch_get_unmapped_area_foo(struct file *filp,
 				mm->cached_hole_size = vma->vm_start - addr;
 
 			/* try just below the current vma->vm_start */
-			addr = vma->vm_start-len;
+			addr = vma->vm_start - len;
 			if (do_color_align)
 				addr = COLOUR_ALIGN_DOWN(addr, pgoff);
 		} while (likely(len < vma->vm_start));
@@ -201,7 +203,7 @@ bottomup:
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0,
 	unsigned long len, unsigned long pgoff, unsigned long flags)
 {
-	return arch_get_unmapped_area_foo(filp,
+	return arch_get_unmapped_area_common(filp,
 			addr0, len, pgoff, flags, UP);
 }
 
@@ -213,7 +215,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 	unsigned long addr0, unsigned long len, unsigned long pgoff,
 	unsigned long flags)
 {
-	return arch_get_unmapped_area_foo(filp,
+	return arch_get_unmapped_area_common(filp,
 			addr0, len, pgoff, flags, DOWN);
 }
 
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index b6e1cff50667..e06370f58ef3 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1759,14 +1759,13 @@ static void __cpuinit build_r3000_tlb_modify_handler(void)
 	u32 *p = handle_tlbm;
 	struct uasm_label *l = labels;
 	struct uasm_reloc *r = relocs;
-	struct work_registers wr;
 
 	memset(handle_tlbm, 0, sizeof(handle_tlbm));
 	memset(labels, 0, sizeof(labels));
 	memset(relocs, 0, sizeof(relocs));
 
 	build_r3000_tlbchange_handler_head(&p, K0, K1);
-	build_pte_modifiable(&p, &r, wr.r1, wr.r2,  wr.r3, label_nopage_tlbm);
+	build_pte_modifiable(&p, &r, K0, K1,  -1, label_nopage_tlbm);
 	uasm_i_nop(&p); /* load delay */
 	build_make_write(&p, &r, K0, K1);
 	build_r3000_pte_reload_tlbwi(&p, K0, K1);
@@ -1963,7 +1962,8 @@ static void __cpuinit build_r4000_tlb_load_handler(void)
 			uasm_i_andi(&p, wr.r3, wr.r3, 2);
 			uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2);
 		}
-
+		if (PM_DEFAULT_MASK == 0)
+			uasm_i_nop(&p);
 		/*
 		 * We clobbered C0_PAGEMASK, restore it.  On the other branch
 		 * it is restored in build_huge_tlb_write_entry.
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 1d36c511a7a5..d53ff91b277c 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -350,12 +350,14 @@ unsigned int plat_ipi_resched_int_xlate(unsigned int cpu)
 
 static struct irqaction i8259irq = {
 	.handler = no_action,
-	.name = "XT-PIC cascade"
+	.name = "XT-PIC cascade",
+	.flags = IRQF_NO_THREAD,
 };
 
 static struct irqaction corehi_irqaction = {
 	.handler = no_action,
-	.name = "CoreHi"
+	.name = "CoreHi",
+	.flags = IRQF_NO_THREAD,
 };
 
 static msc_irqmap_t __initdata msc_irqmap[] = {
diff --git a/arch/mips/netlogic/xlr/Makefile b/arch/mips/netlogic/xlr/Makefile
index 9bd3f731f62e..2dca585dd2f7 100644
--- a/arch/mips/netlogic/xlr/Makefile
+++ b/arch/mips/netlogic/xlr/Makefile
@@ -2,4 +2,4 @@ obj-y				+= setup.o platform.o irq.o setup.o time.o
 obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_EARLY_PRINTK)	+= xlr_console.o
 
-EXTRA_CFLAGS			+= -Werror
+ccflags-y			+= -Werror
diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c
index 455f8e50a007..400535a955d0 100644
--- a/arch/mips/pci/pci-bcm47xx.c
+++ b/arch/mips/pci/pci-bcm47xx.c
@@ -25,6 +25,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/ssb/ssb.h>
+#include <bcm47xx.h>
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
@@ -33,9 +34,13 @@ int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 
 int pcibios_plat_dev_init(struct pci_dev *dev)
 {
+#ifdef CONFIG_BCM47XX_SSB
 	int res;
 	u8 slot, pin;
 
+	if (bcm47xx_bus_type !=  BCM47XX_BUS_TYPE_SSB)
+		return 0;
+
 	res = ssb_pcibios_plat_dev_init(dev);
 	if (res < 0) {
 		printk(KERN_ALERT "PCI: Failed to init device %s\n",
@@ -55,5 +60,6 @@ int pcibios_plat_dev_init(struct pci_dev *dev)
 	}
 
 	dev->irq = res;
+#endif
 	return 0;
 }
diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c
index 603d7493e966..8656388b34bd 100644
--- a/arch/mips/pci/pci-lantiq.c
+++ b/arch/mips/pci/pci-lantiq.c
@@ -171,8 +171,13 @@ static int __devinit ltq_pci_startup(struct ltq_pci_data *conf)
 	u32 temp_buffer;
 
 	/* set clock to 33Mhz */
-	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR);
-	ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR);
+	if (ltq_is_ar9()) {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0x1f00000, LTQ_CGU_IFCCR);
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0xe00000, LTQ_CGU_IFCCR);
+	} else {
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) & ~0xf00000, LTQ_CGU_IFCCR);
+		ltq_cgu_w32(ltq_cgu_r32(LTQ_CGU_IFCCR) | 0x800000, LTQ_CGU_IFCCR);
+	}
 
 	/* external or internal clock ? */
 	if (conf->clock) {
diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c
index 764362ce5e40..5f3a69cebad1 100644
--- a/arch/mips/pci/pci-rc32434.c
+++ b/arch/mips/pci/pci-rc32434.c
@@ -215,7 +215,7 @@ static int __init rc32434_pci_init(void)
 	rc32434_pcibridge_init();
 
 	io_map_base = ioremap(rc32434_res_pci_io1.start,
-			      resource_size(&rcrc32434_res_pci_io1));
+			      resource_size(&rc32434_res_pci_io1));
 
 	if (!io_map_base)
 		return -ENOMEM;
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_irq.c b/arch/mips/pmc-sierra/msp71xx/msp_irq.c
index 4531c4a514bc..d3c3d81757a5 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_irq.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_irq.c
@@ -108,12 +108,14 @@ asmlinkage void plat_irq_dispatch(struct pt_regs *regs)
 
 static struct irqaction cic_cascade_msp = {
 	.handler = no_action,
-	.name	 = "MSP CIC cascade"
+	.name	 = "MSP CIC cascade",
+	.flags	 = IRQF_NO_THREAD,
 };
 
 static struct irqaction per_cascade_msp = {
 	.handler = no_action,
-	.name	 = "MSP PER cascade"
+	.name	 = "MSP PER cascade",
+	.flags	 = IRQF_NO_THREAD,
 };
 
 void __init arch_init_irq(void)
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_serial.c b/arch/mips/pmc-sierra/msp71xx/msp_serial.c
index f7261628d8a6..a1c7c7da2336 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_serial.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_serial.c
@@ -27,6 +27,7 @@
 #include <linux/serial.h>
 #include <linux/serial_core.h>
 #include <linux/serial_reg.h>
+#include <linux/slab.h>
 
 #include <asm/bootinfo.h>
 #include <asm/io.h>
@@ -38,6 +39,55 @@
 #include <msp_int.h>
 #include <msp_regs.h>
 
+struct msp_uart_data {
+	int	last_lcr;
+};
+
+static void msp_serial_out(struct uart_port *p, int offset, int value)
+{
+	struct msp_uart_data *d = p->private_data;
+
+	if (offset == UART_LCR)
+		d->last_lcr = value;
+
+	offset <<= p->regshift;
+	writeb(value, p->membase + offset);
+}
+
+static unsigned int msp_serial_in(struct uart_port *p, int offset)
+{
+	offset <<= p->regshift;
+
+	return readb(p->membase + offset);
+}
+
+static int msp_serial_handle_irq(struct uart_port *p)
+{
+	struct msp_uart_data *d = p->private_data;
+	unsigned int iir = readb(p->membase + (UART_IIR << p->regshift));
+
+	if (serial8250_handle_irq(p, iir)) {
+		return 1;
+	} else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) {
+		/*
+		 * The DesignWare APB UART has an Busy Detect (0x07) interrupt
+		 * meaning an LCR write attempt occurred while the UART was
+		 * busy. The interrupt must be cleared by reading the UART
+		 * status register (USR) and the LCR re-written.
+		 *
+		 * Note: MSP reserves 0x20 bytes of address space for the UART
+		 * and the USR is mapped in a separate block at an offset of
+		 * 0xc0 from the start of the UART.
+		 */
+		(void)readb(p->membase + 0xc0);
+		writeb(d->last_lcr, p->membase + (UART_LCR << p->regshift));
+
+		return 1;
+	}
+
+	return 0;
+}
+
 void __init msp_serial_setup(void)
 {
 	char    *s;
@@ -59,13 +109,22 @@ void __init msp_serial_setup(void)
 	up.irq          = MSP_INT_UART0;
 	up.uartclk      = uartclk;
 	up.regshift     = 2;
-	up.iotype       = UPIO_DWAPB; /* UPIO_MEM like */
+	up.iotype       = UPIO_MEM;
 	up.flags        = ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
 	up.type         = PORT_16550A;
 	up.line         = 0;
-	up.private_data		= (void*)UART0_STATUS_REG;
-	if (early_serial_setup(&up))
-		printk(KERN_ERR "Early serial init of port 0 failed\n");
+	up.serial_out	= msp_serial_out;
+	up.serial_in	= msp_serial_in;
+	up.handle_irq	= msp_serial_handle_irq;
+	up.private_data	= kzalloc(sizeof(struct msp_uart_data), GFP_KERNEL);
+	if (!up.private_data) {
+		pr_err("failed to allocate uart private data\n");
+		return;
+	}
+	if (early_serial_setup(&up)) {
+		kfree(up.private_data);
+		pr_err("Early serial init of port 0 failed\n");
+	}
 
 	/* Initialize the second serial port, if one exists */
 	switch (mips_machtype) {
@@ -88,6 +147,8 @@ void __init msp_serial_setup(void)
 	up.irq          = MSP_INT_UART1;
 	up.line         = 1;
 	up.private_data		= (void*)UART1_STATUS_REG;
-	if (early_serial_setup(&up))
-		printk(KERN_ERR "Early serial init of port 1 failed\n");
+	if (early_serial_setup(&up)) {
+		kfree(up.private_data);
+		pr_err("Early serial init of port 1 failed\n");
+	}
 }
diff --git a/arch/mips/pnx8550/common/int.c b/arch/mips/pnx8550/common/int.c
index 6b93c81779c1..1ebe22bdadc8 100644
--- a/arch/mips/pnx8550/common/int.c
+++ b/arch/mips/pnx8550/common/int.c
@@ -167,7 +167,7 @@ static struct irq_chip level_irq_type = {
 
 static struct irqaction gic_action = {
 	.handler =	no_action,
-	.flags =	IRQF_DISABLED,
+	.flags =	IRQF_DISABLED | IRQF_NO_THREAD,
 	.name =		"GIC",
 };
 
diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c
index b4d08e4d2ea9..f72c336ea27b 100644
--- a/arch/mips/sgi-ip22/ip22-int.c
+++ b/arch/mips/sgi-ip22/ip22-int.c
@@ -155,32 +155,32 @@ static void __irq_entry indy_buserror_irq(void)
 
 static struct irqaction local0_cascade = {
 	.handler	= no_action,
-	.flags		= IRQF_DISABLED,
+	.flags		= IRQF_DISABLED | IRQF_NO_THREAD,
 	.name		= "local0 cascade",
 };
 
 static struct irqaction local1_cascade = {
 	.handler	= no_action,
-	.flags		= IRQF_DISABLED,
+	.flags		= IRQF_DISABLED | IRQF_NO_THREAD,
 	.name		= "local1 cascade",
 };
 
 static struct irqaction buserr = {
 	.handler	= no_action,
-	.flags		= IRQF_DISABLED,
+	.flags		= IRQF_DISABLED | IRQF_NO_THREAD,
 	.name		= "Bus Error",
 };
 
 static struct irqaction map0_cascade = {
 	.handler	= no_action,
-	.flags		= IRQF_DISABLED,
+	.flags		= IRQF_DISABLED | IRQF_NO_THREAD,
 	.name		= "mapable0 cascade",
 };
 
 #ifdef USE_LIO3_IRQ
 static struct irqaction map1_cascade = {
 	.handler	= no_action,
-	.flags		= IRQF_DISABLED,
+	.flags		= IRQF_DISABLED | IRQF_NO_THREAD,
 	.name		= "mapable1 cascade",
 };
 #define SGI_INTERRUPTS	SGINT_END
diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c
index a7e5a6d917b1..3ab5b5d25b0a 100644
--- a/arch/mips/sni/rm200.c
+++ b/arch/mips/sni/rm200.c
@@ -359,6 +359,7 @@ void sni_rm200_init_8259A(void)
 static struct irqaction sni_rm200_irq2 = {
 	.handler = no_action,
 	.name = "cascade",
+	.flags = IRQF_NO_THREAD,
 };
 
 static struct resource sni_rm200_pic1_resource = {
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index e9f95dcde379..ba3cec3155df 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -321,7 +321,7 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask)
 static u32 tx4939_get_eth_speed(struct net_device *dev)
 {
 	struct ethtool_cmd cmd;
-	if (dev_ethtool_get_settings(dev, &cmd))
+	if (__ethtool_get_settings(dev, &cmd))
 		return 100;	/* default 100Mbps */
 
 	return ethtool_cmd_speed(&cmd);
diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c
index 70a3b85f3757..fad2bef432cd 100644
--- a/arch/mips/vr41xx/common/irq.c
+++ b/arch/mips/vr41xx/common/irq.c
@@ -34,6 +34,7 @@ static irq_cascade_t irq_cascade[NR_IRQS] __cacheline_aligned;
 static struct irqaction cascade_irqaction = {
 	.handler	= no_action,
 	.name		= "cascade",
+	.flags		= IRQF_NO_THREAD,
 };
 
 int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int))
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 1f870340ebdd..438db84a1f7c 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -47,9 +47,6 @@ config GENERIC_CMOS_UPDATE
 config GENERIC_HWEIGHT
 	def_bool y
 
-config GENERIC_TIME
-	def_bool y
-
 config GENERIC_CLOCKEVENTS
 	def_bool y
 
@@ -195,7 +192,7 @@ config SMP
 	  singleprocessor machines. On a singleprocessor machine, the kernel
 	  will run faster if you say N here.
 
-	  See also <file:Documentation/i386/IO-APIC.txt>,
+	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
 	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
 	  <http://www.tldp.org/docs.html#howto>.
 
diff --git a/arch/mn10300/kernel/gdb-stub.c b/arch/mn10300/kernel/gdb-stub.c
index 538266b2c9bc..522eb8a9b60d 100644
--- a/arch/mn10300/kernel/gdb-stub.c
+++ b/arch/mn10300/kernel/gdb-stub.c
@@ -798,7 +798,7 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault)
 	if ((u32) mem & 1 && count >= 1) {
 		if (gdbstub_read_byte(mem, ch) != 0)
 			return 0;
-		buf = pack_hex_byte(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[0]);
 		mem++;
 		count--;
 	}
@@ -806,8 +806,8 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault)
 	if ((u32) mem & 3 && count >= 2) {
 		if (gdbstub_read_word(mem, ch) != 0)
 			return 0;
-		buf = pack_hex_byte(buf, ch[0]);
-		buf = pack_hex_byte(buf, ch[1]);
+		buf = hex_byte_pack(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[1]);
 		mem += 2;
 		count -= 2;
 	}
@@ -815,10 +815,10 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault)
 	while (count >= 4) {
 		if (gdbstub_read_dword(mem, ch) != 0)
 			return 0;
-		buf = pack_hex_byte(buf, ch[0]);
-		buf = pack_hex_byte(buf, ch[1]);
-		buf = pack_hex_byte(buf, ch[2]);
-		buf = pack_hex_byte(buf, ch[3]);
+		buf = hex_byte_pack(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[1]);
+		buf = hex_byte_pack(buf, ch[2]);
+		buf = hex_byte_pack(buf, ch[3]);
 		mem += 4;
 		count -= 4;
 	}
@@ -826,8 +826,8 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault)
 	if (count >= 2) {
 		if (gdbstub_read_word(mem, ch) != 0)
 			return 0;
-		buf = pack_hex_byte(buf, ch[0]);
-		buf = pack_hex_byte(buf, ch[1]);
+		buf = hex_byte_pack(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[1]);
 		mem += 2;
 		count -= 2;
 	}
@@ -835,7 +835,7 @@ unsigned char *mem2hex(const void *_mem, char *buf, int count, int may_fault)
 	if (count >= 1) {
 		if (gdbstub_read_byte(mem, ch) != 0)
 			return 0;
-		buf = pack_hex_byte(buf, ch[0]);
+		buf = hex_byte_pack(buf, ch[0]);
 	}
 
 	*buf = 0;
@@ -1273,13 +1273,13 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 		ptr = mem2hex(title, ptr, sizeof(title) - 1, 0);
 
 		hx = hex_asc_hi(excep >> 8);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(excep >> 8);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(excep);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(excep);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 
 		ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0);
 		*ptr = 0;
@@ -1291,21 +1291,21 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 		ptr = mem2hex(tbcberr, ptr, sizeof(tbcberr) - 1, 0);
 
 		hx = hex_asc_hi(bcberr >> 24);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(bcberr >> 24);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(bcberr >> 16);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(bcberr >> 16);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(bcberr >> 8);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(bcberr >> 8);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_hi(bcberr);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 		hx = hex_asc_lo(bcberr);
-		ptr = pack_hex_byte(ptr, hx);
+		ptr = hex_byte_pack(ptr, hx);
 
 		ptr = mem2hex(crlf, ptr, sizeof(crlf) - 1, 0);
 		*ptr = 0;
@@ -1321,12 +1321,12 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 	 * Send trap type (converted to signal)
 	 */
 	*ptr++ = 'T';
-	ptr = pack_hex_byte(ptr, sigval);
+	ptr = hex_byte_pack(ptr, sigval);
 
 	/*
 	 * Send Error PC
 	 */
-	ptr = pack_hex_byte(ptr, GDB_REGID_PC);
+	ptr = hex_byte_pack(ptr, GDB_REGID_PC);
 	*ptr++ = ':';
 	ptr = mem2hex(&regs->pc, ptr, 4, 0);
 	*ptr++ = ';';
@@ -1334,7 +1334,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 	/*
 	 * Send frame pointer
 	 */
-	ptr = pack_hex_byte(ptr, GDB_REGID_FP);
+	ptr = hex_byte_pack(ptr, GDB_REGID_FP);
 	*ptr++ = ':';
 	ptr = mem2hex(&regs->a3, ptr, 4, 0);
 	*ptr++ = ';';
@@ -1343,7 +1343,7 @@ static int gdbstub(struct pt_regs *regs, enum exception_code excep)
 	 * Send stack pointer
 	 */
 	ssp = (unsigned long) (regs + 1);
-	ptr = pack_hex_byte(ptr, GDB_REGID_SP);
+	ptr = hex_byte_pack(ptr, GDB_REGID_SP);
 	*ptr++ = ':';
 	ptr = mem2hex(&ssp, ptr, 4, 0);
 	*ptr++ = ';';
diff --git a/arch/mn10300/kernel/irq.c b/arch/mn10300/kernel/irq.c
index 2623d19f4f4c..2381df83bd00 100644
--- a/arch/mn10300/kernel/irq.c
+++ b/arch/mn10300/kernel/irq.c
@@ -260,7 +260,6 @@ void set_intr_level(int irq, u16 level)
 /*
  * mark an interrupt to be ACK'd after interrupt handlers have been run rather
  * than before
- * - see Documentation/mn10300/features.txt
  */
 void mn10300_set_lateack_irq_type(int irq)
 {
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 4558bafbd1a2..9460e1c266dd 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -1,6 +1,6 @@
 #
 # For a description of the syntax of this configuration file,
-# see Documentation/kbuild/config-language.txt.
+# see Documentation/kbuild/kconfig-language.txt.
 #
 
 config OPENRISC
diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h
index 60b472233900..b206ba4608b2 100644
--- a/arch/openrisc/include/asm/dma-mapping.h
+++ b/arch/openrisc/include/asm/dma-mapping.h
@@ -18,7 +18,7 @@
 #define __ASM_OPENRISC_DMA_MAPPING_H
 
 /*
- * See Documentation/PCI/PCI-DMA-mapping.txt and
+ * See Documentation/DMA-API-HOWTO.txt and
  * Documentation/DMA-API.txt for documentation.
  *
  * This file is written with the intention of eventually moving over
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index e077b0bf56ca..fdfd8be29e95 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -169,9 +169,7 @@ config 64BIT
 
 choice
 	prompt "Kernel page size"
-	default PARISC_PAGE_SIZE_4KB  if !64BIT
-	default PARISC_PAGE_SIZE_4KB  if 64BIT
-#	default PARISC_PAGE_SIZE_16KB if 64BIT
+	default PARISC_PAGE_SIZE_4KB
 
 config PARISC_PAGE_SIZE_4KB
 	bool "4KB"
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index efa0b60c63fe..760f331d4fa3 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -105,7 +105,8 @@ struct compat_statfs {
 	__kernel_fsid_t	f_fsid;
 	s32		f_namelen;
 	s32		f_frsize;
-	s32		f_spare[5];
+	s32		f_flags;
+	s32		f_spare[4];
 };
 
 struct compat_sigcontext {
diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h
index 890531e32fe8..467bbd510eac 100644
--- a/arch/parisc/include/asm/dma-mapping.h
+++ b/arch/parisc/include/asm/dma-mapping.h
@@ -5,7 +5,7 @@
 #include <asm/cacheflush.h>
 #include <asm/scatterlist.h>
 
-/* See Documentation/PCI/PCI-DMA-mapping.txt */
+/* See Documentation/DMA-API-HOWTO.txt */
 struct hppa_dma_ops {
 	int  (*dma_supported)(struct device *dev, u64 mask);
 	void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag);
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index a029f74a3c5c..d047edea2504 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -2,7 +2,7 @@
 ** PARISC 1.1 Dynamic DMA mapping support.
 ** This implementation is for PA-RISC platforms that do not support
 ** I/O TLBs (aka DMA address translation hardware).
-** See Documentation/PCI/PCI-DMA-mapping.txt for interface definitions.
+** See Documentation/DMA-API-HOWTO.txt for interface definitions.
 **
 **      (c) Copyright 1999,2000 Hewlett-Packard Company
 **      (c) Copyright 2000 Grant Grundler
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6926b61acfea..47682b67fd36 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -656,6 +656,8 @@ config SBUS
 
 config FSL_SOC
 	bool
+	select HAVE_CAN_FLEXCAN if NET && CAN
+	select PPC_CLOCK if CAN_FLEXCAN
 
 config FSL_PCI
  	bool
diff --git a/arch/powerpc/boot/dts/p1010rdb.dts b/arch/powerpc/boot/dts/p1010rdb.dts
index 6b33b73a5ba0..d6c669c888e9 100644
--- a/arch/powerpc/boot/dts/p1010rdb.dts
+++ b/arch/powerpc/boot/dts/p1010rdb.dts
@@ -23,6 +23,8 @@
 		ethernet2 = &enet2;
 		pci0 = &pci0;
 		pci1 = &pci1;
+		can0 = &can0;
+		can1 = &can1;
 	};
 
 	memory {
@@ -169,14 +171,6 @@
 			};
 		};
 
-		can0@1c000 {
-			fsl,flexcan-clock-source = "platform";
-		};
-
-		can1@1d000 {
-			fsl,flexcan-clock-source = "platform";
-		};
-
 		usb@22000 {
 			phy_type = "utmi";
 		};
diff --git a/arch/powerpc/boot/dts/p1010si.dtsi b/arch/powerpc/boot/dts/p1010si.dtsi
index 7f51104f2e36..cabe0a453ae6 100644
--- a/arch/powerpc/boot/dts/p1010si.dtsi
+++ b/arch/powerpc/boot/dts/p1010si.dtsi
@@ -140,20 +140,18 @@
 			interrupt-parent = <&mpic>;
 		};
 
-		can0@1c000 {
-			compatible = "fsl,flexcan-v1.0";
+		can0: can@1c000 {
+			compatible = "fsl,p1010-flexcan";
 			reg = <0x1c000 0x1000>;
 			interrupts = <48 0x2>;
 			interrupt-parent = <&mpic>;
-			fsl,flexcan-clock-divider = <2>;
 		};
 
-		can1@1d000 {
-			compatible = "fsl,flexcan-v1.0";
+		can1: can@1d000 {
+			compatible = "fsl,p1010-flexcan";
 			reg = <0x1d000 0x1000>;
 			interrupts = <61 0x2>;
 			interrupt-parent = <&mpic>;
-			fsl,flexcan-clock-divider = <2>;
 		};
 
 		L2: l2-cache-controller@20000 {
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
index 4182c772340b..ed3bab72a834 100644
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ b/arch/powerpc/configs/40x/acadia_defconfig
@@ -44,12 +44,13 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 # CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
 CONFIG_MII=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
-CONFIG_IBM_NEW_EMAC_DEBUG=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+CONFIG_IBM_EMAC_DEBUG=y
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
index 2dbb293163f5..17582a3420fb 100644
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ b/arch/powerpc/configs/40x/ep405_defconfig
@@ -42,8 +42,9 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/40x/hcu4_defconfig b/arch/powerpc/configs/40x/hcu4_defconfig
index ebeb4accad65..dba263c1d3a2 100644
--- a/arch/powerpc/configs/40x/hcu4_defconfig
+++ b/arch/powerpc/configs/40x/hcu4_defconfig
@@ -43,8 +43,9 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
index 532ea9d93a15..f2d4be936e08 100644
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ b/arch/powerpc/configs/40x/kilauea_defconfig
@@ -51,10 +51,11 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 # CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
index 3c142ac1b344..42b979355f9b 100644
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ b/arch/powerpc/configs/40x/makalu_defconfig
@@ -43,10 +43,11 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 # CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
index ff57d4828ffc..aa1a4cac3708 100644
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ b/arch/powerpc/configs/40x/walnut_defconfig
@@ -40,8 +40,9 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 3ed16d5c909d..329f9a3b892e 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -44,10 +44,11 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 # CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index b1b7d2c5c059..cef7d62560c4 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -32,8 +32,9 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
index 30a0a8e08fdd..20c8d26d7fc0 100644
--- a/arch/powerpc/configs/44x/bluestone_defconfig
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -38,10 +38,11 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=2
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index a46942aac695..d5be93e6e92d 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -49,10 +49,11 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 # CONFIG_MISC_DEVICES is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index 07d77e51f1ba..f9269fc4ffcc 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -40,8 +40,9 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index 2ce7e9aff09e..9be089038fd7 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -55,10 +55,11 @@ CONFIG_FUSION=y
 CONFIG_FUSION_SAS=y
 CONFIG_I2O=y
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
 CONFIG_E1000E=y
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 18730ff9de7c..82f73035a7ce 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -56,8 +56,9 @@ CONFIG_FUSION_SAS=y
 CONFIG_FUSION_CTL=y
 CONFIG_FUSION_LOGGING=y
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_WLAN is not set
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index 34c09144a699..109562c3c6be 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -42,8 +42,9 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index 01cc2b1a7f9a..48802811da76 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -53,11 +53,12 @@ CONFIG_FUSION=y
 CONFIG_FUSION_SAS=y
 CONFIG_I2O=y
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
-CONFIG_IBM_NEW_EMAC_RXB=256
-CONFIG_IBM_NEW_EMAC_TXB=256
-CONFIG_IBM_NEW_EMAC_DEBUG=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
+CONFIG_IBM_EMAC_RXB=256
+CONFIG_IBM_EMAC_TXB=256
+CONFIG_IBM_EMAC_DEBUG=y
 CONFIG_E1000E=y
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index dfcffede16ad..ca088cd581af 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -44,8 +44,9 @@ CONFIG_ATA=y
 # CONFIG_SATA_PMP is not set
 CONFIG_SATA_SIL=y
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 CONFIG_INPUT_FF_MEMLESS=m
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index 47e399f2892f..b7a653b626db 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -46,8 +46,9 @@ CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index a6a002ed5681..30de97f158a4 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -40,8 +40,9 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index abf74dc1f79c..105bc56f4b2b 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -54,9 +54,10 @@ CONFIG_BLK_DEV_SD=y
 CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
 CONFIG_MII=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_IBM_EMAC=y
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 # CONFIG_INPUT is not set
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index bfd634b5ada7..7cb703b948b1 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -50,8 +50,9 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=35000
 CONFIG_XILINX_SYSACE=m
 CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 CONFIG_SERIO=m
 # CONFIG_SERIO_I8042 is not set
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 47133202a625..6cdf1c0d2c8a 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -63,8 +63,9 @@ CONFIG_BLK_DEV_SD=m
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
 CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
-CONFIG_IBM_NEW_EMAC=y
+CONFIG_ETHERNET=y
+CONFIG_NET_VENDOR_IBM=y
+CONFIG_IBM_EMAC=y
 # CONFIG_INPUT is not set
 CONFIG_SERIO=m
 # CONFIG_SERIO_I8042 is not set
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 91010e8f8479..88e602f6430d 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -100,7 +100,8 @@ struct compat_statfs {
 	compat_fsid_t	f_fsid;
 	int		f_namelen;	/* SunOS ignores this field. */
 	int		f_frsize;
-	int		f_spare[5];
+	int		f_flags;
+	int		f_spare[4];
 };
 
 #define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index a4f6c85431f8..08fe69edcd10 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -149,6 +149,12 @@ struct kvm_regs {
 #define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
 
 /*
+ * Book3S special bits to indicate contents in the struct by maintaining
+ * backwards compatibility with older structs. If adding a new field,
+ * please make sure to add a flag for that new field */
+#define KVM_SREGS_S_HIOR		(1 << 0)
+
+/*
  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
  * previous KVM_GET_REGS.
  *
@@ -173,6 +179,8 @@ struct kvm_sregs {
 				__u64 ibat[8]; 
 				__u64 dbat[8]; 
 			} ppc32;
+			__u64 flags; /* KVM_SREGS_S_ */
+			__u64 hior;
 		} s;
 		struct {
 			union {
@@ -276,6 +284,11 @@ struct kvm_guest_debug_arch {
 #define KVM_INTERRUPT_UNSET	-2U
 #define KVM_INTERRUPT_SET_LEVEL	-3U
 
+#define KVM_CPU_440		1
+#define KVM_CPU_E500V2		2
+#define KVM_CPU_3S_32		3
+#define KVM_CPU_3S_64		4
+
 /* for KVM_CAP_SPAPR_TCE */
 struct kvm_create_spapr_tce {
 	__u64 liobn;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 98da010252a3..a384ffdf33de 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -90,6 +90,8 @@ struct kvmppc_vcpu_book3s {
 #endif
 	int context_id[SID_CONTEXTS];
 
+	bool hior_sregs;		/* HIOR is set by SREGS, not PVR */
+
 	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
 	struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
 	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
@@ -139,15 +141,14 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
-extern void kvmppc_handler_lowmem_trampoline(void);
-extern void kvmppc_handler_trampoline_enter(void);
-extern void kvmppc_rmcall(ulong srr0, ulong srr1);
+extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
 extern void kvmppc_load_up_fpu(void);
 extern void kvmppc_load_up_altivec(void);
 extern void kvmppc_load_up_vsx(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
+extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
@@ -382,6 +383,39 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 }
 #endif
 
+static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
+					     unsigned long pte_index)
+{
+	unsigned long rb, va_low;
+
+	rb = (v & ~0x7fUL) << 16;		/* AVA field */
+	va_low = pte_index >> 3;
+	if (v & HPTE_V_SECONDARY)
+		va_low = ~va_low;
+	/* xor vsid from AVA */
+	if (!(v & HPTE_V_1TB_SEG))
+		va_low ^= v >> 12;
+	else
+		va_low ^= v >> 24;
+	va_low &= 0x7ff;
+	if (v & HPTE_V_LARGE) {
+		rb |= 1;			/* L field */
+		if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+		    (r & 0xff000)) {
+			/* non-16MB large page, must be 64k */
+			/* (masks depend on page size) */
+			rb |= 0x1000;		/* page encoding in LP field */
+			rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
+			rb |= (va_low & 0xfe);	/* AVAL field (P7 doesn't seem to care) */
+		}
+	} else {
+		/* 4kB page */
+		rb |= (va_low & 0x7ff) << 12;	/* remaining 11b of VA */
+	}
+	rb |= (v >> 54) & 0x300;		/* B field */
+	return rb;
+}
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index ef7b3688c3b6..1f2f5b6156bd 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -75,6 +75,8 @@ struct kvmppc_host_state {
 	ulong scratch0;
 	ulong scratch1;
 	u8 in_guest;
+	u8 restore_hid5;
+	u8 napping;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	struct kvm_vcpu *kvm_vcpu;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index cc22b282d755..bf8af5d5d5dc 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -198,21 +198,29 @@ struct kvm_arch {
  */
 struct kvmppc_vcore {
 	int n_runnable;
-	int n_blocked;
+	int n_busy;
 	int num_threads;
 	int entry_exit_count;
 	int n_woken;
 	int nap_count;
+	int napping_threads;
 	u16 pcpu;
-	u8 vcore_running;
+	u8 vcore_state;
 	u8 in_guest;
 	struct list_head runnable_threads;
 	spinlock_t lock;
+	wait_queue_head_t wq;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
 #define VCORE_EXIT_COUNT(vc)	((vc)->entry_exit_count >> 8)
 
+/* Values for vcore_state */
+#define VCORE_INACTIVE	0
+#define VCORE_RUNNING	1
+#define VCORE_EXITING	2
+#define VCORE_SLEEPING	3
+
 struct kvmppc_pte {
 	ulong eaddr;
 	u64 vpage;
@@ -258,14 +266,6 @@ struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
 #ifdef CONFIG_PPC_BOOK3S
-	ulong host_msr;
-	ulong host_r2;
-	void *host_retip;
-	ulong trampoline_lowmem;
-	ulong trampoline_enter;
-	ulong highmem_handler;
-	ulong rmcall;
-	ulong host_paca_phys;
 	struct kvmppc_slb slb[64];
 	int slb_max;		/* 1 + index of last valid entry in slb[] */
 	int slb_nr;		/* total number of entries in SLB */
@@ -389,6 +389,9 @@ struct kvm_vcpu_arch {
 	u8 dcr_is_write;
 	u8 osi_needed;
 	u8 osi_enabled;
+	u8 papr_enabled;
+	u8 sane;
+	u8 cpu_type;
 	u8 hcall_needed;
 
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -408,11 +411,13 @@ struct kvm_vcpu_arch {
 	struct dtl *dtl;
 	struct dtl *dtl_end;
 
+	wait_queue_head_t *wqp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
 	int state;
 	int ptid;
+	bool timer_running;
 	wait_queue_head_t cpu_run;
 
 	struct kvm_vcpu_arch_shared *shared;
@@ -428,8 +433,9 @@ struct kvm_vcpu_arch {
 #endif
 };
 
-#define KVMPPC_VCPU_BUSY_IN_HOST	0
-#define KVMPPC_VCPU_BLOCKED		1
+/* Values for vcpu->arch.state */
+#define KVMPPC_VCPU_STOPPED		0
+#define KVMPPC_VCPU_BUSY_IN_HOST	1
 #define KVMPPC_VCPU_RUNNABLE		2
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index d121f49d62b8..46efd1a265c9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
+extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 0947b36e534c..5e0b6d511e14 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -196,7 +196,7 @@ static inline int qe_alive_during_sleep(void)
 
 /* Structure that defines QE firmware binary files.
  *
- * See Documentation/powerpc/qe-firmware.txt for a description of these
+ * See Documentation/powerpc/qe_firmware.txt for a description of these
  * fields.
  */
 struct qe_firmware {
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index fa0d27a400de..559ae1ee6706 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -354,3 +354,5 @@ COMPAT_SYS_SPU(clock_adjtime)
 SYSCALL_SPU(syncfs)
 COMPAT_SYS_SPU(sendmmsg)
 SYSCALL_SPU(setns)
+COMPAT_SYS(process_vm_readv)
+COMPAT_SYS(process_vm_writev)
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 93e05d1b34b2..5354ae91bdde 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -54,6 +54,7 @@ extern void __init udbg_init_40x_realmode(void);
 extern void __init udbg_init_cpm(void);
 extern void __init udbg_init_usbgecko(void);
 extern void __init udbg_init_wsp(void);
+extern void __init udbg_init_ehv_bc(void);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index b8b3f599362b..d3d1b5efd7eb 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -373,10 +373,12 @@
 #define __NR_syncfs		348
 #define __NR_sendmmsg		349
 #define __NR_setns		350
+#define __NR_process_vm_readv	351
+#define __NR_process_vm_writev	352
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		351
+#define __NR_syscalls		353
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 5f078bc2063e..69f7ffe7f674 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -44,6 +44,7 @@
 #include <asm/compat.h>
 #include <asm/mmu.h>
 #include <asm/hvcall.h>
+#include <asm/xics.h>
 #endif
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
@@ -449,8 +450,6 @@ int main(void)
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
-	DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
-	DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
 	DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
 	DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
 	DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
@@ -458,14 +457,12 @@ int main(void)
 	DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
 	DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
 	DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
-	DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
-	DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
-	DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
-	DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
 	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
 	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
 	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
 	DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
+	DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
+	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
 	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
 	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
@@ -481,6 +478,7 @@ int main(void)
 	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
+	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
 	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
 			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
@@ -537,6 +535,8 @@ int main(void)
 	HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
 	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
 	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
+	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
+	HSTATE_FIELD(HSTATE_NAPPING, napping);
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
@@ -549,6 +549,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_DSCR, host_dscr);
 	HSTATE_FIELD(HSTATE_DABR, dabr);
 	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 #else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 41b02c792aa3..29ddd8b1c274 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -427,16 +427,6 @@ slb_miss_user_pseries:
 	b	.				/* prevent spec. execution */
 #endif /* __DISABLED__ */
 
-/* KVM's trampoline code needs to be close to the interrupt handlers */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_PR
-#include "../kvm/book3s_rmhandlers.S"
-#else
-#include "../kvm/book3s_hv_rmhandlers.S"
-#endif
-#endif
-
 	.align	7
 	.globl	__end_interrupts
 __end_interrupts:
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index faa82c1f3f68..b4607a91d1f4 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -67,6 +67,8 @@ void __init udbg_early_init(void)
 	udbg_init_usbgecko();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_WSP)
 	udbg_init_wsp();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC)
+	udbg_init_ehv_bc();
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index da3a1225c0ac..ca1f88b3dc59 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -78,6 +78,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
 		vcpu_44x->shadow_refs[i].gtlb_index = -1;
 
+	vcpu->arch.cpu_type = KVM_CPU_440;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 08428e2c188d..3688aeecc4b2 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -43,18 +43,22 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
+	book3s_pr_papr.o \
 	book3s_emulate.o \
 	book3s_interrupts.o \
 	book3s_mmu_hpte.o \
 	book3s_64_mmu_host.o \
 	book3s_64_mmu.o \
 	book3s_32_mmu.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+	book3s_rmhandlers.o
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_hv.o \
 	book3s_hv_interrupts.o \
 	book3s_64_mmu_hv.o
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_64_vio_hv.o \
 	book3s_hv_builtin.o
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index 3608471ad2d8..7e06a6fc8d07 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -31,7 +31,7 @@
 	 * R1 = host R1
 	 * R2 = host R2
 	 * R3 = shadow vcpu
-	 * all other volatile GPRS = free
+	 * all other volatile GPRS = free except R4, R6
 	 * SVCPU[CR]  = guest CR
 	 * SVCPU[XER] = guest XER
 	 * SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index c6d3e194b6b4..b871721c0050 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -128,7 +128,13 @@ static hva_t kvmppc_mmu_book3s_64_get_pteg(
 	dprintk("MMU: page=0x%x sdr1=0x%llx pteg=0x%llx vsid=0x%llx\n",
 		page, vcpu_book3s->sdr1, pteg, slbe->vsid);
 
-	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+	/* When running a PAPR guest, SDR1 contains a HVA address instead
+           of a GPA */
+	if (vcpu_book3s->vcpu.arch.papr_enabled)
+		r = pteg;
+	else
+		r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
+
 	if (kvm_is_error_hva(r))
 		return r;
 	return r | (pteg & ~PAGE_MASK);
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 04e7d3bbfe8b..f2e6e48ea463 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -53,7 +53,7 @@ slb_exit_skip_ ## num:
 	 * R1 = host R1
 	 * R2 = host R2
 	 * R3 = shadow vcpu
-	 * all other volatile GPRS = free
+	 * all other volatile GPRS = free except R4, R6
 	 * SVCPU[CR]  = guest CR
 	 * SVCPU[XER] = guest XER
 	 * SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 466846557089..0c9dc62532d0 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -63,6 +63,25 @@
  * function pointers, so let's just disable the define. */
 #undef mfsrin
 
+enum priv_level {
+	PRIV_PROBLEM = 0,
+	PRIV_SUPER = 1,
+	PRIV_HYPER = 2,
+};
+
+static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
+{
+	/* PAPR VMs only access supervisor SPRs */
+	if (vcpu->arch.papr_enabled && (level > PRIV_SUPER))
+		return false;
+
+	/* Limit user space to its own small SPR set */
+	if ((vcpu->arch.shared->msr & MSR_PR) && level > PRIV_PROBLEM)
+		return false;
+
+	return true;
+}
+
 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            unsigned int inst, int *advance)
 {
@@ -296,6 +315,8 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
 	switch (sprn) {
 	case SPRN_SDR1:
+		if (!spr_allowed(vcpu, PRIV_HYPER))
+			goto unprivileged;
 		to_book3s(vcpu)->sdr1 = spr_val;
 		break;
 	case SPRN_DSISR:
@@ -390,6 +411,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_PMC4_GEKKO:
 	case SPRN_WPAR_GEKKO:
 		break;
+unprivileged:
 	default:
 		printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
 #ifndef DEBUG_SPR
@@ -421,6 +443,8 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		break;
 	}
 	case SPRN_SDR1:
+		if (!spr_allowed(vcpu, PRIV_HYPER))
+			goto unprivileged;
 		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
 		break;
 	case SPRN_DSISR:
@@ -449,6 +473,10 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_HID5:
 		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
 		break;
+	case SPRN_CFAR:
+	case SPRN_PURR:
+		kvmppc_set_gpr(vcpu, rt, 0);
+		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
@@ -476,6 +504,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		kvmppc_set_gpr(vcpu, rt, 0);
 		break;
 	default:
+unprivileged:
 		printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
 #ifndef DEBUG_SPR
 		emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 88c8f26add02..f7f63a00ab1f 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -23,9 +23,7 @@
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
 #else
-EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
-EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
-EXPORT_SYMBOL_GPL(kvmppc_rmcall);
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
 EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cc0d7f1b19ab..4644c7986d80 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -62,6 +62,8 @@
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
 
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	local_paca->kvm_hstate.kvm_vcpu = vcpu;
@@ -72,40 +74,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
 }
 
-static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
-static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
-
-void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
-{
-	u64 now;
-	unsigned long dec_nsec;
-
-	now = get_tb();
-	if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
-		kvmppc_core_queue_dec(vcpu);
-	if (vcpu->arch.pending_exceptions)
-		return;
-	if (vcpu->arch.dec_expires != ~(u64)0) {
-		dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
-			tb_ticks_per_sec;
-		hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
-			      HRTIMER_MODE_REL);
-	}
-
-	kvmppc_vcpu_blocked(vcpu);
-
-	kvm_vcpu_block(vcpu);
-	vcpu->stat.halt_wakeup++;
-
-	if (vcpu->arch.dec_expires != ~(u64)0)
-		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-
-	kvmppc_vcpu_unblocked(vcpu);
-}
-
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	vcpu->arch.shregs.msr = msr;
+	kvmppc_end_cede(vcpu);
 }
 
 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -257,15 +229,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 	switch (req) {
 	case H_CEDE:
-		vcpu->arch.shregs.msr |= MSR_EE;
-		vcpu->arch.ceded = 1;
-		smp_mb();
-		if (!vcpu->arch.prodded)
-			kvmppc_vcpu_block(vcpu);
-		else
-			vcpu->arch.prodded = 0;
-		smp_mb();
-		vcpu->arch.ceded = 0;
 		break;
 	case H_PROD:
 		target = kvmppc_get_gpr(vcpu, 4);
@@ -388,20 +351,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	}
 
-
-	if (!(r & RESUME_HOST)) {
-		/* To avoid clobbering exit_reason, only check for signals if
-		 * we aren't already exiting to userspace for some other
-		 * reason. */
-		if (signal_pending(tsk)) {
-			vcpu->stat.signal_exits++;
-			run->exit_reason = KVM_EXIT_INTR;
-			r = -EINTR;
-		} else {
-			kvmppc_core_deliver_interrupts(vcpu);
-		}
-	}
-
 	return r;
 }
 
@@ -479,13 +428,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	/*
-	 * Some vcpus may start out in stopped state.  If we initialize
-	 * them to busy-in-host state they will stop other vcpus in the
-	 * vcore from running.  Instead we initialize them to blocked
-	 * state, effectively considering them to be stopped until we
-	 * see the first run ioctl for them.
+	 * We consider the vcpu stopped until we see the first run ioctl for it.
 	 */
-	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+	vcpu->arch.state = KVMPPC_VCPU_STOPPED;
 
 	init_waitqueue_head(&vcpu->arch.cpu_run);
 
@@ -496,6 +441,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 		if (vcore) {
 			INIT_LIST_HEAD(&vcore->runnable_threads);
 			spin_lock_init(&vcore->lock);
+			init_waitqueue_head(&vcore->wq);
 		}
 		kvm->arch.vcores[core] = vcore;
 	}
@@ -506,10 +452,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 
 	spin_lock(&vcore->lock);
 	++vcore->num_threads;
-	++vcore->n_blocked;
 	spin_unlock(&vcore->lock);
 	vcpu->arch.vcore = vcore;
 
+	vcpu->arch.cpu_type = KVM_CPU_3S_64;
+	kvmppc_sanity_check(vcpu);
+
 	return vcpu;
 
 free_vcpu:
@@ -524,30 +472,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kfree(vcpu);
 }
 
-static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
+static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	unsigned long dec_nsec, now;
 
-	spin_lock(&vc->lock);
-	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
-	++vc->n_blocked;
-	if (vc->n_runnable > 0 &&
-	    vc->n_runnable + vc->n_blocked == vc->num_threads) {
-		vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
-					arch.run_list);
-		wake_up(&vcpu->arch.cpu_run);
+	now = get_tb();
+	if (now > vcpu->arch.dec_expires) {
+		/* decrementer has already gone negative */
+		kvmppc_core_queue_dec(vcpu);
+		kvmppc_core_deliver_interrupts(vcpu);
+		return;
 	}
-	spin_unlock(&vc->lock);
+	dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
+		   / tb_ticks_per_sec;
+	hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+		      HRTIMER_MODE_REL);
+	vcpu->arch.timer_running = 1;
 }
 
-static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-
-	spin_lock(&vc->lock);
-	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
-	--vc->n_blocked;
-	spin_unlock(&vc->lock);
+	vcpu->arch.ceded = 0;
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
 }
 
 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -562,6 +511,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 		return;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 	--vc->n_runnable;
+	++vc->n_busy;
 	/* decrement the physical thread id of each following vcpu */
 	v = vcpu;
 	list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
@@ -575,15 +525,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 	struct paca_struct *tpaca;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
 	cpu = vc->pcpu + vcpu->arch.ptid;
 	tpaca = &paca[cpu];
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
 	tpaca->kvm_hstate.kvm_vcore = vc;
+	tpaca->kvm_hstate.napping = 0;
+	vcpu->cpu = vc->pcpu;
 	smp_wmb();
 #ifdef CONFIG_PPC_ICP_NATIVE
 	if (vcpu->arch.ptid) {
 		tpaca->cpu_start = 0x80;
-		tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
 		wmb();
 		xics_wake_cpu(cpu);
 		++vc->n_woken;
@@ -631,9 +586,10 @@ static int on_primary_thread(void)
  */
 static int kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-	struct kvm_vcpu *vcpu, *vnext;
+	struct kvm_vcpu *vcpu, *vcpu0, *vnext;
 	long ret;
 	u64 now;
+	int ptid;
 
 	/* don't start if any threads have a signal pending */
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -652,29 +608,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		goto out;
 	}
 
+	/*
+	 * Assign physical thread IDs, first to non-ceded vcpus
+	 * and then to ceded ones.
+	 */
+	ptid = 0;
+	vcpu0 = NULL;
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		if (!vcpu->arch.ceded) {
+			if (!ptid)
+				vcpu0 = vcpu;
+			vcpu->arch.ptid = ptid++;
+		}
+	}
+	if (!vcpu0)
+		return 0;		/* nothing to run */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+		if (vcpu->arch.ceded)
+			vcpu->arch.ptid = ptid++;
+
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
-	vc->vcore_running = 1;
+	vc->vcore_state = VCORE_RUNNING;
 	vc->in_guest = 0;
 	vc->pcpu = smp_processor_id();
+	vc->napping_threads = 0;
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		kvmppc_start_thread(vcpu);
-	vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
-				arch.run_list);
 
+	preempt_disable();
 	spin_unlock(&vc->lock);
 
-	preempt_disable();
 	kvm_guest_enter();
-	__kvmppc_vcore_entry(NULL, vcpu);
+	__kvmppc_vcore_entry(NULL, vcpu0);
 
-	/* wait for secondary threads to finish writing their state to memory */
 	spin_lock(&vc->lock);
+	/* disable sending of IPIs on virtual external irqs */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+		vcpu->cpu = -1;
+	/* wait for secondary threads to finish writing their state to memory */
 	if (vc->nap_count < vc->n_woken)
 		kvmppc_wait_for_nap(vc);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
-	vc->vcore_running = 2;
+	vc->vcore_state = VCORE_EXITING;
 	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
@@ -690,22 +667,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		if (now < vcpu->arch.dec_expires &&
 		    kvmppc_core_pending_dec(vcpu))
 			kvmppc_core_dequeue_dec(vcpu);
-		if (!vcpu->arch.trap) {
-			if (signal_pending(vcpu->arch.run_task)) {
-				vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
-				vcpu->arch.ret = -EINTR;
-			}
-			continue;		/* didn't get to run */
-		}
-		ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
-					 vcpu->arch.run_task);
+
+		ret = RESUME_GUEST;
+		if (vcpu->arch.trap)
+			ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
+						 vcpu->arch.run_task);
+
 		vcpu->arch.ret = ret;
 		vcpu->arch.trap = 0;
+
+		if (vcpu->arch.ceded) {
+			if (ret != RESUME_GUEST)
+				kvmppc_end_cede(vcpu);
+			else
+				kvmppc_set_timer(vcpu);
+		}
 	}
 
 	spin_lock(&vc->lock);
  out:
-	vc->vcore_running = 0;
+	vc->vcore_state = VCORE_INACTIVE;
 	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
 				 arch.run_list) {
 		if (vcpu->arch.ret != RESUME_GUEST) {
@@ -717,82 +698,130 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	return 1;
 }
 
-static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+/*
+ * Wait for some other vcpu thread to execute us, and
+ * wake us up when we need to handle something in the host.
+ */
+static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
 {
-	int ptid;
-	int wait_state;
-	struct kvmppc_vcore *vc;
 	DEFINE_WAIT(wait);
 
-	/* No need to go into the guest when all we do is going out */
-	if (signal_pending(current)) {
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		return -EINTR;
+	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+		schedule();
+	finish_wait(&vcpu->arch.cpu_run, &wait);
+}
+
+/*
+ * All the vcpus in this vcore are idle, so wait for a decrementer
+ * or external interrupt to one of the vcpus.  vc->lock is held.
+ */
+static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
+{
+	DEFINE_WAIT(wait);
+	struct kvm_vcpu *v;
+	int all_idle = 1;
+
+	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+	vc->vcore_state = VCORE_SLEEPING;
+	spin_unlock(&vc->lock);
+	list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+		if (!v->arch.ceded || v->arch.pending_exceptions) {
+			all_idle = 0;
+			break;
+		}
 	}
+	if (all_idle)
+		schedule();
+	finish_wait(&vc->wq, &wait);
+	spin_lock(&vc->lock);
+	vc->vcore_state = VCORE_INACTIVE;
+}
 
-	/* On PPC970, check that we have an RMA region */
-	if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
-		return -EPERM;
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	int n_ceded;
+	int prev_state;
+	struct kvmppc_vcore *vc;
+	struct kvm_vcpu *v, *vn;
 
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
 
-	flush_fp_to_thread(current);
-	flush_altivec_to_thread(current);
-	flush_vsx_to_thread(current);
-
 	/*
 	 * Synchronize with other threads in this virtual core
 	 */
 	vc = vcpu->arch.vcore;
 	spin_lock(&vc->lock);
-	/* This happens the first time this is called for a vcpu */
-	if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
-		--vc->n_blocked;
-	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
-	ptid = vc->n_runnable;
+	vcpu->arch.ceded = 0;
 	vcpu->arch.run_task = current;
 	vcpu->arch.kvm_run = kvm_run;
-	vcpu->arch.ptid = ptid;
+	prev_state = vcpu->arch.state;
+	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
 	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
 	++vc->n_runnable;
 
-	wait_state = TASK_INTERRUPTIBLE;
-	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
-		if (signal_pending(current)) {
-			if (!vc->vcore_running) {
-				kvm_run->exit_reason = KVM_EXIT_INTR;
-				vcpu->arch.ret = -EINTR;
-				break;
-			}
-			/* have to wait for vcore to stop executing guest */
-			wait_state = TASK_UNINTERRUPTIBLE;
-			smp_send_reschedule(vc->pcpu);
+	/*
+	 * This happens the first time this is called for a vcpu.
+	 * If the vcore is already running, we may be able to start
+	 * this thread straight away and have it join in.
+	 */
+	if (prev_state == KVMPPC_VCPU_STOPPED) {
+		if (vc->vcore_state == VCORE_RUNNING &&
+		    VCORE_EXIT_COUNT(vc) == 0) {
+			vcpu->arch.ptid = vc->n_runnable - 1;
+			kvmppc_start_thread(vcpu);
 		}
 
-		if (!vc->vcore_running &&
-		    vc->n_runnable + vc->n_blocked == vc->num_threads) {
-			/* we can run now */
-			if (kvmppc_run_core(vc))
-				continue;
-		}
+	} else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
+		--vc->n_busy;
 
-		if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
-			kvmppc_start_thread(vcpu);
+	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	       !signal_pending(current)) {
+		if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
+			spin_unlock(&vc->lock);
+			kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
+			spin_lock(&vc->lock);
+			continue;
+		}
+		n_ceded = 0;
+		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+			n_ceded += v->arch.ceded;
+		if (n_ceded == vc->n_runnable)
+			kvmppc_vcore_blocked(vc);
+		else
+			kvmppc_run_core(vc);
+
+		list_for_each_entry_safe(v, vn, &vc->runnable_threads,
+					 arch.run_list) {
+			kvmppc_core_deliver_interrupts(v);
+			if (signal_pending(v->arch.run_task)) {
+				kvmppc_remove_runnable(vc, v);
+				v->stat.signal_exits++;
+				v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+				v->arch.ret = -EINTR;
+				wake_up(&v->arch.cpu_run);
+			}
+		}
+	}
 
-		/* wait for other threads to come in, or wait for vcore */
-		prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
-		spin_unlock(&vc->lock);
-		schedule();
-		finish_wait(&vcpu->arch.cpu_run, &wait);
-		spin_lock(&vc->lock);
+	if (signal_pending(current)) {
+		if (vc->vcore_state == VCORE_RUNNING ||
+		    vc->vcore_state == VCORE_EXITING) {
+			spin_unlock(&vc->lock);
+			kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+			spin_lock(&vc->lock);
+		}
+		if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+			kvmppc_remove_runnable(vc, vcpu);
+			vcpu->stat.signal_exits++;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
+			vcpu->arch.ret = -EINTR;
+		}
 	}
 
-	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
-		kvmppc_remove_runnable(vc, vcpu);
 	spin_unlock(&vc->lock);
-
 	return vcpu->arch.ret;
 }
 
@@ -800,6 +829,26 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 
+	if (!vcpu->arch.sane) {
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return -EINVAL;
+	}
+
+	/* No need to go into the guest when all we'll do is come back out */
+	if (signal_pending(current)) {
+		run->exit_reason = KVM_EXIT_INTR;
+		return -EINTR;
+	}
+
+	/* On PPC970, check that we have an RMA region */
+	if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EPERM;
+
+	flush_fp_to_thread(current);
+	flush_altivec_to_thread(current);
+	flush_vsx_to_thread(current);
+	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+
 	do {
 		r = kvmppc_run_vcpu(run, vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fcfe6b055558..bacb0cfa3602 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -110,39 +110,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	return H_SUCCESS;
 }
 
-static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
-				      unsigned long pte_index)
-{
-	unsigned long rb, va_low;
-
-	rb = (v & ~0x7fUL) << 16;		/* AVA field */
-	va_low = pte_index >> 3;
-	if (v & HPTE_V_SECONDARY)
-		va_low = ~va_low;
-	/* xor vsid from AVA */
-	if (!(v & HPTE_V_1TB_SEG))
-		va_low ^= v >> 12;
-	else
-		va_low ^= v >> 24;
-	va_low &= 0x7ff;
-	if (v & HPTE_V_LARGE) {
-		rb |= 1;			/* L field */
-		if (cpu_has_feature(CPU_FTR_ARCH_206) &&
-		    (r & 0xff000)) {
-			/* non-16MB large page, must be 64k */
-			/* (masks depend on page size) */
-			rb |= 0x1000;		/* page encoding in LP field */
-			rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
-			rb |= (va_low & 0xfe);	/* AVAL field (P7 doesn't seem to care) */
-		}
-	} else {
-		/* 4kB page */
-		rb |= (va_low & 0x7ff) << 12;	/* remaining 11b of VA */
-	}
-	rb |= (v >> 54) & 0x300;		/* B field */
-	return rb;
-}
-
 #define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
 
 static inline int try_lock_tlbie(unsigned int *lock)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index de2950135e6e..f422231d9235 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -20,7 +20,10 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
+#include <asm/mmu.h>
 #include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/exception-64s.h>
 
@@ -49,7 +52,7 @@ kvmppc_skip_Hinterrupt:
 	b	.
 
 /*
- * Call kvmppc_handler_trampoline_enter in real mode.
+ * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
  *
  * Input Registers:
@@ -89,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
 kvm_start_guest:
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
+	ld	r2,PACATOC(r13)
+
+	/* were we napping due to cede? */
+	lbz	r0,HSTATE_NAPPING(r13)
+	cmpwi	r0,0
+	bne	kvm_end_cede
 
 	/* get vcpu pointer */
 	ld	r4, HSTATE_KVM_VCPU(r13)
@@ -276,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	cmpwi	r0,0
 	beq	20b
 
-	/* Set LPCR.  Set the MER bit if there is a pending external irq. */
+	/* Set LPCR and RMOR. */
 10:	ld	r8,KVM_LPCR(r9)
-	ld	r0,VCPU_PENDING_EXC(r4)
-	li	r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and.	r0,r0,r7
-	beq	11f
-	ori	r8,r8,LPCR_MER
-11:	mtspr	SPRN_LPCR,r8
+	mtspr	SPRN_LPCR,r8
 	ld	r8,KVM_RMOR(r9)
 	mtspr	SPRN_RMOR,r8
 	isync
@@ -448,19 +451,50 @@ toc_tlbie_lock:
 	mtctr	r6
 	mtxer	r7
 
-	/* Move SRR0 and SRR1 into the respective regs */
+kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 	ld	r6, VCPU_SRR0(r4)
 	ld	r7, VCPU_SRR1(r4)
-	mtspr	SPRN_SRR0, r6
-	mtspr	SPRN_SRR1, r7
-
 	ld	r10, VCPU_PC(r4)
+	ld	r11, VCPU_MSR(r4)	/* r11 = vcpu->arch.msr & ~MSR_HV */
 
-	ld	r11, VCPU_MSR(r4)	/* r10 = vcpu->arch.msr & ~MSR_HV */
 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
 	rotldi	r11, r11, 1 + MSR_HV_LG
 	ori	r11, r11, MSR_ME
 
+	/* Check if we can deliver an external or decrementer interrupt now */
+	ld	r0,VCPU_PENDING_EXC(r4)
+	li	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
+	oris	r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+	and	r0,r0,r8
+	cmpdi	cr1,r0,0
+	andi.	r0,r11,MSR_EE
+	beq	cr1,11f
+BEGIN_FTR_SECTION
+	mfspr	r8,SPRN_LPCR
+	ori	r8,r8,LPCR_MER
+	mtspr	SPRN_LPCR,r8
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+	beq	5f
+	li	r0,BOOK3S_INTERRUPT_EXTERNAL
+12:	mr	r6,r10
+	mr	r10,r0
+	mr	r7,r11
+	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
+	rotldi	r11,r11,63
+	b	5f
+11:	beq	5f
+	mfspr	r0,SPRN_DEC
+	cmpwi	r0,0
+	li	r0,BOOK3S_INTERRUPT_DECREMENTER
+	blt	12b
+
+	/* Move SRR0 and SRR1 into the respective regs */
+5:	mtspr	SPRN_SRR0, r6
+	mtspr	SPRN_SRR1, r7
+	li	r0,0
+	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
+
 fast_guest_return:
 	mtspr	SPRN_HSRR0,r10
 	mtspr	SPRN_HSRR1,r11
@@ -574,21 +608,20 @@ kvmppc_interrupt:
 	/* See if this is something we can handle in real mode */
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
-hcall_real_cont:
 
 	/* Check for mediated interrupts (could be done earlier really ...) */
 BEGIN_FTR_SECTION
 	cmpwi	r12,BOOK3S_INTERRUPT_EXTERNAL
 	bne+	1f
-	ld	r5,VCPU_KVM(r9)
-	ld	r5,KVM_LPCR(r5)
 	andi.	r0,r11,MSR_EE
 	beq	1f
+	mfspr	r5,SPRN_LPCR
 	andi.	r0,r5,LPCR_MER
 	bne	bounce_ext_interrupt
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
+hcall_real_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
 	mftb	r6
@@ -682,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 	slbia
 	ptesync
 
-hdec_soon:
+hdec_soon:			/* r9 = vcpu, r12 = trap, r13 = paca */
 BEGIN_FTR_SECTION
 	b	32f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -700,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	addi	r0,r3,0x100
 	stwcx.	r0,0,r6
 	bne	41b
+	lwsync
 
 	/*
 	 * At this point we have an interrupt that we have to pass
@@ -713,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 * interrupt, since the other threads will already be on their
 	 * way here in that case.
 	 */
+	cmpwi	r3,0x100	/* Are we the first here? */
+	bge	43f
+	cmpwi	r3,1		/* Are any other threads in the guest? */
+	ble	43f
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	beq	40f
-	cmpwi	r3,0x100	/* Are we the first here? */
-	bge	40f
-	cmpwi	r3,1
-	ble	40f
 	li	r0,0
 	mtspr	SPRN_HDEC,r0
 40:
+	/*
+	 * Send an IPI to any napping threads, since an HDEC interrupt
+	 * doesn't wake CPUs up from nap.
+	 */
+	lwz	r3,VCORE_NAPPING_THREADS(r5)
+	lwz	r4,VCPU_PTID(r9)
+	li	r0,1
+	sldi	r0,r0,r4
+	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
+	beq	43f
+	mulli	r4,r4,PACA_SIZE		/* get paca for thread 0 */
+	subf	r6,r4,r13
+42:	andi.	r0,r3,1
+	beq	44f
+	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
+	li	r0,IPI_PRIORITY
+	li	r7,XICS_QIRR
+	stbcix	r0,r7,r8		/* trigger the IPI */
+44:	srdi.	r3,r3,1
+	addi	r6,r6,PACA_SIZE
+	bne	42b
 
 	/* Secondary threads wait for primary to do partition switch */
-	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */
+43:	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */
 	ld	r5,HSTATE_KVM_VCORE(r13)
 	lwz	r3,VCPU_PTID(r9)
 	cmpwi	r3,0
@@ -1077,7 +1132,6 @@ hcall_try_real_mode:
 hcall_real_fallback:
 	li	r12,BOOK3S_INTERRUPT_SYSCALL
 	ld	r9, HSTATE_KVM_VCPU(r13)
-	ld	r11, VCPU_MSR(r9)
 
 	b	hcall_real_cont
 
@@ -1139,7 +1193,7 @@ hcall_real_table:
 	.long	0		/* 0xd4 */
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
-	.long	0		/* 0xe0 */
+	.long	.kvmppc_h_cede - hcall_real_table
 	.long	0		/* 0xe4 */
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
@@ -1168,7 +1222,8 @@ bounce_ext_interrupt:
 	mtspr	SPRN_SRR0,r10
 	mtspr	SPRN_SRR1,r11
 	li	r10,BOOK3S_INTERRUPT_EXTERNAL
-	LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
+	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
+	rotldi	r11,r11,63
 	b	fast_guest_return
 
 _GLOBAL(kvmppc_h_set_dabr)
@@ -1177,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr)
 	li	r3,0
 	blr
 
+_GLOBAL(kvmppc_h_cede)
+	ori	r11,r11,MSR_EE
+	std	r11,VCPU_MSR(r3)
+	li	r0,1
+	stb	r0,VCPU_CEDED(r3)
+	sync			/* order setting ceded vs. testing prodded */
+	lbz	r5,VCPU_PRODDED(r3)
+	cmpwi	r5,0
+	bne	1f
+	li	r0,0		/* set trap to 0 to say hcall is handled */
+	stw	r0,VCPU_TRAP(r3)
+	li	r0,H_SUCCESS
+	std	r0,VCPU_GPR(r3)(r3)
+BEGIN_FTR_SECTION
+	b	2f		/* just send it up to host on 970 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+	/*
+	 * Set our bit in the bitmask of napping threads unless all the
+	 * other threads are already napping, in which case we send this
+	 * up to the host.
+	 */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	lwz	r6,VCPU_PTID(r3)
+	lwz	r8,VCORE_ENTRY_EXIT(r5)
+	clrldi	r8,r8,56
+	li	r0,1
+	sld	r0,r0,r6
+	addi	r6,r5,VCORE_NAPPING_THREADS
+31:	lwarx	r4,0,r6
+	or	r4,r4,r0
+	popcntw	r7,r4
+	cmpw	r7,r8
+	bge	2f
+	stwcx.	r4,0,r6
+	bne	31b
+	li	r0,1
+	stb	r0,HSTATE_NAPPING(r13)
+	/* order napping_threads update vs testing entry_exit_count */
+	lwsync
+	mr	r4,r3
+	lwz	r7,VCORE_ENTRY_EXIT(r5)
+	cmpwi	r7,0x100
+	bge	33f		/* another thread already exiting */
+
+/*
+ * Although not specifically required by the architecture, POWER7
+ * preserves the following registers in nap mode, even if an SMT mode
+ * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
+ * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
+ */
+	/* Save non-volatile GPRs */
+	std	r14, VCPU_GPR(r14)(r3)
+	std	r15, VCPU_GPR(r15)(r3)
+	std	r16, VCPU_GPR(r16)(r3)
+	std	r17, VCPU_GPR(r17)(r3)
+	std	r18, VCPU_GPR(r18)(r3)
+	std	r19, VCPU_GPR(r19)(r3)
+	std	r20, VCPU_GPR(r20)(r3)
+	std	r21, VCPU_GPR(r21)(r3)
+	std	r22, VCPU_GPR(r22)(r3)
+	std	r23, VCPU_GPR(r23)(r3)
+	std	r24, VCPU_GPR(r24)(r3)
+	std	r25, VCPU_GPR(r25)(r3)
+	std	r26, VCPU_GPR(r26)(r3)
+	std	r27, VCPU_GPR(r27)(r3)
+	std	r28, VCPU_GPR(r28)(r3)
+	std	r29, VCPU_GPR(r29)(r3)
+	std	r30, VCPU_GPR(r30)(r3)
+	std	r31, VCPU_GPR(r31)(r3)
+
+	/* save FP state */
+	bl	.kvmppc_save_fp
+
+	/*
+	 * Take a nap until a decrementer or external interrupt occurs,
+	 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
+	 */
+	li	r0,0x80
+	stb	r0,PACAPROCSTART(r13)
+	mfspr	r5,SPRN_LPCR
+	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
+	mtspr	SPRN_LPCR,r5
+	isync
+	li	r0, 0
+	std	r0, HSTATE_SCRATCH0(r13)
+	ptesync
+	ld	r0, HSTATE_SCRATCH0(r13)
+1:	cmpd	r0, r0
+	bne	1b
+	nap
+	b	.
+
+kvm_end_cede:
+	/* Woken by external or decrementer interrupt */
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATOC(r13)
+
+	/* If we're a secondary thread and we got here by an IPI, ack it */
+	ld	r4,HSTATE_KVM_VCPU(r13)
+	lwz	r3,VCPU_PTID(r4)
+	cmpwi	r3,0
+	beq	27f
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */
+	cmpwi	r3,4			/* was it an external interrupt? */
+	bne	27f
+	ld	r5, HSTATE_XICS_PHYS(r13)
+	li	r0,0xff
+	li	r6,XICS_QIRR
+	li	r7,XICS_XIRR
+	lwzcix	r8,r5,r7		/* ack the interrupt */
+	sync
+	stbcix	r0,r5,r6		/* clear it */
+	stwcix	r8,r5,r7		/* EOI it */
+27:
+	/* load up FP state */
+	bl	kvmppc_load_fp
+
+	/* Load NV GPRS */
+	ld	r14, VCPU_GPR(r14)(r4)
+	ld	r15, VCPU_GPR(r15)(r4)
+	ld	r16, VCPU_GPR(r16)(r4)
+	ld	r17, VCPU_GPR(r17)(r4)
+	ld	r18, VCPU_GPR(r18)(r4)
+	ld	r19, VCPU_GPR(r19)(r4)
+	ld	r20, VCPU_GPR(r20)(r4)
+	ld	r21, VCPU_GPR(r21)(r4)
+	ld	r22, VCPU_GPR(r22)(r4)
+	ld	r23, VCPU_GPR(r23)(r4)
+	ld	r24, VCPU_GPR(r24)(r4)
+	ld	r25, VCPU_GPR(r25)(r4)
+	ld	r26, VCPU_GPR(r26)(r4)
+	ld	r27, VCPU_GPR(r27)(r4)
+	ld	r28, VCPU_GPR(r28)(r4)
+	ld	r29, VCPU_GPR(r29)(r4)
+	ld	r30, VCPU_GPR(r30)(r4)
+	ld	r31, VCPU_GPR(r31)(r4)
+
+	/* clear our bit in vcore->napping_threads */
+33:	ld	r5,HSTATE_KVM_VCORE(r13)
+	lwz	r3,VCPU_PTID(r4)
+	li	r0,1
+	sld	r0,r0,r3
+	addi	r6,r5,VCORE_NAPPING_THREADS
+32:	lwarx	r7,0,r6
+	andc	r7,r7,r0
+	stwcx.	r7,0,r6
+	bne	32b
+	li	r0,0
+	stb	r0,HSTATE_NAPPING(r13)
+
+	/* see if any other thread is already exiting */
+	lwz	r0,VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0,0x100
+	blt	kvmppc_cede_reentry	/* if not go back to guest */
+
+	/* some threads are exiting, so go to the guest exit path */
+	b	hcall_real_fallback
+
+	/* cede when already previously prodded case */
+1:	li	r0,0
+	stb	r0,VCPU_PRODDED(r3)
+	sync			/* order testing prodded vs. clearing ceded */
+	stb	r0,VCPU_CEDED(r3)
+	li	r3,H_SUCCESS
+	blr
+
+	/* we've ceded but we want to give control to the host */
+2:	li	r3,H_TOO_HARD
+	blr
+
 secondary_too_late:
 	ld	r5,HSTATE_KVM_VCORE(r13)
 	HMT_LOW
@@ -1194,14 +1421,20 @@ secondary_too_late:
 	slbmte	r6,r5
 1:	addi	r11,r11,16
 	.endr
-	b	50f
 
 secondary_nap:
-	/* Clear any pending IPI */
-50:	ld	r5, HSTATE_XICS_PHYS(r13)
+	/* Clear any pending IPI - assume we're a secondary thread */
+	ld	r5, HSTATE_XICS_PHYS(r13)
+	li	r7, XICS_XIRR
+	lwzcix	r3, r5, r7		/* ack any pending interrupt */
+	rlwinm.	r0, r3, 0, 0xffffff	/* any pending? */
+	beq	37f
+	sync
 	li	r0, 0xff
 	li	r6, XICS_QIRR
-	stbcix	r0, r5, r6
+	stbcix	r0, r5, r6		/* clear the IPI */
+	stwcix	r3, r5, r7		/* EOI it */
+37:	sync
 
 	/* increment the nap count and then go to nap mode */
 	ld	r4, HSTATE_KVM_VCORE(r13)
@@ -1211,13 +1444,12 @@ secondary_nap:
 	addi	r3, r3, 1
 	stwcx.	r3, 0, r4
 	bne	51b
-	isync
 
+	li	r3, LPCR_PECE0
 	mfspr	r4, SPRN_LPCR
-	li	r0, LPCR_PECE
-	andc	r4, r4, r0
-	ori	r4, r4, LPCR_PECE0	/* exit nap on interrupt */
+	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
 	mtspr	SPRN_LPCR, r4
+	isync
 	li	r0, 0
 	std	r0, HSTATE_SCRATCH0(r13)
 	ptesync
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index c54b0e30cf3f..0a8515a5c042 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -29,27 +29,11 @@
 #define ULONG_SIZE 		8
 #define FUNC(name) 		GLUE(.,name)
 
-#define GET_SHADOW_VCPU_R13
-
-#define DISABLE_INTERRUPTS	\
-	mfmsr   r0;		\
-	rldicl  r0,r0,48,1;	\
-	rotldi  r0,r0,16;	\
-	mtmsrd  r0,1;		\
-
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
 #define ULONG_SIZE              4
 #define FUNC(name)		name
 
-#define GET_SHADOW_VCPU_R13	\
-	lwz	r13, (THREAD + THREAD_KVM_SVCPU)(r2)
-
-#define DISABLE_INTERRUPTS	\
-	mfmsr   r0;		\
-	rlwinm  r0,r0,0,17,15;	\
-	mtmsr   r0;		\
-
 #endif /* CONFIG_PPC_BOOK3S_XX */
 
 
@@ -108,44 +92,17 @@ kvm_start_entry:
 
 kvm_start_lightweight:
 
-	GET_SHADOW_VCPU_R13
-	PPC_LL	r3, VCPU_HIGHMEM_HANDLER(r4)
-	PPC_STL	r3, HSTATE_VMHANDLER(r13)
-
-	PPC_LL	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
-
-	DISABLE_INTERRUPTS
-
 #ifdef CONFIG_PPC_BOOK3S_64
-	/* Some guests may need to have dcbz set to 32 byte length.
-	 *
-	 * Usually we ensure that by patching the guest's instructions
-	 * to trap on dcbz and emulate it in the hypervisor.
-	 *
-	 * If we can, we should tell the CPU to use 32 byte dcbz though,
-	 * because that's a lot faster.
-	 */
-
 	PPC_LL	r3, VCPU_HFLAGS(r4)
-	rldicl.	r3, r3, 0, 63		/* CR = ((r3 & 1) == 0) */
-	beq	no_dcbz32_on
-
-	mfspr   r3,SPRN_HID5
-	ori     r3, r3, 0x80		/* XXX HID5_dcbz32 = 0x80 */
-	mtspr   SPRN_HID5,r3
-
-no_dcbz32_on:
-
+	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
+	stb	r3, HSTATE_RESTORE_HID5(r13)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-	PPC_LL	r6, VCPU_RMCALL(r4)
-	mtctr	r6
-
-	PPC_LL	r3, VCPU_TRAMPOLINE_ENTER(r4)
-	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+	PPC_LL	r4, VCPU_SHADOW_MSR(r4)	/* get shadow_msr */
 
 	/* Jump to segment patching handler and into our guest */
-	bctr
+	bl	FUNC(kvmppc_entry_trampoline)
+	nop
 
 /*
  * This is the handler in module memory. It gets jumped at from the
@@ -170,21 +127,6 @@ kvmppc_handler_highmem:
 	/* R7 = vcpu */
 	PPC_LL	r7, GPR4(r1)
 
-#ifdef CONFIG_PPC_BOOK3S_64
-
-	PPC_LL	r5, VCPU_HFLAGS(r7)
-	rldicl.	r5, r5, 0, 63		/* CR = ((r5 & 1) == 0) */
-	beq	no_dcbz32_off
-
-	li	r4, 0
-	mfspr   r5,SPRN_HID5
-	rldimi  r5,r4,6,56
-	mtspr   SPRN_HID5,r5
-
-no_dcbz32_off:
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
 	PPC_STL	r14, VCPU_GPR(r14)(r7)
 	PPC_STL	r15, VCPU_GPR(r15)(r7)
 	PPC_STL	r16, VCPU_GPR(r16)(r7)
@@ -204,67 +146,6 @@ no_dcbz32_off:
 	PPC_STL	r30, VCPU_GPR(r30)(r7)
 	PPC_STL	r31, VCPU_GPR(r31)(r7)
 
-	/* Restore host msr -> SRR1 */
-	PPC_LL	r6, VCPU_HOST_MSR(r7)
-
-	/*
-	 * For some interrupts, we need to call the real Linux
-	 * handler, so it can do work for us. This has to happen
-	 * as if the interrupt arrived from the kernel though,
-	 * so let's fake it here where most state is restored.
-	 *
-	 * Call Linux for hardware interrupts/decrementer
-	 * r3 = address of interrupt handler (exit reason)
-	 */
-
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	call_linux_handler
-	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
-	beq	call_linux_handler
-	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
-	beq	call_linux_handler
-
-	/* Back to EE=1 */
-	mtmsr	r6
-	sync
-	b	kvm_return_point
-
-call_linux_handler:
-
-	/*
-	 * If we land here we need to jump back to the handler we
-	 * came from.
-	 *
-	 * We have a page that we can access from real mode, so let's
-	 * jump back to that and use it as a trampoline to get back into the
-	 * interrupt handler!
-	 *
-	 * R3 still contains the exit code,
-	 * R5 VCPU_HOST_RETIP and
-	 * R6 VCPU_HOST_MSR
-	 */
-
-	/* Restore host IP -> SRR0 */
-	PPC_LL	r5, VCPU_HOST_RETIP(r7)
-
-	/* XXX Better move to a safe function?
-	 *     What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
-
-	mtlr	r12
-
-	PPC_LL	r4, VCPU_TRAMPOLINE_LOWMEM(r7)
-	mtsrr0	r4
-	LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
-	mtsrr1	r3
-
-	RFI
-
-.global kvm_return_point
-kvm_return_point:
-
-	/* Jump back to lightweight entry if we're supposed to */
-	/* go back into the guest */
-
 	/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
 	mr	r5, r12
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 0c0d3f274437..d417511abfb1 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -150,16 +150,22 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 #ifdef CONFIG_PPC_BOOK3S_64
 	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
 		kvmppc_mmu_book3s_64_init(vcpu);
-		to_book3s(vcpu)->hior = 0xfff00000;
+		if (!to_book3s(vcpu)->hior_sregs)
+			to_book3s(vcpu)->hior = 0xfff00000;
 		to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
+		vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	} else
 #endif
 	{
 		kvmppc_mmu_book3s_32_init(vcpu);
-		to_book3s(vcpu)->hior = 0;
+		if (!to_book3s(vcpu)->hior_sregs)
+			to_book3s(vcpu)->hior = 0;
 		to_book3s(vcpu)->msr_mask = 0xffffffffULL;
+		vcpu->arch.cpu_type = KVM_CPU_3S_32;
 	}
 
+	kvmppc_sanity_check(vcpu);
+
 	/* If we are in hypervisor level on 970, we can tell the CPU to
 	 * treat DCBZ as 32 bytes store */
 	vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
@@ -646,7 +652,27 @@ program_interrupt:
 		break;
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
-		if (vcpu->arch.osi_enabled &&
+		if (vcpu->arch.papr_enabled &&
+		    (kvmppc_get_last_inst(vcpu) == 0x44000022) &&
+		    !(vcpu->arch.shared->msr & MSR_PR)) {
+			/* SC 1 papr hypercalls */
+			ulong cmd = kvmppc_get_gpr(vcpu, 3);
+			int i;
+
+			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
+				r = RESUME_GUEST;
+				break;
+			}
+
+			run->papr_hcall.nr = cmd;
+			for (i = 0; i < 9; ++i) {
+				ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
+				run->papr_hcall.args[i] = gpr;
+			}
+			run->exit_reason = KVM_EXIT_PAPR_HCALL;
+			vcpu->arch.hcall_needed = 1;
+			r = RESUME_HOST;
+		} else if (vcpu->arch.osi_enabled &&
 		    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
 		    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
 			/* MOL hypercalls */
@@ -770,6 +796,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 		}
 	}
 
+	if (sregs->u.s.flags & KVM_SREGS_S_HIOR)
+		sregs->u.s.hior = to_book3s(vcpu)->hior;
+
 	return 0;
 }
 
@@ -806,6 +835,11 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	/* Flush the MMU after messing with the segments */
 	kvmppc_mmu_pte_flush(vcpu, 0, 0);
 
+	if (sregs->u.s.flags & KVM_SREGS_S_HIOR) {
+		to_book3s(vcpu)->hior_sregs = true;
+		to_book3s(vcpu)->hior = sregs->u.s.hior;
+	}
+
 	return 0;
 }
 
@@ -841,8 +875,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (!p)
 		goto uninit_vcpu;
 
-	vcpu->arch.host_retip = kvm_return_point;
-	vcpu->arch.host_msr = mfmsr();
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* default to book3s_64 (970fx) */
 	vcpu->arch.pvr = 0x3C0301;
@@ -853,16 +885,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
 	vcpu->arch.slb_nr = 64;
 
-	/* remember where some real-mode handlers are */
-	vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
-	vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
-	vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
-#ifdef CONFIG_PPC_BOOK3S_64
-	vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
-#else
-	vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
-#endif
-
 	vcpu->arch.shadow_msr = MSR_USER64;
 
 	err = kvmppc_mmu_init(vcpu);
@@ -908,6 +930,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 	ulong ext_msr;
 
+	/* Check if we can run the vcpu at all */
+	if (!vcpu->arch.sane) {
+		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return -EINVAL;
+	}
+
 	/* No need to go into the guest when all we do is going out */
 	if (signal_pending(current)) {
 		kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
new file mode 100644
index 000000000000..b9589324797b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2011. Freescale Inc. All rights reserved.
+ *
+ * Authors:
+ *    Alexander Graf <agraf@suse.de>
+ *    Paul Mackerras <paulus@samba.org>
+ *
+ * Description:
+ *
+ * Hypercall handling for running PAPR guests in PR KVM on Book 3S
+ * processors.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+
+static unsigned long get_pteg_addr(struct kvm_vcpu *vcpu, long pte_index)
+{
+	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+	unsigned long pteg_addr;
+
+	pte_index <<= 4;
+	pte_index &= ((1 << ((vcpu_book3s->sdr1 & 0x1f) + 11)) - 1) << 7 | 0x70;
+	pteg_addr = vcpu_book3s->sdr1 & 0xfffffffffffc0000ULL;
+	pteg_addr |= pte_index;
+
+	return pteg_addr;
+}
+
+static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
+{
+	long flags = kvmppc_get_gpr(vcpu, 4);
+	long pte_index = kvmppc_get_gpr(vcpu, 5);
+	unsigned long pteg[2 * 8];
+	unsigned long pteg_addr, i, *hpte;
+
+	pte_index &= ~7UL;
+	pteg_addr = get_pteg_addr(vcpu, pte_index);
+
+	copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+	hpte = pteg;
+
+	if (likely((flags & H_EXACT) == 0)) {
+		pte_index &= ~7UL;
+		for (i = 0; ; ++i) {
+			if (i == 8)
+				return H_PTEG_FULL;
+			if ((*hpte & HPTE_V_VALID) == 0)
+				break;
+			hpte += 2;
+		}
+	} else {
+		i = kvmppc_get_gpr(vcpu, 5) & 7UL;
+		hpte += i * 2;
+	}
+
+	hpte[0] = kvmppc_get_gpr(vcpu, 6);
+	hpte[1] = kvmppc_get_gpr(vcpu, 7);
+	copy_to_user((void __user *)pteg_addr, pteg, sizeof(pteg));
+	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+	kvmppc_set_gpr(vcpu, 4, pte_index | i);
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags= kvmppc_get_gpr(vcpu, 4);
+	unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+	unsigned long v = 0, pteg, rb;
+	unsigned long pte[2];
+
+	pteg = get_pteg_addr(vcpu, pte_index);
+	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+	if ((pte[0] & HPTE_V_VALID) == 0 ||
+	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn) ||
+	    ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) {
+		kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
+		return EMULATE_DONE;
+	}
+
+	copy_to_user((void __user *)pteg, &v, sizeof(v));
+
+	rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
+	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+
+	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+	kvmppc_set_gpr(vcpu, 4, pte[0]);
+	kvmppc_set_gpr(vcpu, 5, pte[1]);
+
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
+{
+	unsigned long flags = kvmppc_get_gpr(vcpu, 4);
+	unsigned long pte_index = kvmppc_get_gpr(vcpu, 5);
+	unsigned long avpn = kvmppc_get_gpr(vcpu, 6);
+	unsigned long rb, pteg, r, v;
+	unsigned long pte[2];
+
+	pteg = get_pteg_addr(vcpu, pte_index);
+	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+
+	if ((pte[0] & HPTE_V_VALID) == 0 ||
+	    ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != avpn)) {
+		kvmppc_set_gpr(vcpu, 3, H_NOT_FOUND);
+		return EMULATE_DONE;
+	}
+
+	v = pte[0];
+	r = pte[1];
+	r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_HI |
+	       HPTE_R_KEY_LO);
+	r |= (flags << 55) & HPTE_R_PP0;
+	r |= (flags << 48) & HPTE_R_KEY_HI;
+	r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+	pte[1] = r;
+
+	rb = compute_tlbie_rb(v, r, pte_index);
+	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
+	copy_to_user((void __user *)pteg, pte, sizeof(pte));
+
+	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+
+	return EMULATE_DONE;
+}
+
+int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
+{
+	switch (cmd) {
+	case H_ENTER:
+		return kvmppc_h_pr_enter(vcpu);
+	case H_REMOVE:
+		return kvmppc_h_pr_remove(vcpu);
+	case H_PROTECT:
+		return kvmppc_h_pr_protect(vcpu);
+	case H_BULK_REMOVE:
+		/* We just flush all PTEs, so user space can
+		   handle the HPT modifications */
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+		break;
+	case H_CEDE:
+		kvm_vcpu_block(vcpu);
+		vcpu->stat.halt_wakeup++;
+		return EMULATE_DONE;
+	}
+
+	return EMULATE_FAIL;
+}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index c1f877c4a884..34187585c507 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -20,6 +20,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
+#include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
 
@@ -35,10 +36,10 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
-#define LOAD_SHADOW_VCPU(reg)	GET_PACA(reg)					
-#define MSR_NOIRQ		MSR_KERNEL & ~(MSR_IR | MSR_DR)
 #define FUNC(name) 		GLUE(.,name)
+#define MTMSR_EERI(reg)		mtmsrd	(reg),1
 
+	.globl	kvmppc_skip_interrupt
 kvmppc_skip_interrupt:
 	/*
 	 * Here all GPRs are unchanged from when the interrupt happened
@@ -51,6 +52,7 @@ kvmppc_skip_interrupt:
 	rfid
 	b	.
 
+	.globl	kvmppc_skip_Hinterrupt
 kvmppc_skip_Hinterrupt:
 	/*
 	 * Here all GPRs are unchanged from when the interrupt happened
@@ -65,8 +67,8 @@ kvmppc_skip_Hinterrupt:
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
-#define MSR_NOIRQ		MSR_KERNEL
 #define FUNC(name)		name
+#define MTMSR_EERI(reg)		mtmsr	(reg)
 
 .macro INTERRUPT_TRAMPOLINE intno
 
@@ -167,40 +169,24 @@ kvmppc_handler_skip_ins:
 #endif
 
 /*
- * This trampoline brings us back to a real mode handler
- *
- * Input Registers:
- *
- * R5 = SRR0
- * R6 = SRR1
- * LR = real-mode IP
+ * Call kvmppc_handler_trampoline_enter in real mode
  *
+ * On entry, r4 contains the guest shadow MSR
  */
-.global kvmppc_handler_lowmem_trampoline
-kvmppc_handler_lowmem_trampoline:
-
-	mtsrr0	r5
+_GLOBAL(kvmppc_entry_trampoline)
+	mfmsr	r5
+	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
+	toreal(r7)
+
+	li	r9, MSR_RI
+	ori	r9, r9, MSR_EE
+	andc	r9, r5, r9	/* Clear EE and RI in MSR value */
+	li	r6, MSR_IR | MSR_DR
+	ori	r6, r6, MSR_EE
+	andc	r6, r5, r6	/* Clear EE, DR and IR in MSR value */
+	MTMSR_EERI(r9)		/* Clear EE and RI in MSR */
+	mtsrr0	r7		/* before we set srr0/1 */
 	mtsrr1	r6
-	blr
-kvmppc_handler_lowmem_trampoline_end:
-
-/*
- * Call a function in real mode
- *
- * Input Registers:
- *
- * R3 = function
- * R4 = MSR
- * R5 = scratch register
- *
- */
-_GLOBAL(kvmppc_rmcall)
-	LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
-	mtmsr	r5		/* Disable relocation and interrupts, so mtsrr
-				   doesn't get interrupted */
-	sync
-	mtsrr0	r3
-	mtsrr1	r4
 	RFI
 
 #if defined(CONFIG_PPC_BOOK3S_32)
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index aed32e517212..0676ae249b9f 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -23,6 +23,7 @@
 
 #define GET_SHADOW_VCPU(reg)    \
 	mr	reg, r13
+#define MTMSR_EERI(reg)		mtmsrd	(reg),1
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
@@ -30,6 +31,7 @@
 	tophys(reg, r2);       			\
 	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(reg);	\
 	tophys(reg, reg)
+#define MTMSR_EERI(reg)		mtmsr	(reg)
 
 #endif
 
@@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter:
 	/* Required state:
 	 *
 	 * MSR = ~IR|DR
-	 * R13 = PACA
 	 * R1 = host R1
 	 * R2 = host R2
-	 * R10 = guest MSR
+	 * R4 = guest shadow MSR
+	 * R5 = normal host MSR
+	 * R6 = current host MSR (EE, IR, DR off)
+	 * LR = highmem guest exit code
 	 * all other volatile GPRS = free
 	 * SVCPU[CR] = guest CR
 	 * SVCPU[XER] = guest XER
@@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter:
 	/* r3 = shadow vcpu */
 	GET_SHADOW_VCPU(r3)
 
+	/* Save guest exit handler address and MSR */
+	mflr	r0
+	PPC_STL	r0, HSTATE_VMHANDLER(r3)
+	PPC_STL	r5, HSTATE_HOST_MSR(r3)
+
 	/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
 	PPC_STL	r1, HSTATE_HOST_R1(r3)
 	PPC_STL	r2, HSTATE_HOST_R2(r3)
 
-	/* Move SRR0 and SRR1 into the respective regs */
-	PPC_LL  r9, SVCPU_PC(r3)
-	mtsrr0	r9
-	mtsrr1	r10
-
 	/* Activate guest mode, so faults get handled by KVM */
 	li	r11, KVM_GUEST_MODE_GUEST
 	stb	r11, HSTATE_IN_GUEST(r3)
@@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter:
 	/* Switch to guest segment. This is subarch specific. */
 	LOAD_GUEST_SEGMENTS
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Some guests may need to have dcbz set to 32 byte length.
+	 *
+	 * Usually we ensure that by patching the guest's instructions
+	 * to trap on dcbz and emulate it in the hypervisor.
+	 *
+	 * If we can, we should tell the CPU to use 32 byte dcbz though,
+	 * because that's a lot faster.
+	 */
+	lbz	r0, HSTATE_RESTORE_HID5(r3)
+	cmpwi	r0, 0
+	beq	no_dcbz32_on
+
+	mfspr   r0,SPRN_HID5
+	ori     r0, r0, 0x80		/* XXX HID5_dcbz32 = 0x80 */
+	mtspr   SPRN_HID5,r0
+no_dcbz32_on:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 	/* Enter guest */
 
-	PPC_LL	r4, SVCPU_CTR(r3)
-	PPC_LL	r5, SVCPU_LR(r3)
-	lwz	r6, SVCPU_CR(r3)
-	lwz	r7, SVCPU_XER(r3)
+	PPC_LL	r8, SVCPU_CTR(r3)
+	PPC_LL	r9, SVCPU_LR(r3)
+	lwz	r10, SVCPU_CR(r3)
+	lwz	r11, SVCPU_XER(r3)
+
+	mtctr	r8
+	mtlr	r9
+	mtcr	r10
+	mtxer	r11
 
-	mtctr	r4
-	mtlr	r5
-	mtcr	r6
-	mtxer	r7
+	/* Move SRR0 and SRR1 into the respective regs */
+	PPC_LL  r9, SVCPU_PC(r3)
+	/* First clear RI in our current MSR value */
+	li	r0, MSR_RI
+	andc	r6, r6, r0
+	MTMSR_EERI(r6)
+	mtsrr0	r9
+	mtsrr1	r4
 
 	PPC_LL	r0, SVCPU_R0(r3)
 	PPC_LL	r1, SVCPU_R1(r3)
@@ -213,11 +246,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	beq	ld_last_inst
 	cmpwi	r12, BOOK3S_INTERRUPT_PROGRAM
 	beq	ld_last_inst
+	cmpwi	r12, BOOK3S_INTERRUPT_SYSCALL
+	beq	ld_last_prev_inst
 	cmpwi	r12, BOOK3S_INTERRUPT_ALIGNMENT
 	beq-	ld_last_inst
 
 	b	no_ld_last_inst
 
+ld_last_prev_inst:
+	addi	r3, r3, -4
+
 ld_last_inst:
 	/* Save off the guest instruction we're at */
 
@@ -254,6 +292,43 @@ no_ld_last_inst:
 	/* Switch back to host MMU */
 	LOAD_HOST_SEGMENTS
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+	lbz	r5, HSTATE_RESTORE_HID5(r13)
+	cmpwi	r5, 0
+	beq	no_dcbz32_off
+
+	li	r4, 0
+	mfspr   r5,SPRN_HID5
+	rldimi  r5,r4,6,56
+	mtspr   SPRN_HID5,r5
+
+no_dcbz32_off:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	/*
+	 * For some interrupts, we need to call the real Linux
+	 * handler, so it can do work for us. This has to happen
+	 * as if the interrupt arrived from the kernel though,
+	 * so let's fake it here where most state is restored.
+	 *
+	 * Having set up SRR0/1 with the address where we want
+	 * to continue with relocation on (potentially in module
+	 * space), we either just go straight there with rfi[d],
+	 * or we jump to an interrupt handler with bctr if there
+	 * is an interrupt to be handled first.  In the latter
+	 * case, the rfi[d] at the end of the interrupt handler
+	 * will get us back to where we want to continue.
+	 */
+
+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
+	beq	1f
+	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
+	beq	1f
+	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
+1:	mtctr	r12
+
 	/* Register usage at this point:
 	 *
 	 * R1       = host R1
@@ -264,13 +339,15 @@ no_ld_last_inst:
 	 *
 	 */
 
-	/* RFI into the highmem handler */
-	mfmsr	r7
-	ori	r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME	/* Enable paging */
-	mtsrr1	r7
-	/* Load highmem handler address */
+	PPC_LL	r6, HSTATE_HOST_MSR(r13)
 	PPC_LL	r8, HSTATE_VMHANDLER(r13)
+
+	/* Restore host msr -> SRR1 */
+	mtsrr1	r6
+	/* Load highmem handler address */
 	mtsrr0	r8
 
+	/* RFI into the highmem handler, or jump to interrupt handler */
+	beqctr
 	RFI
 kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ee45fa01220e..bb6c988f010a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -316,6 +316,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
 
+	if (!vcpu->arch.sane) {
+		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return -EINVAL;
+	}
+
 	local_irq_disable();
 	kvm_guest_enter();
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
@@ -618,6 +623,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int i;
+	int r;
 
 	vcpu->arch.pc = 0;
 	vcpu->arch.shared->msr = 0;
@@ -634,7 +640,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	kvmppc_init_timing_stats(vcpu);
 
-	return kvmppc_core_vcpu_setup(vcpu);
+	r = kvmppc_core_vcpu_setup(vcpu);
+	kvmppc_sanity_check(vcpu);
+	return r;
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 797a7447c268..26d20903f2bc 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -73,6 +73,8 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
 	vcpu->vcpu_id = 0;
 
+	vcpu->arch.cpu_type = KVM_CPU_E500V2;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a107c9be0fb1..0d843c6ba315 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,12 +39,8 @@
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 	return !(v->arch.shared->msr & MSR_WE) ||
 	       !!(v->arch.pending_exceptions);
-#else
-	return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
-#endif
 }
 
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -95,6 +91,31 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
+{
+	int r = false;
+
+	/* We have to know what CPU to virtualize */
+	if (!vcpu->arch.pvr)
+		goto out;
+
+	/* PAPR only works with book3s_64 */
+	if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
+		goto out;
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	/* HV KVM can only do PAPR mode for now */
+	if (!vcpu->arch.papr_enabled)
+		goto out;
+#endif
+
+	r = true;
+
+out:
+	vcpu->arch.sane = r;
+	return r ? 0 : -EINVAL;
+}
+
 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	enum emulation_result er;
@@ -188,6 +209,8 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_BOOKE_SREGS:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
+	case KVM_CAP_PPC_HIOR:
+	case KVM_CAP_PPC_PAPR:
 #endif
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
@@ -258,6 +281,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
 	vcpu = kvmppc_core_vcpu_create(kvm, id);
+	vcpu->arch.wqp = &vcpu->wq;
 	if (!IS_ERR(vcpu))
 		kvmppc_create_vcpu_debugfs(vcpu, id);
 	return vcpu;
@@ -289,8 +313,8 @@ static void kvmppc_decrementer_func(unsigned long data)
 
 	kvmppc_core_queue_dec(vcpu);
 
-	if (waitqueue_active(&vcpu->wq)) {
-		wake_up_interruptible(&vcpu->wq);
+	if (waitqueue_active(vcpu->arch.wqp)) {
+		wake_up_interruptible(vcpu->arch.wqp);
 		vcpu->stat.halt_wakeup++;
 	}
 }
@@ -543,13 +567,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-	if (irq->irq == KVM_INTERRUPT_UNSET)
+	if (irq->irq == KVM_INTERRUPT_UNSET) {
 		kvmppc_core_dequeue_external(vcpu, irq);
-	else
-		kvmppc_core_queue_external(vcpu, irq);
+		return 0;
+	}
+
+	kvmppc_core_queue_external(vcpu, irq);
 
-	if (waitqueue_active(&vcpu->wq)) {
-		wake_up_interruptible(&vcpu->wq);
+	if (waitqueue_active(vcpu->arch.wqp)) {
+		wake_up_interruptible(vcpu->arch.wqp);
 		vcpu->stat.halt_wakeup++;
 	} else if (vcpu->cpu != -1) {
 		smp_send_reschedule(vcpu->cpu);
@@ -571,11 +597,18 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.osi_enabled = true;
 		break;
+	case KVM_CAP_PPC_PAPR:
+		r = 0;
+		vcpu->arch.papr_enabled = true;
+		break;
 	default:
 		r = -EINVAL;
 		break;
 	}
 
+	if (!r)
+		r = kvmppc_sanity_check(vcpu);
+
 	return r;
 }
 
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index d733d7ca939c..b5d87067a58b 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -130,21 +130,21 @@ config 405GP
 	bool
 	select IBM405_ERR77
 	select IBM405_ERR51
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_ZMII
 
 config 405EP
 	bool
 
 config 405EX
 	bool
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_RGMII
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
 
 config 405EZ
 	bool
-	select IBM_NEW_EMAC_NO_FLOW_CTRL
-	select IBM_NEW_EMAC_MAL_CLR_ICINTSTAT
-	select IBM_NEW_EMAC_MAL_COMMON_ERR
+	select IBM_EMAC_NO_FLOW_CTRL
+	select IBM_EMAC_MAL_CLR_ICINTSTAT
+	select IBM_EMAC_MAL_COMMON_ERR
 
 config 405GPR
 	bool
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index e958b6f48ec2..762322ce24a9 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -23,7 +23,7 @@ config BLUESTONE
 	default n
 	select PPC44x_SIMPLE
 	select APM821xx
-	select IBM_NEW_EMAC_RGMII
+	select IBM_EMAC_RGMII
 	help
 	  This option enables support for the APM APM821xx Evaluation board.
 
@@ -122,8 +122,8 @@ config CANYONLANDS
 	select PPC4xx_PCI_EXPRESS
 	select PCI_MSI
 	select PPC4xx_MSI
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
 	help
 	  This option enables support for the AMCC PPC460EX evaluation board.
 
@@ -135,8 +135,8 @@ config GLACIER
 	select 460EX # Odd since it uses 460GT but the effects are the same
 	select PCI
 	select PPC4xx_PCI_EXPRESS
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
 	help
 	  This option enables support for the AMCC PPC460GT evaluation board.
 
@@ -161,7 +161,7 @@ config EIGER
 	select 460SX
 	select PCI
 	select PPC4xx_PCI_EXPRESS
-	select IBM_NEW_EMAC_RGMII
+	select IBM_EMAC_RGMII
 	help
 	  This option enables support for the AMCC PPC460SX evaluation board.
 
@@ -260,59 +260,59 @@ config 440EP
 	bool
 	select PPC_FPU
 	select IBM440EP_ERR42
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_ZMII
 	select USB_ARCH_HAS_OHCI
 
 config 440EPX
 	bool
 	select PPC_FPU
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
 
 config 440GRX
 	bool
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
 
 config 440GP
 	bool
-	select IBM_NEW_EMAC_ZMII
+	select IBM_EMAC_ZMII
 
 config 440GX
 	bool
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII #test only
-	select IBM_NEW_EMAC_TAH  #test only
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII #test only
+	select IBM_EMAC_TAH  #test only
 
 config 440SP
 	bool
 
 config 440SPe
 	bool
-	select IBM_NEW_EMAC_EMAC4
+	select IBM_EMAC_EMAC4
 
 config 460EX
 	bool
 	select PPC_FPU
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_TAH
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_TAH
 
 config 460SX
 	bool
 	select PPC_FPU
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII
-	select IBM_NEW_EMAC_TAH
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII
+	select IBM_EMAC_TAH
 
 config APM821xx
 	bool
 	select PPC_FPU
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_TAH
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_TAH
 
 # 44x errata/workaround config symbols, selected by the CPU models above
 config IBM440EP_ERR42
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index e41ebbdb3e12..cfe958e94e1e 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -66,8 +66,8 @@ struct fsl_diu_shared_fb {
 	bool		in_use;
 };
 
-unsigned int mpc512x_get_pixel_format(unsigned int bits_per_pixel,
-				      int monitor_port)
+u32 mpc512x_get_pixel_format(enum fsl_diu_monitor_port port,
+			     unsigned int bits_per_pixel)
 {
 	switch (bits_per_pixel) {
 	case 32:
@@ -80,11 +80,12 @@ unsigned int mpc512x_get_pixel_format(unsigned int bits_per_pixel,
 	return 0x00000400;
 }
 
-void mpc512x_set_gamma_table(int monitor_port, char *gamma_table_base)
+void mpc512x_set_gamma_table(enum fsl_diu_monitor_port port,
+			     char *gamma_table_base)
 {
 }
 
-void mpc512x_set_monitor_port(int monitor_port)
+void mpc512x_set_monitor_port(enum fsl_diu_monitor_port port)
 {
 }
 
@@ -182,14 +183,10 @@ void mpc512x_set_pixel_clock(unsigned int pixclock)
 	iounmap(ccm);
 }
 
-ssize_t mpc512x_show_monitor_port(int monitor_port, char *buf)
+enum fsl_diu_monitor_port
+mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port)
 {
-	return sprintf(buf, "0 - 5121 LCD\n");
-}
-
-int mpc512x_set_sysfs_monitor_port(int val)
-{
-	return 0;
+	return FSL_DIU_PORT_DVI;
 }
 
 static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
@@ -256,7 +253,7 @@ void __init mpc512x_init_diu(void)
 	}
 
 	mode = in_be32(&diu_reg->diu_mode);
-	if (mode != MFB_MODE1) {
+	if (mode == MFB_MODE0) {
 		pr_info("%s: DIU OFF\n", __func__);
 		goto out;
 	}
@@ -332,8 +329,7 @@ void __init mpc512x_setup_diu(void)
 	diu_ops.set_gamma_table		= mpc512x_set_gamma_table;
 	diu_ops.set_monitor_port	= mpc512x_set_monitor_port;
 	diu_ops.set_pixel_clock		= mpc512x_set_pixel_clock;
-	diu_ops.show_monitor_port	= mpc512x_show_monitor_port;
-	diu_ops.set_sysfs_monitor_port	= mpc512x_set_sysfs_monitor_port;
+	diu_ops.valid_monitor_port	= mpc512x_valid_monitor_port;
 	diu_ops.release_bootmem		= mpc512x_release_bootmem;
 #endif
 }
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 498534cd5265..12f5932dadc9 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -80,7 +80,7 @@ config P1010_RDB
 config P1022_DS
 	bool "Freescale P1022 DS"
 	select DEFAULT_UIMAGE
-	select CONFIG_PHYS_64BIT	# The DTS has 36-bit addresses
+	select PHYS_64BIT	# The DTS has 36-bit addresses
 	select SWIOTLB
 	help
 	  This option enables support for the Freescale P1022DS reference board.
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 266b3aadfe5e..c01c7277888c 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -93,8 +93,8 @@
  * The Area Descriptor is a 32-bit value that determine which bits in each
  * pixel are to be used for each color.
  */
-static unsigned int p1022ds_get_pixel_format(unsigned int bits_per_pixel,
-	int monitor_port)
+static u32 p1022ds_get_pixel_format(enum fsl_diu_monitor_port port,
+				    unsigned int bits_per_pixel)
 {
 	switch (bits_per_pixel) {
 	case 32:
@@ -118,7 +118,8 @@ static unsigned int p1022ds_get_pixel_format(unsigned int bits_per_pixel,
  * On some boards, the gamma table for some ports may need to be modified.
  * This is not the case on the P1022DS, so we do nothing.
 */
-static void p1022ds_set_gamma_table(int monitor_port, char *gamma_table_base)
+static void p1022ds_set_gamma_table(enum fsl_diu_monitor_port port,
+				    char *gamma_table_base)
 {
 }
 
@@ -126,7 +127,7 @@ static void p1022ds_set_gamma_table(int monitor_port, char *gamma_table_base)
  * p1022ds_set_monitor_port: switch the output to a different monitor port
  *
  */
-static void p1022ds_set_monitor_port(int monitor_port)
+static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port)
 {
 	struct device_node *pixis_node;
 	void __iomem *pixis;
@@ -145,19 +146,21 @@ static void p1022ds_set_monitor_port(int monitor_port)
 	}
 	brdcfg1 = pixis + 9;	/* BRDCFG1 is at offset 9 in the ngPIXIS */
 
-	switch (monitor_port) {
-	case 0: /* DVI */
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+		printk(KERN_INFO "%s:%u\n", __func__, __LINE__);
 		/* Enable the DVI port, disable the DFP and the backlight */
 		clrsetbits_8(brdcfg1, PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT,
 			     PX_BRDCFG1_DVIEN);
 		break;
-	case 1: /* Single link LVDS */
+	case FSL_DIU_PORT_LVDS:
+		printk(KERN_INFO "%s:%u\n", __func__, __LINE__);
 		/* Enable the DFP port, disable the DVI and the backlight */
 		clrsetbits_8(brdcfg1, PX_BRDCFG1_DVIEN | PX_BRDCFG1_BACKLIGHT,
 			     PX_BRDCFG1_DFPEN);
 		break;
 	default:
-		pr_err("p1022ds: unsupported monitor port %i\n", monitor_port);
+		pr_err("p1022ds: unsupported monitor port %i\n", port);
 	}
 
 	iounmap(pixis);
@@ -214,23 +217,18 @@ void p1022ds_set_pixel_clock(unsigned int pixclock)
 }
 
 /**
- * p1022ds_show_monitor_port: show the current monitor
- *
- * This function returns a string indicating whether the current monitor is
- * set to DVI or LVDS.
- */
-ssize_t p1022ds_show_monitor_port(int monitor_port, char *buf)
-{
-	return sprintf(buf, "%c0 - DVI\n%c1 - Single link LVDS\n",
-		monitor_port == 0 ? '*' : ' ', monitor_port == 1 ? '*' : ' ');
-}
-
-/**
- * p1022ds_set_sysfs_monitor_port: set the monitor port for sysfs
+ * p1022ds_valid_monitor_port: set the monitor port for sysfs
  */
-int p1022ds_set_sysfs_monitor_port(int val)
+enum fsl_diu_monitor_port
+p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
 {
-	return val < 2 ? val : 0;
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+	case FSL_DIU_PORT_LVDS:
+		return port;
+	default:
+		return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+	}
 }
 
 #endif
@@ -305,8 +303,7 @@ static void __init p1022_ds_setup_arch(void)
 	diu_ops.set_gamma_table		= p1022ds_set_gamma_table;
 	diu_ops.set_monitor_port	= p1022ds_set_monitor_port;
 	diu_ops.set_pixel_clock		= p1022ds_set_pixel_clock;
-	diu_ops.show_monitor_port	= p1022ds_show_monitor_port;
-	diu_ops.set_sysfs_monitor_port	= p1022ds_set_sysfs_monitor_port;
+	diu_ops.valid_monitor_port	= p1022ds_valid_monitor_port;
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 74e018ef724b..13fa9a6403e6 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -152,10 +152,10 @@ machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 	(c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
 	(c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
 
-unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
-						int monitor_port)
+u32 mpc8610hpcd_get_pixel_format(enum fsl_diu_monitor_port port,
+				 unsigned int bits_per_pixel)
 {
-	static const unsigned long pixelformat[][3] = {
+	static const u32 pixelformat[][3] = {
 		{
 			MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
 			MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
@@ -170,7 +170,8 @@ unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
 	unsigned int arch_monitor;
 
 	/* The DVI port is mis-wired on revision 1 of this board. */
-	arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
+	arch_monitor =
+		((*pixis_arch == 0x01) && (port == FSL_DIU_PORT_DVI)) ? 0 : 1;
 
 	switch (bits_per_pixel) {
 	case 32:
@@ -185,10 +186,11 @@ unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
 	}
 }
 
-void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
+void mpc8610hpcd_set_gamma_table(enum fsl_diu_monitor_port port,
+				 char *gamma_table_base)
 {
 	int i;
-	if (monitor_port == 2) {		/* dual link LVDS */
+	if (port == FSL_DIU_PORT_DLVDS) {
 		for (i = 0; i < 256*3; i++)
 			gamma_table_base[i] = (gamma_table_base[i] << 2) |
 					 ((gamma_table_base[i] >> 6) & 0x03);
@@ -199,17 +201,21 @@ void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
 #define PX_BRDCFG0_DLINK	(1 << 4)
 #define PX_BRDCFG0_DIU_MASK	(PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK)
 
-void mpc8610hpcd_set_monitor_port(int monitor_port)
+void mpc8610hpcd_set_monitor_port(enum fsl_diu_monitor_port port)
 {
-	static const u8 bdcfg[] = {
-		PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK,
-		PX_BRDCFG0_DLINK,
-		0,
-	};
-
-	if (monitor_port < 3)
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
 		clrsetbits_8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK,
-			     bdcfg[monitor_port]);
+			     PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK);
+		break;
+	case FSL_DIU_PORT_LVDS:
+		clrsetbits_8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK,
+			     PX_BRDCFG0_DLINK);
+		break;
+	case FSL_DIU_PORT_DLVDS:
+		clrbits8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK);
+		break;
+	}
 }
 
 /**
@@ -262,20 +268,10 @@ void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
 	iounmap(guts);
 }
 
-ssize_t mpc8610hpcd_show_monitor_port(int monitor_port, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE,
-			"%c0 - DVI\n"
-			"%c1 - Single link LVDS\n"
-			"%c2 - Dual link LVDS\n",
-			monitor_port == 0 ? '*' : ' ',
-			monitor_port == 1 ? '*' : ' ',
-			monitor_port == 2 ? '*' : ' ');
-}
-
-int mpc8610hpcd_set_sysfs_monitor_port(int val)
+enum fsl_diu_monitor_port
+mpc8610hpcd_valid_monitor_port(enum fsl_diu_monitor_port port)
 {
-	return val < 3 ? val : 0;
+	return port;
 }
 
 #endif
@@ -307,8 +303,7 @@ static void __init mpc86xx_hpcd_setup_arch(void)
 	diu_ops.set_gamma_table		= mpc8610hpcd_set_gamma_table;
 	diu_ops.set_monitor_port	= mpc8610hpcd_set_monitor_port;
 	diu_ops.set_pixel_clock		= mpc8610hpcd_set_pixel_clock;
-	diu_ops.show_monitor_port	= mpc8610hpcd_show_monitor_port;
-	diu_ops.set_sysfs_monitor_port	= mpc8610hpcd_set_sysfs_monitor_port;
+	diu_ops.valid_monitor_port	= mpc8610hpcd_valid_monitor_port;
 #endif
 
 	pixis_node = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 67d5009b4e86..2e7ff0c5cf42 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -17,10 +17,10 @@ config PPC_CELL_NATIVE
 	select PPC_CELL_COMMON
 	select MPIC
 	select PPC_IO_WORKAROUNDS
-	select IBM_NEW_EMAC_EMAC4
-	select IBM_NEW_EMAC_RGMII
-	select IBM_NEW_EMAC_ZMII #test only
-	select IBM_NEW_EMAC_TAH  #test only
+	select IBM_EMAC_EMAC4
+	select IBM_EMAC_RGMII
+	select IBM_EMAC_ZMII #test only
+	select IBM_EMAC_TAH  #test only
 	default n
 
 config PPC_IBM_CELL_BLADE
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
index 613070e9ddbe..f1eebcae9bf0 100644
--- a/arch/powerpc/platforms/embedded6xx/storcenter.c
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -77,7 +77,7 @@ static void __init storcenter_setup_arch(void)
 }
 
 /*
- * Interrupt setup and service.  Interrrupts on the turbostation come
+ * Interrupt setup and service.  Interrupts on the turbostation come
  * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
  */
 static void __init storcenter_init_IRQ(void)
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index 2ece02beb8ff..c6d00736f07f 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -22,15 +22,24 @@ struct device_node;
 extern void fsl_rstcr_restart(char *cmd);
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* The different ports that the DIU can be connected to */
+enum fsl_diu_monitor_port {
+	FSL_DIU_PORT_DVI,	/* DVI */
+	FSL_DIU_PORT_LVDS,	/* Single-link LVDS */
+	FSL_DIU_PORT_DLVDS	/* Dual-link LVDS */
+};
+
 struct platform_diu_data_ops {
-	unsigned int (*get_pixel_format) (unsigned int bits_per_pixel,
-		int monitor_port);
-	void (*set_gamma_table) (int monitor_port, char *gamma_table_base);
-	void (*set_monitor_port) (int monitor_port);
-	void (*set_pixel_clock) (unsigned int pixclock);
-	ssize_t (*show_monitor_port) (int monitor_port, char *buf);
-	int (*set_sysfs_monitor_port) (int val);
-	void (*release_bootmem) (void);
+	u32 (*get_pixel_format)(enum fsl_diu_monitor_port port,
+		unsigned int bpp);
+	void (*set_gamma_table)(enum fsl_diu_monitor_port port,
+		char *gamma_table_base);
+	void (*set_monitor_port)(enum fsl_diu_monitor_port port);
+	void (*set_pixel_clock)(unsigned int pixclock);
+	enum fsl_diu_monitor_port (*valid_monitor_port)
+		(enum fsl_diu_monitor_port port);
+	void (*release_bootmem)(void);
 };
 
 extern struct platform_diu_data_ops diu_ops;
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index 904c6cbaf45b..3363fbc964f8 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -382,7 +382,7 @@ static void qe_upload_microcode(const void *base,
 /*
  * Upload a microcode to the I-RAM at a specific address.
  *
- * See Documentation/powerpc/qe-firmware.txt for information on QE microcode
+ * See Documentation/powerpc/qe_firmware.txt for information on QE microcode
  * uploading.
  *
  * Currently, only version 1 is supported, so the 'version' field must be
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c
index 984cd2029158..3330feca7502 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/sysdev/uic.c
@@ -47,7 +47,7 @@ struct uic {
 	int index;
 	int dcrbase;
 
-	spinlock_t lock;
+	raw_spinlock_t lock;
 
 	/* The remapper for this UIC */
 	struct irq_host	*irqhost;
@@ -61,14 +61,14 @@ static void uic_unmask_irq(struct irq_data *d)
 	u32 er, sr;
 
 	sr = 1 << (31-src);
-	spin_lock_irqsave(&uic->lock, flags);
+	raw_spin_lock_irqsave(&uic->lock, flags);
 	/* ack level-triggered interrupts here */
 	if (irqd_is_level_type(d))
 		mtdcr(uic->dcrbase + UIC_SR, sr);
 	er = mfdcr(uic->dcrbase + UIC_ER);
 	er |= sr;
 	mtdcr(uic->dcrbase + UIC_ER, er);
-	spin_unlock_irqrestore(&uic->lock, flags);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
 }
 
 static void uic_mask_irq(struct irq_data *d)
@@ -78,11 +78,11 @@ static void uic_mask_irq(struct irq_data *d)
 	unsigned long flags;
 	u32 er;
 
-	spin_lock_irqsave(&uic->lock, flags);
+	raw_spin_lock_irqsave(&uic->lock, flags);
 	er = mfdcr(uic->dcrbase + UIC_ER);
 	er &= ~(1 << (31 - src));
 	mtdcr(uic->dcrbase + UIC_ER, er);
-	spin_unlock_irqrestore(&uic->lock, flags);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
 }
 
 static void uic_ack_irq(struct irq_data *d)
@@ -91,9 +91,9 @@ static void uic_ack_irq(struct irq_data *d)
 	unsigned int src = irqd_to_hwirq(d);
 	unsigned long flags;
 
-	spin_lock_irqsave(&uic->lock, flags);
+	raw_spin_lock_irqsave(&uic->lock, flags);
 	mtdcr(uic->dcrbase + UIC_SR, 1 << (31-src));
-	spin_unlock_irqrestore(&uic->lock, flags);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
 }
 
 static void uic_mask_ack_irq(struct irq_data *d)
@@ -104,7 +104,7 @@ static void uic_mask_ack_irq(struct irq_data *d)
 	u32 er, sr;
 
 	sr = 1 << (31-src);
-	spin_lock_irqsave(&uic->lock, flags);
+	raw_spin_lock_irqsave(&uic->lock, flags);
 	er = mfdcr(uic->dcrbase + UIC_ER);
 	er &= ~sr;
 	mtdcr(uic->dcrbase + UIC_ER, er);
@@ -118,7 +118,7 @@ static void uic_mask_ack_irq(struct irq_data *d)
 	 */
 	if (!irqd_is_level_type(d))
 		mtdcr(uic->dcrbase + UIC_SR, sr);
-	spin_unlock_irqrestore(&uic->lock, flags);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
 }
 
 static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
@@ -152,7 +152,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 
 	mask = ~(1 << (31 - src));
 
-	spin_lock_irqsave(&uic->lock, flags);
+	raw_spin_lock_irqsave(&uic->lock, flags);
 	tr = mfdcr(uic->dcrbase + UIC_TR);
 	pr = mfdcr(uic->dcrbase + UIC_PR);
 	tr = (tr & mask) | (trigger << (31-src));
@@ -161,7 +161,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
 	mtdcr(uic->dcrbase + UIC_PR, pr);
 	mtdcr(uic->dcrbase + UIC_TR, tr);
 
-	spin_unlock_irqrestore(&uic->lock, flags);
+	raw_spin_unlock_irqrestore(&uic->lock, flags);
 
 	return 0;
 }
@@ -254,7 +254,7 @@ static struct uic * __init uic_init_one(struct device_node *node)
 	if (! uic)
 		return NULL; /* FIXME: panic? */
 
-	spin_lock_init(&uic->lock);
+	raw_spin_lock_init(&uic->lock);
 	indexp = of_get_property(node, "cell-index", &len);
 	if (!indexp || (len != sizeof(u32))) {
 		printk(KERN_ERR "uic: Device node %s has missing or invalid "
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ed5cb5af5281..a9fbd43395f7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -91,6 +91,7 @@ config S390
 	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
 	select HAVE_RCU_TABLE_FREE if SMP
+	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
@@ -568,6 +569,16 @@ config KEXEC
 	  current kernel, and to start another kernel.  It is like a reboot
 	  but is independent of hardware/microcode support.
 
+config CRASH_DUMP
+	bool "kernel crash dumps"
+	depends on 64BIT
+	help
+	  Generate crash dump after being started by kexec.
+	  Crash dump kernels are loaded in the main kernel with kexec-tools
+	  into a specially reserved region and then later executed after
+	  a crash by kdump/kexec.
+	  For more details see Documentation/kdump/kdump.txt
+
 config ZFCPDUMP
 	def_bool n
 	prompt "zfcpdump support"
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 028f23ea81d1..465eca756feb 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -61,7 +61,7 @@ static unsigned long free_mem_end_ptr;
 
 extern _sclp_print_early(const char *);
 
-int puts(const char *s)
+static int puts(const char *s)
 {
 	_sclp_print_early(s);
 	return 0;
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 29c82c640a88..6cf8e26b3137 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -68,7 +68,7 @@ CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 6023c6dc1fb7..74c8f5e76ce4 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -562,10 +562,9 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
 	void *base;
 
 	buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr);
-	base = vmalloc(buf_size);
+	base = vzalloc(buf_size);
 	if (!base)
 		return -ENOMEM;
-	memset(base, 0, buf_size);
 	d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
 	rc = diag204_do_store(d204->buf, diag204_buf_pages);
 	if (rc) {
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 623f2fb71774..9381c92cc779 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -11,6 +11,7 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 #include <asm/fcx.h>
+#include <asm/irq.h>
 
 /* structs from asm/cio.h */
 struct irb;
@@ -127,6 +128,7 @@ enum uc_todo {
  * @restore: callback for restoring after hibernation
  * @uc_handler: callback for unit check handler
  * @driver: embedded device driver structure
+ * @int_class: interruption class to use for accounting interrupts
  */
 struct ccw_driver {
 	struct ccw_device_id *ids;
@@ -144,6 +146,7 @@ struct ccw_driver {
 	int (*restore)(struct ccw_device *);
 	enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
 	struct device_driver driver;
+	enum interruption_class int_class;
 };
 
 extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index da359ca6fe55..2e49748b27da 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -12,6 +12,7 @@
 #define PSW32_MASK_IO		0x02000000UL
 #define PSW32_MASK_EXT		0x01000000UL
 #define PSW32_MASK_KEY		0x00F00000UL
+#define PSW32_MASK_BASE		0x00080000UL	/* Always one */
 #define PSW32_MASK_MCHECK	0x00040000UL
 #define PSW32_MASK_WAIT		0x00020000UL
 #define PSW32_MASK_PSTATE	0x00010000UL
@@ -19,21 +20,19 @@
 #define PSW32_MASK_CC		0x00003000UL
 #define PSW32_MASK_PM		0x00000f00UL
 
-#define PSW32_ADDR_AMODE31	0x80000000UL
+#define PSW32_MASK_USER		0x00003F00UL
+
+#define PSW32_ADDR_AMODE	0x80000000UL
 #define PSW32_ADDR_INSN		0x7FFFFFFFUL
 
-#define PSW32_BASE_BITS		0x00080000UL
+#define PSW32_DEFAULT_KEY	(((u32) PAGE_DEFAULT_ACC) << 20)
 
 #define PSW32_ASC_PRIMARY	0x00000000UL
 #define PSW32_ASC_ACCREG	0x00004000UL
 #define PSW32_ASC_SECONDARY	0x00008000UL
 #define PSW32_ASC_HOME		0x0000C000UL
 
-#define PSW32_MASK_MERGE(CURRENT,NEW) \
-	(((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \
-	 ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM)))
-
-extern long psw32_user_bits;
+extern u32 psw32_user_bits;
 
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
@@ -131,7 +130,8 @@ struct compat_statfs {
 	compat_fsid_t	f_fsid;
 	s32		f_namelen;
 	s32		f_frsize;
-	s32		f_spare[6];
+	s32		f_flags;
+	s32		f_spare[5];
 };
 
 #define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 97cc4403fabf..6940abfbe1d9 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -168,5 +168,6 @@ enum diag308_rc {
 
 extern int diag308(unsigned long subcode, void *addr);
 extern void diag308_reset(void);
+extern void store_status(void);
 
 #endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index ba7b01c726a3..ba6d85f88d50 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -8,7 +8,8 @@ enum interruption_class {
 	EXTERNAL_INTERRUPT,
 	IO_INTERRUPT,
 	EXTINT_CLK,
-	EXTINT_IPI,
+	EXTINT_EXC,
+	EXTINT_EMS,
 	EXTINT_TMR,
 	EXTINT_TLA,
 	EXTINT_PFL,
@@ -17,8 +18,8 @@ enum interruption_class {
 	EXTINT_SCP,
 	EXTINT_IUC,
 	EXTINT_CPM,
+	IOINT_CIO,
 	IOINT_QAI,
-	IOINT_QDI,
 	IOINT_DAS,
 	IOINT_C15,
 	IOINT_C70,
@@ -28,6 +29,7 @@ enum interruption_class {
 	IOINT_CLW,
 	IOINT_CTC,
 	IOINT_APB,
+	IOINT_CSC,
 	NMI_NMI,
 	NR_IRQS,
 };
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index bb729b84a21e..cf4e47b0948c 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -30,9 +30,15 @@
 /* Not more than 2GB */
 #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
 
+/* Maximum address we can use for the crash control pages */
+#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
+
 /* Allocate one page for the pdp and the second for the code */
 #define KEXEC_CONTROL_PAGE_SIZE 4096
 
+/* Alignment of crashkernel memory */
+#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE
+
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 00ff00dfb24c..24e18473d926 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -119,6 +119,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_lctlg;
 	u32 exit_program_interruption;
 	u32 exit_instr_and_program;
+	u32 deliver_external_call;
 	u32 deliver_emergency_signal;
 	u32 deliver_service_signal;
 	u32 deliver_virtio_interrupt;
@@ -138,11 +139,13 @@ struct kvm_vcpu_stat {
 	u32 instruction_stfl;
 	u32 instruction_tprot;
 	u32 instruction_sigp_sense;
+	u32 instruction_sigp_external_call;
 	u32 instruction_sigp_emergency;
 	u32 instruction_sigp_stop;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
+	u32 diagnose_10;
 	u32 diagnose_44;
 };
 
@@ -174,6 +177,10 @@ struct kvm_s390_prefix_info {
 	__u32 address;
 };
 
+struct kvm_s390_extcall_info {
+	__u16 code;
+};
+
 struct kvm_s390_emerg_info {
 	__u16 code;
 };
@@ -186,6 +193,7 @@ struct kvm_s390_interrupt_info {
 		struct kvm_s390_ext_info ext;
 		struct kvm_s390_pgm_info pgm;
 		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
 		struct kvm_s390_prefix_info prefix;
 	};
 };
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index e85c911aabf0..9e13c7d56cc1 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -151,10 +151,8 @@ struct _lowcore {
 	 */
 	__u32	ipib;				/* 0x0e00 */
 	__u32	ipib_checksum;			/* 0x0e04 */
-
-	/* 64 bit save area */
-	__u64	save_area_64;			/* 0x0e08 */
-	__u8	pad_0x0e10[0x0f00-0x0e10];	/* 0x0e10 */
+	__u32	vmcore_info;			/* 0x0e08 */
+	__u8	pad_0x0e0c[0x0f00-0x0e0c];	/* 0x0e0c */
 
 	/* Extended facility list */
 	__u64	stfle_fac_list[32];		/* 0x0f00 */
@@ -290,9 +288,7 @@ struct _lowcore {
 	 */
 	__u64	ipib;				/* 0x0e00 */
 	__u32	ipib_checksum;			/* 0x0e08 */
-
-	/* 64 bit save area */
-	__u64	save_area_64;			/* 0x0e0c */
+	__u64	vmcore_info;			/* 0x0e0c */
 	__u8	pad_0x0e14[0x0f00-0x0e14];	/* 0x0e14 */
 
 	/* Extended facility list */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index accb372ddc7e..f7ec548c2b9d 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,6 +177,7 @@ static inline int page_test_and_clear_young(unsigned long pfn)
 struct page;
 void arch_free_page(struct page *page, int order);
 void arch_alloc_page(struct page *page, int order);
+void arch_set_page_states(int make_stable);
 
 static inline int devmem_is_allowed(unsigned long pfn)
 {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c0cb794bb365..34ede0ea85a9 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -696,7 +696,9 @@ void gmap_disable(struct gmap *gmap);
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long length);
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_fault(unsigned long address, struct gmap *);
 unsigned long gmap_fault(unsigned long address, struct gmap *);
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 
 /*
  * Certain architectures need to do special things when PTEs
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index a4b6229e5d4b..5f33d37d032c 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -33,6 +33,8 @@ static inline void get_cpu_id(struct cpuid *ptr)
 
 extern void s390_adjust_jiffies(void);
 extern int get_cpu_capability(unsigned int *);
+extern const struct seq_operations cpuinfo_op;
+extern int sysctl_ieee_emulation_warnings;
 
 /*
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -118,17 +120,17 @@ struct stack_frame {
 /*
  * Do necessary setup to start up a new thread.
  */
-#define start_thread(regs, new_psw, new_stackp) do {		\
-	regs->psw.mask	= psw_user_bits;			\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;		\
-	regs->gprs[15]	= new_stackp;				\
+#define start_thread(regs, new_psw, new_stackp) do {			\
+	regs->psw.mask	= psw_user_bits | PSW_MASK_EA | PSW_MASK_BA;	\
+	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->gprs[15]	= new_stackp;					\
 } while (0)
 
-#define start_thread31(regs, new_psw, new_stackp) do {		\
-	regs->psw.mask	= psw_user32_bits;			\
-	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;		\
-	regs->gprs[15]	= new_stackp;				\
-	crst_table_downgrade(current->mm, 1UL << 31);		\
+#define start_thread31(regs, new_psw, new_stackp) do {			\
+	regs->psw.mask	= psw_user_bits | PSW_MASK_BA;			\
+	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->gprs[15]	= new_stackp;					\
+	crst_table_downgrade(current->mm, 1UL << 31);			\
 } while (0)
 
 /* Forward declaration, a strange C thing */
@@ -187,7 +189,6 @@ static inline void __load_psw(psw_t psw)
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
  */
-
 static inline void __load_psw_mask (unsigned long mask)
 {
 	unsigned long addr;
@@ -212,26 +213,37 @@ static inline void __load_psw_mask (unsigned long mask)
 		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
 #endif /* __s390x__ */
 }
- 
+
 /*
- * Function to stop a processor until an interruption occurred
+ * Rewind PSW instruction address by specified number of bytes.
  */
-static inline void enabled_wait(void)
+static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
 {
-	__load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT |
-			PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY);
-}
+#ifndef __s390x__
+	if (psw.addr & PSW_ADDR_AMODE)
+		/* 31 bit mode */
+		return (psw.addr - ilc) | PSW_ADDR_AMODE;
+	/* 24 bit mode */
+	return (psw.addr - ilc) & ((1UL << 24) - 1);
+#else
+	unsigned long mask;
 
+	mask = (psw.mask & PSW_MASK_EA) ? -1UL :
+	       (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
+					  (1UL << 24) - 1;
+	return (psw.addr - ilc) & mask;
+#endif
+}
+ 
 /*
  * Function to drop a processor into disabled wait state
  */
-
 static inline void ATTRIB_NORET disabled_wait(unsigned long code)
 {
         unsigned long ctl_buf;
         psw_t dw_psw;
 
-        dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT;
+	dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
         dw_psw.addr = code;
         /* 
          * Store status and then load disabled wait psw,
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 62fd80c9e98c..a65846340d51 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -230,17 +230,21 @@ typedef struct
 #define PSW_MASK_IO		0x02000000UL
 #define PSW_MASK_EXT		0x01000000UL
 #define PSW_MASK_KEY		0x00F00000UL
+#define PSW_MASK_BASE		0x00080000UL	/* always one */
 #define PSW_MASK_MCHECK		0x00040000UL
 #define PSW_MASK_WAIT		0x00020000UL
 #define PSW_MASK_PSTATE		0x00010000UL
 #define PSW_MASK_ASC		0x0000C000UL
 #define PSW_MASK_CC		0x00003000UL
 #define PSW_MASK_PM		0x00000F00UL
+#define PSW_MASK_EA		0x00000000UL
+#define PSW_MASK_BA		0x00000000UL
+
+#define PSW_MASK_USER		0x00003F00UL
 
 #define PSW_ADDR_AMODE		0x80000000UL
 #define PSW_ADDR_INSN		0x7FFFFFFFUL
 
-#define PSW_BASE_BITS		0x00080000UL
 #define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 20)
 
 #define PSW_ASC_PRIMARY		0x00000000UL
@@ -254,6 +258,7 @@ typedef struct
 #define PSW_MASK_DAT		0x0400000000000000UL
 #define PSW_MASK_IO		0x0200000000000000UL
 #define PSW_MASK_EXT		0x0100000000000000UL
+#define PSW_MASK_BASE		0x0000000000000000UL
 #define PSW_MASK_KEY		0x00F0000000000000UL
 #define PSW_MASK_MCHECK		0x0004000000000000UL
 #define PSW_MASK_WAIT		0x0002000000000000UL
@@ -261,12 +266,14 @@ typedef struct
 #define PSW_MASK_ASC		0x0000C00000000000UL
 #define PSW_MASK_CC		0x0000300000000000UL
 #define PSW_MASK_PM		0x00000F0000000000UL
+#define PSW_MASK_EA		0x0000000100000000UL
+#define PSW_MASK_BA		0x0000000080000000UL
+
+#define PSW_MASK_USER		0x00003F0180000000UL
 
 #define PSW_ADDR_AMODE		0x0000000000000000UL
 #define PSW_ADDR_INSN		0xFFFFFFFFFFFFFFFFUL
 
-#define PSW_BASE_BITS		0x0000000180000000UL
-#define PSW_BASE32_BITS		0x0000000080000000UL
 #define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 52)
 
 #define PSW_ASC_PRIMARY		0x0000000000000000UL
@@ -279,18 +286,7 @@ typedef struct
 #ifdef __KERNEL__
 extern long psw_kernel_bits;
 extern long psw_user_bits;
-#ifdef CONFIG_64BIT
-extern long psw_user32_bits;
 #endif
-#endif
-
-/* This macro merges a NEW PSW mask specified by the user into
-   the currently active PSW mask CURRENT, modifying only those
-   bits in CURRENT that the user may be allowed to change: this
-   is the condition code and the program mask bits.  */
-#define PSW_MASK_MERGE(CURRENT,NEW) \
-	(((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \
-	 ((NEW) & (PSW_MASK_CC|PSW_MASK_PM)))
 
 /*
  * The s390_regs structure is used to define the elf_gregset_t.
@@ -328,8 +324,7 @@ struct pt_regs
 	psw_t psw;
 	unsigned long gprs[NUM_GPRS];
 	unsigned long orig_gpr2;
-	unsigned short ilc;
-	unsigned short svcnr;
+	unsigned int svc_code;
 };
 
 /*
@@ -487,6 +482,8 @@ typedef struct
 #define PTRACE_POKETEXT_AREA	      0x5004
 #define PTRACE_POKEDATA_AREA 	      0x5005
 #define PTRACE_GET_LAST_BREAK	      0x5006
+#define PTRACE_PEEK_SYSTEM_CALL       0x5007
+#define PTRACE_POKE_SYSTEM_CALL	      0x5008
 
 /*
  * PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 15c97625df8d..e63d13dd3bf5 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -46,6 +46,8 @@ struct qdesfmt0 {
 	u32	 : 16;
 } __attribute__ ((packed));
 
+#define QDR_AC_MULTI_BUFFER_ENABLE 0x01
+
 /**
  * struct qdr - queue description record (QDR)
  * @qfmt: queue format
@@ -123,6 +125,40 @@ struct slibe {
 };
 
 /**
+ * struct qaob - queue asynchronous operation block
+ * @res0: reserved parameters
+ * @res1: reserved parameter
+ * @res2: reserved parameter
+ * @res3: reserved parameter
+ * @aorc: asynchronous operation return code
+ * @flags: internal flags
+ * @cbtbs: control block type
+ * @sb_count: number of storage blocks
+ * @sba: storage block element addresses
+ * @dcount: size of storage block elements
+ * @user0: user defineable value
+ * @res4: reserved paramater
+ * @user1: user defineable value
+ * @user2: user defineable value
+ */
+struct qaob {
+	u64 res0[6];
+	u8 res1;
+	u8 res2;
+	u8 res3;
+	u8 aorc;
+	u8 flags;
+	u16 cbtbs;
+	u8 sb_count;
+	u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u64 user0;
+	u64 res4[2];
+	u64 user1;
+	u64 user2;
+} __attribute__ ((packed, aligned(256)));
+
+/**
  * struct slib - storage list information block (SLIB)
  * @nsliba: next SLIB address (if any)
  * @sla: SL address
@@ -222,9 +258,46 @@ struct slsb {
 	u8 val[QDIO_MAX_BUFFERS_PER_Q];
 } __attribute__ ((packed, aligned(256)));
 
+#define CHSC_AC2_MULTI_BUFFER_AVAILABLE	0x0080
+#define CHSC_AC2_MULTI_BUFFER_ENABLED	0x0040
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
+/**
+ * struct qdio_outbuf_state - SBAL related asynchronous operation information
+ *   (for communication with upper layer programs)
+ *   (only required for use with completion queues)
+ * @flags: flags indicating state of buffer
+ * @aob: pointer to QAOB used for the particular SBAL
+ * @user: pointer to upper layer program's state information related to SBAL
+ *        (stored in user1 data of QAOB)
+ */
+struct qdio_outbuf_state {
+	u8 flags;
+	struct qaob *aob;
+	void *user;
+};
+
+#define QDIO_OUTBUF_STATE_FLAG_NONE	0x00
+#define QDIO_OUTBUF_STATE_FLAG_PENDING	0x01
+
+#define CHSC_AC1_INITIATE_INPUTQ	0x80
+
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
+#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
+
+#define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_DATA_DIV_ENABLED	0x0002
+
+#define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
+
 struct qdio_ssqd_desc {
 	u8 flags;
 	u8:8;
@@ -243,8 +316,7 @@ struct qdio_ssqd_desc {
 	u64 sch_token;
 	u8 mro;
 	u8 mri;
-	u8:8;
-	u8 sbalic;
+	u16 qdioac3;
 	u16:16;
 	u8:8;
 	u8 mmwc;
@@ -280,13 +352,16 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
  * @no_output_qs: number of output queues
  * @input_handler: handler to be called for input queues
  * @output_handler: handler to be called for output queues
+ * @queue_start_poll: polling handlers (one per input queue or NULL)
  * @int_parm: interruption parameter
  * @input_sbal_addr_array:  address of no_input_qs * 128 pointers
  * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
  */
 struct qdio_initialize {
 	struct ccw_device *cdev;
 	unsigned char q_format;
+	unsigned char qdr_ac;
 	unsigned char adapter_name[8];
 	unsigned int qib_param_field_format;
 	unsigned char *qib_param_field;
@@ -297,11 +372,12 @@ struct qdio_initialize {
 	unsigned int no_output_qs;
 	qdio_handler_t *input_handler;
 	qdio_handler_t *output_handler;
-	void (*queue_start_poll) (struct ccw_device *, int, unsigned long);
+	void (**queue_start_poll) (struct ccw_device *, int, unsigned long);
 	int scan_threshold;
 	unsigned long int_parm;
 	void **input_sbal_addr_array;
 	void **output_sbal_addr_array;
+	struct qdio_outbuf_state *output_sbal_state_array;
 };
 
 #define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
@@ -316,6 +392,7 @@ struct qdio_initialize {
 extern int qdio_allocate(struct qdio_initialize *);
 extern int qdio_establish(struct qdio_initialize *);
 extern int qdio_activate(struct ccw_device *);
+extern void qdio_release_aob(struct qaob *);
 extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
 		   unsigned int);
 extern int qdio_start_irq(struct ccw_device *, int);
diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h
index f584f4a52581..3d6ad4ad2a3f 100644
--- a/arch/s390/include/asm/reset.h
+++ b/arch/s390/include/asm/reset.h
@@ -17,5 +17,5 @@ struct reset_call {
 
 extern void register_reset_call(struct reset_call *reset);
 extern void unregister_reset_call(struct reset_call *reset);
-extern void s390_reset_system(void);
+extern void s390_reset_system(void (*func)(void *), void *data);
 #endif /* _ASM_S390_RESET_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index d5e2ef10537d..5a099714df04 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -26,15 +26,21 @@
 #define IPL_DEVICE        (*(unsigned long *)  (0x10404))
 #define INITRD_START      (*(unsigned long *)  (0x1040C))
 #define INITRD_SIZE       (*(unsigned long *)  (0x10414))
+#define OLDMEM_BASE	  (*(unsigned long *)  (0x1041C))
+#define OLDMEM_SIZE	  (*(unsigned long *)  (0x10424))
 #else /* __s390x__ */
 #define IPL_DEVICE        (*(unsigned long *)  (0x10400))
 #define INITRD_START      (*(unsigned long *)  (0x10408))
 #define INITRD_SIZE       (*(unsigned long *)  (0x10410))
+#define OLDMEM_BASE	  (*(unsigned long *)  (0x10418))
+#define OLDMEM_SIZE	  (*(unsigned long *)  (0x10420))
 #endif /* __s390x__ */
 #define COMMAND_LINE      ((char *)            (0x10480))
 
 #define CHUNK_READ_WRITE 0
 #define CHUNK_READ_ONLY  1
+#define CHUNK_OLDMEM	 4
+#define CHUNK_CRASHK	 5
 
 struct mem_chunk {
 	unsigned long addr;
@@ -48,6 +54,8 @@ extern int memory_end_set;
 extern unsigned long memory_end;
 
 void detect_memory_layout(struct mem_chunk chunk[]);
+void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr,
+		     unsigned long size, int type);
 
 #define PRIMARY_SPACE_MODE	0
 #define ACCESS_REGISTER_MODE	1
@@ -106,6 +114,7 @@ extern unsigned int user_mode;
 #endif /* __s390x__ */
 
 #define ZFCPDUMP_HSA_SIZE	(32UL<<20)
+#define ZFCPDUMP_HSA_SIZE_MAX	(64UL<<20)
 
 /*
  * Console mode. Override with conmode=
@@ -134,10 +143,14 @@ extern char kernel_nss_name[];
 #define IPL_DEVICE        0x10404
 #define INITRD_START      0x1040C
 #define INITRD_SIZE       0x10414
+#define OLDMEM_BASE	  0x1041C
+#define OLDMEM_SIZE	  0x10424
 #else /* __s390x__ */
 #define IPL_DEVICE        0x10400
 #define INITRD_START      0x10408
 #define INITRD_SIZE       0x10410
+#define OLDMEM_BASE	  0x10418
+#define OLDMEM_SIZE	  0x10420
 #endif /* __s390x__ */
 #define COMMAND_LINE      0x10480
 
diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h
index 0addc6466d95..ca3f8814e361 100644
--- a/arch/s390/include/asm/sfp-util.h
+++ b/arch/s390/include/asm/sfp-util.h
@@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int,
 
 #define UDIV_NEEDS_NORMALIZATION 0
 
-#define abort() return 0
+#define abort() BUG()
 
 #define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 045e009fc164..ab47a69fdf07 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -33,6 +33,7 @@ extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
 extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *);
 extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp,
 			      int from, int to);
+extern void smp_restart_with_online_cpu(void);
 extern void smp_restart_cpu(void);
 
 /*
@@ -64,6 +65,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data)
 	func(data);
 }
 
+static inline void smp_restart_with_online_cpu(void)
+{
+}
+
 #define smp_vcpu_scheduled	(1)
 
 #endif /* CONFIG_SMP */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 56612fc8186e..fd94dfec8d08 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -13,6 +13,8 @@
 
 #include <linux/smp.h>
 
+extern int spin_retry;
+
 static inline int
 _raw_compare_and_swap(volatile unsigned int *lock,
 		      unsigned int old, unsigned int new)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 5c0246b955d8..b239ff53b189 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -13,6 +13,7 @@
 #define _ASM_SYSCALL_H	1
 
 #include <linux/sched.h>
+#include <linux/err.h>
 #include <asm/ptrace.h>
 
 /*
@@ -25,7 +26,8 @@ extern const unsigned int sys_call_table[];
 static inline long syscall_get_nr(struct task_struct *task,
 				  struct pt_regs *regs)
 {
-	return regs->svcnr ? regs->svcnr : -1;
+	return test_tsk_thread_flag(task, TIF_SYSCALL) ?
+		(regs->svc_code & 0xffff) : -1;
 }
 
 static inline void syscall_rollback(struct task_struct *task,
@@ -37,7 +39,7 @@ static inline void syscall_rollback(struct task_struct *task,
 static inline long syscall_get_error(struct task_struct *task,
 				     struct pt_regs *regs)
 {
-	return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0;
+	return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0;
 }
 
 static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 6582f69f2389..ef573c1d71a7 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -20,6 +20,8 @@
 
 struct task_struct;
 
+extern int sysctl_userprocess_debug;
+
 extern struct task_struct *__switch_to(void *, void *);
 extern void update_per_regs(struct task_struct *task);
 
@@ -114,6 +116,8 @@ extern void pfault_fini(void);
 extern void cmma_init(void);
 extern int memcpy_real(void *, void *, size_t);
 extern void copy_to_absolute_zero(void *dest, void *src, size_t count);
+extern int copy_to_user_real(void __user *dest, void *src, size_t count);
+extern int copy_from_user_real(void *dest, void __user *src, size_t count);
 
 #define finish_arch_switch(prev) do {					     \
 	set_fs(current->thread.mm_segment);				     \
@@ -210,8 +214,10 @@ __set_psw_mask(unsigned long mask)
 	__load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
 }
 
-#define local_mcck_enable()  __set_psw_mask(psw_kernel_bits)
-#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK)
+#define local_mcck_enable() \
+	__set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK)
+#define local_mcck_disable() \
+	__set_psw_mask(psw_kernel_bits | PSW_MASK_DAT)
 
 #ifdef CONFIG_SMP
 
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 1a5dbb6f1495..a23183423b14 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -48,6 +48,7 @@ struct thread_info {
 	unsigned int		cpu;		/* current CPU */
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	struct restart_block	restart_block;
+	unsigned int		system_call;
 	__u64			user_timer;
 	__u64			system_timer;
 	unsigned long		last_break;	/* last breaking-event-address. */
@@ -84,10 +85,10 @@ static inline struct thread_info *current_thread_info(void)
 /*
  * thread information flags bit numbers
  */
+#define TIF_SYSCALL		0	/* inside a system call */
 #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_RESTART_SVC		4	/* restart svc with new svc number */
 #define TIF_PER_TRAP		6	/* deliver sigtrap on return to user */
 #define TIF_MCCK_PENDING	7	/* machine check handling is pending */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
@@ -103,11 +104,11 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_SINGLE_STEP		20	/* This task is single stepped */
 #define TIF_FREEZE		21	/* thread is freezing for suspend */
 
+#define _TIF_SYSCALL		(1<<TIF_SYSCALL)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_RESTART_SVC	(1<<TIF_RESTART_SVC)
 #define _TIF_PER_TRAP		(1<<TIF_PER_TRAP)
 #define _TIF_MCCK_PENDING	(1<<TIF_MCCK_PENDING)
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -117,7 +118,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SIE		(1<<TIF_SIE)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
-#define _TIF_SINGLE_STEP	(1<<TIF_FREEZE)
+#define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
 #define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #ifdef CONFIG_64BIT
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 88829a40af6f..d610bef9c5e9 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -86,6 +86,17 @@ static inline void get_clock_ext(char *clk)
 	asm volatile("stcke %0" : "=Q" (*clk) : : "cc");
 }
 
+static inline unsigned long long get_clock_fast(void)
+{
+	unsigned long long clk;
+
+	if (test_facility(25))
+		asm volatile(".insn	s,0xb27c0000,%0" : "=Q" (clk) : : "cc");
+	else
+		clk = get_clock();
+	return clk;
+}
+
 static inline unsigned long long get_clock_xt(void)
 {
 	unsigned char clk[16];
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 304445382382..1d8648cf2fea 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -59,6 +59,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 }
 #else
 #define __tlb_flush_full(mm)	__tlb_flush_local()
+#define __tlb_flush_global()	__tlb_flush_local()
 #endif
 
 /*
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index df3732249baa..dd4f07640919 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_FUNCTION_TRACER)	+= $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 2b45591e1582..751318765e2e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -45,8 +45,7 @@ int main(void)
 	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
 	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
 	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
-	DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc));
-	DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr));
+	DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code));
 	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
 	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
@@ -141,7 +140,6 @@ int main(void)
 	DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
 	DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
 	DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
-	DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64));
 #ifdef CONFIG_32BIT
 	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
 #else /* CONFIG_32BIT */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index 255435663bf8..f8828d38fa6e 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -86,6 +86,8 @@ s390_base_pgm_handler_fn:
 ENTRY(diag308_reset)
 	larl	%r4,.Lctlregs		# Save control registers
 	stctg	%c0,%c15,0(%r4)
+	larl	%r4,.Lfpctl		# Floating point control register
+	stfpc	0(%r4)
 	larl	%r4,.Lrestart_psw	# Setup restart PSW at absolute 0
 	lghi	%r3,0
 	lg	%r4,0(%r4)		# Save PSW
@@ -99,6 +101,8 @@ ENTRY(diag308_reset)
 	sam64				# Switch to 64 bit addressing mode
 	larl	%r4,.Lctlregs		# Restore control registers
 	lctlg	%c0,%c15,0(%r4)
+	larl	%r4,.Lfpctl		# Restore floating point ctl register
+	lfpc	0(%r4)
 	br	%r14
 .align 16
 .Lrestart_psw:
@@ -110,6 +114,8 @@ ENTRY(diag308_reset)
 	.rept	16
 	.quad	0
 	.endr
+.Lfpctl:
+	.long	0
 	.previous
 
 #else /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 53acaa86dd94..84a982898448 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -60,12 +60,9 @@
 
 #include "compat_linux.h"
 
-long psw_user32_bits	= (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
-			   PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
-			   PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
-long psw32_user_bits	= (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME |
-			   PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
-			   PSW32_MASK_PSTATE);
+u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT |
+		      PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK |
+		      PSW32_MASK_PSTATE | PSW32_ASC_HOME;
  
 /* For this source file, we want overflow handling. */
 
@@ -365,12 +362,7 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
 	if (set) {
 		if (copy_from_user (&s32, set, sizeof(compat_sigset_t)))
 			return -EFAULT;
-		switch (_NSIG_WORDS) {
-		case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
-		case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
-		case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
-		case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
-		}
+		s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
 	}
 	set_fs (KERNEL_DS);
 	ret = sys_rt_sigprocmask(how,
@@ -380,12 +372,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
 	set_fs (old_fs);
 	if (ret) return ret;
 	if (oset) {
-		switch (_NSIG_WORDS) {
-		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
-		case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
-		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
-		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
-		}
+		s32.sig[1] = (s.sig[0] >> 32);
+		s32.sig[0] = s.sig[0];
 		if (copy_to_user (oset, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
@@ -404,12 +392,8 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
 	ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize);
 	set_fs (old_fs);
 	if (!ret) {
-		switch (_NSIG_WORDS) {
-		case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
-		case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
-		case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
-		case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
-		}
+		s32.sig[1] = (s.sig[0] >> 32);
+		s32.sig[0] = s.sig[0];
 		if (copy_to_user (set, &s32, sizeof(compat_sigset_t)))
 			return -EFAULT;
 	}
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index a9a285b8c4ad..4f68c81d3ffa 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -141,7 +141,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 			break;
 		case __SI_FAULT >> 16:
 			err |= __get_user(tmp, &from->si_addr);
-			to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN);
+			to->si_addr = (void __force __user *)
+				(u64) (tmp & PSW32_ADDR_INSN);
 			break;
 		case __SI_POLL >> 16:
 			err |= __get_user(to->si_band, &from->si_band);
@@ -213,16 +214,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
 		ret = get_user(sa_handler, &act->sa_handler);
 		ret |= __copy_from_user(&set32, &act->sa_mask,
 					sizeof(compat_sigset_t));
-		switch (_NSIG_WORDS) {
-		case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
-				| (((long)set32.sig[7]) << 32);
-		case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
-				| (((long)set32.sig[5]) << 32);
-		case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
-				| (((long)set32.sig[3]) << 32);
-		case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
-				| (((long)set32.sig[1]) << 32);
-		}
+		new_ka.sa.sa_mask.sig[0] =
+			set32.sig[0] | (((long)set32.sig[1]) << 32);
 		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
 		
 		if (ret)
@@ -233,20 +226,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
 
 	if (!ret && oact) {
-		switch (_NSIG_WORDS) {
-		case 4:
-			set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
-			set32.sig[6] = old_ka.sa.sa_mask.sig[3];
-		case 3:
-			set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
-			set32.sig[4] = old_ka.sa.sa_mask.sig[2];
-		case 2:
-			set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
-			set32.sig[2] = old_ka.sa.sa_mask.sig[1];
-		case 1:
-			set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
-			set32.sig[0] = old_ka.sa.sa_mask.sig[0];
-		}
+		set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
+		set32.sig[0] = old_ka.sa.sa_mask.sig[0];
 		ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler);
 		ret |= __copy_to_user(&oact->sa_mask, &set32,
 				      sizeof(compat_sigset_t));
@@ -300,9 +281,10 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
 	_s390_regs_common32 regs32;
 	int err, i;
 
-	regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits,
-					   (__u32)(regs->psw.mask >> 32));
-	regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr;
+	regs32.psw.mask = psw32_user_bits |
+		((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER);
+	regs32.psw.addr = (__u32) regs->psw.addr |
+		(__u32)(regs->psw.mask & PSW_MASK_BA);
 	for (i = 0; i < NUM_GPRS; i++)
 		regs32.gprs[i] = (__u32) regs->gprs[i];
 	save_access_regs(current->thread.acrs);
@@ -327,8 +309,9 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	err = __copy_from_user(&regs32, &sregs->regs, sizeof(regs32));
 	if (err)
 		return err;
-	regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask,
-				        (__u64)regs32.psw.mask << 32);
+	regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+		(__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
+		(__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
 	regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
 	for (i = 0; i < NUM_GPRS; i++)
 		regs->gprs[i] = (__u64) regs32.gprs[i];
@@ -342,7 +325,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 		return err;
 
 	restore_fp_regs(&current->thread.fp_regs);
-	regs->svcnr = 0;	/* disable syscall checks */
+	clear_thread_flag(TIF_SYSCALL);	/* No longer in a system call */
 	return 0;
 }
 
@@ -496,11 +479,11 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
 	if (ka->sa.sa_flags & SA_RESTORER) {
-		regs->gprs[14] = (__u64) ka->sa.sa_restorer;
+		regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
 	} else {
-		regs->gprs[14] = (__u64) frame->retcode;
+		regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE;
 		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
-		               (u16 __user *)(frame->retcode)))
+			       (u16 __force __user *)(frame->retcode)))
 			goto give_sigsegv;
         }
 
@@ -509,11 +492,12 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->gprs[15] = (__u64) frame;
-	regs->psw.addr = (__u64) ka->sa.sa_handler;
+	regs->gprs[15] = (__force __u64) frame;
+	regs->psw.mask |= PSW_MASK_BA;		/* force amode 31 */
+	regs->psw.addr = (__force __u64) ka->sa.sa_handler;
 
 	regs->gprs[2] = map_signal(sig);
-	regs->gprs[3] = (__u64) &frame->sc;
+	regs->gprs[3] = (__force __u64) &frame->sc;
 
 	/* We forgot to include these in the sigcontext.
 	   To avoid breaking binary compatibility, they are passed as args. */
@@ -521,7 +505,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
 	regs->gprs[5] = current->thread.prot_addr;
 
 	/* Place signal number on stack to allow backtrace from handler.  */
-	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+	if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
 		goto give_sigsegv;
 	return 0;
 
@@ -564,20 +548,21 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 	} else {
 		regs->gprs[14] = (__u64) frame->retcode;
 		err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-		                  (u16 __user *)(frame->retcode));
+				  (u16 __force __user *)(frame->retcode));
 	}
 
 	/* Set up backchain. */
-	if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+	if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->gprs[15] = (__u64) frame;
+	regs->gprs[15] = (__force __u64) frame;
+	regs->psw.mask |= PSW_MASK_BA;		/* force amode 31 */
 	regs->psw.addr = (__u64) ka->sa.sa_handler;
 
 	regs->gprs[2] = map_signal(sig);
-	regs->gprs[3] = (__u64) &frame->info;
-	regs->gprs[4] = (__u64) &frame->uc;
+	regs->gprs[3] = (__force __u64) &frame->info;
+	regs->gprs[4] = (__force __u64) &frame->uc;
 	return 0;
 
 give_sigsegv:
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 7526db6bf501..5006a1d9f5d0 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1623,8 +1623,7 @@ ENTRY(sys_syncfs_wrapper)
 	lgfr	%r2,%r2			# int
 	jg	sys_syncfs
 
-	.globl	sys_setns_wrapper
-sys_setns_wrapper:
+ENTRY(sys_setns_wrapper)
 	lgfr	%r2,%r2			# int
 	lgfr	%r3,%r3			# int
 	jg	sys_setns
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 000000000000..39f8fd4438fc
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,426 @@
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/crash_dump.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <asm/ipl.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+/*
+ * Copy one page from "oldmem"
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ *  - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
+ *  - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset, int userbuf)
+{
+	unsigned long src;
+
+	if (!csize)
+		return 0;
+
+	src = (pfn << PAGE_SHIFT) + offset;
+	if (src < OLDMEM_SIZE)
+		src += OLDMEM_BASE;
+	else if (src > OLDMEM_BASE &&
+		 src < OLDMEM_BASE + OLDMEM_SIZE)
+		src -= OLDMEM_BASE;
+	if (userbuf)
+		copy_to_user_real((void __force __user *) buf, (void *) src,
+				  csize);
+	else
+		memcpy_real(buf, (void *) src, csize);
+	return csize;
+}
+
+/*
+ * Copy memory from old kernel
+ */
+static int copy_from_oldmem(void *dest, void *src, size_t count)
+{
+	unsigned long copied = 0;
+	int rc;
+
+	if ((unsigned long) src < OLDMEM_SIZE) {
+		copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+		rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
+		if (rc)
+			return rc;
+	}
+	return memcpy_real(dest + copied, src + copied, count - copied);
+}
+
+/*
+ * Alloc memory and panic in case of ENOMEM
+ */
+static void *kzalloc_panic(int len)
+{
+	void *rc;
+
+	rc = kzalloc(len, GFP_KERNEL);
+	if (!rc)
+		panic("s390 kdump kzalloc (%d) failed", len);
+	return rc;
+}
+
+/*
+ * Get memory layout and create hole for oldmem
+ */
+static struct mem_chunk *get_memory_layout(void)
+{
+	struct mem_chunk *chunk_array;
+
+	chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk));
+	detect_memory_layout(chunk_array);
+	create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK);
+	return chunk_array;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
+		     const char *name)
+{
+	Elf64_Nhdr *note;
+	u64 len;
+
+	note = (Elf64_Nhdr *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = d_len;
+	note->n_type = type;
+	len = sizeof(Elf64_Nhdr);
+
+	memcpy(buf + len, name, note->n_namesz);
+	len = roundup(len + note->n_namesz, 4);
+
+	memcpy(buf + len, desc, note->n_descsz);
+	len = roundup(len + note->n_descsz, 4);
+
+	return PTR_ADD(buf, len);
+}
+
+/*
+ * Initialize prstatus note
+ */
+static void *nt_prstatus(void *ptr, struct save_area *sa)
+{
+	struct elf_prstatus nt_prstatus;
+	static int cpu_nr = 1;
+
+	memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+	memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
+	memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+	memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
+	nt_prstatus.pr_pid = cpu_nr;
+	cpu_nr++;
+
+	return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
+			 "CORE");
+}
+
+/*
+ * Initialize fpregset (floating point) note
+ */
+static void *nt_fpregset(void *ptr, struct save_area *sa)
+{
+	elf_fpregset_t nt_fpregset;
+
+	memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+	memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
+	memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
+
+	return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset),
+		       "CORE");
+}
+
+/*
+ * Initialize timer note
+ */
+static void *nt_s390_timer(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
+			 KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD clock comparator note
+ */
+static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
+		       sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD programmable register note
+ */
+static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
+		       sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize control register note
+ */
+static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
+		       sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize prefix register note
+ */
+static void *nt_s390_prefix(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
+			 sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa)
+{
+	ptr = nt_prstatus(ptr, sa);
+	ptr = nt_fpregset(ptr, sa);
+	ptr = nt_s390_timer(ptr, sa);
+	ptr = nt_s390_tod_cmp(ptr, sa);
+	ptr = nt_s390_tod_preg(ptr, sa);
+	ptr = nt_s390_ctrs(ptr, sa);
+	ptr = nt_s390_prefix(ptr, sa);
+	return ptr;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+	struct elf_prpsinfo prpsinfo;
+
+	memset(&prpsinfo, 0, sizeof(prpsinfo));
+	prpsinfo.pr_sname = 'R';
+	strcpy(prpsinfo.pr_fname, "vmlinux");
+	return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo),
+		       KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+	char nt_name[11], *vmcoreinfo;
+	Elf64_Nhdr note;
+	void *addr;
+
+	if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+		return ptr;
+	memset(nt_name, 0, sizeof(nt_name));
+	if (copy_from_oldmem(&note, addr, sizeof(note)))
+		return ptr;
+	if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
+		return ptr;
+	if (strcmp(nt_name, "VMCOREINFO") != 0)
+		return ptr;
+	vmcoreinfo = kzalloc_panic(note.n_descsz + 1);
+	if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+		return ptr;
+	vmcoreinfo[note.n_descsz + 1] = 0;
+	return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO");
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+	ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = EM_S390;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_phoff = sizeof(Elf64_Ehdr);
+	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+	ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	ehdr->e_phnum = mem_chunk_cnt + 1;
+	return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+	int i, cpus = 0;
+
+	for (i = 0; zfcpdump_save_areas[i]; i++) {
+		if (zfcpdump_save_areas[i]->pref_reg == 0)
+			continue;
+		cpus++;
+	}
+	return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+	struct mem_chunk *chunk_array, *mem_chunk;
+	int i, cnt = 0;
+
+	chunk_array = get_memory_layout();
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		mem_chunk = &chunk_array[i];
+		if (chunk_array[i].type != CHUNK_READ_WRITE &&
+		    chunk_array[i].type != CHUNK_READ_ONLY)
+			continue;
+		if (mem_chunk->size == 0)
+			continue;
+		cnt++;
+	}
+	kfree(chunk_array);
+	return cnt;
+}
+
+/*
+ * Relocate pointer in order to allow vmcore code access the data
+ */
+static inline unsigned long relocate(unsigned long addr)
+{
+	return OLDMEM_BASE + addr;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static int loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+	struct mem_chunk *chunk_array, *mem_chunk;
+	int i;
+
+	chunk_array = get_memory_layout();
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		mem_chunk = &chunk_array[i];
+		if (mem_chunk->size == 0)
+			break;
+		if (chunk_array[i].type != CHUNK_READ_WRITE &&
+		    chunk_array[i].type != CHUNK_READ_ONLY)
+			continue;
+		else
+			phdr->p_filesz = mem_chunk->size;
+		phdr->p_type = PT_LOAD;
+		phdr->p_offset = mem_chunk->addr;
+		phdr->p_vaddr = mem_chunk->addr;
+		phdr->p_paddr = mem_chunk->addr;
+		phdr->p_memsz = mem_chunk->size;
+		phdr->p_flags = PF_R | PF_W | PF_X;
+		phdr->p_align = PAGE_SIZE;
+		phdr++;
+	}
+	kfree(chunk_array);
+	return i;
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+	struct save_area *sa;
+	void *ptr_start = ptr;
+	int i;
+
+	ptr = nt_prpsinfo(ptr);
+
+	for (i = 0; zfcpdump_save_areas[i]; i++) {
+		sa = zfcpdump_save_areas[i];
+		if (sa->pref_reg == 0)
+			continue;
+		ptr = fill_cpu_elf_notes(ptr, sa);
+	}
+	ptr = nt_vmcoreinfo(ptr);
+	memset(phdr, 0, sizeof(*phdr));
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = relocate(notes_offset);
+	phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+	phdr->p_memsz = phdr->p_filesz;
+	return ptr;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
+{
+	Elf64_Phdr *phdr_notes, *phdr_loads;
+	int mem_chunk_cnt;
+	void *ptr, *hdr;
+	u32 alloc_size;
+	u64 hdr_off;
+
+	mem_chunk_cnt = get_mem_chunk_cnt();
+
+	alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
+		mem_chunk_cnt * sizeof(Elf64_Phdr);
+	hdr = kzalloc_panic(alloc_size);
+	/* Init elf header */
+	ptr = ehdr_init(hdr, mem_chunk_cnt);
+	/* Init program headers */
+	phdr_notes = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+	phdr_loads = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+	/* Init notes */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+	/* Init loads */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off);
+	*elfcorebuf_sz = hdr_off;
+	*elfcorebuf = (void *) relocate((unsigned long) hdr);
+	BUG_ON(*elfcorebuf_sz > alloc_size);
+}
+
+/*
+ * Create kdump ELF core header in new kernel, if it has not been passed via
+ * the "elfcorehdr" kernel parameter
+ */
+static int setup_kdump_elfcorehdr(void)
+{
+	size_t elfcorebuf_sz;
+	char *elfcorebuf;
+
+	if (!OLDMEM_BASE || is_kdump_kernel())
+		return -EINVAL;
+	s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz);
+	elfcorehdr_addr = (unsigned long long) elfcorebuf;
+	elfcorehdr_size = elfcorebuf_sz;
+	return 0;
+}
+
+subsys_initcall(setup_kdump_elfcorehdr);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index f297456dba7a..37394b3413e2 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -252,7 +252,7 @@ static noinline __init void setup_lowcore_early(void)
 {
 	psw_t psw;
 
-	psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+	psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
 	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
 	S390_lowcore.external_new_psw = psw;
 	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 02ec8fe7d03f..b13157057e02 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -43,16 +43,15 @@ SP_R13	     =	STACK_FRAME_OVERHEAD + __PT_GPRS + 52
 SP_R14	     =	STACK_FRAME_OVERHEAD + __PT_GPRS + 56
 SP_R15	     =	STACK_FRAME_OVERHEAD + __PT_GPRS + 60
 SP_ORIG_R2   =	STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2
-SP_ILC	     =	STACK_FRAME_OVERHEAD + __PT_ILC
-SP_SVCNR     =	STACK_FRAME_OVERHEAD + __PT_SVCNR
+SP_SVC_CODE  =	STACK_FRAME_OVERHEAD + __PT_SVC_CODE
 SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 
 _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP )
+		 _TIF_MCCK_PENDING | _TIF_PER_TRAP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
-_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
-		_TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
+_TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+		 _TIF_SYSCALL_TRACEPOINT)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
@@ -228,9 +227,10 @@ ENTRY(system_call)
 sysc_saveall:
 	SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	mvc	SP_PSW(8,%r15),__LC_SVC_OLD_PSW
-	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
 	l	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_PSW(8,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_SVC_CODE(4,%r15),__LC_SVC_ILC
+	oi	__TI_flags+3(%r12),_TIF_SYSCALL
 sysc_vtime:
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 sysc_stime:
@@ -239,17 +239,17 @@ sysc_update:
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
 sysc_do_svc:
 	xr	%r7,%r7
-	icm	%r7,3,SP_SVCNR(%r15)	# load svc number and test for svc 0
+	icm	%r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0
 	bnz	BASED(sysc_nr_ok)	# svc number > 0
 	# svc 0: system call number in %r1
 	cl	%r1,BASED(.Lnr_syscalls)
 	bnl	BASED(sysc_nr_ok)
-	sth	%r1,SP_SVCNR(%r15)
+	sth	%r1,SP_SVC_CODE+2(%r15)
 	lr	%r7,%r1 	  # copy svc number to %r7
 sysc_nr_ok:
 	sll	%r7,2		  # svc number *4
 	l	%r10,BASED(.Lsysc_table)
-	tm	__TI_flags+2(%r12),_TIF_SYSCALL
+	tm	__TI_flags+2(%r12),_TIF_TRACE >> 8
 	mvc	SP_ARGS(4,%r15),SP_R7(%r15)
 	l	%r8,0(%r7,%r10)	  # get system call addr.
 	bnz	BASED(sysc_tracesys)
@@ -259,23 +259,19 @@ sysc_nr_ok:
 sysc_return:
 	LOCKDEP_SYS_EXIT
 sysc_tif:
+	tm	SP_PSW+1(%r15),0x01	# returning to user ?
+	bno	BASED(sysc_restore)
 	tm	__TI_flags+3(%r12),_TIF_WORK_SVC
 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
+	ni	__TI_flags+3(%r12),255-_TIF_SYSCALL
 sysc_restore:
 	RESTORE_ALL __LC_RETURN_PSW,1
 sysc_done:
 
 #
-# There is work to do, but first we need to check if we return to userspace.
-#
-sysc_work:
-	tm	SP_PSW+1(%r15),0x01	# returning to user ?
-	bno	BASED(sysc_restore)
-
-#
 # One of the work bits is on. Find out which one.
 #
-sysc_work_tif:
+sysc_work:
 	tm	__TI_flags+3(%r12),_TIF_MCCK_PENDING
 	bo	BASED(sysc_mcck_pending)
 	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
@@ -284,8 +280,6 @@ sysc_work_tif:
 	bo	BASED(sysc_sigpending)
 	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
 	bo	BASED(sysc_notify_resume)
-	tm	__TI_flags+3(%r12),_TIF_RESTART_SVC
-	bo	BASED(sysc_restart)
 	tm	__TI_flags+3(%r12),_TIF_PER_TRAP
 	bo	BASED(sysc_singlestep)
 	b	BASED(sysc_return)	# beware of critical section cleanup
@@ -314,11 +308,14 @@ sysc_sigpending:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	l	%r1,BASED(.Ldo_signal)
 	basr	%r14,%r1		# call do_signal
-	tm	__TI_flags+3(%r12),_TIF_RESTART_SVC
-	bo	BASED(sysc_restart)
-	tm	__TI_flags+3(%r12),_TIF_PER_TRAP
-	bo	BASED(sysc_singlestep)
-	b	BASED(sysc_return)
+	tm	__TI_flags+3(%r12),_TIF_SYSCALL
+	bno	BASED(sysc_return)
+	lm	%r2,%r6,SP_R2(%r15)	# load svc arguments
+	xr	%r7,%r7			# svc 0 returns -ENOSYS
+	clc	SP_SVC_CODE+2(2,%r15),BASED(.Lnr_syscalls+2)
+	bnl	BASED(sysc_nr_ok)	# invalid svc number -> do svc 0
+	icm	%r7,3,SP_SVC_CODE+2(%r15)# load new svc number
+	b	BASED(sysc_nr_ok)	# restart svc
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
@@ -329,24 +326,11 @@ sysc_notify_resume:
 	la	%r14,BASED(sysc_return)
 	br	%r1			# call do_notify_resume
 
-
-#
-# _TIF_RESTART_SVC is set, set up registers and restart svc
-#
-sysc_restart:
-	ni	__TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
-	l	%r7,SP_R2(%r15) 	# load new svc number
-	mvc	SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument
-	lm	%r2,%r6,SP_R2(%r15)	# load svc arguments
-	sth	%r7,SP_SVCNR(%r15)
-	b	BASED(sysc_nr_ok)	# restart svc
-
 #
 # _TIF_PER_TRAP is set, call do_per_trap
 #
 sysc_singlestep:
-	ni	__TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP
-	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)		# clear svc number
+	ni	__TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP)
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	l	%r1,BASED(.Lhandle_per)	# load adr. of per handler
 	la	%r14,BASED(sysc_return)	# load adr. of system return
@@ -361,7 +345,7 @@ sysc_tracesys:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	la	%r3,0
 	xr	%r0,%r0
-	icm	%r0,3,SP_SVCNR(%r15)
+	icm	%r0,3,SP_SVC_CODE(%r15)
 	st	%r0,SP_R2(%r15)
 	basr	%r14,%r1
 	cl	%r2,BASED(.Lnr_syscalls)
@@ -376,7 +360,7 @@ sysc_tracego:
 	basr	%r14,%r8		# call sys_xxx
 	st	%r2,SP_R2(%r15)		# store return value
 sysc_tracenogo:
-	tm	__TI_flags+2(%r12),_TIF_SYSCALL
+	tm	__TI_flags+2(%r12),_TIF_TRACE >> 8
 	bz	BASED(sysc_return)
 	l	%r1,BASED(.Ltrace_exit)
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
@@ -454,7 +438,6 @@ ENTRY(pgm_check_handler)
 	bnz	BASED(pgm_per)		# got per exception -> special case
 	SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	xc	SP_ILC(4,%r15),SP_ILC(%r15)
 	mvc	SP_PSW(8,%r15),__LC_PGM_OLD_PSW
 	l	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
@@ -530,9 +513,10 @@ pgm_exit2:
 pgm_svcper:
 	SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	mvc	SP_PSW(8,%r15),__LC_SVC_OLD_PSW
-	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
 	l	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_PSW(8,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_SVC_CODE(4,%r15),__LC_SVC_ILC
+	oi	__TI_flags+3(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP)
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
@@ -540,7 +524,6 @@ pgm_svcper:
 	mvc	__THREAD_per_cause(2,%r8),__LC_PER_CAUSE
 	mvc	__THREAD_per_address(4,%r8),__LC_PER_ADDRESS
 	mvc	__THREAD_per_paid(1,%r8),__LC_PER_PAID
-	oi	__TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	lm	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	b	BASED(sysc_do_svc)
@@ -550,7 +533,6 @@ pgm_svcper:
 #
 kernel_per:
 	REENABLE_IRQS
-	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	l	%r1,BASED(.Lhandle_per)	# load adr. of per handler
 	basr	%r14,%r1		# branch to do_single_step
@@ -853,13 +835,13 @@ restart_go:
 # PSW restart interrupt handler
 #
 ENTRY(psw_restart_int_handler)
-	st	%r15,__LC_SAVE_AREA_64(%r0)	# save r15
+	st	%r15,__LC_SAVE_AREA+48(%r0)	# save r15
 	basr	%r15,0
 0:	l	%r15,.Lrestart_stack-0b(%r15)	# load restart stack
 	l	%r15,0(%r15)
 	ahi	%r15,-SP_SIZE			# make room for pt_regs
 	stm	%r0,%r14,SP_R0(%r15)		# store gprs %r0-%r14 to stack
-	mvc	SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack
+	mvc	SP_R15(4,%r15),__LC_SAVE_AREA+48(%r0)# store saved %r15 to stack
 	mvc	SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0
 	basr	%r14,0
@@ -965,9 +947,11 @@ cleanup_system_call:
 	s	%r15,BASED(.Lc_spsize)	# make room for registers & psw
 	st	%r15,12(%r12)
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	mvc	SP_PSW(8,%r15),__LC_SVC_OLD_PSW
-	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
 	mvc	0(4,%r12),__LC_THREAD_INFO
+	l	%r12,__LC_THREAD_INFO
+	mvc	SP_PSW(8,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_SVC_CODE(4,%r15),__LC_SVC_ILC
+	oi	__TI_flags+3(%r12),_TIF_SYSCALL
 cleanup_vtime:
 	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12)
 	bhe	BASED(cleanup_stime)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 66729eb7bbc5..ef8fb1d6e8d7 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -5,24 +5,33 @@
 #include <linux/signal.h>
 #include <asm/ptrace.h>
 
+
+extern void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long);
+extern void *restart_stack;
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+
 void do_protection_exception(struct pt_regs *, long, unsigned long);
 void do_dat_exception(struct pt_regs *, long, unsigned long);
 void do_asce_exception(struct pt_regs *, long, unsigned long);
 
-extern int sysctl_userprocess_debug;
-
 void do_per_trap(struct pt_regs *regs);
 void syscall_trace(struct pt_regs *regs, int entryexit);
 void kernel_stack_overflow(struct pt_regs * regs);
 void do_signal(struct pt_regs *regs);
 int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
+void do_notify_resume(struct pt_regs *regs);
 
 void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long);
+void do_restart(void);
 int __cpuinit start_secondary(void *cpuvoid);
 void __init startup_init(void);
 void die(const char * str, struct pt_regs * regs, long err);
 
+void __init time_init(void);
+
 struct s390_mmap_arg_struct;
 struct fadvise64_64_args;
 struct old_sigaction;
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 713da0760538..83a93747e2fd 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -43,19 +43,18 @@ SP_R13	     =	STACK_FRAME_OVERHEAD + __PT_GPRS + 104
 SP_R14	     =	STACK_FRAME_OVERHEAD + __PT_GPRS + 112
 SP_R15	     =	STACK_FRAME_OVERHEAD + __PT_GPRS + 120
 SP_ORIG_R2   =	STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2
-SP_ILC	     =	STACK_FRAME_OVERHEAD + __PT_ILC
-SP_SVCNR      =	STACK_FRAME_OVERHEAD + __PT_SVCNR
+SP_SVC_CODE  =	STACK_FRAME_OVERHEAD + __PT_SVC_CODE
 SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
 
 _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP )
+		 _TIF_MCCK_PENDING | _TIF_PER_TRAP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
-_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
-		_TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
+_TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+		 _TIF_SYSCALL_TRACEPOINT)
 _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
 
 #define BASED(name) name-system_call(%r13)
@@ -249,9 +248,10 @@ ENTRY(system_call)
 sysc_saveall:
 	SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
-	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
 	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_SVC_CODE(4,%r15),__LC_SVC_ILC
+	oi	__TI_flags+7(%r12),_TIF_SYSCALL
 sysc_vtime:
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 sysc_stime:
@@ -260,14 +260,14 @@ sysc_update:
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
 	LAST_BREAK
 sysc_do_svc:
-	llgh	%r7,SP_SVCNR(%r15)
+	llgh	%r7,SP_SVC_CODE+2(%r15)
 	slag	%r7,%r7,2	# shift and test for svc 0
 	jnz	sysc_nr_ok
 	# svc 0: system call number in %r1
 	llgfr	%r1,%r1		# clear high word in r1
 	cghi	%r1,NR_syscalls
 	jnl	sysc_nr_ok
-	sth	%r1,SP_SVCNR(%r15)
+	sth	%r1,SP_SVC_CODE+2(%r15)
 	slag	%r7,%r1,2	# shift and test for svc 0
 sysc_nr_ok:
 	larl	%r10,sys_call_table
@@ -277,7 +277,7 @@ sysc_nr_ok:
 	larl	%r10,sys_call_table_emu  # use 31 bit emulation system calls
 sysc_noemu:
 #endif
-	tm	__TI_flags+6(%r12),_TIF_SYSCALL
+	tm	__TI_flags+6(%r12),_TIF_TRACE >> 8
 	mvc	SP_ARGS(8,%r15),SP_R7(%r15)
 	lgf	%r8,0(%r7,%r10) # load address of system call routine
 	jnz	sysc_tracesys
@@ -287,23 +287,19 @@ sysc_noemu:
 sysc_return:
 	LOCKDEP_SYS_EXIT
 sysc_tif:
+	tm	SP_PSW+1(%r15),0x01	# returning to user ?
+	jno	sysc_restore
 	tm	__TI_flags+7(%r12),_TIF_WORK_SVC
 	jnz	sysc_work	# there is work to do (signals etc.)
+	ni	__TI_flags+7(%r12),255-_TIF_SYSCALL
 sysc_restore:
 	RESTORE_ALL __LC_RETURN_PSW,1
 sysc_done:
 
 #
-# There is work to do, but first we need to check if we return to userspace.
-#
-sysc_work:
-	tm	SP_PSW+1(%r15),0x01	# returning to user ?
-	jno	sysc_restore
-
-#
 # One of the work bits is on. Find out which one.
 #
-sysc_work_tif:
+sysc_work:
 	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jo	sysc_mcck_pending
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
@@ -312,8 +308,6 @@ sysc_work_tif:
 	jo	sysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	sysc_notify_resume
-	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
-	jo	sysc_restart
 	tm	__TI_flags+7(%r12),_TIF_PER_TRAP
 	jo	sysc_singlestep
 	j	sysc_return		# beware of critical section cleanup
@@ -339,11 +333,15 @@ sysc_sigpending:
 	ni	__TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
-	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
-	jo	sysc_restart
-	tm	__TI_flags+7(%r12),_TIF_PER_TRAP
-	jo	sysc_singlestep
-	j	sysc_return
+	tm	__TI_flags+7(%r12),_TIF_SYSCALL
+	jno	sysc_return
+	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
+	lghi	%r7,0			# svc 0 returns -ENOSYS
+	lh	%r1,SP_SVC_CODE+2(%r15)	# load new svc number
+	cghi	%r1,NR_syscalls
+	jnl	sysc_nr_ok		# invalid svc number -> do svc 0
+	slag	%r7,%r1,2
+	j	sysc_nr_ok		# restart svc
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
@@ -354,23 +352,10 @@ sysc_notify_resume:
 	jg	do_notify_resume	# call do_notify_resume
 
 #
-# _TIF_RESTART_SVC is set, set up registers and restart svc
-#
-sysc_restart:
-	ni	__TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
-	lg	%r7,SP_R2(%r15)		# load new svc number
-	mvc	SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument
-	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
-	sth	%r7,SP_SVCNR(%r15)
-	slag	%r7,%r7,2
-	j	sysc_nr_ok		# restart svc
-
-#
 # _TIF_PER_TRAP is set, call do_per_trap
 #
 sysc_singlestep:
-	ni	__TI_flags+7(%r12),255-_TIF_PER_TRAP	# clear TIF_PER_TRAP
-	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)		# clear svc number
+	ni	__TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP)
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	larl	%r14,sysc_return	# load adr. of system return
 	jg	do_per_trap
@@ -382,7 +367,7 @@ sysc_singlestep:
 sysc_tracesys:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	la	%r3,0
-	llgh	%r0,SP_SVCNR(%r15)
+	llgh	%r0,SP_SVC_CODE+2(%r15)
 	stg	%r0,SP_R2(%r15)
 	brasl	%r14,do_syscall_trace_enter
 	lghi	%r0,NR_syscalls
@@ -397,7 +382,7 @@ sysc_tracego:
 	basr	%r14,%r8		# call sys_xxx
 	stg	%r2,SP_R2(%r15)		# store return value
 sysc_tracenogo:
-	tm	__TI_flags+6(%r12),_TIF_SYSCALL
+	tm	__TI_flags+6(%r12),_TIF_TRACE >> 8
 	jz	sysc_return
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	larl	%r14,sysc_return	# return point is sysc_return
@@ -470,7 +455,6 @@ ENTRY(pgm_check_handler)
 	jnz	pgm_per 		 # got per exception -> special case
 	SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	xc	SP_ILC(4,%r15),SP_ILC(%r15)
 	mvc	SP_PSW(16,%r15),__LC_PGM_OLD_PSW
 	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	HANDLE_SIE_INTERCEPT
@@ -550,9 +534,10 @@ pgm_exit2:
 pgm_svcper:
 	SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
-	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
 	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_SVC_CODE(4,%r15),__LC_SVC_ILC
+	oi	__TI_flags+7(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP)
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
@@ -561,7 +546,6 @@ pgm_svcper:
 	mvc	__THREAD_per_cause(2,%r8),__LC_PER_CAUSE
 	mvc	__THREAD_per_address(8,%r8),__LC_PER_ADDRESS
 	mvc	__THREAD_per_paid(1,%r8),__LC_PER_PAID
-	oi	__TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	j	sysc_do_svc
@@ -571,7 +555,6 @@ pgm_svcper:
 #
 kernel_per:
 	REENABLE_IRQS
-	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)	# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	brasl	%r14,do_per_trap
 	j	pgm_exit
@@ -869,12 +852,12 @@ restart_go:
 # PSW restart interrupt handler
 #
 ENTRY(psw_restart_int_handler)
-	stg	%r15,__LC_SAVE_AREA_64(%r0)	# save r15
+	stg	%r15,__LC_SAVE_AREA+120(%r0)	# save r15
 	larl	%r15,restart_stack		# load restart stack
 	lg	%r15,0(%r15)
 	aghi	%r15,-SP_SIZE			# make room for pt_regs
 	stmg	%r0,%r14,SP_R0(%r15)		# store gprs %r0-%r14 to stack
-	mvc	SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack
+	mvc	SP_R15(8,%r15),__LC_SAVE_AREA+120(%r0)# store saved %r15 to stack
 	mvc	SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0
 	brasl	%r14,do_restart
@@ -972,9 +955,11 @@ cleanup_system_call:
 	stg	%r15,32(%r12)
 	stg	%r11,0(%r12)
 	CREATE_STACK_FRAME __LC_SAVE_AREA
-	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
-	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
 	mvc	8(8,%r12),__LC_THREAD_INFO
+	lg	%r12,__LC_THREAD_INFO
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_SVC_CODE(4,%r15),__LC_SVC_ILC
+	oi	__TI_flags+7(%r12),_TIF_SYSCALL
 cleanup_vtime:
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24)
 	jhe	cleanup_stime
@@ -1096,6 +1081,7 @@ sie_exit:
 	lghi	%r2,0
 	br	%r14
 sie_fault:
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	lg	%r14,__LC_THREAD_INFO		# pointer thread_info struct
 	ni	__TI_flags+6(%r14),255-(_TIF_SIE>>8)
 	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 2d781bab37bb..900068d2bf92 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -449,10 +449,28 @@ ENTRY(start)
 #
 	.org	0x10000
 ENTRY(startup)
+	j	.Lep_startup_normal
+	.org	0x10008
+#
+# This is a list of s390 kernel entry points. At address 0x1000f the number of
+# valid entry points is stored.
+#
+# IMPORTANT: Do not change this table, it is s390 kernel ABI!
+#
+	.ascii	"S390EP"
+	.byte	0x00,0x01
+#
+# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+#
+	.org	0x10010
+ENTRY(startup_kdump)
+	j	.Lep_startup_kdump
+.Lep_startup_normal:
 	basr	%r13,0			# get base
 .LPG0:
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x300(256),0x300
+	xc	0xe00(256),0xe00
 	stck	__LC_LAST_UPDATE_CLOCK
 	spt	5f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13)
@@ -534,6 +552,8 @@ ENTRY(startup)
 	.align	8
 5:	.long	0x7fffffff,0xffffffff
 
+#include "head_kdump.S"
+
 #
 # params at 10400 (setup.h)
 #
@@ -541,6 +561,8 @@ ENTRY(startup)
 	.long	0,0			# IPL_DEVICE
 	.long	0,0			# INITRD_START
 	.long	0,0			# INITRD_SIZE
+	.long	0,0			# OLDMEM_BASE
+	.long	0,0			# OLDMEM_SIZE
 
 	.org	COMMAND_LINE
 	.byte	"root=/dev/ram0 ro"
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index f21954b44dc1..d3f1ab7d90ad 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -92,7 +92,7 @@ ENTRY(_stext)
 .LPG3:
 # check control registers
 	stctl	%c0,%c15,0(%r15)
-	oi	2(%r15),0x40		# enable sigp emergency signal
+	oi	2(%r15),0x60		# enable sigp emergency & external call
 	oi	0(%r15),0x10		# switch on low address protection
 	lctl	%c0,%c15,0(%r15)
 
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index ae5d492b069e..99348c0eaa41 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -90,7 +90,7 @@ ENTRY(_stext)
 .LPG3:
 # check control registers
 	stctg	%c0,%c15,0(%r15)
-	oi	6(%r15),0x40		# enable sigp emergency signal
+	oi	6(%r15),0x60		# enable sigp emergency & external call
 	oi	4(%r15),0x10		# switch on low address proctection
 	lctlg	%c0,%c15,0(%r15)
 
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
new file mode 100644
index 000000000000..e1ac3893e972
--- /dev/null
+++ b/arch/s390/kernel/head_kdump.S
@@ -0,0 +1,119 @@
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define DATAMOVER_ADDR	0x4000
+#define COPY_PAGE_ADDR	0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+.align 2
+.Lep_startup_kdump:
+	lhi	%r1,2				# mode 2 = esame (dump)
+	sigp	%r1,%r0,0x12			# Switch to esame mode
+	sam64					# Switch to 64 bit addressing
+	basr	%r13,0
+.Lbase:
+	larl	%r2,.Lbase_addr			# Check, if we have been
+	lg	%r2,0(%r2)			# already relocated:
+	clgr	%r2,%r13			#
+	jne	.Lrelocate			# No : Start data mover
+	lghi	%r2,0				# Yes: Start kdump kernel
+	brasl	%r14,startup_kdump_relocated
+
+.Lrelocate:
+	larl	%r4,startup
+	lg	%r2,0x418(%r4)			# Get kdump base
+	lg	%r3,0x420(%r4)			# Get kdump size
+
+	larl	%r10,.Lcopy_start		# Source of data mover
+	lghi	%r8,DATAMOVER_ADDR		# Target of data mover
+	mvc	0(256,%r8),0(%r10)		# Copy data mover code
+
+	agr	%r8,%r2				# Copy data mover to
+	mvc	0(256,%r8),0(%r10)		# reserved mem
+
+	lghi	%r14,DATAMOVER_ADDR		# Jump to copied data mover
+	basr	%r14,%r14
+.Lbase_addr:
+	.quad	.Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+	basr	%r13,0				# Base
+0:
+	lgr	%r11,%r2			# Save kdump base address
+	lgr	%r12,%r2
+	agr	%r12,%r3			# Compute kdump end address
+
+	lghi	%r5,0
+	lghi	%r10,COPY_PAGE_ADDR		# Load copy page address
+1:
+	mvc	0(256,%r10),0(%r5)		# Copy old kernel to tmp
+	mvc	0(256,%r5),0(%r11)		# Copy new kernel to old
+	mvc	0(256,%r11),0(%r10)		# Copy tmp to new
+	aghi	%r11,256
+	aghi	%r5,256
+	clgr	%r11,%r12
+	jl	1b
+
+	lg	%r14,.Lstartup_kdump-0b(%r13)
+	basr	%r14,%r14			# Start relocated kernel
+.Lstartup_kdump:
+	.long	0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+.align 2
+startup_kdump_relocated:
+	basr	%r13,0
+0:
+	mvc	0(8,%r0),.Lrestart_psw-0b(%r13)	# Setup restart PSW
+	mvc	464(16,%r0),.Lpgm_psw-0b(%r13)	# Setup pgm check PSW
+	lhi	%r1,1				# Start new kernel
+	diag	%r1,%r1,0x308			# with diag 308
+
+.Lno_diag308:					# No diag 308
+	sam31					# Switch to 31 bit addr mode
+	sr	%r1,%r1				# Erase register r1
+	sr	%r2,%r2				# Erase register r2
+	sigp	%r1,%r2,0x12			# Switch to 31 bit arch mode
+	lpsw	0				# Start new kernel...
+.align	8
+.Lrestart_psw:
+	.long	0x00080000,0x80000000 + startup
+.Lpgm_psw:
+	.quad	0x0000000180000000,0x0000000000000000 + .Lno_diag308
+#else
+.align 2
+.Lep_startup_kdump:
+#ifdef CONFIG_64BIT
+	larl	%r13,startup_kdump_crash
+	lpswe	0(%r13)
+.align 8
+startup_kdump_crash:
+	.quad	0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#else
+	basr	%r13,0
+0:	lpsw	startup_kdump_crash-0b(%r13)
+.align 8
+startup_kdump_crash:
+	.long	0x000a0000,0x00000000 + startup_kdump_crash
+#endif /* CONFIG_64BIT */
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 48c710206366..affa8e68124a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -16,6 +16,7 @@
 #include <linux/ctype.h>
 #include <linux/fs.h>
 #include <linux/gfp.h>
+#include <linux/crash_dump.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
@@ -26,6 +27,7 @@
 #include <asm/sclp.h>
 #include <asm/sigp.h>
 #include <asm/checksum.h>
+#include "entry.h"
 
 #define IPL_PARM_BLOCK_VERSION 0
 
@@ -275,8 +277,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 
 /* VM IPL PARM routines */
-size_t reipl_get_ascii_vmparm(char *dest, size_t size,
-				   const struct ipl_parameter_block *ipb)
+static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
+				     const struct ipl_parameter_block *ipb)
 {
 	int i;
 	size_t len;
@@ -338,8 +340,8 @@ static size_t scpdata_length(const char* buf, size_t count)
 	return count;
 }
 
-size_t reipl_append_ascii_scpdata(char *dest, size_t size,
-				  const struct ipl_parameter_block *ipb)
+static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+					 const struct ipl_parameter_block *ipb)
 {
 	size_t count;
 	size_t i;
@@ -1738,7 +1740,11 @@ static struct kobj_attribute on_restart_attr =
 
 void do_restart(void)
 {
+	smp_restart_with_online_cpu();
 	smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+	crash_kexec(NULL);
+#endif
 	on_restart_trigger.action->fn(&on_restart_trigger);
 	stop_run(&on_restart_trigger);
 }
@@ -2009,7 +2015,7 @@ static void do_reset_calls(void)
 
 u32 dump_prefix_page;
 
-void s390_reset_system(void)
+void s390_reset_system(void (*func)(void *), void *data)
 {
 	struct _lowcore *lc;
 
@@ -2028,15 +2034,19 @@ void s390_reset_system(void)
 	__ctl_clear_bit(0,28);
 
 	/* Set new machine check handler */
-	S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
+	S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT;
 	S390_lowcore.mcck_new_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
 
 	/* Set new program check handler */
-	S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT;
 	S390_lowcore.program_new_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
 
+	/* Store status at absolute zero */
+	store_status();
+
 	do_reset_calls();
+	if (func)
+		func(data);
 }
-
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 1f4050d45f78..b9a7fdd9c814 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -33,7 +33,8 @@ static const struct irq_class intrclass_names[] = {
 	{.name = "EXT" },
 	{.name = "I/O" },
 	{.name = "CLK", .desc = "[EXT] Clock Comparator" },
-	{.name = "IPI", .desc = "[EXT] Signal Processor" },
+	{.name = "EXC", .desc = "[EXT] External Call" },
+	{.name = "EMS", .desc = "[EXT] Emergency Signal" },
 	{.name = "TMR", .desc = "[EXT] CPU Timer" },
 	{.name = "TAL", .desc = "[EXT] Timing Alert" },
 	{.name = "PFL", .desc = "[EXT] Pseudo Page Fault" },
@@ -42,8 +43,8 @@ static const struct irq_class intrclass_names[] = {
 	{.name = "SCP", .desc = "[EXT] Service Call" },
 	{.name = "IUC", .desc = "[EXT] IUCV" },
 	{.name = "CPM", .desc = "[EXT] CPU Measurement" },
+	{.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" },
 	{.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" },
-	{.name = "QDI", .desc = "[I/O] QDIO Interrupt" },
 	{.name = "DAS", .desc = "[I/O] DASD" },
 	{.name = "C15", .desc = "[I/O] 3215" },
 	{.name = "C70", .desc = "[I/O] 3270" },
@@ -53,6 +54,7 @@ static const struct irq_class intrclass_names[] = {
 	{.name = "CLW", .desc = "[I/O] CLAW" },
 	{.name = "CTC", .desc = "[I/O] CTC" },
 	{.name = "APB", .desc = "[I/O] AP Bus" },
+	{.name = "CSC", .desc = "[I/O] CHSC Subchannel" },
 	{.name = "NMI", .desc = "[NMI] Machine Check" },
 };
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 1d05d669107c..64b761aef004 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -635,7 +635,7 @@ void __kprobes jprobe_return(void)
 	asm volatile(".word 0x0002");
 }
 
-void __kprobes jprobe_return_end(void)
+static void __used __kprobes jprobe_return_end(void)
 {
 	asm volatile("bcr 0,0");
 }
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index b09b9c62573e..3cd0f25ab015 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -1,10 +1,11 @@
 /*
  * arch/s390/kernel/machine_kexec.c
  *
- * Copyright IBM Corp. 2005,2006
+ * Copyright IBM Corp. 2005,2011
  *
  * Author(s): Rolf Adelsberger,
  *	      Heiko Carstens <heiko.carstens@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
  */
 
 #include <linux/device.h>
@@ -21,12 +22,162 @@
 #include <asm/smp.h>
 #include <asm/reset.h>
 #include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/asm-offsets.h>
 
 typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
 
 extern const unsigned char relocate_kernel[];
 extern const unsigned long long relocate_kernel_len;
 
+#ifdef CONFIG_CRASH_DUMP
+
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa);
+
+/*
+ * Create ELF notes for one CPU
+ */
+static void add_elf_notes(int cpu)
+{
+	struct save_area *sa = (void *) 4608 + store_prefix();
+	void *ptr;
+
+	memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
+	ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	ptr = fill_cpu_elf_notes(ptr, sa);
+	memset(ptr, 0, sizeof(struct elf_note));
+}
+
+/*
+ * Store status of next available physical CPU
+ */
+static int store_status_next(int start_cpu, int this_cpu)
+{
+	struct save_area *sa = (void *) 4608 + store_prefix();
+	int cpu, rc;
+
+	for (cpu = start_cpu; cpu < 65536; cpu++) {
+		if (cpu == this_cpu)
+			continue;
+		do {
+			rc = raw_sigp(cpu, sigp_stop_and_store_status);
+		} while (rc == sigp_busy);
+		if (rc != sigp_order_code_accepted)
+			continue;
+		if (sa->pref_reg)
+			return cpu;
+	}
+	return -1;
+}
+
+/*
+ * Initialize CPU ELF notes
+ */
+void setup_regs(void)
+{
+	unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
+	int cpu, this_cpu, phys_cpu = 0, first = 1;
+
+	this_cpu = stap();
+
+	if (!S390_lowcore.prefixreg_save_area)
+		first = 0;
+	for_each_online_cpu(cpu) {
+		if (first) {
+			add_elf_notes(cpu);
+			first = 0;
+			continue;
+		}
+		phys_cpu = store_status_next(phys_cpu, this_cpu);
+		if (phys_cpu == -1)
+			break;
+		add_elf_notes(cpu);
+		phys_cpu++;
+	}
+	/* Copy dump CPU store status info to absolute zero */
+	memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
+}
+
+#endif
+
+/*
+ * Start kdump: We expect here that a store status has been done on our CPU
+ */
+static void __do_machine_kdump(void *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+	int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+
+	__load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
+	setup_regs();
+	start_kdump(1);
+#endif
+}
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static int kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+	int (*start_kdump)(int) = (void *)image->start;
+	int rc;
+
+	__arch_local_irq_stnsm(0xfb); /* disable DAT */
+	rc = start_kdump(0);
+	__arch_local_irq_stosm(0x04); /* enable DAT */
+	return rc ? 0 : -EINVAL;
+#else
+	return -EINVAL;
+#endif
+}
+
+/*
+ * Map or unmap crashkernel memory
+ */
+static void crash_map_pages(int enable)
+{
+	unsigned long size = resource_size(&crashk_res);
+
+	BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
+	       size % KEXEC_CRASH_MEM_ALIGN);
+	if (enable)
+		vmem_add_mapping(crashk_res.start, size);
+	else
+		vmem_remove_mapping(crashk_res.start, size);
+}
+
+/*
+ * Map crashkernel memory
+ */
+void crash_map_reserved_pages(void)
+{
+	crash_map_pages(1);
+}
+
+/*
+ * Unmap crashkernel memory
+ */
+void crash_unmap_reserved_pages(void)
+{
+	crash_map_pages(0);
+}
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crashk_res.start),
+			     PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
 int machine_kexec_prepare(struct kimage *image)
 {
 	void *reboot_code_buffer;
@@ -35,6 +186,9 @@ int machine_kexec_prepare(struct kimage *image)
 	if (ipl_flags & IPL_NSS_VALID)
 		return -ENOSYS;
 
+	if (image->type == KEXEC_TYPE_CRASH)
+		return machine_kexec_prepare_kdump();
+
 	/* We don't support anything but the default image type for now. */
 	if (image->type != KEXEC_TYPE_DEFAULT)
 		return -EINVAL;
@@ -51,27 +205,53 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
+void arch_crash_save_vmcoreinfo(void)
+{
+	VMCOREINFO_SYMBOL(lowcore_ptr);
+	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+}
+
 void machine_shutdown(void)
 {
 }
 
-static void __machine_kexec(void *data)
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
 {
 	relocate_kernel_t data_mover;
 	struct kimage *image = data;
 
-	pfault_fini();
-	s390_reset_system();
-
 	data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
 
 	/* Call the moving routine */
 	(*data_mover)(&image->head, image->start);
-	for (;;);
 }
 
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+	struct kimage *image = data;
+
+	pfault_fini();
+	if (image->type == KEXEC_TYPE_CRASH)
+		s390_reset_system(__do_machine_kdump, data);
+	else
+		s390_reset_system(__do_machine_kexec, data);
+	disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
 void machine_kexec(struct kimage *image)
 {
+	if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+		return;
 	tracer_disable();
 	smp_send_stop();
 	smp_switch_to_ipl_cpu(__machine_kexec, image);
diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c
index 0fbe4e32f7ba..19b4568f4cee 100644
--- a/arch/s390/kernel/mem_detect.c
+++ b/arch/s390/kernel/mem_detect.c
@@ -62,3 +62,72 @@ void detect_memory_layout(struct mem_chunk chunk[])
 	arch_local_irq_restore(flags);
 }
 EXPORT_SYMBOL(detect_memory_layout);
+
+/*
+ * Create memory hole with given address, size, and type
+ */
+void create_mem_hole(struct mem_chunk chunks[], unsigned long addr,
+		     unsigned long size, int type)
+{
+	unsigned long start, end, new_size;
+	int i;
+
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		if (chunks[i].size == 0)
+			continue;
+		if (addr + size < chunks[i].addr)
+			continue;
+		if (addr >= chunks[i].addr + chunks[i].size)
+			continue;
+		start = max(addr, chunks[i].addr);
+		end = min(addr + size, chunks[i].addr + chunks[i].size);
+		new_size = end - start;
+		if (new_size == 0)
+			continue;
+		if (start == chunks[i].addr &&
+		    end == chunks[i].addr + chunks[i].size) {
+			/* Remove chunk */
+			chunks[i].type = type;
+		} else if (start == chunks[i].addr) {
+			/* Make chunk smaller at start */
+			if (i >= MEMORY_CHUNKS - 1)
+				panic("Unable to create memory hole");
+			memmove(&chunks[i + 1], &chunks[i],
+				sizeof(struct mem_chunk) *
+				(MEMORY_CHUNKS - (i + 1)));
+			chunks[i + 1].addr = chunks[i].addr + new_size;
+			chunks[i + 1].size = chunks[i].size - new_size;
+			chunks[i].size = new_size;
+			chunks[i].type = type;
+			i += 1;
+		} else if (end == chunks[i].addr + chunks[i].size) {
+			/* Make chunk smaller at end */
+			if (i >= MEMORY_CHUNKS - 1)
+				panic("Unable to create memory hole");
+			memmove(&chunks[i + 1], &chunks[i],
+				sizeof(struct mem_chunk) *
+				(MEMORY_CHUNKS - (i + 1)));
+			chunks[i + 1].addr = start;
+			chunks[i + 1].size = new_size;
+			chunks[i + 1].type = type;
+			chunks[i].size -= new_size;
+			i += 1;
+		} else {
+			/* Create memory hole */
+			if (i >= MEMORY_CHUNKS - 2)
+				panic("Unable to create memory hole");
+			memmove(&chunks[i + 2], &chunks[i],
+				sizeof(struct mem_chunk) *
+				(MEMORY_CHUNKS - (i + 2)));
+			chunks[i + 1].addr = addr;
+			chunks[i + 1].size = size;
+			chunks[i + 1].type = type;
+			chunks[i + 2].addr = addr + size;
+			chunks[i + 2].size =
+				chunks[i].addr + chunks[i].size - (addr + size);
+			chunks[i + 2].type = chunks[i].type;
+			chunks[i].size = addr - chunks[i].addr;
+			i += 2;
+		}
+	}
+}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 541a7509faeb..9451b210a1b4 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/elfcore.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
@@ -117,7 +118,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	struct pt_regs regs;
 
 	memset(&regs, 0, sizeof(regs));
-	regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
+	regs.psw.mask = psw_kernel_bits |
+		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
 	regs.gprs[9] = (unsigned long) fn;
 	regs.gprs[10] = (unsigned long) arg;
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 311e9d712888..6e0073e43f54 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -74,7 +74,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL;
+	return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ef86ad243986..450931a45b68 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -42,34 +42,37 @@ enum s390_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
 	REGSET_LAST_BREAK,
+	REGSET_SYSTEM_CALL,
 	REGSET_GENERAL_EXTENDED,
 };
 
 void update_per_regs(struct task_struct *task)
 {
-	static const struct per_regs per_single_step = {
-		.control = PER_EVENT_IFETCH,
-		.start = 0,
-		.end = PSW_ADDR_INSN,
-	};
 	struct pt_regs *regs = task_pt_regs(task);
 	struct thread_struct *thread = &task->thread;
-	const struct per_regs *new;
-	struct per_regs old;
-
-	/* TIF_SINGLE_STEP overrides the user specified PER registers. */
-	new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ?
-		&per_single_step : &thread->per_user;
+	struct per_regs old, new;
+
+	/* Copy user specified PER registers */
+	new.control = thread->per_user.control;
+	new.start = thread->per_user.start;
+	new.end = thread->per_user.end;
+
+	/* merge TIF_SINGLE_STEP into user specified PER registers. */
+	if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) {
+		new.control |= PER_EVENT_IFETCH;
+		new.start = 0;
+		new.end = PSW_ADDR_INSN;
+	}
 
 	/* Take care of the PER enablement bit in the PSW. */
-	if (!(new->control & PER_EVENT_MASK)) {
+	if (!(new.control & PER_EVENT_MASK)) {
 		regs->psw.mask &= ~PSW_MASK_PER;
 		return;
 	}
 	regs->psw.mask |= PSW_MASK_PER;
 	__ctl_store(old, 9, 11);
-	if (memcmp(new, &old, sizeof(struct per_regs)) != 0)
-		__ctl_load(*new, 9, 11);
+	if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+		__ctl_load(new, 9, 11);
 }
 
 void user_enable_single_step(struct task_struct *task)
@@ -166,8 +169,8 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 		 */
 		tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
 		if (addr == (addr_t) &dummy->regs.psw.mask)
-			/* Remove per bit from user psw. */
-			tmp &= ~PSW_MASK_PER;
+			/* Return a clean psw mask. */
+			tmp = psw_user_bits | (tmp & PSW_MASK_USER);
 
 	} else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
 		/*
@@ -289,18 +292,17 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * psw and gprs are stored on the stack
 		 */
 		if (addr == (addr_t) &dummy->regs.psw.mask &&
-#ifdef CONFIG_COMPAT
-		    data != PSW_MASK_MERGE(psw_user32_bits, data) &&
-#endif
-		    data != PSW_MASK_MERGE(psw_user_bits, data))
+		    ((data & ~PSW_MASK_USER) != psw_user_bits ||
+		     ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))))
 			/* Invalid psw mask. */
 			return -EINVAL;
-#ifndef CONFIG_64BIT
 		if (addr == (addr_t) &dummy->regs.psw.addr)
-			/* I'd like to reject addresses without the
-			   high order bit but older gdb's rely on it */
-			data |= PSW_ADDR_AMODE;
-#endif
+			/*
+			 * The debugger changed the instruction address,
+			 * reset system call restart, see signal.c:do_signal
+			 */
+			task_thread_info(child)->system_call = 0;
+
 		*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
@@ -495,21 +497,21 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 	__u32 tmp;
 
 	if (addr < (addr_t) &dummy32->regs.acrs) {
+		struct pt_regs *regs = task_pt_regs(child);
 		/*
 		 * psw and gprs are stored on the stack
 		 */
 		if (addr == (addr_t) &dummy32->regs.psw.mask) {
 			/* Fake a 31 bit psw mask. */
-			tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32);
-			tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp);
+			tmp = (__u32)(regs->psw.mask >> 32);
+			tmp = psw32_user_bits | (tmp & PSW32_MASK_USER);
 		} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
 			/* Fake a 31 bit psw address. */
-			tmp = (__u32) task_pt_regs(child)->psw.addr |
-				PSW32_ADDR_AMODE31;
+			tmp = (__u32) regs->psw.addr |
+				(__u32)(regs->psw.mask & PSW_MASK_BA);
 		} else {
 			/* gpr 0-15 */
-			tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw +
-					 addr*2 + 4);
+			tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
 		}
 	} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
 		/*
@@ -594,24 +596,32 @@ static int __poke_user_compat(struct task_struct *child,
 	addr_t offset;
 
 	if (addr < (addr_t) &dummy32->regs.acrs) {
+		struct pt_regs *regs = task_pt_regs(child);
 		/*
 		 * psw, gprs, acrs and orig_gpr2 are stored on the stack
 		 */
 		if (addr == (addr_t) &dummy32->regs.psw.mask) {
 			/* Build a 64 bit psw mask from 31 bit mask. */
-			if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp))
+			if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits)
 				/* Invalid psw mask. */
 				return -EINVAL;
-			task_pt_regs(child)->psw.mask =
-				PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32);
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+				(regs->psw.mask & PSW_MASK_BA) |
+				(__u64)(tmp & PSW32_MASK_USER) << 32;
 		} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
 			/* Build a 64 bit psw address from 31 bit address. */
-			task_pt_regs(child)->psw.addr =
-				(__u64) tmp & PSW32_ADDR_INSN;
+			regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+			/* Transfer 31 bit amode bit to psw mask. */
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+				(__u64)(tmp & PSW32_ADDR_AMODE);
+			/*
+			 * The debugger changed the instruction address,
+			 * reset system call restart, see signal.c:do_signal
+			 */
+			task_thread_info(child)->system_call = 0;
 		} else {
 			/* gpr 0-15 */
-			*(__u32*)((addr_t) &task_pt_regs(child)->psw
-				  + addr*2 + 4) = tmp;
+			*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
 		}
 	} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
 		/*
@@ -735,7 +745,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 * debugger stored an invalid system call number. Skip
 		 * the system call and the system call restart handling.
 		 */
-		regs->svcnr = 0;
+		clear_thread_flag(TIF_SYSCALL);
 		ret = -1;
 	}
 
@@ -897,6 +907,26 @@ static int s390_last_break_get(struct task_struct *target,
 
 #endif
 
+static int s390_system_call_get(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				void *kbuf, void __user *ubuf)
+{
+	unsigned int *data = &task_thread_info(target)->system_call;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   data, 0, sizeof(unsigned int));
+}
+
+static int s390_system_call_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	unsigned int *data = &task_thread_info(target)->system_call;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(unsigned int));
+}
+
 static const struct user_regset s390_regsets[] = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
@@ -923,6 +953,14 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_last_break_get,
 	},
 #endif
+	[REGSET_SYSTEM_CALL] = {
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(unsigned int),
+		.align = sizeof(unsigned int),
+		.get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
 };
 
 static const struct user_regset_view user_s390_view = {
@@ -1102,6 +1140,14 @@ static const struct user_regset s390_compat_regsets[] = {
 		.align = sizeof(long),
 		.get = s390_compat_last_break_get,
 	},
+	[REGSET_SYSTEM_CALL] = {
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(compat_uint_t),
+		.align = sizeof(compat_uint_t),
+		.get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
 	[REGSET_GENERAL_EXTENDED] = {
 		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 303d961c3bb5..ad67c214be04 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -10,6 +10,12 @@
 #include <asm/asm-offsets.h>
 
 #
+# store_status: Empty implementation until kdump is supported on 31 bit
+#
+ENTRY(store_status)
+		br	%r14
+
+#
 # do_reipl_asm
 # Parameter: r2 = schid of reipl device
 #
diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S
index e690975403f4..732a793ec53a 100644
--- a/arch/s390/kernel/reipl64.S
+++ b/arch/s390/kernel/reipl64.S
@@ -17,11 +17,11 @@
 #
 ENTRY(store_status)
 	/* Save register one and load save area base */
-	stg	%r1,__LC_SAVE_AREA_64(%r0)
+	stg	%r1,__LC_SAVE_AREA+120(%r0)
 	lghi	%r1,SAVE_AREA_BASE
 	/* General purpose registers */
 	stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
-	lg	%r2,__LC_SAVE_AREA_64(%r0)
+	lg	%r2,__LC_SAVE_AREA+120(%r0)
 	stg	%r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
 	/* Control registers */
 	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
@@ -62,8 +62,11 @@ ENTRY(store_status)
 	larl	%r2,store_status
 	stg	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
 	br	%r14
-.align	8
+
+	.section .bss
+	.align	8
 .Lclkcmp:	.quad	0x0000000000000000
+	.previous
 
 #
 # do_reipl_asm
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7b371c37061d..8ac6bfa2786c 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -42,6 +42,9 @@
 #include <linux/reboot.h>
 #include <linux/topology.h>
 #include <linux/ftrace.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -57,12 +60,13 @@
 #include <asm/ebcdic.h>
 #include <asm/compat.h>
 #include <asm/kvm_virtio.h>
+#include <asm/diag.h>
 
-long psw_kernel_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
-			   PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
-long psw_user_bits	= (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
-			   PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
-			   PSW_MASK_PSTATE | PSW_DEFAULT_KEY);
+long psw_kernel_bits	= PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
+			  PSW_MASK_EA | PSW_MASK_BA;
+long psw_user_bits	= PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT |
+			  PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK |
+			  PSW_MASK_PSTATE | PSW_ASC_HOME;
 
 /*
  * User copy operations.
@@ -274,22 +278,14 @@ early_param("mem", early_parse_mem);
 unsigned int user_mode = HOME_SPACE_MODE;
 EXPORT_SYMBOL_GPL(user_mode);
 
-static int set_amode_and_uaccess(unsigned long user_amode,
-				 unsigned long user32_amode)
+static int set_amode_primary(void)
 {
-	psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode |
-			PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
-			PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
+	psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME;
+	psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY;
 #ifdef CONFIG_COMPAT
-	psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode |
-			  PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK |
-			  PSW_MASK_PSTATE | PSW_DEFAULT_KEY;
-	psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode |
-			  PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK |
-			  PSW32_MASK_PSTATE;
+	psw32_user_bits =
+		(psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY;
 #endif
-	psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME |
-			  PSW_MASK_MCHECK | PSW_DEFAULT_KEY;
 
 	if (MACHINE_HAS_MVCOS) {
 		memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess));
@@ -325,7 +321,7 @@ early_param("user_mode", early_parse_user_mode);
 static void setup_addressing_mode(void)
 {
 	if (user_mode == PRIMARY_SPACE_MODE) {
-		if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY))
+		if (set_amode_primary())
 			pr_info("Address spaces switched, "
 				"mvcos available\n");
 		else
@@ -344,24 +340,25 @@ setup_lowcore(void)
 	 */
 	BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
 	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
-	lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+	lc->restart_psw.mask = psw_kernel_bits;
 	lc->restart_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
-	if (user_mode != HOME_SPACE_MODE)
-		lc->restart_psw.mask |= PSW_ASC_HOME;
-	lc->external_new_psw.mask = psw_kernel_bits;
+	lc->external_new_psw.mask = psw_kernel_bits |
+		PSW_MASK_DAT | PSW_MASK_MCHECK;
 	lc->external_new_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
-	lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT;
+	lc->svc_new_psw.mask = psw_kernel_bits |
+		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
-	lc->program_new_psw.mask = psw_kernel_bits;
+	lc->program_new_psw.mask = psw_kernel_bits |
+		PSW_MASK_DAT | PSW_MASK_MCHECK;
 	lc->program_new_psw.addr =
-		PSW_ADDR_AMODE | (unsigned long)pgm_check_handler;
-	lc->mcck_new_psw.mask =
-		psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT;
+		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+	lc->mcck_new_psw.mask = psw_kernel_bits;
 	lc->mcck_new_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
-	lc->io_new_psw.mask = psw_kernel_bits;
+	lc->io_new_psw.mask = psw_kernel_bits |
+		PSW_MASK_DAT | PSW_MASK_MCHECK;
 	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
 	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
@@ -435,10 +432,14 @@ static void __init setup_resources(void)
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (!memory_chunk[i].size)
 			continue;
+		if (memory_chunk[i].type == CHUNK_OLDMEM ||
+		    memory_chunk[i].type == CHUNK_CRASHK)
+			continue;
 		res = alloc_bootmem_low(sizeof(*res));
 		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
 		switch (memory_chunk[i].type) {
 		case CHUNK_READ_WRITE:
+		case CHUNK_CRASHK:
 			res->name = "System RAM";
 			break;
 		case CHUNK_READ_ONLY:
@@ -479,6 +480,7 @@ static void __init setup_memory_end(void)
 	unsigned long max_mem;
 	int i;
 
+
 #ifdef CONFIG_ZFCPDUMP
 	if (ipl_info.type == IPL_TYPE_FCP_DUMP) {
 		memory_end = ZFCPDUMP_HSA_SIZE;
@@ -545,11 +547,201 @@ static void __init setup_restart_psw(void)
 	 * Setup restart PSW for absolute zero lowcore. This is necesary
 	 * if PSW restart is done on an offline CPU that has lowcore zero
 	 */
-	psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+	psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
 	psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
 	copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw));
 }
 
+static void __init setup_vmcoreinfo(void)
+{
+#ifdef CONFIG_KEXEC
+	unsigned long ptr = paddr_vmcoreinfo_note();
+
+	copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr));
+#endif
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Find suitable location for crashkernel memory
+ */
+static unsigned long __init find_crash_base(unsigned long crash_size,
+					    char **msg)
+{
+	unsigned long crash_base;
+	struct mem_chunk *chunk;
+	int i;
+
+	if (memory_chunk[0].size < crash_size) {
+		*msg = "first memory chunk must be at least crashkernel size";
+		return 0;
+	}
+	if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE))
+		return OLDMEM_BASE;
+
+	for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
+		chunk = &memory_chunk[i];
+		if (chunk->size == 0)
+			continue;
+		if (chunk->type != CHUNK_READ_WRITE)
+			continue;
+		if (chunk->size < crash_size)
+			continue;
+		crash_base = (chunk->addr + chunk->size) - crash_size;
+		if (crash_base < crash_size)
+			continue;
+		if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
+			continue;
+		if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
+			continue;
+		return crash_base;
+	}
+	*msg = "no suitable area found";
+	return 0;
+}
+
+/*
+ * Check if crash_base and crash_size is valid
+ */
+static int __init verify_crash_base(unsigned long crash_base,
+				    unsigned long crash_size,
+				    char **msg)
+{
+	struct mem_chunk *chunk;
+	int i;
+
+	/*
+	 * Because we do the swap to zero, we must have at least 'crash_size'
+	 * bytes free space before crash_base
+	 */
+	if (crash_size > crash_base) {
+		*msg = "crashkernel offset must be greater than size";
+		return -EINVAL;
+	}
+
+	/* First memory chunk must be at least crash_size */
+	if (memory_chunk[0].size < crash_size) {
+		*msg = "first memory chunk must be at least crashkernel size";
+		return -EINVAL;
+	}
+	/* Check if we fit into the respective memory chunk */
+	for (i = 0; i < MEMORY_CHUNKS; i++) {
+		chunk = &memory_chunk[i];
+		if (chunk->size == 0)
+			continue;
+		if (crash_base < chunk->addr)
+			continue;
+		if (crash_base >= chunk->addr + chunk->size)
+			continue;
+		/* we have found the memory chunk */
+		if (crash_base + crash_size > chunk->addr + chunk->size) {
+			*msg = "selected memory chunk is too small for "
+				"crashkernel memory";
+			return -EINVAL;
+		}
+		return 0;
+	}
+	*msg = "invalid memory range specified";
+	return -EINVAL;
+}
+
+/*
+ * Reserve kdump memory by creating a memory hole in the mem_chunk array
+ */
+static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
+					 int type)
+{
+
+	create_mem_hole(memory_chunk, addr, size, type);
+}
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from
+ * the area [0 - crashkernel memory size] and
+ * [crashk_res.start - crashk_res.end] is set offline.
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
+{
+	struct memory_notify *arg = data;
+
+	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+		return NOTIFY_BAD;
+	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
+		return NOTIFY_OK;
+	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
+		return NOTIFY_OK;
+	return NOTIFY_BAD;
+}
+
+static struct notifier_block kdump_mem_nb = {
+	.notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void reserve_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (!OLDMEM_BASE)
+		return;
+
+	reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
+	reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
+			      CHUNK_OLDMEM);
+	if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
+		saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
+	else
+		saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
+#endif
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	unsigned long long crash_base, crash_size;
+	char *msg;
+	int rc;
+
+	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
+			       &crash_base);
+	if (rc || crash_size == 0)
+		return;
+	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+	if (register_memory_notifier(&kdump_mem_nb))
+		return;
+	if (!crash_base)
+		crash_base = find_crash_base(crash_size, &msg);
+	if (!crash_base) {
+		pr_info("crashkernel reservation failed: %s\n", msg);
+		unregister_memory_notifier(&kdump_mem_nb);
+		return;
+	}
+	if (verify_crash_base(crash_base, crash_size, &msg)) {
+		pr_info("crashkernel reservation failed: %s\n", msg);
+		unregister_memory_notifier(&kdump_mem_nb);
+		return;
+	}
+	if (!OLDMEM_BASE && MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+	insert_resource(&iomem_resource, &crashk_res);
+	reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK);
+	pr_info("Reserving %lluMB of memory at %lluMB "
+		"for crashkernel (System RAM: %luMB)\n",
+		crash_size >> 20, crash_base >> 20, memory_end >> 20);
+#endif
+}
+
 static void __init
 setup_memory(void)
 {
@@ -580,6 +772,14 @@ setup_memory(void)
 		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
 			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
 
+#ifdef CONFIG_CRASH_DUMP
+			if (OLDMEM_BASE) {
+				/* Move initrd behind kdump oldmem */
+				if (start + INITRD_SIZE > OLDMEM_BASE &&
+				    start < OLDMEM_BASE + OLDMEM_SIZE)
+					start = OLDMEM_BASE + OLDMEM_SIZE;
+			}
+#endif
 			if (start + INITRD_SIZE > memory_end) {
 				pr_err("initrd extends beyond end of "
 				       "memory (0x%08lx > 0x%08lx) "
@@ -610,7 +810,8 @@ setup_memory(void)
 	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
 		unsigned long start_chunk, end_chunk, pfn;
 
-		if (memory_chunk[i].type != CHUNK_READ_WRITE)
+		if (memory_chunk[i].type != CHUNK_READ_WRITE &&
+		    memory_chunk[i].type != CHUNK_CRASHK)
 			continue;
 		start_chunk = PFN_DOWN(memory_chunk[i].addr);
 		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
@@ -644,6 +845,15 @@ setup_memory(void)
 	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
 			BOOTMEM_DEFAULT);
 
+#ifdef CONFIG_CRASH_DUMP
+	if (crashk_res.start)
+		reserve_bootmem(crashk_res.start,
+				crashk_res.end - crashk_res.start + 1,
+				BOOTMEM_DEFAULT);
+	if (is_kdump_kernel())
+		reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
+				PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
+#endif
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (INITRD_START && INITRD_SIZE) {
 		if (INITRD_START + INITRD_SIZE <= memory_end) {
@@ -812,8 +1022,11 @@ setup_arch(char **cmdline_p)
 	setup_ipl();
 	setup_memory_end();
 	setup_addressing_mode();
+	reserve_oldmem();
+	reserve_crashkernel();
 	setup_memory();
 	setup_resources();
+	setup_vmcoreinfo();
 	setup_restart_psw();
 	setup_lowcore();
 
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 9a40e1cc5ec3..05a85bc14c98 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -30,6 +30,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/lowcore.h>
+#include <asm/compat.h>
 #include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -116,7 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 
 	/* Copy a 'clean' PSW mask to the user to avoid leaking
 	   information about whether PER is currently on.  */
-	user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask);
+	user_sregs.regs.psw.mask = psw_user_bits |
+		(regs->psw.mask & PSW_MASK_USER);
 	user_sregs.regs.psw.addr = regs->psw.addr;
 	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
 	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
@@ -143,9 +145,13 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs));
 	if (err)
 		return err;
-	regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask,
-					user_sregs.regs.psw.mask);
-	regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr;
+	/* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+		(user_sregs.regs.psw.mask & PSW_MASK_USER);
+	/* Check for invalid amode */
+	if (regs->psw.mask & PSW_MASK_EA)
+		regs->psw.mask |= PSW_MASK_BA;
+	regs->psw.addr = user_sregs.regs.psw.addr;
 	memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
 	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
 	       sizeof(sregs->regs.acrs));
@@ -156,7 +162,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	current->thread.fp_regs.fpc &= FPC_VALID_MASK;
 
 	restore_fp_regs(&current->thread.fp_regs);
-	regs->svcnr = 0;	/* disable syscall checks */
+	clear_thread_flag(TIF_SYSCALL);	/* No longer in a system call */
 	return 0;
 }
 
@@ -288,6 +294,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 
 	/* Set up registers for signal handler */
 	regs->gprs[15] = (unsigned long) frame;
+	regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA;	/* 64 bit amode */
 	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
 
 	regs->gprs[2] = map_signal(sig);
@@ -356,6 +363,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 
 	/* Set up registers for signal handler */
 	regs->gprs[15] = (unsigned long) frame;
+	regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA;	/* 64 bit amode */
 	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
 
 	regs->gprs[2] = map_signal(sig);
@@ -401,7 +409,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
  */
 void do_signal(struct pt_regs *regs)
 {
-	unsigned long retval = 0, continue_addr = 0, restart_addr = 0;
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
@@ -421,54 +428,45 @@ void do_signal(struct pt_regs *regs)
 	else
 		oldset = &current->blocked;
 
-	/* Are we from a system call? */
-	if (regs->svcnr) {
-		continue_addr = regs->psw.addr;
-		restart_addr = continue_addr - regs->ilc;
-		retval = regs->gprs[2];
-
-		/* Prepare for system call restart.  We do this here so that a
-		   debugger will see the already changed PSW. */
-		switch (retval) {
-		case -ERESTARTNOHAND:
-		case -ERESTARTSYS:
-		case -ERESTARTNOINTR:
-			regs->gprs[2] = regs->orig_gpr2;
-			regs->psw.addr = restart_addr;
-			break;
-		case -ERESTART_RESTARTBLOCK:
-			regs->gprs[2] = -EINTR;
-		}
-		regs->svcnr = 0;	/* Don't deal with this again. */
-	}
-
-	/* Get signal to deliver.  When running under ptrace, at this point
-	   the debugger may change all our registers ... */
+	/*
+	 * Get signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all our registers, including the system
+	 * call information.
+	 */
+	current_thread_info()->system_call =
+		test_thread_flag(TIF_SYSCALL) ? regs->svc_code : 0;
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
-	/* Depending on the signal settings we may need to revert the
-	   decision to restart the system call. */
-	if (signr > 0 && regs->psw.addr == restart_addr) {
-		if (retval == -ERESTARTNOHAND
-		    || (retval == -ERESTARTSYS
-			 && !(current->sighand->action[signr-1].sa.sa_flags
-			      & SA_RESTART))) {
-			regs->gprs[2] = -EINTR;
-			regs->psw.addr = continue_addr;
-		}
-	}
-
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
-		int ret;
-#ifdef CONFIG_COMPAT
-		if (is_compat_task()) {
-			ret = handle_signal32(signr, &ka, &info, oldset, regs);
-	        }
-		else
-#endif
-			ret = handle_signal(signr, &ka, &info, oldset, regs);
-		if (!ret) {
+		if (current_thread_info()->system_call) {
+			regs->svc_code = current_thread_info()->system_call;
+			/* Check for system call restarting. */
+			switch (regs->gprs[2]) {
+			case -ERESTART_RESTARTBLOCK:
+			case -ERESTARTNOHAND:
+				regs->gprs[2] = -EINTR;
+				break;
+			case -ERESTARTSYS:
+				if (!(ka.sa.sa_flags & SA_RESTART)) {
+					regs->gprs[2] = -EINTR;
+					break;
+				}
+			/* fallthrough */
+			case -ERESTARTNOINTR:
+				regs->gprs[2] = regs->orig_gpr2;
+				regs->psw.addr =
+					__rewind_psw(regs->psw,
+						     regs->svc_code >> 16);
+				break;
+			}
+			/* No longer in a system call */
+			clear_thread_flag(TIF_SYSCALL);
+		}
+
+		if ((is_compat_task() ?
+		     handle_signal32(signr, &ka, &info, oldset, regs) :
+		     handle_signal(signr, &ka, &info, oldset, regs)) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
@@ -482,11 +480,32 @@ void do_signal(struct pt_regs *regs)
 			 * Let tracing know that we've done the handler setup.
 			 */
 			tracehook_signal_handler(signr, &info, &ka, regs,
-					test_thread_flag(TIF_SINGLE_STEP));
+					 test_thread_flag(TIF_SINGLE_STEP));
 		}
 		return;
 	}
 
+	/* No handlers present - check for system call restart */
+	if (current_thread_info()->system_call) {
+		regs->svc_code = current_thread_info()->system_call;
+		switch (regs->gprs[2]) {
+		case -ERESTART_RESTARTBLOCK:
+			/* Restart with sys_restart_syscall */
+			regs->svc_code = __NR_restart_syscall;
+		/* fallthrough */
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			/* Restart system call with magic TIF bit. */
+			regs->gprs[2] = regs->orig_gpr2;
+			set_thread_flag(TIF_SYSCALL);
+			break;
+		default:
+			clear_thread_flag(TIF_SYSCALL);
+			break;
+		}
+	}
+
 	/*
 	 * If there's no signal to deliver, we just put the saved sigmask back.
 	 */
@@ -494,13 +513,6 @@ void do_signal(struct pt_regs *regs)
 		clear_thread_flag(TIF_RESTORE_SIGMASK);
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
-
-	/* Restart a different system call. */
-	if (retval == -ERESTART_RESTARTBLOCK
-	    && regs->psw.addr == continue_addr) {
-		regs->gprs[2] = __NR_restart_syscall;
-		set_thread_flag(TIF_RESTART_SVC);
-	}
 }
 
 void do_notify_resume(struct pt_regs *regs)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6ab16ac64d29..3ea872890da2 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -38,6 +38,7 @@
 #include <linux/timex.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
+#include <linux/crash_dump.h>
 #include <asm/asm-offsets.h>
 #include <asm/ipl.h>
 #include <asm/setup.h>
@@ -97,6 +98,29 @@ static inline int cpu_stopped(int cpu)
 	return raw_cpu_stopped(cpu_logical_map(cpu));
 }
 
+/*
+ * Ensure that PSW restart is done on an online CPU
+ */
+void smp_restart_with_online_cpu(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (stap() == __cpu_logical_map[cpu]) {
+			/* We are online: Enable DAT again and return */
+			__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
+			return;
+		}
+	}
+	/* We are not online: Do PSW restart on an online CPU */
+	while (sigp(cpu, sigp_restart) == sigp_busy)
+		cpu_relax();
+	/* And stop ourself */
+	while (raw_sigp(stap(), sigp_stop) == sigp_busy)
+		cpu_relax();
+	for (;;);
+}
+
 void smp_switch_to_ipl_cpu(void (*func)(void *), void *data)
 {
 	struct _lowcore *lc, *current_lc;
@@ -106,14 +130,16 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data)
 
 	if (smp_processor_id() == 0)
 		func(data);
-	__load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY);
+	__load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE |
+			PSW_MASK_EA | PSW_MASK_BA);
 	/* Disable lowcore protection */
 	__ctl_clear_bit(0, 28);
 	current_lc = lowcore_ptr[smp_processor_id()];
 	lc = lowcore_ptr[0];
 	if (!lc)
 		lc = current_lc;
-	lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+	lc->restart_psw.mask =
+		PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
 	lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu;
 	if (!cpu_online(0))
 		smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]);
@@ -135,7 +161,7 @@ void smp_send_stop(void)
 	int cpu, rc;
 
 	/* Disable all interrupts/machine checks */
-	__load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
+	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
 	trace_hardirqs_off();
 
 	/* stop all processors */
@@ -161,7 +187,10 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 {
 	unsigned long bits;
 
-	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
+	if (ext_int_code == 0x1202)
+		kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++;
+	else
+		kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++;
 	/*
 	 * handle bit signal external calls
 	 */
@@ -183,12 +212,19 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
  */
 static void smp_ext_bitcall(int cpu, int sig)
 {
+	int order;
+
 	/*
 	 * Set signaling bit in lowcore of target cpu and kick it
 	 */
 	set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast);
-	while (sigp(cpu, sigp_emergency_signal) == sigp_busy)
+	while (1) {
+		order = smp_vcpu_scheduled(cpu) ?
+			sigp_external_call : sigp_emergency_signal;
+		if (sigp(cpu, order) != sigp_busy)
+			break;
 		udelay(10);
+	}
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -281,11 +317,13 @@ void smp_ctl_clear_bit(int cr, int bit)
 }
 EXPORT_SYMBOL(smp_ctl_clear_bit);
 
-#ifdef CONFIG_ZFCPDUMP
+#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
 
 static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu)
 {
-	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+	if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE)
+		return;
+	if (is_kdump_kernel())
 		return;
 	if (cpu >= NR_CPUS) {
 		pr_warning("CPU %i exceeds the maximum %i and is excluded from "
@@ -403,6 +441,18 @@ static void __init smp_detect_cpus(void)
 	info = kmalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		panic("smp_detect_cpus failed to allocate memory\n");
+#ifdef CONFIG_CRASH_DUMP
+	if (OLDMEM_BASE && !is_kdump_kernel()) {
+		struct save_area *save_area;
+
+		save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
+		if (!save_area)
+			panic("could not allocate memory for save area\n");
+		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
+				 0x200, 0);
+		zfcpdump_save_areas[0] = save_area;
+	}
+#endif
 	/* Use sigp detection algorithm if sclp doesn't work. */
 	if (sclp_get_cpu_info(info)) {
 		smp_use_sigp_detection = 1;
@@ -463,7 +513,8 @@ int __cpuinit start_secondary(void *cpuvoid)
 	set_cpu_online(smp_processor_id(), true);
 	ipi_call_unlock();
 	__ctl_clear_bit(0, 28); /* Disable lowcore protection */
-	S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+	S390_lowcore.restart_psw.mask =
+		PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
 	S390_lowcore.restart_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
 	__ctl_set_bit(0, 28); /* Enable lowcore protection */
@@ -511,7 +562,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
 	memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512);
 	lowcore->async_stack = async_stack + ASYNC_SIZE;
 	lowcore->panic_stack = panic_stack + PAGE_SIZE;
-	lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY;
+	lowcore->restart_psw.mask =
+		PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA;
 	lowcore->restart_psw.addr =
 		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
 	if (user_mode != HOME_SPACE_MODE)
@@ -712,6 +764,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	/* request the 0x1201 emergency signal external interrupt */
 	if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
 		panic("Couldn't request external interrupt 0x1201");
+	/* request the 0x1202 external call external interrupt */
+	if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
+		panic("Couldn't request external interrupt 0x1202");
 
 	/* Reallocate current lowcore, but keep its contents. */
 	lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index cf9e5c6d5527..47df775c844d 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -7,6 +7,8 @@
  */
 
 #include <linux/pfn.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
 #include <asm/system.h>
 
 /*
@@ -14,6 +16,123 @@
  */
 extern const void __nosave_begin, __nosave_end;
 
+/*
+ * The restore of the saved pages in an hibernation image will set
+ * the change and referenced bits in the storage key for each page.
+ * Overindication of the referenced bits after an hibernation cycle
+ * does not cause any harm but the overindication of the change bits
+ * would cause trouble.
+ * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
+ * page to the most significant byte of the associated page frame
+ * number in the hibernation image.
+ */
+
+/*
+ * Key storage is allocated as a linked list of pages.
+ * The size of the keys array is (PAGE_SIZE - sizeof(long))
+ */
+struct page_key_data {
+	struct page_key_data *next;
+	unsigned char data[];
+};
+
+#define PAGE_KEY_DATA_SIZE	(PAGE_SIZE - sizeof(struct page_key_data *))
+
+static struct page_key_data *page_key_data;
+static struct page_key_data *page_key_rp, *page_key_wp;
+static unsigned long page_key_rx, page_key_wx;
+
+/*
+ * For each page in the hibernation image one additional byte is
+ * stored in the most significant byte of the page frame number.
+ * On suspend no additional memory is required but on resume the
+ * keys need to be memorized until the page data has been restored.
+ * Only then can the storage keys be set to their old state.
+ */
+unsigned long page_key_additional_pages(unsigned long pages)
+{
+	return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+}
+
+/*
+ * Free page_key_data list of arrays.
+ */
+void page_key_free(void)
+{
+	struct page_key_data *pkd;
+
+	while (page_key_data) {
+		pkd = page_key_data;
+		page_key_data = pkd->next;
+		free_page((unsigned long) pkd);
+	}
+}
+
+/*
+ * Allocate page_key_data list of arrays with enough room to store
+ * one byte for each page in the hibernation image.
+ */
+int page_key_alloc(unsigned long pages)
+{
+	struct page_key_data *pk;
+	unsigned long size;
+
+	size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+	while (size--) {
+		pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
+		if (!pk) {
+			page_key_free();
+			return -ENOMEM;
+		}
+		pk->next = page_key_data;
+		page_key_data = pk;
+	}
+	page_key_rp = page_key_wp = page_key_data;
+	page_key_rx = page_key_wx = 0;
+	return 0;
+}
+
+/*
+ * Save the storage key into the upper 8 bits of the page frame number.
+ */
+void page_key_read(unsigned long *pfn)
+{
+	unsigned long addr;
+
+	addr = (unsigned long) page_address(pfn_to_page(*pfn));
+	*(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+}
+
+/*
+ * Extract the storage key from the upper 8 bits of the page frame number
+ * and store it in the page_key_data list of arrays.
+ */
+void page_key_memorize(unsigned long *pfn)
+{
+	page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
+	*(unsigned char *) pfn = 0;
+	if (++page_key_wx < PAGE_KEY_DATA_SIZE)
+		return;
+	page_key_wp = page_key_wp->next;
+	page_key_wx = 0;
+}
+
+/*
+ * Get the next key from the page_key_data list of arrays and set the
+ * storage key of the page referred by @address. If @address refers to
+ * a "safe" page the swsusp_arch_resume code will transfer the storage
+ * key from the buffer page to the original page.
+ */
+void page_key_write(void *address)
+{
+	page_set_storage_key((unsigned long) address,
+			     page_key_rp->data[page_key_rx], 0);
+	if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
+		return;
+	page_key_rp = page_key_rp->next;
+	page_key_rx = 0;
+}
+
 int pfn_is_nosave(unsigned long pfn)
 {
 	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index 51bcdb50a230..acb78cdee896 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -136,11 +136,14 @@ ENTRY(swsusp_arch_resume)
 0:
 	lg	%r2,8(%r1)
 	lg	%r4,0(%r1)
+	iske	%r0,%r4
 	lghi	%r3,PAGE_SIZE
 	lghi	%r5,PAGE_SIZE
 1:
 	mvcle	%r2,%r4,0
 	jo	1b
+	lg	%r2,8(%r1)
+	sske	%r0,%r2
 	lg	%r1,16(%r1)
 	ltgr	%r1,%r1
 	jnz	0b
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 5c9e439bf3f6..2a94b774695c 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -442,7 +442,7 @@ void s390_adjust_jiffies(void)
 		 */
 		FP_UNPACK_SP(SA, &fmil);
 		if ((info->capability >> 23) == 0)
-			FP_FROM_INT_S(SB, info->capability, 32, int);
+			FP_FROM_INT_S(SB, (long) info->capability, 64, long);
 		else
 			FP_UNPACK_SP(SB, &info->capability);
 		FP_DIV_S(SR, SA, SB);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index dff933065ab6..ebbfab3c6e5a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -48,6 +48,7 @@
 #include <asm/timer.h>
 #include <asm/etr.h>
 #include <asm/cio.h>
+#include "entry.h"
 
 /* change this if you have some constant time drift */
 #define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
@@ -109,10 +110,14 @@ static void fixup_clock_comparator(unsigned long long delta)
 	set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-static int s390_next_event(unsigned long delta,
+static int s390_next_ktime(ktime_t expires,
 			   struct clock_event_device *evt)
 {
-	S390_lowcore.clock_comparator = get_clock() + delta;
+	u64 nsecs;
+
+	nsecs = ktime_to_ns(ktime_sub(expires, ktime_get_monotonic_offset()));
+	do_div(nsecs, 125);
+	S390_lowcore.clock_comparator = TOD_UNIX_EPOCH + (nsecs << 9);
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	return 0;
 }
@@ -137,14 +142,15 @@ void init_cpu_timer(void)
 	cpu = smp_processor_id();
 	cd = &per_cpu(comparators, cpu);
 	cd->name		= "comparator";
-	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT |
+				  CLOCK_EVT_FEAT_KTIME;
 	cd->mult		= 16777;
 	cd->shift		= 12;
 	cd->min_delta_ns	= 1;
 	cd->max_delta_ns	= LONG_MAX;
 	cd->rating		= 400;
 	cd->cpumask		= cpumask_of(cpu);
-	cd->set_next_event	= s390_next_event;
+	cd->set_next_ktime	= s390_next_ktime;
 	cd->set_mode		= s390_set_mode;
 
 	clockevents_register_device(cd);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 0cd340b72632..77b8942b9a15 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -299,8 +299,8 @@ out:
 }
 __initcall(init_topology_update);
 
-static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask,
-			int offset)
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+			       struct mask_info *mask, int offset)
 {
 	int i, nr_masks;
 
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index ffabcd9d3363..a9807dd86276 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -200,7 +200,7 @@ void show_registers(struct pt_regs *regs)
 	       mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
 	       mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
 #ifdef CONFIG_64BIT
-	printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS));
+	printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
 #endif
 	printk("\n%s GPRS: " FOURLONG, mode,
 	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
@@ -334,7 +334,8 @@ void __kprobes do_per_trap(struct pt_regs *regs)
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code = TRAP_HWBKPT;
-	info.si_addr = (void *) current->thread.per_event.address;
+	info.si_addr =
+		(void __force __user *) current->thread.per_event.address;
 	force_sig_info(SIGTRAP, &info, current);
 }
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 2d6228f60cd6..bb48977f5469 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -170,7 +170,8 @@ void __kprobes vtime_stop_cpu(void)
 	psw_t psw;
 
 	/* Wait for external, I/O or machine check interrupt. */
-	psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
+	psw.mask = psw_kernel_bits | PSW_MASK_WAIT |
+		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 
 	idle->nohz_delay = 0;
 
@@ -183,7 +184,8 @@ void __kprobes vtime_stop_cpu(void)
 		 *	set_cpu_timer(VTIMER_MAX_SLICE);
 		 *	idle->idle_enter = get_clock();
 		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-		 *			   PSW_MASK_IO | PSW_MASK_EXT);
+		 *			   PSW_MASK_DAT | PSW_MASK_IO |
+		 *			   PSW_MASK_EXT | PSW_MASK_MCHECK);
 		 * The difference is that the inline assembly makes sure that
 		 * the last three instruction are stpt, stck and lpsw in that
 		 * order. This is done to increase the precision.
@@ -216,7 +218,8 @@ void __kprobes vtime_stop_cpu(void)
 		 *	vq->idle = get_cpu_timer();
 		 *	idle->idle_enter = get_clock();
 		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-		 *			   PSW_MASK_IO | PSW_MASK_EXT);
+		 *			   PSW_MASK_DAT | PSW_MASK_IO |
+		 *			   PSW_MASK_EXT | PSW_MASK_MCHECK);
 		 * The difference is that the inline assembly makes sure that
 		 * the last three instruction are stpt, stck and lpsw in that
 		 * order. This is done to increase the precision.
@@ -458,7 +461,7 @@ void add_virt_timer_periodic(void *new)
 }
 EXPORT_SYMBOL(add_virt_timer_periodic);
 
-int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic)
+static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic)
 {
 	struct vtimer_queue *vq;
 	unsigned long flags;
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 9e4c84187cf5..87cedd61be04 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,7 +1,7 @@
 /*
  * diag.c - handling diagnose instructions
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2011
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -15,6 +15,34 @@
 #include <linux/kvm_host.h>
 #include "kvm-s390.h"
 
+static int diag_release_pages(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, end;
+	unsigned long prefix  = vcpu->arch.sie_block->prefix;
+
+	start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+	end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+
+	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
+	    || start < 2 * PAGE_SIZE)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+	vcpu->stat.diagnose_10++;
+
+	/* we checked for start > end above */
+	if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
+		gmap_discard(start, end, vcpu->arch.gmap);
+	} else {
+		if (start < prefix)
+			gmap_discard(start, prefix, vcpu->arch.gmap);
+		if (end >= prefix)
+			gmap_discard(prefix + 2 * PAGE_SIZE,
+				     end, vcpu->arch.gmap);
+	}
+	return 0;
+}
+
 static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
 
 	switch (code) {
+	case 0x10:
+		return diag_release_pages(vcpu);
 	case 0x44:
 		return __diag_time_slice_end(vcpu);
 	case 0x308:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index c9aeb4b4d0b8..87c16705b381 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
+	case KVM_S390_INT_EXTERNAL_CALL:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
+			return 1;
 	case KVM_S390_INT_EMERGENCY:
 		if (psw_extint_disabled(vcpu))
 			return 0;
@@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
+	case KVM_S390_INT_EXTERNAL_CALL:
 	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_VIRTIO:
@@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 			exception = 1;
 		break;
 
+	case KVM_S390_INT_EXTERNAL_CALL:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+		vcpu->stat.deliver_external_call++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
 	case KVM_S390_INT_SERVICE:
 		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
 			   inti->ext.ext_params);
@@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 		break;
 	case KVM_S390_PROGRAM_INT:
 	case KVM_S390_SIGP_STOP:
+	case KVM_S390_INT_EXTERNAL_CALL:
 	case KVM_S390_INT_EMERGENCY:
 	default:
 		kfree(inti);
@@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		break;
 	case KVM_S390_SIGP_STOP:
 	case KVM_S390_RESTART:
+	case KVM_S390_INT_EXTERNAL_CALL:
 	case KVM_S390_INT_EMERGENCY:
 		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
 		inti->type = s390int->type;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index dc2b580e27bc..0bd3bea1e4cd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
@@ -64,11 +65,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ NULL }
 };
@@ -175,6 +178,8 @@ int kvm_arch_init_vm(struct kvm *kvm)
 	if (rc)
 		goto out_err;
 
+	rc = -ENOMEM;
+
 	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
 	if (!kvm->arch.sca)
 		goto out_err;
@@ -312,11 +317,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 				      unsigned int id)
 {
-	struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
-	int rc = -ENOMEM;
+	struct kvm_vcpu *vcpu;
+	int rc = -EINVAL;
+
+	if (id >= KVM_MAX_VCPUS)
+		goto out;
 
+	rc = -ENOMEM;
+
+	vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
 	if (!vcpu)
-		goto out_nomem;
+		goto out;
 
 	vcpu->arch.sie_block = (struct kvm_s390_sie_block *)
 					get_zeroed_page(GFP_KERNEL);
@@ -352,7 +363,7 @@ out_free_sie_block:
 	free_page((unsigned long)(vcpu->arch.sie_block));
 out_free_cpu:
 	kfree(vcpu);
-out_nomem:
+out:
 	return ERR_PTR(rc);
 }
 
@@ -386,6 +397,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 {
 	memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+	restore_access_regs(vcpu->arch.guest_acrs);
 	return 0;
 }
 
@@ -401,6 +413,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+	restore_fp_regs(&vcpu->arch.guest_fpregs);
 	return 0;
 }
 
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index d6a50c1fb2e6..f815118835f3 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 		return -ENOMEM;
 
 	inti->type = KVM_S390_INT_EMERGENCY;
+	inti->emerg.code = vcpu->vcpu_id;
 
 	spin_lock(&fi->lock);
 	li = fi->local_int[cpu_addr];
@@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
 		wake_up_interruptible(&li->wq);
 	spin_unlock_bh(&li->lock);
 	rc = 0; /* order accepted */
+	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+unlock:
+	spin_unlock(&fi->lock);
+	return rc;
+}
+
+static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_INT_EXTERNAL_CALL;
+	inti->extcall.code = vcpu->vcpu_id;
+
+	spin_lock(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+	VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
 unlock:
 	spin_unlock(&fi->lock);
-	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
 	return rc;
 }
 
@@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_sense(vcpu, cpu_addr,
 				  &vcpu->arch.guest_gprs[r1]);
 		break;
+	case SIGP_EXTERNAL_CALL:
+		vcpu->stat.instruction_sigp_external_call++;
+		rc = __sigp_external_call(vcpu, cpu_addr);
+		break;
 	case SIGP_EMERGENCY:
 		vcpu->stat.instruction_sigp_emergency++;
 		rc = __sigp_emergency(vcpu, cpu_addr);
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index a65229d91c92..db92f044024c 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -32,7 +32,8 @@ static void __udelay_disabled(unsigned long long usecs)
 	u64 clock_saved;
 	u64 end;
 
-	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT;
+	mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT |
+		PSW_MASK_EXT | PSW_MASK_MCHECK;
 	end = get_clock() + (usecs << 12);
 	clock_saved = local_tick_disable();
 	__ctl_store(cr0_saved, 0, 0);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 74833831417f..342ae35a5ba9 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -342,7 +342,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return __futex_atomic_op_pt(op, uaddr, oparg, old);
 	spin_lock(&current->mm->page_table_lock);
-	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+	uaddr = (u32 __force __user *)
+		__dat_user_addr((__force unsigned long) uaddr);
 	if (!uaddr) {
 		spin_unlock(&current->mm->page_table_lock);
 		return -EFAULT;
@@ -378,7 +379,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
 	if (segment_eq(get_fs(), KERNEL_DS))
 		return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
 	spin_lock(&current->mm->page_table_lock);
-	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+	uaddr = (u32 __force __user *)
+		__dat_user_addr((__force unsigned long) uaddr);
 	if (!uaddr) {
 		spin_unlock(&current->mm->page_table_lock);
 		return -EFAULT;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9564fc779b27..1766def5bc3f 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -307,7 +307,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
 
 #ifdef CONFIG_PGSTE
 	if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
-		address = gmap_fault(address,
+		address = __gmap_fault(address,
 				     (struct gmap *) S390_lowcore.gmap);
 		if (address == -EFAULT) {
 			fault = VM_FAULT_BADMAP;
@@ -393,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code,
 	int fault;
 
 	/* Protection exception is suppressing, decrement psw address. */
-	regs->psw.addr -= (pgm_int_code >> 16);
+	regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16);
 	/*
 	 * Check for low-address protection.  This needs to be treated
 	 * as a special case because the translation exception code
@@ -454,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write)
 	struct pt_regs regs;
 	int access, fault;
 
-	regs.psw.mask = psw_kernel_bits;
+	regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK;
 	if (!irqs_disabled())
 		regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT;
 	regs.psw.addr = (unsigned long) __builtin_return_address(0);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 5dbbaa6e594c..1cb8427bedfb 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/gfp.h>
 #include <asm/system.h>
 
 /*
@@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size)
 	return copied < 0 ? -EFAULT : 0;
 }
 
+/*
+ * Copy memory in real mode (kernel to kernel)
+ */
 int memcpy_real(void *dest, void *src, size_t count)
 {
 	register unsigned long _dest asm("2") = (unsigned long) dest;
@@ -101,3 +105,55 @@ void copy_to_absolute_zero(void *dest, void *src, size_t count)
 	__ctl_load(cr0, 0, 0);
 	preempt_enable();
 }
+
+/*
+ * Copy memory from kernel (real) to user (virtual)
+ */
+int copy_to_user_real(void __user *dest, void *src, size_t count)
+{
+	int offs = 0, size, rc;
+	char *buf;
+
+	buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	rc = -EFAULT;
+	while (offs < count) {
+		size = min(PAGE_SIZE, count - offs);
+		if (memcpy_real(buf, src + offs, size))
+			goto out;
+		if (copy_to_user(dest + offs, buf, size))
+			goto out;
+		offs += size;
+	}
+	rc = 0;
+out:
+	free_page((unsigned long) buf);
+	return rc;
+}
+
+/*
+ * Copy memory from user (virtual) to kernel (real)
+ */
+int copy_from_user_real(void *dest, void __user *src, size_t count)
+{
+	int offs = 0, size, rc;
+	char *buf;
+
+	buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	rc = -EFAULT;
+	while (offs < count) {
+		size = min(PAGE_SIZE, count - offs);
+		if (copy_from_user(buf, src + offs, size))
+			goto out;
+		if (memcpy_real(dest + offs, buf, size))
+			goto out;
+		offs += size;
+	}
+	rc = 0;
+out:
+	free_page((unsigned long) buf);
+	return rc;
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c9a9f7f18188..f09c74881b7e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -26,6 +26,7 @@
 
 #include <linux/personality.h>
 #include <linux/mm.h>
+#include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <asm/pgalloc.h>
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index d013ed39743b..b36537a5f43e 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
+#include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 
 static void change_page_attr(unsigned long addr, int numpages,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5d56c2b95b14..301c84d3b542 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,5 +1,5 @@
 /*
- *    Copyright IBM Corp. 2007,2009
+ *    Copyright IBM Corp. 2007,2011
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
@@ -222,6 +222,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Free all segment & region tables. */
 	down_read(&gmap->mm->mmap_sem);
+	spin_lock(&gmap->mm->page_table_lock);
 	list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
 		table = (unsigned long *) page_to_phys(page);
 		if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
@@ -230,6 +231,7 @@ void gmap_free(struct gmap *gmap)
 				gmap_unlink_segment(gmap, table);
 		__free_pages(page, ALLOC_ORDER);
 	}
+	spin_unlock(&gmap->mm->page_table_lock);
 	up_read(&gmap->mm->mmap_sem);
 	list_del(&gmap->list);
 	kfree(gmap);
@@ -256,6 +258,9 @@ void gmap_disable(struct gmap *gmap)
 }
 EXPORT_SYMBOL_GPL(gmap_disable);
 
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
 static int gmap_alloc_table(struct gmap *gmap,
 			       unsigned long *table, unsigned long init)
 {
@@ -267,14 +272,12 @@ static int gmap_alloc_table(struct gmap *gmap,
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
 	crst_table_init(new, init);
-	down_read(&gmap->mm->mmap_sem);
 	if (*table & _REGION_ENTRY_INV) {
 		list_add(&page->lru, &gmap->crst_list);
 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
 			(*table & _REGION_ENTRY_TYPE_MASK);
 	} else
 		__free_pages(page, ALLOC_ORDER);
-	up_read(&gmap->mm->mmap_sem);
 	return 0;
 }
 
@@ -299,6 +302,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 
 	flush = 0;
 	down_read(&gmap->mm->mmap_sem);
+	spin_lock(&gmap->mm->page_table_lock);
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Walk the guest addr space page table */
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
@@ -320,6 +324,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 		*table = _SEGMENT_ENTRY_INV;
 	}
 out:
+	spin_unlock(&gmap->mm->page_table_lock);
 	up_read(&gmap->mm->mmap_sem);
 	if (flush)
 		gmap_flush_tlb(gmap);
@@ -350,6 +355,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 
 	flush = 0;
 	down_read(&gmap->mm->mmap_sem);
+	spin_lock(&gmap->mm->page_table_lock);
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Walk the gmap address space page table */
 		table = gmap->table + (((to + off) >> 53) & 0x7ff);
@@ -373,19 +379,24 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		flush |= gmap_unlink_segment(gmap, table);
 		*table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off);
 	}
+	spin_unlock(&gmap->mm->page_table_lock);
 	up_read(&gmap->mm->mmap_sem);
 	if (flush)
 		gmap_flush_tlb(gmap);
 	return 0;
 
 out_unmap:
+	spin_unlock(&gmap->mm->page_table_lock);
 	up_read(&gmap->mm->mmap_sem);
 	gmap_unmap_segment(gmap, to, len);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(gmap_map_segment);
 
-unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
 {
 	unsigned long *table, vmaddr, segment;
 	struct mm_struct *mm;
@@ -445,16 +456,75 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
 		page = pmd_page(*pmd);
 		mp = (struct gmap_pgtable *) page->index;
 		rmap->entry = table;
+		spin_lock(&mm->page_table_lock);
 		list_add(&rmap->list, &mp->mapper);
+		spin_unlock(&mm->page_table_lock);
 		/* Set gmap segment table entry to page table. */
 		*table = pmd_val(*pmd) & PAGE_MASK;
 		return vmaddr | (address & ~PMD_MASK);
 	}
 	return -EFAULT;
+}
 
+unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
+{
+	unsigned long rc;
+
+	down_read(&gmap->mm->mmap_sem);
+	rc = __gmap_fault(address, gmap);
+	up_read(&gmap->mm->mmap_sem);
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_fault);
 
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+{
+
+	unsigned long *table, address, size;
+	struct vm_area_struct *vma;
+	struct gmap_pgtable *mp;
+	struct page *page;
+
+	down_read(&gmap->mm->mmap_sem);
+	address = from;
+	while (address < to) {
+		/* Walk the gmap address space page table */
+		table = gmap->table + ((address >> 53) & 0x7ff);
+		if (unlikely(*table & _REGION_ENTRY_INV)) {
+			address = (address + PMD_SIZE) & PMD_MASK;
+			continue;
+		}
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = table + ((address >> 42) & 0x7ff);
+		if (unlikely(*table & _REGION_ENTRY_INV)) {
+			address = (address + PMD_SIZE) & PMD_MASK;
+			continue;
+		}
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = table + ((address >> 31) & 0x7ff);
+		if (unlikely(*table & _REGION_ENTRY_INV)) {
+			address = (address + PMD_SIZE) & PMD_MASK;
+			continue;
+		}
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = table + ((address >> 20) & 0x7ff);
+		if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+			address = (address + PMD_SIZE) & PMD_MASK;
+			continue;
+		}
+		page = pfn_to_page(*table >> PAGE_SHIFT);
+		mp = (struct gmap_pgtable *) page->index;
+		vma = find_vma(gmap->mm, mp->vmaddr);
+		size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
+		zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
+			       size, NULL);
+		address = (address + PMD_SIZE) & PMD_MASK;
+	}
+	up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
 void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
 {
 	struct gmap_rmap *rmap, *next;
@@ -662,8 +732,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
 
 void __tlb_remove_table(void *_table)
 {
-	void *table = (void *)((unsigned long) _table & PAGE_MASK);
-	unsigned type = (unsigned long) _table & ~PAGE_MASK;
+	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
+	void *table = (void *)((unsigned long) _table & ~mask);
+	unsigned type = (unsigned long) _table & mask;
 
 	if (type)
 		__page_table_free_rcu(table, type);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 781ff5169560..4799383e2df9 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -335,6 +335,9 @@ void __init vmem_map_init(void)
 	ro_start = ((unsigned long)&_stext) & PAGE_MASK;
 	ro_end = PFN_ALIGN((unsigned long)&_eshared);
 	for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+		if (memory_chunk[i].type == CHUNK_CRASHK ||
+		    memory_chunk[i].type == CHUNK_OLDMEM)
+			continue;
 		start = memory_chunk[i].addr;
 		end = memory_chunk[i].addr + memory_chunk[i].size;
 		if (start >= ro_end || end <= ro_start)
@@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void)
 	for (i = 0; i < MEMORY_CHUNKS; i++) {
 		if (!memory_chunk[i].size)
 			continue;
+		if (memory_chunk[i].type == CHUNK_CRASHK ||
+		    memory_chunk[i].type == CHUNK_OLDMEM)
+			continue;
 		seg = kzalloc(sizeof(*seg), GFP_KERNEL);
 		if (!seg)
 			panic("Out of memory...\n");
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 4552ce40c81a..f43c0e4282af 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -994,7 +994,7 @@ allocate_error:
  *
  * Returns 0 on success, !0 on failure.
  */
-int hwsampler_deallocate()
+int hwsampler_deallocate(void)
 {
 	int rc;
 
@@ -1035,7 +1035,7 @@ unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
 	return cb->sample_overflow;
 }
 
-int hwsampler_setup()
+int hwsampler_setup(void)
 {
 	int rc;
 	int cpu;
@@ -1102,7 +1102,7 @@ setup_exit:
 	return rc;
 }
 
-int hwsampler_shutdown()
+int hwsampler_shutdown(void)
 {
 	int rc;
 
@@ -1203,7 +1203,7 @@ start_all_exit:
  *
  * Returns 0 on success, !0 on failure.
  */
-int hwsampler_stop_all()
+int hwsampler_stop_all(void)
 {
 	int tmp_rc, rc, cpu;
 	struct hws_cpu_buffer *cb;
diff --git a/arch/sh/include/asm/sh_eth.h b/arch/sh/include/asm/sh_eth.h
deleted file mode 100644
index 0f325da0f923..000000000000
--- a/arch/sh/include/asm/sh_eth.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __ASM_SH_ETH_H__
-#define __ASM_SH_ETH_H__
-
-#include <linux/phy.h>
-
-enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN};
-enum {
-	SH_ETH_REG_GIGABIT,
-	SH_ETH_REG_FAST_SH4,
-	SH_ETH_REG_FAST_SH3_SH2
-};
-
-struct sh_eth_plat_data {
-	int phy;
-	int edmac_endian;
-	int register_type;
-	phy_interface_t phy_interface;
-	void (*set_mdio_gate)(unsigned long addr);
-
-	unsigned char mac_addr[6];
-	unsigned no_ether_link:1;
-	unsigned ether_link_active_low:1;
-};
-
-#endif
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 6f57325bb883..b8be20d42a0a 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -134,7 +134,8 @@ struct compat_statfs {
 	compat_fsid_t	f_fsid;
 	int		f_namelen;	/* SunOS ignores this field. */
 	int		f_frsize;
-	int		f_spare[5];
+	int		f_flags;
+	int		f_spare[4];
 };
 
 #define COMPAT_RLIM_INFINITY 0x7fffffff
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
index 1407c07bdade..f6ae2b2b6870 100644
--- a/arch/sparc/include/asm/pgtsrmmu.h
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -280,7 +280,7 @@ static inline unsigned long srmmu_hwprobe(unsigned long vaddr)
 	return retval;
 }
 #else
-#define srmmu_hwprobe(addr) (srmmu_swprobe(addr, 0) & SRMMU_PTE_PMASK)
+#define srmmu_hwprobe(addr) srmmu_swprobe(addr, 0)
 #endif
 
 static inline int
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 1e94f946570e..8aa0d4408586 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -230,7 +230,8 @@ static void pci_parse_of_addrs(struct platform_device *op,
 			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
 		} else if (i == dev->rom_base_reg) {
 			res = &dev->resource[PCI_ROM_RESOURCE];
-			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+			flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE
+			      | IORESOURCE_SIZEALIGN;
 		} else {
 			printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
 			continue;
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 1ba95aff5d59..2caa556db86d 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -273,10 +273,7 @@ void do_sigreturn32(struct pt_regs *regs)
 		case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32);
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 	return;
 
 segv:
@@ -377,10 +374,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 		case 1: set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32);
 	}
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 	return;
 segv:
 	force_sig(SIGSEGV, current);
@@ -782,6 +776,7 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka,
 				  siginfo_t *info,
 				  sigset_t *oldset, struct pt_regs *regs)
 {
+	sigset_t blocked;
 	int err;
 
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -792,12 +787,10 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka,
 	if (err)
 		return err;
 
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NOMASK))
-		sigaddset(&current->blocked,signr);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+		sigaddset(&blocked, signr);
+	set_current_blocked(&blocked);
 
 	tracehook_signal_handler(signr, info, ka, regs, 0);
 
@@ -881,7 +874,7 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs,
 	 */
 	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
 		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+		set_current_blocked(&current->saved_sigmask);
 	}
 }
 
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 04ede8f04add..8ce247ac04cc 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -62,12 +62,13 @@ struct rt_signal_frame {
 
 static int _sigpause_common(old_sigset_t set)
 {
-	set &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
+	sigset_t blocked;
+
 	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, set);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+
+	set &= _BLOCKABLE;
+	siginitset(&blocked, set);
+	set_current_blocked(&blocked);
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
@@ -139,10 +140,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 		goto segv_and_exit;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 	return;
 
 segv_and_exit:
@@ -209,10 +207,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 	}
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 	return;
 segv:
 	force_sig(SIGSEGV, current);
@@ -470,6 +465,7 @@ static inline int
 handle_signal(unsigned long signr, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
 {
+	sigset_t blocked;
 	int err;
 
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -480,12 +476,10 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
 	if (err)
 		return err;
 
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NOMASK))
-		sigaddset(&current->blocked, signr);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+		sigaddset(&blocked, signr);
+	set_current_blocked(&blocked);
 
 	tracehook_signal_handler(signr, info, ka, regs, 0);
 
@@ -581,7 +575,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	 */
 	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
 		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+		set_current_blocked(&current->saved_sigmask);
 	}
 }
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 47509df3b893..a2b81598d905 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -70,10 +70,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs)
 				goto do_sigsegv;
 		}
 		sigdelsetmask(&set, ~_BLOCKABLE);
-		spin_lock_irq(&current->sighand->siglock);
-		current->blocked = set;
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
+		set_current_blocked(&set);
 	}
 	if (test_thread_flag(TIF_32BIT)) {
 		pc &= 0xffffffff;
@@ -242,12 +239,13 @@ struct rt_signal_frame {
 
 static long _sigpause_common(old_sigset_t set)
 {
-	set &= _BLOCKABLE;
-	spin_lock_irq(&current->sighand->siglock);
+	sigset_t blocked;
+
 	current->saved_sigmask = current->blocked;
-	siginitset(&current->blocked, set);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+
+	set &= _BLOCKABLE;
+	siginitset(&blocked, set);
+	set_current_blocked(&blocked);
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
@@ -327,10 +325,7 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	pt_regs_clear_syscall(regs);
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+	set_current_blocked(&set);
 	return;
 segv:
 	force_sig(SIGSEGV, current);
@@ -484,18 +479,17 @@ static inline int handle_signal(unsigned long signr, struct k_sigaction *ka,
 				siginfo_t *info,
 				sigset_t *oldset, struct pt_regs *regs)
 {
+	sigset_t blocked;
 	int err;
 
 	err = setup_rt_frame(ka, regs, signr, oldset,
 			     (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL);
 	if (err)
 		return err;
-	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NOMASK))
-		sigaddset(&current->blocked,signr);
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
+		sigaddset(&blocked, signr);
+	set_current_blocked(&blocked);
 
 	tracehook_signal_handler(signr, info, ka, regs, 0);
 
@@ -601,7 +595,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	 */
 	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
 		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+		set_current_blocked(&current->saved_sigmask);
 	}
 }
 
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
index 32b626c9d815..73370674ccff 100644
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -713,17 +713,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a > b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPGT32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a > b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 
@@ -733,17 +733,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a <= b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPLE32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a <= b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 
@@ -753,17 +753,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a != b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPNE32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a != b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 
@@ -773,17 +773,17 @@ static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
 			s16 b = (rs2 >> (i * 16)) & 0xffff;
 
 			if (a == b)
-				rd_val |= 1 << i;
+				rd_val |= 8 >> i;
 		}
 		break;
 
 	case FCMPEQ32_OPF:
 		for (i = 0; i < 2; i++) {
-			s32 a = (rs1 >> (i * 32)) & 0xffff;
-			s32 b = (rs2 >> (i * 32)) & 0xffff;
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
 
 			if (a == b)
-				rd_val |= 1 << i;
+				rd_val |= 2 >> i;
 		}
 		break;
 	}
diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
index 34fe65751737..4d8c497517bd 100644
--- a/arch/sparc/lib/memcpy.S
+++ b/arch/sparc/lib/memcpy.S
@@ -7,40 +7,12 @@
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
-#ifdef __KERNEL__
-
-#define FUNC(x) 											\
+#define FUNC(x) 		\
 	.globl	x;		\
 	.type	x,@function;	\
-	.align	4;											\
+	.align	4;		\
 x:
 
-#undef FASTER_REVERSE
-#undef FASTER_NONALIGNED
-#define FASTER_ALIGNED
-
-/* In kernel these functions don't return a value.
- * One should use macros in asm/string.h for that purpose.
- * We return 0, so that bugs are more apparent.
- */
-#define SETUP_RETL
-#define RETL_INSN	clr	%o0
-
-#else
-
-/* libc */
-
-#include "DEFS.h"
-
-#define FASTER_REVERSE
-#define FASTER_NONALIGNED
-#define FASTER_ALIGNED
-
-#define SETUP_RETL	mov	%o0, %g6
-#define RETL_INSN	mov	%g6, %o0
-
-#endif
-
 /* Both these macros have to start with exactly the same insn */
 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
 	ldd	[%src + (offset) + 0x00], %t0; \
@@ -164,30 +136,6 @@ x:
 	.text
 	.align	4
 
-#ifdef FASTER_REVERSE
-
-70:	/* rdword_align */
-
-	andcc		%o1, 1, %g0
-	be		4f
-	 andcc		%o1, 2, %g0
-
-	ldub		[%o1 - 1], %g2
-	sub		%o1, 1, %o1
-	stb		%g2, [%o0 - 1]
-	sub		%o2, 1, %o2
-	be		3f
-	 sub		%o0, 1, %o0
-4:
-	lduh		[%o1 - 2], %g2
-	sub		%o1, 2, %o1
-	sth		%g2, [%o0 - 2]
-	sub		%o2, 2, %o2
-	b		3f
-	 sub		%o0, 2, %o0
-
-#endif /* FASTER_REVERSE */
-
 0:
 	retl
 	 nop		! Only bcopy returns here and it retuns void...
@@ -198,7 +146,7 @@ FUNC(__memmove)
 #endif
 FUNC(memmove)
 	cmp		%o0, %o1
-	SETUP_RETL
+	mov		%o0, %g7
 	bleu		9f
 	 sub		%o0, %o1, %o4
 
@@ -207,8 +155,6 @@ FUNC(memmove)
 	bleu		0f
 	 andcc		%o4, 3, %o5
 
-#ifndef FASTER_REVERSE
-
 	add		%o1, %o2, %o1
 	add		%o0, %o2, %o0
 	sub		%o1, 1, %o1
@@ -224,295 +170,7 @@ FUNC(memmove)
 	 sub		%o0, 1, %o0
 
 	retl
-	 RETL_INSN
-
-#else /* FASTER_REVERSE */
-
-	add		%o1, %o2, %o1
-	add		%o0, %o2, %o0
-	bne		77f
-	 cmp		%o2, 15
-	bleu		91f
-	 andcc		%o1, 3, %g0
-	bne		70b
-3:
-	 andcc		%o1, 4, %g0
-
-	be		2f
-	 mov		%o2, %g1
-
-	ld		[%o1 - 4], %o4
-	sub		%g1, 4, %g1
-	st		%o4, [%o0 - 4]
-	sub		%o1, 4, %o1
-	sub		%o0, 4, %o0
-2:
-	andcc		%g1, 0xffffff80, %g7
-	be		3f
-	 andcc		%o0, 4, %g0
-
-	be		74f + 4
-5:
-	RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
-	sub		%o1, 128, %o1
-	bne		5b
-	 sub		%o0, 128, %o0
-3:
-	andcc		%g1, 0x70, %g7
-	be		72f
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(72f), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	sub		%o1, %g7, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(72f), %g0
-	 sub		%o0, %g7, %o0
-
-71:	/* rmemcpy_table */
-	RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
-	RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
-
-72:	/* rmemcpy_table_end */
-
-	be		73f
-	 andcc		%g1, 4, %g0
-
-	ldd		[%o1 - 0x08], %g2
-	sub		%o0, 8, %o0
-	sub		%o1, 8, %o1
-	st		%g2, [%o0]
-	st		%g3, [%o0 + 0x04]
-
-73:	/* rmemcpy_last7 */
-
-	be		1f
-	 andcc		%g1, 2, %g0
-
-	ld		[%o1 - 4], %g2
-	sub		%o1, 4, %o1
-	st		%g2, [%o0 - 4]
-	sub		%o0, 4, %o0
-1:
-	be		1f
-	 andcc		%g1, 1, %g0
-
-	lduh		[%o1 - 2], %g2
-	sub		%o1, 2, %o1
-	sth		%g2, [%o0 - 2]
-	sub		%o0, 2, %o0
-1:
-	be		1f
-	 nop
-
-	ldub		[%o1 - 1], %g2
-	stb		%g2, [%o0 - 1]
-1:
-	retl
- 	 RETL_INSN
-
-74:	/* rldd_std */
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
-	RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
-	sub		%o1, 128, %o1
-	bne		74b
-	 sub		%o0, 128, %o0
-
-	andcc		%g1, 0x70, %g7
-	be		72b
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(72b), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	sub		%o1, %g7, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(72b), %g0
-	 sub		%o0, %g7, %o0
-
-75:	/* rshort_end */
-
-	and		%o2, 0xe, %o3
-2:
-	sethi		%hi(76f), %o5
-	sll		%o3, 3, %o4
-	sub		%o0, %o3, %o0
-	sub		%o5, %o4, %o5
-	sub		%o1, %o3, %o1
-	jmpl		%o5 + %lo(76f), %g0
-	 andcc		%o2, 1, %g0
-
-	RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
-	RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
-
-76:	/* rshort_table_end */
-
-	be		1f
-	 nop
-	ldub		[%o1 - 1], %g2
-	stb		%g2, [%o0 - 1]
-1:
-	retl
- 	 RETL_INSN
-
-91:	/* rshort_aligned_end */
-
-	bne		75b
-	 andcc		%o2, 8, %g0
-
-	be		1f
-	 andcc		%o2, 4, %g0
-
-	ld		[%o1 - 0x08], %g2
-	ld		[%o1 - 0x04], %g3
-	sub		%o1, 8, %o1
-	st		%g2, [%o0 - 0x08]
-	st		%g3, [%o0 - 0x04]
-	sub		%o0, 8, %o0
-1:
-	b		73b
-	 mov		%o2, %g1
-
-77:	/* rnon_aligned */
-	cmp		%o2, 15
-	bleu		75b
-	 andcc		%o0, 3, %g0
-	be		64f
-	 andcc		%o0, 1, %g0
-	be		63f
-	 andcc		%o0, 2, %g0
-	ldub		[%o1 - 1], %g5
-	sub		%o1, 1, %o1
-	stb		%g5, [%o0 - 1]
-	sub		%o0, 1, %o0
-	be		64f
-	 sub		%o2, 1, %o2
-63:
-	ldub		[%o1 - 1], %g5
-	sub		%o1, 2, %o1
-	stb		%g5, [%o0 - 1]
-	sub		%o0, 2, %o0
-	ldub		[%o1], %g5
-	sub		%o2, 2, %o2
-	stb		%g5, [%o0]
-64:	
-	and		%o1, 3, %g2
-	and		%o1, -4, %o1
-	and		%o2, 0xc, %g3
-	add		%o1, 4, %o1
-	cmp		%g3, 4
-	sll		%g2, 3, %g4
-	mov		32, %g2
-	be		4f
-	 sub		%g2, %g4, %g7
-
-	blu		3f
-	 cmp		%g3, 8
-
-	be		2f
-	 srl		%o2, 2, %g3
-
-	ld		[%o1 - 4], %o3
-	add		%o0, -8, %o0
-	ld		[%o1 - 8], %o4
-	add		%o1, -16, %o1
-	b		7f
-	 add		%g3, 1, %g3
-2:
-	ld		[%o1 - 4], %o4
-	add		%o0, -4, %o0
-	ld		[%o1 - 8], %g1
-	add		%o1, -12, %o1
-	b		8f
-	 add		%g3, 2, %g3
-3:
-	ld		[%o1 - 4], %o5
-	add		%o0, -12, %o0
-	ld		[%o1 - 8], %o3
-	add		%o1, -20, %o1
-	b		6f
-	 srl		%o2, 2, %g3
-4:
-	ld		[%o1 - 4], %g1
-	srl		%o2, 2, %g3
-	ld		[%o1 - 8], %o5
-	add		%o1, -24, %o1
-	add		%o0, -16, %o0
-	add		%g3, -1, %g3
-
-	ld		[%o1 + 12], %o3
-5:
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 12]
-6:
-	ld		[%o1 + 8], %o4
-	sll		%o3, %g4, %g2
-	srl		%o5, %g7, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 8]
-7:
-	ld		[%o1 + 4], %g1
-	sll		%o4, %g4, %g2
-	srl		%o3, %g7, %g5
-	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 4]
-8:
-	ld		[%o1], %o5
-	sll		%g1, %g4, %g2
-	srl		%o4, %g7, %g5
-	addcc		%g3, -4, %g3
-	or		%g2, %g5, %g2
-	add		%o1, -16, %o1
-	st		%g2, [%o0]
-	add		%o0, -16, %o0
-	bne,a		5b	
-	 ld		[%o1 + 12], %o3
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
-	srl		%g4, 3, %g3
-	or		%g2, %g5, %g2
-	add		%o1, %g3, %o1
-	andcc		%o2, 2, %g0
-	st		%g2, [%o0 + 12]
-	be		1f
-	 andcc		%o2, 1, %g0
-	
-	ldub		[%o1 + 15], %g5
-	add		%o1, -2, %o1
-	stb		%g5, [%o0 + 11]
-	add		%o0, -2, %o0
-	ldub		[%o1 + 16], %g5
-	stb		%g5, [%o0 + 12]
-1:
-	be		1f
-	 nop
-	ldub		[%o1 + 15], %g5
-	stb		%g5, [%o0 + 11]
-1:
-	retl
-	 RETL_INSN
-
-#endif /* FASTER_REVERSE */
+	 mov		%g7, %o0
 
 /* NOTE: This code is executed just for the cases,
          where %src (=%o1) & 3 is != 0.
@@ -546,7 +204,7 @@ FUNC(memmove)
 FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 
 	sub		%o0, %o1, %o4
-	SETUP_RETL
+	mov		%o0, %g7
 9:
 	andcc		%o4, 3, %o5
 0:
@@ -569,7 +227,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	add		%o1, 4, %o1
 	add		%o0, 4, %o0
 2:
-	andcc		%g1, 0xffffff80, %g7
+	andcc		%g1, 0xffffff80, %g0
 	be		3f
 	 andcc		%o0, 4, %g0
 
@@ -579,22 +237,23 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
+	sub		%g1, 128, %g1
 	add		%o1, 128, %o1
-	bne		5b
+	cmp		%g1, 128
+	bge		5b
 	 add		%o0, 128, %o0
 3:
-	andcc		%g1, 0x70, %g7
+	andcc		%g1, 0x70, %g4
 	be		80f
 	 andcc		%g1, 8, %g0
 
 	sethi		%hi(80f), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	add		%o1, %g7, %o1
+	srl		%g4, 1, %o4
+	add		%g4, %o4, %o4
+	add		%o1, %g4, %o1
 	sub		%o5, %o4, %o5
 	jmpl		%o5 + %lo(80f), %g0
-	 add		%o0, %g7, %o0
+	 add		%o0, %g4, %o0
 
 79:	/* memcpy_table */
 
@@ -641,43 +300,28 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%o0]
 1:
 	retl
- 	 RETL_INSN
+	 mov		%g7, %o0
 
 82:	/* ldd_std */
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-	subcc		%g7, 128, %g7
+	subcc		%g1, 128, %g1
 	add		%o1, 128, %o1
-	bne		82b
+	cmp		%g1, 128
+	bge		82b
 	 add		%o0, 128, %o0
 
-#ifndef FASTER_ALIGNED
-
-	andcc		%g1, 0x70, %g7
-	be		80b
-	 andcc		%g1, 8, %g0
-
-	sethi		%hi(80b), %o5
-	srl		%g7, 1, %o4
-	add		%g7, %o4, %o4
-	add		%o1, %g7, %o1
-	sub		%o5, %o4, %o5
-	jmpl		%o5 + %lo(80b), %g0
-	 add		%o0, %g7, %o0
-
-#else /* FASTER_ALIGNED */
-
-	andcc		%g1, 0x70, %g7
+	andcc		%g1, 0x70, %g4
 	be		84f
 	 andcc		%g1, 8, %g0
 
 	sethi		%hi(84f), %o5
-	add		%o1, %g7, %o1
-	sub		%o5, %g7, %o5
+	add		%o1, %g4, %o1
+	sub		%o5, %g4, %o5
 	jmpl		%o5 + %lo(84f), %g0
-	 add		%o0, %g7, %o0
+	 add		%o0, %g4, %o0
 
 83:	/* amemcpy_table */
 
@@ -721,382 +365,132 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%o0]
 1:
 	retl
- 	 RETL_INSN
-
-#endif /* FASTER_ALIGNED */
+	 mov		%g7, %o0
 
 86:	/* non_aligned */
 	cmp		%o2, 6
 	bleu		88f
+	 nop
 
-#ifdef FASTER_NONALIGNED
-
-	 cmp		%o2, 256
-	bcc		87f
-
-#endif /* FASTER_NONALIGNED */
-
-	 andcc		%o0, 3, %g0
+	save		%sp, -96, %sp
+	andcc		%i0, 3, %g0
 	be		61f
-	 andcc		%o0, 1, %g0
+	 andcc		%i0, 1, %g0
 	be		60f
-	 andcc		%o0, 2, %g0
+	 andcc		%i0, 2, %g0
 
-	ldub		[%o1], %g5
-	add		%o1, 1, %o1
-	stb		%g5, [%o0]
-	sub		%o2, 1, %o2
+	ldub		[%i1], %g5
+	add		%i1, 1, %i1
+	stb		%g5, [%i0]
+	sub		%i2, 1, %i2
 	bne		61f
-	 add		%o0, 1, %o0
+	 add		%i0, 1, %i0
 60:
-	ldub		[%o1], %g3
-	add		%o1, 2, %o1
-	stb		%g3, [%o0]
-	sub		%o2, 2, %o2
-	ldub		[%o1 - 1], %g3
-	add		%o0, 2, %o0
-	stb		%g3, [%o0 - 1]
+	ldub		[%i1], %g3
+	add		%i1, 2, %i1
+	stb		%g3, [%i0]
+	sub		%i2, 2, %i2
+	ldub		[%i1 - 1], %g3
+	add		%i0, 2, %i0
+	stb		%g3, [%i0 - 1]
 61:
-	and		%o1, 3, %g2
-	and		%o2, 0xc, %g3
-	and		%o1, -4, %o1
+	and		%i1, 3, %g2
+	and		%i2, 0xc, %g3
+	and		%i1, -4, %i1
 	cmp		%g3, 4
 	sll		%g2, 3, %g4
 	mov		32, %g2
 	be		4f
-	 sub		%g2, %g4, %g7
+	 sub		%g2, %g4, %l0
 	
 	blu		3f
 	 cmp		%g3, 0x8
 
 	be		2f
-	 srl		%o2, 2, %g3
+	 srl		%i2, 2, %g3
 
-	ld		[%o1], %o3
-	add		%o0, -8, %o0
-	ld		[%o1 + 4], %o4
+	ld		[%i1], %i3
+	add		%i0, -8, %i0
+	ld		[%i1 + 4], %i4
 	b		8f
 	 add		%g3, 1, %g3
 2:
-	ld		[%o1], %o4
-	add		%o0, -12, %o0
-	ld		[%o1 + 4], %o5
+	ld		[%i1], %i4
+	add		%i0, -12, %i0
+	ld		[%i1 + 4], %i5
 	add		%g3, 2, %g3
 	b		9f
-	 add		%o1, -4, %o1
+	 add		%i1, -4, %i1
 3:
-	ld		[%o1], %g1
-	add		%o0, -4, %o0
-	ld		[%o1 + 4], %o3
-	srl		%o2, 2, %g3
+	ld		[%i1], %g1
+	add		%i0, -4, %i0
+	ld		[%i1 + 4], %i3
+	srl		%i2, 2, %g3
 	b		7f
-	 add		%o1, 4, %o1
+	 add		%i1, 4, %i1
 4:
-	ld		[%o1], %o5
-	cmp		%o2, 7
-	ld		[%o1 + 4], %g1
-	srl		%o2, 2, %g3
+	ld		[%i1], %i5
+	cmp		%i2, 7
+	ld		[%i1 + 4], %g1
+	srl		%i2, 2, %g3
 	bleu		10f
-	 add		%o1, 8, %o1
+	 add		%i1, 8, %i1
 
-	ld		[%o1], %o3
+	ld		[%i1], %i3
 	add		%g3, -1, %g3
 5:
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
+	sll		%i5, %g4, %g2
+	srl		%g1, %l0, %g5
 	or		%g2, %g5, %g2
-	st		%g2, [%o0]
+	st		%g2, [%i0]
 7:
-	ld		[%o1 + 4], %o4
+	ld		[%i1 + 4], %i4
 	sll		%g1, %g4, %g2
-	srl		%o3, %g7, %g5
+	srl		%i3, %l0, %g5
 	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 4]
+	st		%g2, [%i0 + 4]
 8:
-	ld		[%o1 + 8], %o5
-	sll		%o3, %g4, %g2
-	srl		%o4, %g7, %g5
+	ld		[%i1 + 8], %i5
+	sll		%i3, %g4, %g2
+	srl		%i4, %l0, %g5
 	or		%g2, %g5, %g2
-	st		%g2, [%o0 + 8]
+	st		%g2, [%i0 + 8]
 9:
-	ld		[%o1 + 12], %g1
-	sll		%o4, %g4, %g2
-	srl		%o5, %g7, %g5
+	ld		[%i1 + 12], %g1
+	sll		%i4, %g4, %g2
+	srl		%i5, %l0, %g5
 	addcc		%g3, -4, %g3
 	or		%g2, %g5, %g2
-	add		%o1, 16, %o1
-	st		%g2, [%o0 + 12]
-	add		%o0, 16, %o0
+	add		%i1, 16, %i1
+	st		%g2, [%i0 + 12]
+	add		%i0, 16, %i0
 	bne,a		5b
-	 ld		[%o1], %o3
+	 ld		[%i1], %i3
 10:
-	sll		%o5, %g4, %g2
-	srl		%g1, %g7, %g5
-	srl		%g7, 3, %g3
+	sll		%i5, %g4, %g2
+	srl		%g1, %l0, %g5
+	srl		%l0, 3, %g3
 	or		%g2, %g5, %g2
-	sub		%o1, %g3, %o1
-	andcc		%o2, 2, %g0
-	st		%g2, [%o0]
+	sub		%i1, %g3, %i1
+	andcc		%i2, 2, %g0
+	st		%g2, [%i0]
 	be		1f
-	 andcc		%o2, 1, %g0
-
-	ldub		[%o1], %g2
-	add		%o1, 2, %o1
-	stb		%g2, [%o0 + 4]
-	add		%o0, 2, %o0
-	ldub		[%o1 - 1], %g2
-	stb		%g2, [%o0 + 3]
+	 andcc		%i2, 1, %g0
+
+	ldub		[%i1], %g2
+	add		%i1, 2, %i1
+	stb		%g2, [%i0 + 4]
+	add		%i0, 2, %i0
+	ldub		[%i1 - 1], %g2
+	stb		%g2, [%i0 + 3]
 1:
 	be		1f
 	 nop
-	ldub		[%o1], %g2
-	stb		%g2, [%o0 + 4]
-1:
-	retl
-	 RETL_INSN
-
-#ifdef FASTER_NONALIGNED
-
-87:	/* faster_nonaligned */
-
-	andcc		%o1, 3, %g0
-	be		3f
-	 andcc		%o1, 1, %g0
-
-	be		4f
-	 andcc		%o1, 2, %g0
-
-	ldub		[%o1], %g2
-	add		%o1, 1, %o1
-	stb		%g2, [%o0]
-	sub		%o2, 1, %o2
-	bne		3f
-	 add		%o0, 1, %o0
-4:
-	lduh		[%o1], %g2
-	add		%o1, 2, %o1
-	srl		%g2, 8, %g3
-	sub		%o2, 2, %o2
-	stb		%g3, [%o0]
-	add		%o0, 2, %o0
-	stb		%g2, [%o0 - 1]
-3:
-	 andcc		%o1, 4, %g0
-
-	bne		2f
-	 cmp		%o5, 1
-
-	ld		[%o1], %o4
-	srl		%o4, 24, %g2
-	stb		%g2, [%o0]
-	srl		%o4, 16, %g3
-	stb		%g3, [%o0 + 1]
-	srl		%o4, 8, %g2
-	stb		%g2, [%o0 + 2]
-	sub		%o2, 4, %o2
-	stb		%o4, [%o0 + 3]
-	add		%o1, 4, %o1
-	add		%o0, 4, %o0
-2:
-	be		33f
-	 cmp		%o5, 2
-	be		32f
-	 sub		%o2, 4, %o2
-31:
-	ld		[%o1], %g2
-	add		%o1, 4, %o1
-	srl		%g2, 24, %g3
-	and		%o0, 7, %g5
-	stb		%g3, [%o0]
-	cmp		%g5, 7
-	sll		%g2, 8, %g1
-	add		%o0, 4, %o0
-	be		41f
-	 and		%o2, 0xffffffc0, %o3
-	ld		[%o0 - 7], %o4
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		4b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	st		%o4, [%o0 - 7]
-	sth		%g2, [%o0 - 3]
-	srl		%g1, 8, %g4
-	b		88f
-	 stb		%g4, [%o0 - 1]
-32:
-	ld		[%o1], %g2
-	add		%o1, 4, %o1
-	srl		%g2, 16, %g3
-	and		%o0, 7, %g5
-	sth		%g3, [%o0]
-	cmp		%g5, 6
-	sll		%g2, 16, %g1
-	add		%o0, 4, %o0
-	be		42f
-	 and		%o2, 0xffffffc0, %o3
-	ld		[%o0 - 6], %o4
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		4b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	st		%o4, [%o0 - 6]
-	b		88f
-	 sth		%g2, [%o0 - 2]
-33:
-	ld		[%o1], %g2
-	sub		%o2, 4, %o2
-	srl		%g2, 24, %g3
-	and		%o0, 7, %g5
-	stb		%g3, [%o0]
-	cmp		%g5, 5
-	srl		%g2, 8, %g4
-	sll		%g2, 24, %g1
-	sth		%g4, [%o0 + 1]
-	add		%o1, 4, %o1
-	be		43f
-	 and		%o2, 0xffffffc0, %o3
-
-	ld		[%o0 - 1], %o4
-	add		%o0, 4, %o0
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		4b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 24, %g2
-4:
-	SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 24, %g2
-1:
-	st		%o4, [%o0 - 5]
-	b		88f
-	 stb		%g2, [%o0 - 1]
-41:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		41b
-	 add		%o0, 64, %o0
-	 
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
+	ldub		[%i1], %g2
+	stb		%g2, [%i0 + 4]
 1:
-	sth		%g2, [%o0 - 3]
-	srl		%g1, 8, %g4
-	b		88f
-	 stb		%g4, [%o0 - 1]
-43:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		43b
-	 add		%o0, 64, %o0
-
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 24, %g2
-4:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 24, %g2
-1:
-	stb		%g2, [%o0 + 3]
-	b		88f
-	 add		%o0, 4, %o0
-42:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 64, %o3
-	add		%o1, 64, %o1
-	bne		42b
-	 add		%o0, 64, %o0
-	 
-	andcc		%o2, 0x30, %o3
-	be,a		1f
-	 srl		%g1, 16, %g2
-4:
-	SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
-	subcc		%o3, 16, %o3
-	add		%o1, 16, %o1
-	bne		4b
-	 add		%o0, 16, %o0
-
-	srl		%g1, 16, %g2
-1:
-	sth		%g2, [%o0 - 2]
-
-	/* Fall through */
-	 
-#endif /* FASTER_NONALIGNED */
+	ret
+	 restore	%g7, %g0, %o0
 
 88:	/* short_end */
 
@@ -1127,7 +521,7 @@ FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
 	stb		%g2, [%o0]
 1:
 	retl
- 	 RETL_INSN
+	 mov		%g7, %o0
 
 90:	/* short_aligned_end */
 	bne		88b
diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
index e485a6804998..13c2169822a8 100644
--- a/arch/sparc/mm/leon_mm.c
+++ b/arch/sparc/mm/leon_mm.c
@@ -162,7 +162,7 @@ ready:
 		printk(KERN_INFO "swprobe: padde %x\n", paddr_calc);
 	if (paddr)
 		*paddr = paddr_calc;
-	return paddrbase;
+	return pte;
 }
 
 void leon_flush_icache_all(void)
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index b30f71ac0d06..70a0de46cd1b 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -46,9 +46,6 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
 config SYS_SUPPORTS_HUGETLBFS
 	def_bool y
 
-config GENERIC_TIME
-	def_bool y
-
 config GENERIC_CLOCKEVENTS
 	def_bool y
 
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 2ad73fb707b9..dafdbbae1124 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -11,7 +11,6 @@ CONFIG_HAVE_ARCH_ALLOC_REMAP=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_SYS_SUPPORTS_HUGETLBFS=y
-CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_DEFAULT_MIGRATION_COST=10000000
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index f58dc362b944..6f05f969b564 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -11,7 +11,6 @@ CONFIG_HAVE_ARCH_ALLOC_REMAP=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
 CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y
 CONFIG_SYS_SUPPORTS_HUGETLBFS=y
-CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_DEFAULT_MIGRATION_COST=10000000
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index fc94607f0bd5..aecc8ed5f39b 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -21,7 +21,7 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
-#include <linux/atomic.h>
+#include <asm/atomic_32.h>
 #include <asm/asm-offsets.h>
 #include <hv/hypervisor.h>
 #include <arch/abi.h>
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 1f75a2a56101..30638042691d 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -70,7 +70,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/atomic.h>
+#include <asm/atomic_32.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 9f7634f08cf3..761f5e1a657e 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -13,7 +13,6 @@ CONFIG_LOCKDEP_SUPPORT=y
 # CONFIG_STACKTRACE_SUPPORT is not set
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_GENERIC_BUG=y
-CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_IRQ_RELEASE_METHOD=y
 CONFIG_HZ=100
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 22745b47c829..a492e59883a3 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -368,7 +368,7 @@ static const struct net_device_ops uml_netdev_ops = {
 	.ndo_open 		= uml_net_open,
 	.ndo_stop 		= uml_net_close,
 	.ndo_start_xmit 	= uml_net_start_xmit,
-	.ndo_set_multicast_list = uml_net_set_multicast_list,
+	.ndo_set_rx_mode	= uml_net_set_multicast_list,
 	.ndo_tx_timeout 	= uml_net_tx_timeout,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_change_mtu 	= uml_net_change_mtu,
diff --git a/arch/unicore32/include/asm/io.h b/arch/unicore32/include/asm/io.h
index 4bd87f3d13d4..1a5c5a5eb39c 100644
--- a/arch/unicore32/include/asm/io.h
+++ b/arch/unicore32/include/asm/io.h
@@ -32,7 +32,7 @@ extern void __uc32_iounmap(volatile void __iomem *addr);
  * ioremap and friends.
  *
  * ioremap takes a PCI memory address, as specified in
- * Documentation/IO-mapping.txt.
+ * Documentation/io-mapping.txt.
  *
  */
 #define ioremap(cookie, size)		__uc32_ioremap(cookie, size)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a47bb22657f..77f7a384c0b5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,10 +64,12 @@ config X86
 	select HAVE_TEXT_POKE_SMP
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
+	select SPARSE_IRQ
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
+	select GENERIC_CLOCKEVENTS_MIN_ADJUST
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_BPF_JIT if (X86_64 && NET)
@@ -130,7 +132,7 @@ config SBUS
 	bool
 
 config NEED_DMA_MAP_STATE
-       def_bool (X86_64 || DMAR || DMA_API_DEBUG)
+       def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
 
 config NEED_SG_DMA_LENGTH
 	def_bool y
@@ -220,7 +222,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 
 config HAVE_INTEL_TXT
 	def_bool y
-	depends on EXPERIMENTAL && DMAR && ACPI
+	depends on EXPERIMENTAL && INTEL_IOMMU && ACPI
 
 config X86_32_SMP
 	def_bool y
@@ -279,7 +281,7 @@ config SMP
 	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
 	  Management" code will be disabled if you say Y here.
 
-	  See also <file:Documentation/i386/IO-APIC.txt>,
+	  See also <file:Documentation/x86/i386/IO-APIC.txt>,
 	  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
 	  <http://www.tldp.org/docs.html#howto>.
 
@@ -287,7 +289,7 @@ config SMP
 
 config X86_X2APIC
 	bool "Support x2apic"
-	depends on X86_LOCAL_APIC && X86_64 && INTR_REMAP
+	depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
 	---help---
 	  This enables x2apic support on CPUs that have this feature.
 
@@ -1452,6 +1454,15 @@ config ARCH_USES_PG_UNCACHED
 	def_bool y
 	depends on X86_PAT
 
+config ARCH_RANDOM
+	def_bool y
+	prompt "x86 architectural random number generator" if EXPERT
+	---help---
+	  Enable the x86 architectural RDRAND instruction
+	  (Intel Bull Mountain technology) to generate random numbers.
+	  If supported, this is a high bandwidth, cryptographically
+	  secure hardware random number generator.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
@@ -2064,6 +2075,20 @@ config OLPC_XO15_SCI
 	   - AC adapter status updates
 	   - Battery status updates
 
+config ALIX
+	bool "PCEngines ALIX System Support (LED setup)"
+	select GPIOLIB
+	---help---
+	  This option enables system support for the PCEngines ALIX.
+	  At present this just sets up LEDs for GPIO control on
+	  ALIX2/3/6 boards.  However, other system specific setup should
+	  get added here.
+
+	  Note: You must still enable the drivers for GPIO and LED support
+	  (GPIO_CS5535 & LEDS_GPIO) to actually use the LEDs
+
+	  Note: You have to set alix.force=1 for boards with Award BIOS.
+
 endif # X86_32
 
 config AMD_NB
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c0f8a5c88910..bf56e1793272 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -139,7 +139,7 @@ config IOMMU_DEBUG
 	  code. When you use it make sure you have a big enough
 	  IOMMU/AGP aperture.  Most of the options enabled by this can
 	  be set more finegrained using the iommu= command line
-	  options. See Documentation/x86_64/boot-options.txt for more
+	  options. See Documentation/x86/x86_64/boot-options.txt for more
 	  details.
 
 config IOMMU_STRESS
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 93e689f4bd86..bdb4d458ec8c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -129,7 +129,7 @@ start_sys_seg:	.word	SYSSEG		# obsolete and meaningless, but just
 
 type_of_loader:	.byte	0		# 0 means ancient bootloader, newer
 					# bootloaders know to change this.
-					# See Documentation/i386/boot.txt for
+					# See Documentation/x86/boot.txt for
 					# assigned ids
 
 # flags, unused bits must be zero (RFU) bit within loadflags
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 22a0dc8e51dd..058a35b8286c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -67,8 +67,8 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_X86_ACPI_CPUFREQ=y
 CONFIG_PCI_MMCONFIG=y
-CONFIG_DMAR=y
-# CONFIG_DMAR_DEFAULT_ON is not set
+CONFIG_INTEL_IOMMU=y
+# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCCARD=y
 CONFIG_YENTA=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index c04f1b7a9139..3537d4b91f74 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -7,21 +7,33 @@ obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
+obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
+obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
 salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
+twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 
 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
+
+# enable AVX support only when $(AS) can actually assemble the instructions
+ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
+AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
+CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
+endif
+sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 49ae9fe32b22..b0b6950cc8c8 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -4,6 +4,7 @@
  */
 
 #include <crypto/aes.h>
+#include <asm/aes.h>
 
 asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
 asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
new file mode 100644
index 000000000000..391d245dc086
--- /dev/null
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -0,0 +1,390 @@
+/*
+ * Blowfish Cipher Algorithm (x86_64)
+ *
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "blowfish-x86_64-asm.S"
+.text
+
+/* structure of crypto context */
+#define p	0
+#define s0	((16 + 2) * 4)
+#define s1	((16 + 2 + (1 * 256)) * 4)
+#define s2	((16 + 2 + (2 * 256)) * 4)
+#define s3	((16 + 2 + (3 * 256)) * 4)
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rcx
+#define RX3 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %ecx
+#define RX3d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %cl
+#define RX3bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %ch
+#define RX3bh %dh
+
+#define RT0 %rbp
+#define RT1 %rsi
+#define RT2 %r8
+#define RT3 %r9
+
+#define RT0d %ebp
+#define RT1d %esi
+#define RT2d %r8d
+#define RT3d %r9d
+
+#define RKEY %r10
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F() \
+	rorq $16,		RX0; \
+	movzbl RX0bh,		RT0d; \
+	movzbl RX0bl,		RT1d; \
+	rolq $16,		RX0; \
+	movl s0(CTX,RT0,4),	RT0d; \
+	addl s1(CTX,RT1,4),	RT0d; \
+	movzbl RX0bh,		RT1d; \
+	movzbl RX0bl,		RT2d; \
+	rolq $32,		RX0; \
+	xorl s2(CTX,RT1,4),	RT0d; \
+	addl s3(CTX,RT2,4),	RT0d; \
+	xorq RT0,		RX0;
+
+#define add_roundkey_enc(n) \
+	xorq p+4*(n)(CTX), 	RX0;
+
+#define round_enc(n) \
+	add_roundkey_enc(n); \
+	\
+	F(); \
+	F();
+
+#define add_roundkey_dec(n) \
+	movq p+4*(n-1)(CTX),	RT0; \
+	rorq $32,		RT0; \
+	xorq RT0,		RX0;
+
+#define round_dec(n) \
+	add_roundkey_dec(n); \
+	\
+	F(); \
+	F(); \
+
+#define read_block() \
+	movq (RIO), 		RX0; \
+	rorq $32, 		RX0; \
+	bswapq 			RX0;
+
+#define write_block() \
+	bswapq 			RX0; \
+	movq RX0, 		(RIO);
+
+#define xor_block() \
+	bswapq 			RX0; \
+	xorq RX0, 		(RIO);
+
+.align 8
+.global __blowfish_enc_blk
+.type   __blowfish_enc_blk,@function;
+
+__blowfish_enc_blk:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+	movq %rbp, %r11;
+
+	movq %rsi, %r10;
+	movq %rdx, RIO;
+
+	read_block();
+
+	round_enc(0);
+	round_enc(2);
+	round_enc(4);
+	round_enc(6);
+	round_enc(8);
+	round_enc(10);
+	round_enc(12);
+	round_enc(14);
+	add_roundkey_enc(16);
+
+	movq %r11, %rbp;
+
+	movq %r10, RIO;
+	test %cl, %cl;
+	jnz __enc_xor;
+
+	write_block();
+	ret;
+__enc_xor:
+	xor_block();
+	ret;
+
+.align 8
+.global blowfish_dec_blk
+.type   blowfish_dec_blk,@function;
+
+blowfish_dec_blk:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	movq %rbp, %r11;
+
+	movq %rsi, %r10;
+	movq %rdx, RIO;
+
+	read_block();
+
+	round_dec(17);
+	round_dec(15);
+	round_dec(13);
+	round_dec(11);
+	round_dec(9);
+	round_dec(7);
+	round_dec(5);
+	round_dec(3);
+	add_roundkey_dec(1);
+
+	movq %r10, RIO;
+	write_block();
+
+	movq %r11, %rbp;
+
+	ret;
+
+/**********************************************************************
+  4-way blowfish, four blocks parallel
+ **********************************************************************/
+
+/* F() for 4-way. Slower when used alone/1-way, but faster when used
+ * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
+ */
+#define F4(x) \
+	movzbl x ## bh,		RT1d; \
+	movzbl x ## bl,		RT3d; \
+	rorq $16,		x; \
+	movzbl x ## bh,		RT0d; \
+	movzbl x ## bl,		RT2d; \
+	rorq $16,		x; \
+	movl s0(CTX,RT0,4),	RT0d; \
+	addl s1(CTX,RT2,4),	RT0d; \
+	xorl s2(CTX,RT1,4),	RT0d; \
+	addl s3(CTX,RT3,4),	RT0d; \
+	xorq RT0,		x;
+
+#define add_preloaded_roundkey4() \
+	xorq RKEY,		RX0; \
+	xorq RKEY,		RX1; \
+	xorq RKEY,		RX2; \
+	xorq RKEY,		RX3;
+
+#define preload_roundkey_enc(n) \
+	movq p+4*(n)(CTX),	RKEY;
+
+#define add_roundkey_enc4(n) \
+	add_preloaded_roundkey4(); \
+	preload_roundkey_enc(n + 2);
+
+#define round_enc4(n) \
+	add_roundkey_enc4(n); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3);
+
+#define preload_roundkey_dec(n) \
+	movq p+4*((n)-1)(CTX),	RKEY; \
+	rorq $32,		RKEY;
+
+#define add_roundkey_dec4(n) \
+	add_preloaded_roundkey4(); \
+	preload_roundkey_dec(n - 2);
+
+#define round_dec4(n) \
+	add_roundkey_dec4(n); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3); \
+	\
+	F4(RX0); \
+	F4(RX1); \
+	F4(RX2); \
+	F4(RX3);
+
+#define read_block4() \
+	movq (RIO),		RX0; \
+	rorq $32,		RX0; \
+	bswapq 			RX0; \
+	\
+	movq 8(RIO),		RX1; \
+	rorq $32,		RX1; \
+	bswapq 			RX1; \
+	\
+	movq 16(RIO),		RX2; \
+	rorq $32,		RX2; \
+	bswapq 			RX2; \
+	\
+	movq 24(RIO),		RX3; \
+	rorq $32,		RX3; \
+	bswapq 			RX3;
+
+#define write_block4() \
+	bswapq 			RX0; \
+	movq RX0,		(RIO); \
+	\
+	bswapq 			RX1; \
+	movq RX1,		8(RIO); \
+	\
+	bswapq 			RX2; \
+	movq RX2,		16(RIO); \
+	\
+	bswapq 			RX3; \
+	movq RX3,		24(RIO);
+
+#define xor_block4() \
+	bswapq 			RX0; \
+	xorq RX0,		(RIO); \
+	\
+	bswapq 			RX1; \
+	xorq RX1,		8(RIO); \
+	\
+	bswapq 			RX2; \
+	xorq RX2,		16(RIO); \
+	\
+	bswapq 			RX3; \
+	xorq RX3,		24(RIO);
+
+.align 8
+.global __blowfish_enc_blk_4way
+.type   __blowfish_enc_blk_4way,@function;
+
+__blowfish_enc_blk_4way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+	pushq %rbp;
+	pushq %rbx;
+	pushq %rcx;
+
+	preload_roundkey_enc(0);
+
+	movq %rsi, %r11;
+	movq %rdx, RIO;
+
+	read_block4();
+
+	round_enc4(0);
+	round_enc4(2);
+	round_enc4(4);
+	round_enc4(6);
+	round_enc4(8);
+	round_enc4(10);
+	round_enc4(12);
+	round_enc4(14);
+	add_preloaded_roundkey4();
+
+	popq %rbp;
+	movq %r11, RIO;
+
+	test %bpl, %bpl;
+	jnz __enc_xor4;
+
+	write_block4();
+
+	popq %rbx;
+	popq %rbp;
+	ret;
+
+__enc_xor4:
+	xor_block4();
+
+	popq %rbx;
+	popq %rbp;
+	ret;
+
+.align 8
+.global blowfish_dec_blk_4way
+.type   blowfish_dec_blk_4way,@function;
+
+blowfish_dec_blk_4way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+	pushq %rbp;
+	pushq %rbx;
+	preload_roundkey_dec(17);
+
+	movq %rsi, %r11;
+	movq %rdx, RIO;
+
+	read_block4();
+
+	round_dec4(17);
+	round_dec4(15);
+	round_dec4(13);
+	round_dec4(11);
+	round_dec4(9);
+	round_dec4(7);
+	round_dec4(5);
+	round_dec4(3);
+	add_preloaded_roundkey4();
+
+	movq %r11, RIO;
+	write_block4();
+
+	popq %rbx;
+	popq %rbp;
+
+	ret;
+
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
new file mode 100644
index 000000000000..b05aa163d55a
--- /dev/null
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -0,0 +1,492 @@
+/*
+ * Glue Code for assembler optimized version of Blowfish
+ *
+ * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <crypto/blowfish.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+
+/* regular block cipher functions */
+asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
+				   bool xor);
+asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
+
+/* 4-way parallel cipher functions */
+asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+					const u8 *src, bool xor);
+asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
+				      const u8 *src);
+
+static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
+{
+	__blowfish_enc_blk(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__blowfish_enc_blk(ctx, dst, src, true);
+}
+
+static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+					 const u8 *src)
+{
+	__blowfish_enc_blk_4way(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__blowfish_enc_blk_4way(ctx, dst, src, true);
+}
+
+static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static struct crypto_alg bf_alg = {
+	.cra_name		=	"blowfish",
+	.cra_driver_name	=	"blowfish-asm",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	BF_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct bf_ctx),
+	.cra_alignmask		=	3,
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(bf_alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	BF_MIN_KEY_SIZE,
+			.cia_max_keysize	=	BF_MAX_KEY_SIZE,
+			.cia_setkey		=	blowfish_setkey,
+			.cia_encrypt		=	blowfish_encrypt,
+			.cia_decrypt		=	blowfish_decrypt,
+		}
+	}
+};
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
+		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		/* Process four block batch */
+		if (nbytes >= bsize * 4) {
+			do {
+				fn_4way(ctx, wdst, wsrc);
+
+				wsrc += bsize * 4;
+				wdst += bsize * 4;
+				nbytes -= bsize * 4;
+			} while (nbytes >= bsize * 4);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			fn(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+}
+
+static struct crypto_alg blk_ecb_alg = {
+	.cra_name		= "ecb(blowfish)",
+	.cra_driver_name	= "ecb-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+};
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 *iv = (u64 *)walk->iv;
+
+	do {
+		*dst = *src ^ *iv;
+		blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	*(u64 *)walk->iv = *iv;
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ivs[4 - 1];
+	u64 last_iv;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process four block batch */
+	if (nbytes >= bsize * 4) {
+		do {
+			nbytes -= bsize * 4 - bsize;
+			src -= 4 - 1;
+			dst -= 4 - 1;
+
+			ivs[0] = src[0];
+			ivs[1] = src[1];
+			ivs[2] = src[2];
+
+			blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
+
+			dst[1] ^= ivs[0];
+			dst[2] ^= ivs[1];
+			dst[3] ^= ivs[2];
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			*dst ^= *(src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * 4);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		*dst ^= *(src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	*dst ^= *(u64 *)walk->iv;
+	*(u64 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+	.cra_name		= "cbc(blowfish)",
+	.cra_driver_name	= "cbc-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= BF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+};
+
+static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u8 keystream[BF_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	blowfish_enc_blk(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, BF_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = BF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
+	__be64 ctrblocks[4];
+
+	/* Process four block batch */
+	if (nbytes >= bsize * 4) {
+		do {
+			if (dst != src) {
+				dst[0] = src[0];
+				dst[1] = src[1];
+				dst[2] = src[2];
+				dst[3] = src[3];
+			}
+
+			/* create ctrblks for parallel encrypt */
+			ctrblocks[0] = cpu_to_be64(ctrblk++);
+			ctrblocks[1] = cpu_to_be64(ctrblk++);
+			ctrblocks[2] = cpu_to_be64(ctrblk++);
+			ctrblocks[3] = cpu_to_be64(ctrblk++);
+
+			blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
+						  (u8 *)ctrblocks);
+
+			src += 4;
+			dst += 4;
+		} while ((nbytes -= bsize * 4) >= bsize * 4);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		ctrblocks[0] = cpu_to_be64(ctrblk++);
+
+		blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
+
+		src += 1;
+		dst += 1;
+	} while ((nbytes -= bsize) >= bsize);
+
+done:
+	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+
+	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "ctr(blowfish)",
+	.cra_driver_name	= "ctr-blowfish-asm",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct bf_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= BF_MIN_KEY_SIZE,
+			.max_keysize	= BF_MAX_KEY_SIZE,
+			.ivsize		= BF_BLOCK_SIZE,
+			.setkey		= blowfish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
+static int __init init(void)
+{
+	int err;
+
+	err = crypto_register_alg(&bf_alg);
+	if (err)
+		goto bf_err;
+	err = crypto_register_alg(&blk_ecb_alg);
+	if (err)
+		goto ecb_err;
+	err = crypto_register_alg(&blk_cbc_alg);
+	if (err)
+		goto cbc_err;
+	err = crypto_register_alg(&blk_ctr_alg);
+	if (err)
+		goto ctr_err;
+
+	return 0;
+
+ctr_err:
+	crypto_unregister_alg(&blk_cbc_alg);
+cbc_err:
+	crypto_unregister_alg(&blk_ecb_alg);
+ecb_err:
+	crypto_unregister_alg(&bf_alg);
+bf_err:
+	return err;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&blk_ctr_alg);
+	crypto_unregister_alg(&blk_cbc_alg);
+	crypto_unregister_alg(&blk_ecb_alg);
+	crypto_unregister_alg(&bf_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
+MODULE_ALIAS("blowfish");
+MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
new file mode 100644
index 000000000000..b2c2f57d70e8
--- /dev/null
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -0,0 +1,558 @@
+/*
+ * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
+ * SSE3 instruction set extensions introduced in Intel Core Microarchitecture
+ * processors. CPUs supporting Intel(R) AVX extensions will get an additional
+ * boost.
+ *
+ * This work was inspired by the vectorized implementation of Dean Gaudet.
+ * Additional information on it can be found at:
+ *    http://www.arctic.org/~dean/crypto/sha1.html
+ *
+ * It was improved upon with more efficient vectorization of the message
+ * scheduling. This implementation has also been optimized for all current and
+ * several future generations of Intel CPUs.
+ *
+ * See this article for more information about the implementation details:
+ *   http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
+ *
+ * Copyright (C) 2010, Intel Corp.
+ *   Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com>
+ *            Ronen Zohar <ronen.zohar@intel.com>
+ *
+ * Converted to AT&T syntax and adapted for inclusion in the Linux kernel:
+ *   Author: Mathias Krause <minipli@googlemail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define CTX	%rdi	// arg1
+#define BUF	%rsi	// arg2
+#define CNT	%rdx	// arg3
+
+#define REG_A	%ecx
+#define REG_B	%esi
+#define REG_C	%edi
+#define REG_D	%ebp
+#define REG_E	%edx
+
+#define REG_T1	%eax
+#define REG_T2	%ebx
+
+#define K_BASE		%r8
+#define HASH_PTR	%r9
+#define BUFFER_PTR	%r10
+#define BUFFER_END	%r11
+
+#define W_TMP1	%xmm0
+#define W_TMP2	%xmm9
+
+#define W0	%xmm1
+#define W4	%xmm2
+#define W8	%xmm3
+#define W12	%xmm4
+#define W16	%xmm5
+#define W20	%xmm6
+#define W24	%xmm7
+#define W28	%xmm8
+
+#define XMM_SHUFB_BSWAP	%xmm10
+
+/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
+#define WK(t)	(((t) & 15) * 4)(%rsp)
+#define W_PRECALC_AHEAD	16
+
+/*
+ * This macro implements the SHA-1 function's body for single 64-byte block
+ * param: function's name
+ */
+.macro SHA1_VECTOR_ASM  name
+	.global	\name
+	.type	\name, @function
+	.align 32
+\name:
+	push	%rbx
+	push	%rbp
+	push	%r12
+
+	mov	%rsp, %r12
+	sub	$64, %rsp		# allocate workspace
+	and	$~15, %rsp		# align stack
+
+	mov	CTX, HASH_PTR
+	mov	BUF, BUFFER_PTR
+
+	shl	$6, CNT			# multiply by 64
+	add	BUF, CNT
+	mov	CNT, BUFFER_END
+
+	lea	K_XMM_AR(%rip), K_BASE
+	xmm_mov	BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP
+
+	SHA1_PIPELINED_MAIN_BODY
+
+	# cleanup workspace
+	mov	$8, %ecx
+	mov	%rsp, %rdi
+	xor	%rax, %rax
+	rep stosq
+
+	mov	%r12, %rsp		# deallocate workspace
+
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+
+	.size	\name, .-\name
+.endm
+
+/*
+ * This macro implements 80 rounds of SHA-1 for one 64-byte block
+ */
+.macro SHA1_PIPELINED_MAIN_BODY
+	INIT_REGALLOC
+
+	mov	  (HASH_PTR), A
+	mov	 4(HASH_PTR), B
+	mov	 8(HASH_PTR), C
+	mov	12(HASH_PTR), D
+	mov	16(HASH_PTR), E
+
+  .set i, 0
+  .rept W_PRECALC_AHEAD
+	W_PRECALC i
+    .set i, (i+1)
+  .endr
+
+.align 4
+1:
+	RR F1,A,B,C,D,E,0
+	RR F1,D,E,A,B,C,2
+	RR F1,B,C,D,E,A,4
+	RR F1,E,A,B,C,D,6
+	RR F1,C,D,E,A,B,8
+
+	RR F1,A,B,C,D,E,10
+	RR F1,D,E,A,B,C,12
+	RR F1,B,C,D,E,A,14
+	RR F1,E,A,B,C,D,16
+	RR F1,C,D,E,A,B,18
+
+	RR F2,A,B,C,D,E,20
+	RR F2,D,E,A,B,C,22
+	RR F2,B,C,D,E,A,24
+	RR F2,E,A,B,C,D,26
+	RR F2,C,D,E,A,B,28
+
+	RR F2,A,B,C,D,E,30
+	RR F2,D,E,A,B,C,32
+	RR F2,B,C,D,E,A,34
+	RR F2,E,A,B,C,D,36
+	RR F2,C,D,E,A,B,38
+
+	RR F3,A,B,C,D,E,40
+	RR F3,D,E,A,B,C,42
+	RR F3,B,C,D,E,A,44
+	RR F3,E,A,B,C,D,46
+	RR F3,C,D,E,A,B,48
+
+	RR F3,A,B,C,D,E,50
+	RR F3,D,E,A,B,C,52
+	RR F3,B,C,D,E,A,54
+	RR F3,E,A,B,C,D,56
+	RR F3,C,D,E,A,B,58
+
+	add	$64, BUFFER_PTR		# move to the next 64-byte block
+	cmp	BUFFER_END, BUFFER_PTR	# if the current is the last one use
+	cmovae	K_BASE, BUFFER_PTR	# dummy source to avoid buffer overrun
+
+	RR F4,A,B,C,D,E,60
+	RR F4,D,E,A,B,C,62
+	RR F4,B,C,D,E,A,64
+	RR F4,E,A,B,C,D,66
+	RR F4,C,D,E,A,B,68
+
+	RR F4,A,B,C,D,E,70
+	RR F4,D,E,A,B,C,72
+	RR F4,B,C,D,E,A,74
+	RR F4,E,A,B,C,D,76
+	RR F4,C,D,E,A,B,78
+
+	UPDATE_HASH   (HASH_PTR), A
+	UPDATE_HASH  4(HASH_PTR), B
+	UPDATE_HASH  8(HASH_PTR), C
+	UPDATE_HASH 12(HASH_PTR), D
+	UPDATE_HASH 16(HASH_PTR), E
+
+	RESTORE_RENAMED_REGS
+	cmp	K_BASE, BUFFER_PTR	# K_BASE means, we reached the end
+	jne	1b
+.endm
+
+.macro INIT_REGALLOC
+  .set A, REG_A
+  .set B, REG_B
+  .set C, REG_C
+  .set D, REG_D
+  .set E, REG_E
+  .set T1, REG_T1
+  .set T2, REG_T2
+.endm
+
+.macro RESTORE_RENAMED_REGS
+	# order is important (REG_C is where it should be)
+	mov	B, REG_B
+	mov	D, REG_D
+	mov	A, REG_A
+	mov	E, REG_E
+.endm
+
+.macro SWAP_REG_NAMES  a, b
+  .set _T, \a
+  .set \a, \b
+  .set \b, _T
+.endm
+
+.macro F1  b, c, d
+	mov	\c, T1
+	SWAP_REG_NAMES \c, T1
+	xor	\d, T1
+	and	\b, T1
+	xor	\d, T1
+.endm
+
+.macro F2  b, c, d
+	mov	\d, T1
+	SWAP_REG_NAMES \d, T1
+	xor	\c, T1
+	xor	\b, T1
+.endm
+
+.macro F3  b, c ,d
+	mov	\c, T1
+	SWAP_REG_NAMES \c, T1
+	mov	\b, T2
+	or	\b, T1
+	and	\c, T2
+	and	\d, T1
+	or	T2, T1
+.endm
+
+.macro F4  b, c, d
+	F2 \b, \c, \d
+.endm
+
+.macro UPDATE_HASH  hash, val
+	add	\hash, \val
+	mov	\val, \hash
+.endm
+
+/*
+ * RR does two rounds of SHA-1 back to back with W[] pre-calc
+ *   t1 = F(b, c, d);   e += w(i)
+ *   e += t1;           b <<= 30;   d  += w(i+1);
+ *   t1 = F(a, b, c);
+ *   d += t1;           a <<= 5;
+ *   e += a;
+ *   t1 = e;            a >>= 7;
+ *   t1 <<= 5;
+ *   d += t1;
+ */
+.macro RR  F, a, b, c, d, e, round
+	add	WK(\round), \e
+	\F   \b, \c, \d		# t1 = F(b, c, d);
+	W_PRECALC (\round + W_PRECALC_AHEAD)
+	rol	$30, \b
+	add	T1, \e
+	add	WK(\round + 1), \d
+
+	\F   \a, \b, \c
+	W_PRECALC (\round + W_PRECALC_AHEAD + 1)
+	rol	$5, \a
+	add	\a, \e
+	add	T1, \d
+	ror	$7, \a		# (a <<r 5) >>r 7) => a <<r 30)
+
+	mov	\e, T1
+	SWAP_REG_NAMES \e, T1
+
+	rol	$5, T1
+	add	T1, \d
+
+	# write:  \a, \b
+	# rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c
+.endm
+
+.macro W_PRECALC  r
+  .set i, \r
+
+  .if (i < 20)
+    .set K_XMM, 0
+  .elseif (i < 40)
+    .set K_XMM, 16
+  .elseif (i < 60)
+    .set K_XMM, 32
+  .elseif (i < 80)
+    .set K_XMM, 48
+  .endif
+
+  .if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD))))
+    .set i, ((\r) % 80)	    # pre-compute for the next iteration
+    .if (i == 0)
+	W_PRECALC_RESET
+    .endif
+	W_PRECALC_00_15
+  .elseif (i<32)
+	W_PRECALC_16_31
+  .elseif (i < 80)   // rounds 32-79
+	W_PRECALC_32_79
+  .endif
+.endm
+
+.macro W_PRECALC_RESET
+  .set W,          W0
+  .set W_minus_04, W4
+  .set W_minus_08, W8
+  .set W_minus_12, W12
+  .set W_minus_16, W16
+  .set W_minus_20, W20
+  .set W_minus_24, W24
+  .set W_minus_28, W28
+  .set W_minus_32, W
+.endm
+
+.macro W_PRECALC_ROTATE
+  .set W_minus_32, W_minus_28
+  .set W_minus_28, W_minus_24
+  .set W_minus_24, W_minus_20
+  .set W_minus_20, W_minus_16
+  .set W_minus_16, W_minus_12
+  .set W_minus_12, W_minus_08
+  .set W_minus_08, W_minus_04
+  .set W_minus_04, W
+  .set W,          W_minus_32
+.endm
+
+.macro W_PRECALC_SSSE3
+
+.macro W_PRECALC_00_15
+	W_PRECALC_00_15_SSSE3
+.endm
+.macro W_PRECALC_16_31
+	W_PRECALC_16_31_SSSE3
+.endm
+.macro W_PRECALC_32_79
+	W_PRECALC_32_79_SSSE3
+.endm
+
+/* message scheduling pre-compute for rounds 0-15 */
+.macro W_PRECALC_00_15_SSSE3
+  .if ((i & 3) == 0)
+	movdqu	(i*4)(BUFFER_PTR), W_TMP1
+  .elseif ((i & 3) == 1)
+	pshufb	XMM_SHUFB_BSWAP, W_TMP1
+	movdqa	W_TMP1, W
+  .elseif ((i & 3) == 2)
+	paddd	(K_BASE), W_TMP1
+  .elseif ((i & 3) == 3)
+	movdqa  W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+/* message scheduling pre-compute for rounds 16-31
+ *
+ * - calculating last 32 w[i] values in 8 XMM registers
+ * - pre-calculate K+w[i] values and store to mem, for later load by ALU add
+ *   instruction
+ *
+ * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
+ * dependency, but improves for 32-79
+ */
+.macro W_PRECALC_16_31_SSSE3
+  # blended scheduling of vector and scalar instruction streams, one 4-wide
+  # vector iteration / 4 scalar rounds
+  .if ((i & 3) == 0)
+	movdqa	W_minus_12, W
+	palignr	$8, W_minus_16, W	# w[i-14]
+	movdqa	W_minus_04, W_TMP1
+	psrldq	$4, W_TMP1		# w[i-3]
+	pxor	W_minus_08, W
+  .elseif ((i & 3) == 1)
+	pxor	W_minus_16, W_TMP1
+	pxor	W_TMP1, W
+	movdqa	W, W_TMP2
+	movdqa	W, W_TMP1
+	pslldq	$12, W_TMP2
+  .elseif ((i & 3) == 2)
+	psrld	$31, W
+	pslld	$1, W_TMP1
+	por	W, W_TMP1
+	movdqa	W_TMP2, W
+	psrld	$30, W_TMP2
+	pslld	$2, W
+  .elseif ((i & 3) == 3)
+	pxor	W, W_TMP1
+	pxor	W_TMP2, W_TMP1
+	movdqa	W_TMP1, W
+	paddd	K_XMM(K_BASE), W_TMP1
+	movdqa	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+/* message scheduling pre-compute for rounds 32-79
+ *
+ * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
+ * instead we do equal:    w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
+ * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
+ */
+.macro W_PRECALC_32_79_SSSE3
+  .if ((i & 3) == 0)
+	movdqa	W_minus_04, W_TMP1
+	pxor	W_minus_28, W		# W is W_minus_32 before xor
+	palignr	$8, W_minus_08, W_TMP1
+  .elseif ((i & 3) == 1)
+	pxor	W_minus_16, W
+	pxor	W_TMP1, W
+	movdqa	W, W_TMP1
+  .elseif ((i & 3) == 2)
+	psrld	$30, W
+	pslld	$2, W_TMP1
+	por	W, W_TMP1
+  .elseif ((i & 3) == 3)
+	movdqa	W_TMP1, W
+	paddd	K_XMM(K_BASE), W_TMP1
+	movdqa	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.endm		// W_PRECALC_SSSE3
+
+
+#define K1	0x5a827999
+#define K2	0x6ed9eba1
+#define K3	0x8f1bbcdc
+#define K4	0xca62c1d6
+
+.section .rodata
+.align 16
+
+K_XMM_AR:
+	.long K1, K1, K1, K1
+	.long K2, K2, K2, K2
+	.long K3, K3, K3, K3
+	.long K4, K4, K4, K4
+
+BSWAP_SHUFB_CTL:
+	.long 0x00010203
+	.long 0x04050607
+	.long 0x08090a0b
+	.long 0x0c0d0e0f
+
+
+.section .text
+
+W_PRECALC_SSSE3
+.macro xmm_mov a, b
+	movdqu	\a,\b
+.endm
+
+/* SSSE3 optimized implementation:
+ *  extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws,
+ *                                       unsigned int rounds);
+ */
+SHA1_VECTOR_ASM     sha1_transform_ssse3
+
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+
+.macro W_PRECALC_AVX
+
+.purgem W_PRECALC_00_15
+.macro  W_PRECALC_00_15
+    W_PRECALC_00_15_AVX
+.endm
+.purgem W_PRECALC_16_31
+.macro  W_PRECALC_16_31
+    W_PRECALC_16_31_AVX
+.endm
+.purgem W_PRECALC_32_79
+.macro  W_PRECALC_32_79
+    W_PRECALC_32_79_AVX
+.endm
+
+.macro W_PRECALC_00_15_AVX
+  .if ((i & 3) == 0)
+	vmovdqu	(i*4)(BUFFER_PTR), W_TMP1
+  .elseif ((i & 3) == 1)
+	vpshufb	XMM_SHUFB_BSWAP, W_TMP1, W
+  .elseif ((i & 3) == 2)
+	vpaddd	(K_BASE), W, W_TMP1
+  .elseif ((i & 3) == 3)
+	vmovdqa	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.macro W_PRECALC_16_31_AVX
+  .if ((i & 3) == 0)
+	vpalignr $8, W_minus_16, W_minus_12, W	# w[i-14]
+	vpsrldq	$4, W_minus_04, W_TMP1		# w[i-3]
+	vpxor	W_minus_08, W, W
+	vpxor	W_minus_16, W_TMP1, W_TMP1
+  .elseif ((i & 3) == 1)
+	vpxor	W_TMP1, W, W
+	vpslldq	$12, W, W_TMP2
+	vpslld	$1, W, W_TMP1
+  .elseif ((i & 3) == 2)
+	vpsrld	$31, W, W
+	vpor	W, W_TMP1, W_TMP1
+	vpslld	$2, W_TMP2, W
+	vpsrld	$30, W_TMP2, W_TMP2
+  .elseif ((i & 3) == 3)
+	vpxor	W, W_TMP1, W_TMP1
+	vpxor	W_TMP2, W_TMP1, W
+	vpaddd	K_XMM(K_BASE), W, W_TMP1
+	vmovdqu	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.macro W_PRECALC_32_79_AVX
+  .if ((i & 3) == 0)
+	vpalignr $8, W_minus_08, W_minus_04, W_TMP1
+	vpxor	W_minus_28, W, W		# W is W_minus_32 before xor
+  .elseif ((i & 3) == 1)
+	vpxor	W_minus_16, W_TMP1, W_TMP1
+	vpxor	W_TMP1, W, W
+  .elseif ((i & 3) == 2)
+	vpslld	$2, W, W_TMP1
+	vpsrld	$30, W, W
+	vpor	W, W_TMP1, W
+  .elseif ((i & 3) == 3)
+	vpaddd	K_XMM(K_BASE), W, W_TMP1
+	vmovdqu	W_TMP1, WK(i&~3)
+	W_PRECALC_ROTATE
+  .endif
+.endm
+
+.endm    // W_PRECALC_AVX
+
+W_PRECALC_AVX
+.purgem xmm_mov
+.macro xmm_mov a, b
+	vmovdqu	\a,\b
+.endm
+
+
+/* AVX optimized implementation:
+ *  extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws,
+ *                                     unsigned int rounds);
+ */
+SHA1_VECTOR_ASM     sha1_transform_avx
+
+#endif
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
new file mode 100644
index 000000000000..f916499d0abe
--- /dev/null
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -0,0 +1,240 @@
+/*
+ * Cryptographic API.
+ *
+ * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
+ * Supplemental SSE3 instructions.
+ *
+ * This file is based on sha1_generic.c
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+
+
+asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
+				     unsigned int rounds);
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
+				   unsigned int rounds);
+#endif
+
+static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
+
+
+static int sha1_ssse3_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
+	return 0;
+}
+
+static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len, unsigned int partial)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int done = 0;
+
+	sctx->count += len;
+
+	if (partial) {
+		done = SHA1_BLOCK_SIZE - partial;
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_transform_asm(sctx->state, sctx->buffer, 1);
+	}
+
+	if (len - done >= SHA1_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+		sha1_transform_asm(sctx->state, data + done, rounds);
+		done += rounds * SHA1_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buffer, data + done, len - done);
+
+	return 0;
+}
+
+static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
+			     unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+	int res;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA1_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buffer + partial, data, len);
+
+		return 0;
+	}
+
+	if (!irq_fpu_usable()) {
+		res = crypto_sha1_update(desc, data, len);
+	} else {
+		kernel_fpu_begin();
+		res = __sha1_ssse3_update(desc, data, len, partial);
+		kernel_fpu_end();
+	}
+
+	return res;
+}
+
+
+/* Add padding and return the message digest. */
+static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA1_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+	if (!irq_fpu_usable()) {
+		crypto_sha1_update(desc, padding, padlen);
+		crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+	} else {
+		kernel_fpu_begin();
+		/* We need to fill a whole block for __sha1_ssse3_update() */
+		if (padlen <= 56) {
+			sctx->count += padlen;
+			memcpy(sctx->buffer + index, padding, padlen);
+		} else {
+			__sha1_ssse3_update(desc, padding, padlen, index);
+		}
+		__sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+		kernel_fpu_end();
+	}
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_ssse3_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_ssse3_init,
+	.update		=	sha1_ssse3_update,
+	.final		=	sha1_ssse3_final,
+	.export		=	sha1_ssse3_export,
+	.import		=	sha1_ssse3_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-ssse3",
+		.cra_priority	=	150,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+static bool __init avx_usable(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave)
+		return false;
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+
+		return false;
+	}
+
+	return true;
+}
+#endif
+
+static int __init sha1_ssse3_mod_init(void)
+{
+	/* test for SSSE3 first */
+	if (cpu_has_ssse3)
+		sha1_transform_asm = sha1_transform_ssse3;
+
+#ifdef SHA1_ENABLE_AVX_SUPPORT
+	/* allow AVX to override SSSE3, it's a little faster */
+	if (avx_usable())
+		sha1_transform_asm = sha1_transform_avx;
+#endif
+
+	if (sha1_transform_asm) {
+		pr_info("Using %s optimized SHA-1 implementation\n",
+		        sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
+		                                                   : "AVX");
+		return crypto_register_shash(&alg);
+	}
+	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
+
+	return -ENODEV;
+}
+
+static void __exit sha1_ssse3_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_ssse3_mod_init);
+module_exit(sha1_ssse3_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
+
+MODULE_ALIAS("sha1");
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 575331cb2a8a..658af4bb35c9 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -26,7 +26,7 @@
 
 #define in_blk    12  /* input byte array address parameter*/
 #define out_blk   8  /* output byte array address parameter*/
-#define tfm       4  /* Twofish context structure */
+#define ctx       4  /* Twofish context structure */
 
 #define a_offset	0
 #define b_offset	4
@@ -229,8 +229,8 @@ twofish_enc_blk:
 	push    %esi
 	push    %edi
 
-	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
+					 * pointer to the ctx address */
 	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
@@ -285,8 +285,8 @@ twofish_dec_blk:
 	push    %edi
 
 
-	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
+					 * pointer to the ctx address */
 	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
new file mode 100644
index 000000000000..5b012a2c5119
--- /dev/null
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -0,0 +1,316 @@
+/*
+ * Twofish Cipher 3-way parallel algorithm (x86_64)
+ *
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "twofish-x86_64-asm-3way.S"
+.text
+
+/* structure of crypto context */
+#define s0	0
+#define s1	1024
+#define s2	2048
+#define s3	3072
+#define w	4096
+#define k	4128
+
+/**********************************************************************
+  3-way twofish
+ **********************************************************************/
+#define CTX %rdi
+#define RIO %rdx
+
+#define RAB0 %rax
+#define RAB1 %rbx
+#define RAB2 %rcx
+
+#define RAB0d %eax
+#define RAB1d %ebx
+#define RAB2d %ecx
+
+#define RAB0bh %ah
+#define RAB1bh %bh
+#define RAB2bh %ch
+
+#define RAB0bl %al
+#define RAB1bl %bl
+#define RAB2bl %cl
+
+#define RCD0 %r8
+#define RCD1 %r9
+#define RCD2 %r10
+
+#define RCD0d %r8d
+#define RCD1d %r9d
+#define RCD2d %r10d
+
+#define RX0 %rbp
+#define RX1 %r11
+#define RX2 %r12
+
+#define RX0d %ebp
+#define RX1d %r11d
+#define RX2d %r12d
+
+#define RY0 %r13
+#define RY1 %r14
+#define RY2 %r15
+
+#define RY0d %r13d
+#define RY1d %r14d
+#define RY2d %r15d
+
+#define RT0 %rdx
+#define RT1 %rsi
+
+#define RT0d %edx
+#define RT1d %esi
+
+#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
+	movzbl ab ## bl,		tmp2 ## d; \
+	movzbl ab ## bh,		tmp1 ## d; \
+	rorq $(rot),			ab; \
+	op1##l T0(CTX, tmp2, 4),	dst ## d; \
+	op2##l T1(CTX, tmp1, 4),	dst ## d;
+
+/*
+ * Combined G1 & G2 function. Reordered with help of rotates to have moves
+ * at begining.
+ */
+#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
+	/* G1,1 && G2,1 */ \
+	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
+	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
+	\
+	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
+	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
+	\
+	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
+	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
+	\
+	/* G1,2 && G2,2 */ \
+	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
+	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
+	xchgq cd ## 0, ab ## 0; \
+	\
+	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
+	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
+	xchgq cd ## 1, ab ## 1; \
+	\
+	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
+	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
+	xchgq cd ## 2, ab ## 2;
+
+#define enc_round_end(ab, x, y, n) \
+	addl y ## d,			x ## d; \
+	addl x ## d,			y ## d; \
+	addl k+4*(2*(n))(CTX),		x ## d; \
+	xorl ab ## d,			x ## d; \
+	addl k+4*(2*(n)+1)(CTX),	y ## d; \
+	shrq $32,			ab; \
+	roll $1,			ab ## d; \
+	xorl y ## d,			ab ## d; \
+	shlq $32,			ab; \
+	rorl $1,			x ## d; \
+	orq x,				ab;
+
+#define dec_round_end(ba, x, y, n) \
+	addl y ## d,			x ## d; \
+	addl x ## d,			y ## d; \
+	addl k+4*(2*(n))(CTX),		x ## d; \
+	addl k+4*(2*(n)+1)(CTX),	y ## d; \
+	xorl ba ## d,			y ## d; \
+	shrq $32,			ba; \
+	roll $1,			ba ## d; \
+	xorl x ## d,			ba ## d; \
+	shlq $32,			ba; \
+	rorl $1,			y ## d; \
+	orq y,				ba;
+
+#define encrypt_round3(ab, cd, n) \
+	g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
+	\
+	enc_round_end(ab ## 0, RX0, RY0, n); \
+	enc_round_end(ab ## 1, RX1, RY1, n); \
+	enc_round_end(ab ## 2, RX2, RY2, n);
+
+#define decrypt_round3(ba, dc, n) \
+	g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
+	\
+	dec_round_end(ba ## 0, RX0, RY0, n); \
+	dec_round_end(ba ## 1, RX1, RY1, n); \
+	dec_round_end(ba ## 2, RX2, RY2, n);
+
+#define encrypt_cycle3(ab, cd, n) \
+	encrypt_round3(ab, cd, n*2); \
+	encrypt_round3(ab, cd, (n*2)+1);
+
+#define decrypt_cycle3(ba, dc, n) \
+	decrypt_round3(ba, dc, (n*2)+1); \
+	decrypt_round3(ba, dc, (n*2));
+
+#define inpack3(in, n, xy, m) \
+	movq 4*(n)(in),			xy ## 0; \
+	xorq w+4*m(CTX),		xy ## 0; \
+	\
+	movq 4*(4+(n))(in),		xy ## 1; \
+	xorq w+4*m(CTX),		xy ## 1; \
+	\
+	movq 4*(8+(n))(in),		xy ## 2; \
+	xorq w+4*m(CTX),		xy ## 2;
+
+#define outunpack3(op, out, n, xy, m) \
+	xorq w+4*m(CTX),		xy ## 0; \
+	op ## q xy ## 0,		4*(n)(out); \
+	\
+	xorq w+4*m(CTX),		xy ## 1; \
+	op ## q xy ## 1,		4*(4+(n))(out); \
+	\
+	xorq w+4*m(CTX),		xy ## 2; \
+	op ## q xy ## 2,		4*(8+(n))(out);
+
+#define inpack_enc3() \
+	inpack3(RIO, 0, RAB, 0); \
+	inpack3(RIO, 2, RCD, 2);
+
+#define outunpack_enc3(op) \
+	outunpack3(op, RIO, 2, RAB, 6); \
+	outunpack3(op, RIO, 0, RCD, 4);
+
+#define inpack_dec3() \
+	inpack3(RIO, 0, RAB, 4); \
+	rorq $32,			RAB0; \
+	rorq $32,			RAB1; \
+	rorq $32,			RAB2; \
+	inpack3(RIO, 2, RCD, 6); \
+	rorq $32,			RCD0; \
+	rorq $32,			RCD1; \
+	rorq $32,			RCD2;
+
+#define outunpack_dec3() \
+	rorq $32,			RCD0; \
+	rorq $32,			RCD1; \
+	rorq $32,			RCD2; \
+	outunpack3(mov, RIO, 0, RCD, 0); \
+	rorq $32,			RAB0; \
+	rorq $32,			RAB1; \
+	rorq $32,			RAB2; \
+	outunpack3(mov, RIO, 2, RAB, 2);
+
+.align 8
+.global __twofish_enc_blk_3way
+.type   __twofish_enc_blk_3way,@function;
+
+__twofish_enc_blk_3way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src, RIO
+	 *	%rcx: bool, if true: xor output
+	 */
+	pushq %r15;
+	pushq %r14;
+	pushq %r13;
+	pushq %r12;
+	pushq %rbp;
+	pushq %rbx;
+
+	pushq %rcx; /* bool xor */
+	pushq %rsi; /* dst */
+
+	inpack_enc3();
+
+	encrypt_cycle3(RAB, RCD, 0);
+	encrypt_cycle3(RAB, RCD, 1);
+	encrypt_cycle3(RAB, RCD, 2);
+	encrypt_cycle3(RAB, RCD, 3);
+	encrypt_cycle3(RAB, RCD, 4);
+	encrypt_cycle3(RAB, RCD, 5);
+	encrypt_cycle3(RAB, RCD, 6);
+	encrypt_cycle3(RAB, RCD, 7);
+
+	popq RIO; /* dst */
+	popq %rbp; /* bool xor */
+
+	testb %bpl, %bpl;
+	jnz __enc_xor3;
+
+	outunpack_enc3(mov);
+
+	popq %rbx;
+	popq %rbp;
+	popq %r12;
+	popq %r13;
+	popq %r14;
+	popq %r15;
+	ret;
+
+__enc_xor3:
+	outunpack_enc3(xor);
+
+	popq %rbx;
+	popq %rbp;
+	popq %r12;
+	popq %r13;
+	popq %r14;
+	popq %r15;
+	ret;
+
+.global twofish_dec_blk_3way
+.type   twofish_dec_blk_3way,@function;
+
+twofish_dec_blk_3way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src, RIO
+	 */
+	pushq %r15;
+	pushq %r14;
+	pushq %r13;
+	pushq %r12;
+	pushq %rbp;
+	pushq %rbx;
+
+	pushq %rsi; /* dst */
+
+	inpack_dec3();
+
+	decrypt_cycle3(RAB, RCD, 7);
+	decrypt_cycle3(RAB, RCD, 6);
+	decrypt_cycle3(RAB, RCD, 5);
+	decrypt_cycle3(RAB, RCD, 4);
+	decrypt_cycle3(RAB, RCD, 3);
+	decrypt_cycle3(RAB, RCD, 2);
+	decrypt_cycle3(RAB, RCD, 1);
+	decrypt_cycle3(RAB, RCD, 0);
+
+	popq RIO; /* dst */
+
+	outunpack_dec3();
+
+	popq %rbx;
+	popq %rbp;
+	popq %r12;
+	popq %r13;
+	popq %r14;
+	popq %r15;
+	ret;
+
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 573aa102542e..7bcf3fcc3668 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -221,10 +221,9 @@
 twofish_enc_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm address */
+	/* %rdi contains the ctx address */
 	/* %rsi contains the output address */
 	/* %rdx contains the input address */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
 	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
@@ -274,10 +273,9 @@ twofish_enc_blk:
 twofish_dec_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm address */
+	/* %rdi contains the ctx address */
 	/* %rsi contains the output address */
 	/* %rdx contains the input address */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
 	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index cefaf8b9aa18..dc6b3fb817fc 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -44,17 +44,21 @@
 #include <linux/module.h>
 #include <linux/types.h>
 
-asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+EXPORT_SYMBOL_GPL(twofish_enc_blk);
+asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+EXPORT_SYMBOL_GPL(twofish_dec_blk);
 
 static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	twofish_enc_blk(tfm, dst, src);
+	twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
 static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	twofish_dec_blk(tfm, dst, src);
+	twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
 static struct crypto_alg alg = {
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
new file mode 100644
index 000000000000..5ede9c444c3e
--- /dev/null
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -0,0 +1,472 @@
+/*
+ * Glue Code for 3-way parallel assembler optimized version of Twofish
+ *
+ * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/twofish.h>
+#include <crypto/b128ops.h>
+
+/* regular block cipher functions from twofish_x86_64 module */
+asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+				const u8 *src);
+
+/* 3-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+				       const u8 *src, bool xor);
+asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+				     const u8 *src);
+
+static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+					const u8 *src)
+{
+	__twofish_enc_blk_3way(ctx, dst, src, false);
+}
+
+static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
+					    const u8 *src)
+{
+	__twofish_enc_blk_3way(ctx, dst, src, true);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
+		     void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		/* Process three block batch */
+		if (nbytes >= bsize * 3) {
+			do {
+				fn_3way(ctx, wdst, wsrc);
+
+				wsrc += bsize * 3;
+				wdst += bsize * 3;
+				nbytes -= bsize * 3;
+			} while (nbytes >= bsize * 3);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			fn(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
+}
+
+static struct crypto_alg blk_ecb_alg = {
+	.cra_name		= "ecb(twofish)",
+	.cra_driver_name	= "ecb-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+};
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 *iv = (u128 *)walk->iv;
+
+	do {
+		u128_xor(dst, src, iv);
+		twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ivs[3 - 1];
+	u128 last_iv;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process three block batch */
+	if (nbytes >= bsize * 3) {
+		do {
+			nbytes -= bsize * (3 - 1);
+			src -= 3 - 1;
+			dst -= 3 - 1;
+
+			ivs[0] = src[0];
+			ivs[1] = src[1];
+
+			twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
+
+			u128_xor(dst + 1, dst + 1, ivs + 0);
+			u128_xor(dst + 2, dst + 2, ivs + 1);
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			u128_xor(dst, dst, src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * 3);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		u128_xor(dst, dst, src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	u128_xor(dst, dst, (u128 *)walk->iv);
+	*(u128 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_cbc_alg = {
+	.cra_name		= "cbc(twofish)",
+	.cra_driver_name	= "cbc-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= TF_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+};
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+	dst->a = cpu_to_be64(src->a);
+	dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+	dst->a = be64_to_cpu(src->a);
+	dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+	i->b++;
+	if (!i->b)
+		i->a++;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *ctrblk = walk->iv;
+	u8 keystream[TF_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	twofish_enc_blk(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, TF_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int bsize = TF_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = (u128 *)walk->src.virt.addr;
+	u128 *dst = (u128 *)walk->dst.virt.addr;
+	u128 ctrblk;
+	be128 ctrblocks[3];
+
+	be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+	/* Process three block batch */
+	if (nbytes >= bsize * 3) {
+		do {
+			if (dst != src) {
+				dst[0] = src[0];
+				dst[1] = src[1];
+				dst[2] = src[2];
+			}
+
+			/* create ctrblks for parallel encrypt */
+			u128_to_be128(&ctrblocks[0], &ctrblk);
+			u128_inc(&ctrblk);
+			u128_to_be128(&ctrblocks[1], &ctrblk);
+			u128_inc(&ctrblk);
+			u128_to_be128(&ctrblocks[2], &ctrblk);
+			u128_inc(&ctrblk);
+
+			twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
+						 (u8 *)ctrblocks);
+
+			src += 3;
+			dst += 3;
+			nbytes -= bsize * 3;
+		} while (nbytes >= bsize * 3);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		u128_to_be128(&ctrblocks[0], &ctrblk);
+		u128_inc(&ctrblk);
+
+		twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
+		u128_xor(dst, dst, (u128 *)ctrblocks);
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+done:
+	u128_to_be128((be128 *)walk->iv, &ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
+
+	while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	if (walk.nbytes) {
+		ctr_crypt_final(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg blk_ctr_alg = {
+	.cra_name		= "ctr(twofish)",
+	.cra_driver_name	= "ctr-twofish-3way",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct twofish_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= TF_MIN_KEY_SIZE,
+			.max_keysize	= TF_MAX_KEY_SIZE,
+			.ivsize		= TF_BLOCK_SIZE,
+			.setkey		= twofish_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+};
+
+int __init init(void)
+{
+	int err;
+
+	err = crypto_register_alg(&blk_ecb_alg);
+	if (err)
+		goto ecb_err;
+	err = crypto_register_alg(&blk_cbc_alg);
+	if (err)
+		goto cbc_err;
+	err = crypto_register_alg(&blk_ctr_alg);
+	if (err)
+		goto ctr_err;
+
+	return 0;
+
+ctr_err:
+	crypto_unregister_alg(&blk_cbc_alg);
+cbc_err:
+	crypto_unregister_alg(&blk_ecb_alg);
+ecb_err:
+	return err;
+}
+
+void __exit fini(void)
+{
+	crypto_unregister_alg(&blk_ctr_alg);
+	crypto_unregister_alg(&blk_cbc_alg);
+	crypto_unregister_alg(&blk_ecb_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
+MODULE_ALIAS("twofish");
+MODULE_ALIAS("twofish-asm");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 54edb207ff3a..a6253ec1b284 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -850,4 +850,6 @@ ia32_sys_call_table:
 	.quad sys_syncfs
 	.quad compat_sys_sendmmsg	/* 345 */
 	.quad sys_setns
+	.quad compat_sys_process_vm_readv
+	.quad compat_sys_process_vm_writev
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 67f87f257611..8e41071704a5 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -19,9 +19,15 @@ extern int amd_numa_init(void);
 extern int amd_get_subcaches(int);
 extern int amd_set_subcaches(int, int);
 
+struct amd_l3_cache {
+	unsigned indices;
+	u8	 subcaches[4];
+};
+
 struct amd_northbridge {
 	struct pci_dev *misc;
 	struct pci_dev *link;
+	struct amd_l3_cache l3_cache;
 };
 
 struct amd_northbridge_info {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 7b3ca8324b69..9b7273cb2193 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -495,7 +495,7 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert)
 	return;
 }
 
-extern struct apic *generic_bigsmp_probe(void);
+extern void generic_bigsmp_probe(void);
 
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 34595d5e1038..3925d8007864 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -100,7 +100,9 @@
 #define		APIC_TIMER_BASE_CLKIN		0x0
 #define		APIC_TIMER_BASE_TMBASE		0x1
 #define		APIC_TIMER_BASE_DIV		0x2
+#define		APIC_LVT_TIMER_ONESHOT		(0 << 17)
 #define		APIC_LVT_TIMER_PERIODIC		(1 << 17)
+#define		APIC_LVT_TIMER_TSCDEADLINE	(2 << 17)
 #define		APIC_LVT_MASKED			(1 << 16)
 #define		APIC_LVT_LEVEL_TRIGGER		(1 << 15)
 #define		APIC_LVT_REMOTE_IRR		(1 << 14)
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
new file mode 100644
index 000000000000..0d9ec770f2f8
--- /dev/null
+++ b/arch/x86/include/asm/archrandom.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ *          H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef ASM_X86_ARCHRANDOM_H
+#define ASM_X86_ARCHRANDOM_H
+
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/nops.h>
+
+#define RDRAND_RETRY_LOOPS	10
+
+#define RDRAND_INT	".byte 0x0f,0xc7,0xf0"
+#ifdef CONFIG_X86_64
+# define RDRAND_LONG	".byte 0x48,0x0f,0xc7,0xf0"
+#else
+# define RDRAND_LONG	RDRAND_INT
+#endif
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#define GET_RANDOM(name, type, rdrand, nop)			\
+static inline int name(type *v)					\
+{								\
+	int ok;							\
+	alternative_io("movl $0, %0\n\t"			\
+		       nop,					\
+		       "\n1: " rdrand "\n\t"			\
+		       "jc 2f\n\t"				\
+		       "decl %0\n\t"                            \
+		       "jnz 1b\n\t"                             \
+		       "2:",                                    \
+		       X86_FEATURE_RDRAND,                      \
+		       ASM_OUTPUT2("=r" (ok), "=a" (*v)),       \
+		       "0" (RDRAND_RETRY_LOOPS));		\
+	return ok;						\
+}
+
+#ifdef CONFIG_X86_64
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
+
+#else
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
+
+#endif /* CONFIG_X86_64 */
+
+#endif  /* CONFIG_ARCH_RANDOM */
+
+extern void x86_init_rdrand(struct cpuinfo_x86 *c);
+
+#endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 10572e309ab2..58cb6d4085f7 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -172,18 +172,14 @@ static inline int atomic_add_negative(int i, atomic_t *v)
  */
 static inline int atomic_add_return(int i, atomic_t *v)
 {
-	int __i;
 #ifdef CONFIG_M386
+	int __i;
 	unsigned long flags;
 	if (unlikely(boot_cpu_data.x86 <= 3))
 		goto no_xadd;
 #endif
 	/* Modern 486+ processor */
-	__i = i;
-	asm volatile(LOCK_PREFIX "xaddl %0, %1"
-		     : "+r" (i), "+m" (v->counter)
-		     : : "memory");
-	return i + __i;
+	return i + xadd(&v->counter, i);
 
 #ifdef CONFIG_M386
 no_xadd: /* Legacy 386 processor */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 017594d403f6..0e1cbfc8ee06 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -170,11 +170,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v)
  */
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
-	long __i = i;
-	asm volatile(LOCK_PREFIX "xaddq %0, %1;"
-		     : "+r" (i), "+m" (v->counter)
-		     : : "memory");
-	return i + __i;
+	return i + xadd(&v->counter, i);
 }
 
 static inline long atomic64_sub_return(long i, atomic64_t *v)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index a460fa088d4c..5d3acdf5a7a6 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -1,5 +1,210 @@
+#ifndef ASM_X86_CMPXCHG_H
+#define ASM_X86_CMPXCHG_H
+
+#include <linux/compiler.h>
+#include <asm/alternative.h> /* Provides LOCK_PREFIX */
+
+/*
+ * Non-existant functions to indicate usage errors at link time
+ * (or compile-time if the compiler implements __compiletime_error().
+ */
+extern void __xchg_wrong_size(void)
+	__compiletime_error("Bad argument size for xchg");
+extern void __cmpxchg_wrong_size(void)
+	__compiletime_error("Bad argument size for cmpxchg");
+extern void __xadd_wrong_size(void)
+	__compiletime_error("Bad argument size for xadd");
+
+/*
+ * Constants for operation sizes. On 32-bit, the 64-bit size it set to
+ * -1 because sizeof will never return -1, thereby making those switch
+ * case statements guaranteeed dead code which the compiler will
+ * eliminate, and allowing the "missing symbol in the default case" to
+ * indicate a usage error.
+ */
+#define __X86_CASE_B	1
+#define __X86_CASE_W	2
+#define __X86_CASE_L	4
+#ifdef CONFIG_64BIT
+#define __X86_CASE_Q	8
+#else
+#define	__X86_CASE_Q	-1		/* sizeof will never return -1 */
+#endif
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
+ * Since this is generally used to protect other memory information, we
+ * use "asm volatile" and "memory" clobbers to prevent gcc from moving
+ * information around.
+ */
+#define __xchg(x, ptr, size)						\
+({									\
+	__typeof(*(ptr)) __x = (x);					\
+	switch (size) {							\
+	case __X86_CASE_B:						\
+	{								\
+		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
+		asm volatile("xchgb %0,%1"				\
+			     : "=q" (__x), "+m" (*__ptr)		\
+			     : "0" (__x)				\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_W:						\
+	{								\
+		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
+		asm volatile("xchgw %0,%1"				\
+			     : "=r" (__x), "+m" (*__ptr)		\
+			     : "0" (__x)				\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_L:						\
+	{								\
+		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
+		asm volatile("xchgl %0,%1"				\
+			     : "=r" (__x), "+m" (*__ptr)		\
+			     : "0" (__x)				\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_Q:						\
+	{								\
+		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
+		asm volatile("xchgq %0,%1"				\
+			     : "=r" (__x), "+m" (*__ptr)		\
+			     : "0" (__x)				\
+			     : "memory");				\
+		break;							\
+	}								\
+	default:							\
+		__xchg_wrong_size();					\
+	}								\
+	__x;								\
+})
+
+#define xchg(ptr, v)							\
+	__xchg((v), (ptr), sizeof(*ptr))
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+#define __raw_cmpxchg(ptr, old, new, size, lock)			\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	switch (size) {							\
+	case __X86_CASE_B:						\
+	{								\
+		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
+		asm volatile(lock "cmpxchgb %2,%1"			\
+			     : "=a" (__ret), "+m" (*__ptr)		\
+			     : "q" (__new), "0" (__old)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_W:						\
+	{								\
+		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
+		asm volatile(lock "cmpxchgw %2,%1"			\
+			     : "=a" (__ret), "+m" (*__ptr)		\
+			     : "r" (__new), "0" (__old)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_L:						\
+	{								\
+		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
+		asm volatile(lock "cmpxchgl %2,%1"			\
+			     : "=a" (__ret), "+m" (*__ptr)		\
+			     : "r" (__new), "0" (__old)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	case __X86_CASE_Q:						\
+	{								\
+		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
+		asm volatile(lock "cmpxchgq %2,%1"			\
+			     : "=a" (__ret), "+m" (*__ptr)		\
+			     : "r" (__new), "0" (__old)			\
+			     : "memory");				\
+		break;							\
+	}								\
+	default:							\
+		__cmpxchg_wrong_size();					\
+	}								\
+	__ret;								\
+})
+
+#define __cmpxchg(ptr, old, new, size)					\
+	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
+
+#define __sync_cmpxchg(ptr, old, new, size)				\
+	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+
+#define __cmpxchg_local(ptr, old, new, size)				\
+	__raw_cmpxchg((ptr), (old), (new), (size), "")
+
 #ifdef CONFIG_X86_32
 # include "cmpxchg_32.h"
 #else
 # include "cmpxchg_64.h"
 #endif
+
+#ifdef __HAVE_ARCH_CMPXCHG
+#define cmpxchg(ptr, old, new)						\
+	__cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define sync_cmpxchg(ptr, old, new)					\
+	__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
+
+#define cmpxchg_local(ptr, old, new)					\
+	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
+#endif
+
+#define __xadd(ptr, inc, lock)						\
+	({								\
+	        __typeof__ (*(ptr)) __ret = (inc);			\
+		switch (sizeof(*(ptr))) {				\
+		case __X86_CASE_B:					\
+			asm volatile (lock "xaddb %b0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_W:					\
+			asm volatile (lock "xaddw %w0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_L:					\
+			asm volatile (lock "xaddl %0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		case __X86_CASE_Q:					\
+			asm volatile (lock "xaddq %q0, %1\n"		\
+				      : "+r" (__ret), "+m" (*(ptr))	\
+				      : : "memory", "cc");		\
+			break;						\
+		default:						\
+			__xadd_wrong_size();				\
+		}							\
+		__ret;							\
+	})
+
+/*
+ * xadd() adds "inc" to "*ptr" and atomically returns the previous
+ * value of "*ptr".
+ *
+ * xadd() is locked when multiple CPUs are online
+ * xadd_sync() is always locked
+ * xadd_local() is never locked
+ */
+#define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
+#define xadd_sync(ptr, inc)	__xadd((ptr), (inc), "lock; ")
+#define xadd_local(ptr, inc)	__xadd((ptr), (inc), "")
+
+#endif	/* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 3deb7250624c..fbebb07dd80b 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -1,61 +1,11 @@
 #ifndef _ASM_X86_CMPXCHG_32_H
 #define _ASM_X86_CMPXCHG_32_H
 
-#include <linux/bitops.h> /* for LOCK_PREFIX */
-
 /*
  * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
  *       you need to test for the feature in boot_cpu_data.
  */
 
-extern void __xchg_wrong_size(void);
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
- * Since this is generally used to protect other memory information, we
- * use "asm volatile" and "memory" clobbers to prevent gcc from moving
- * information around.
- */
-#define __xchg(x, ptr, size)						\
-({									\
-	__typeof(*(ptr)) __x = (x);					\
-	switch (size) {							\
-	case 1:								\
-	{								\
-		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
-		asm volatile("xchgb %0,%1"				\
-			     : "=q" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 2:								\
-	{								\
-		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
-		asm volatile("xchgw %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 4:								\
-	{								\
-		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
-		asm volatile("xchgl %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	default:							\
-		__xchg_wrong_size();					\
-	}								\
-	__x;								\
-})
-
-#define xchg(ptr, v)							\
-	__xchg((v), (ptr), sizeof(*ptr))
-
 /*
  * CMPXCHG8B only writes to the target if we had the previous
  * value in registers, otherwise it acts as a read and gives us the
@@ -84,72 +34,8 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
 		     : "memory");
 }
 
-extern void __cmpxchg_wrong_size(void);
-
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#define __raw_cmpxchg(ptr, old, new, size, lock)			\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	__typeof__(*(ptr)) __old = (old);				\
-	__typeof__(*(ptr)) __new = (new);				\
-	switch (size) {							\
-	case 1:								\
-	{								\
-		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
-		asm volatile(lock "cmpxchgb %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "q" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 2:								\
-	{								\
-		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
-		asm volatile(lock "cmpxchgw %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 4:								\
-	{								\
-		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
-		asm volatile(lock "cmpxchgl %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	default:							\
-		__cmpxchg_wrong_size();					\
-	}								\
-	__ret;								\
-})
-
-#define __cmpxchg(ptr, old, new, size)					\
-	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
-
-#define __sync_cmpxchg(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
-
-#define __cmpxchg_local(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "")
-
 #ifdef CONFIG_X86_CMPXCHG
 #define __HAVE_ARCH_CMPXCHG 1
-
-#define cmpxchg(ptr, old, new)						\
-	__cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define sync_cmpxchg(ptr, old, new)					\
-	__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define cmpxchg_local(ptr, old, new)					\
-	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 #endif
 
 #ifdef CONFIG_X86_CMPXCHG64
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 7cf5c0a24434..285da02c38fa 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -1,144 +1,13 @@
 #ifndef _ASM_X86_CMPXCHG_64_H
 #define _ASM_X86_CMPXCHG_64_H
 
-#include <asm/alternative.h> /* Provides LOCK_PREFIX */
-
 static inline void set_64bit(volatile u64 *ptr, u64 val)
 {
 	*ptr = val;
 }
 
-extern void __xchg_wrong_size(void);
-extern void __cmpxchg_wrong_size(void);
-
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
- * Since this is generally used to protect other memory information, we
- * use "asm volatile" and "memory" clobbers to prevent gcc from moving
- * information around.
- */
-#define __xchg(x, ptr, size)						\
-({									\
-	__typeof(*(ptr)) __x = (x);					\
-	switch (size) {							\
-	case 1:								\
-	{								\
-		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
-		asm volatile("xchgb %0,%1"				\
-			     : "=q" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 2:								\
-	{								\
-		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
-		asm volatile("xchgw %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 4:								\
-	{								\
-		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
-		asm volatile("xchgl %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 8:								\
-	{								\
-		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
-		asm volatile("xchgq %0,%1"				\
-			     : "=r" (__x), "+m" (*__ptr)		\
-			     : "0" (__x)				\
-			     : "memory");				\
-		break;							\
-	}								\
-	default:							\
-		__xchg_wrong_size();					\
-	}								\
-	__x;								\
-})
-
-#define xchg(ptr, v)							\
-	__xchg((v), (ptr), sizeof(*ptr))
-
 #define __HAVE_ARCH_CMPXCHG 1
 
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#define __raw_cmpxchg(ptr, old, new, size, lock)			\
-({									\
-	__typeof__(*(ptr)) __ret;					\
-	__typeof__(*(ptr)) __old = (old);				\
-	__typeof__(*(ptr)) __new = (new);				\
-	switch (size) {							\
-	case 1:								\
-	{								\
-		volatile u8 *__ptr = (volatile u8 *)(ptr);		\
-		asm volatile(lock "cmpxchgb %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "q" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 2:								\
-	{								\
-		volatile u16 *__ptr = (volatile u16 *)(ptr);		\
-		asm volatile(lock "cmpxchgw %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 4:								\
-	{								\
-		volatile u32 *__ptr = (volatile u32 *)(ptr);		\
-		asm volatile(lock "cmpxchgl %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	case 8:								\
-	{								\
-		volatile u64 *__ptr = (volatile u64 *)(ptr);		\
-		asm volatile(lock "cmpxchgq %2,%1"			\
-			     : "=a" (__ret), "+m" (*__ptr)		\
-			     : "r" (__new), "0" (__old)			\
-			     : "memory");				\
-		break;							\
-	}								\
-	default:							\
-		__cmpxchg_wrong_size();					\
-	}								\
-	__ret;								\
-})
-
-#define __cmpxchg(ptr, old, new, size)					\
-	__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
-
-#define __sync_cmpxchg(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
-
-#define __cmpxchg_local(ptr, old, new, size)				\
-	__raw_cmpxchg((ptr), (old), (new), (size), "")
-
-#define cmpxchg(ptr, old, new)						\
-	__cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define sync_cmpxchg(ptr, old, new)					\
-	__sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
-
-#define cmpxchg_local(ptr, old, new)					\
-	__cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
-
 #define cmpxchg64(ptr, o, n)						\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 1d9cd27c2920..30d737ef2a42 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -108,7 +108,8 @@ struct compat_statfs {
 	compat_fsid_t	f_fsid;
 	int		f_namelen;	/* SunOS ignores this field. */
 	int		f_frsize;
-	int		f_spare[5];
+	int		f_flags;
+	int		f_spare[4];
 };
 
 #define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 88b23a43f340..f3444f700f36 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -114,12 +114,14 @@
 #define X86_FEATURE_CX16	(4*32+13) /* CMPXCHG16B */
 #define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
 #define X86_FEATURE_PDCM	(4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID	(4*32+17) /* Process Context Identifiers */
 #define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
 #define X86_FEATURE_XMM4_1	(4*32+19) /* "sse4_1" SSE-4.1 */
 #define X86_FEATURE_XMM4_2	(4*32+20) /* "sse4_2" SSE-4.2 */
 #define X86_FEATURE_X2APIC	(4*32+21) /* x2APIC */
 #define X86_FEATURE_MOVBE	(4*32+22) /* MOVBE instruction */
 #define X86_FEATURE_POPCNT      (4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	(4*32+24) /* Tsc deadline timer */
 #define X86_FEATURE_AES		(4*32+25) /* AES instructions */
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
@@ -257,7 +259,9 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
 #define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
 #define cpu_has_xmm3		boot_cpu_has(X86_FEATURE_XMM3)
+#define cpu_has_ssse3		boot_cpu_has(X86_FEATURE_SSSE3)
 #define cpu_has_aes		boot_cpu_has(X86_FEATURE_AES)
+#define cpu_has_avx		boot_cpu_has(X86_FEATURE_AVX)
 #define cpu_has_ht		boot_cpu_has(X86_FEATURE_HT)
 #define cpu_has_mp		boot_cpu_has(X86_FEATURE_MP)
 #define cpu_has_nx		boot_cpu_has(X86_FEATURE_NX)
@@ -285,6 +289,7 @@ extern const char * const x86_power_flags[32];
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 #define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
 #define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 029f230ab637..63a2a03d7d51 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -8,7 +8,7 @@ struct dev_archdata {
 #ifdef CONFIG_X86_64
 struct dma_map_ops *dma_ops;
 #endif
-#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU)
+#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
 	void *iommu; /* hook for IOMMU specific extension */
 #endif
 };
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index d4c419f883a0..ed3065fd6314 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,7 +2,7 @@
 #define _ASM_X86_DMA_MAPPING_H
 
 /*
- * IOMMU interface. See Documentation/PCI/PCI-DMA-mapping.txt and
+ * IOMMU interface. See Documentation/DMA-API-HOWTO.txt and
  * Documentation/DMA-API.txt for documentation.
  */
 
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 326099199318..f6f15986df6c 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -27,6 +27,7 @@
 #define CFI_REMEMBER_STATE	.cfi_remember_state
 #define CFI_RESTORE_STATE	.cfi_restore_state
 #define CFI_UNDEFINED		.cfi_undefined
+#define CFI_ESCAPE		.cfi_escape
 
 #ifdef CONFIG_AS_CFI_SIGNAL_FRAME
 #define CFI_SIGNAL_FRAME	.cfi_signal_frame
@@ -68,6 +69,7 @@
 #define CFI_REMEMBER_STATE	cfi_ignore
 #define CFI_RESTORE_STATE	cfi_ignore
 #define CFI_UNDEFINED		cfi_ignore
+#define CFI_ESCAPE		cfi_ignore
 #define CFI_SIGNAL_FRAME	cfi_ignore
 
 #endif
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f2ad2163109d..5f962df30d0f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -4,6 +4,7 @@
 /*
  * ELF register definitions..
  */
+#include <linux/thread_info.h>
 
 #include <asm/ptrace.h>
 #include <asm/user.h>
@@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+#ifdef CONFIG_X86_32
+	return 1;
+#endif
+#ifdef CONFIG_IA32_EMULATION
+	if (test_thread_flag(TIF_IA32))
+		return 1;
+#endif
+	return 0;
+}
+
+/* The first two values are special, do not change. See align_addr() */
+enum align_flags {
+	ALIGN_VA_32	= BIT(0),
+	ALIGN_VA_64	= BIT(1),
+	ALIGN_VDSO	= BIT(2),
+	ALIGN_TOPDOWN	= BIT(3),
+};
+
+struct va_alignment {
+	int flags;
+	unsigned long mask;
+} ____cacheline_aligned;
+
+extern struct va_alignment va_align;
+extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
 #endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 09199052060f..eb92a6ed2be7 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -119,7 +119,7 @@ struct irq_cfg {
 	cpumask_var_t		old_domain;
 	u8			vector;
 	u8			move_in_progress : 1;
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
 	struct irq_2_iommu	irq_2_iommu;
 #endif
 };
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h
index 5df477ac3af7..b80420bcd09d 100644
--- a/arch/x86/include/asm/hyperv.h
+++ b/arch/x86/include/asm/hyperv.h
@@ -189,5 +189,6 @@
 #define HV_STATUS_INVALID_HYPERCALL_CODE	2
 #define HV_STATUS_INVALID_HYPERCALL_INPUT	3
 #define HV_STATUS_INVALID_ALIGNMENT		4
+#define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
 #endif
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 1c23360fb2d8..47d99934580f 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -3,7 +3,8 @@
 
 #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
 static inline void prepare_irte(struct irte *irte, int vector,
 			        unsigned int dest)
 {
@@ -36,6 +37,9 @@ static inline bool irq_remapped(struct irq_cfg *cfg)
 {
 	return false;
 }
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+}
 #endif
 
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 7e50f06393aa..4b4448761e88 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -160,19 +160,11 @@ static inline int invalid_vm86_irq(int irq)
 #define IO_APIC_VECTOR_LIMIT		( 32 * MAX_IO_APICS )
 
 #ifdef CONFIG_X86_IO_APIC
-# ifdef CONFIG_SPARSE_IRQ
-#  define CPU_VECTOR_LIMIT		(64 * NR_CPUS)
-#  define NR_IRQS					\
+# define CPU_VECTOR_LIMIT		(64 * NR_CPUS)
+# define NR_IRQS					\
 	(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ?	\
 		(NR_VECTORS + CPU_VECTOR_LIMIT)  :	\
 		(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
-# else
-#  define CPU_VECTOR_LIMIT		(32 * NR_CPUS)
-#  define NR_IRQS					\
-	(CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ?	\
-		(NR_VECTORS + CPU_VECTOR_LIMIT)  :	\
-		(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
-# endif
 #else /* !CONFIG_X86_IO_APIC: */
 # define NR_IRQS			NR_IRQS_LEGACY
 #endif
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 6040d115ef51..a026507893e9 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -262,7 +262,7 @@ struct x86_emulate_ctxt {
 	struct operand dst;
 	bool has_seg_override;
 	u8 seg_override;
-	unsigned int d;
+	u64 d;
 	int (*execute)(struct x86_emulate_ctxt *ctxt);
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 	/* modrm */
@@ -275,6 +275,8 @@ struct x86_emulate_ctxt {
 	unsigned long _eip;
 	/* Fields above regs are cleared together. */
 	unsigned long regs[NR_VCPU_REGS];
+	struct operand memop;
+	struct operand *memopp;
 	struct fetch_cache fetch;
 	struct read_cache io_read;
 	struct read_cache mem_read;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd51c83aa5de..b4973f4dab98 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -26,7 +26,8 @@
 #include <asm/mtrr.h>
 #include <asm/msr-index.h>
 
-#define KVM_MAX_VCPUS 64
+#define KVM_MAX_VCPUS 254
+#define KVM_SOFT_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
@@ -264,6 +265,7 @@ struct kvm_mmu {
 	void (*new_cr3)(struct kvm_vcpu *vcpu);
 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
 	unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
+	u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
 	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err,
 			  bool prefault);
 	void (*inject_page_fault)(struct kvm_vcpu *vcpu,
@@ -411,8 +413,9 @@ struct kvm_vcpu_arch {
 	u32  tsc_catchup_mult;
 	s8   tsc_catchup_shift;
 
-	bool nmi_pending;
-	bool nmi_injected;
+	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
+	unsigned nmi_pending; /* NMI queued after currently running handler */
+	bool nmi_injected;    /* Trying to inject an NMI this entry */
 
 	struct mtrr_state_type mtrr_state;
 	u32 pat;
@@ -628,14 +631,13 @@ struct kvm_x86_ops {
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
 	u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
+	u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu);
 
 	void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
 
 	int (*check_intercept)(struct kvm_vcpu *vcpu,
 			       struct x86_instruction_info *info,
 			       enum x86_intercept_stage stage);
-
-	const struct trace_print_flags *exit_reasons_str;
 };
 
 struct kvm_arch_async_pf {
@@ -672,6 +674,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
+u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
+
 /* control of guest tsc rate supported? */
 extern bool kvm_has_tsc_control;
 /* minimum supported tsc_khz for guests */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d52609aeeab8..a6962d9161a0 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -229,6 +229,8 @@
 #define MSR_IA32_APICBASE_ENABLE	(1<<11)
 #define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
 
+#define MSR_IA32_TSCDEADLINE		0x000006e0
+
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 4886a68f267e..fd3f9f18cf3f 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -22,27 +22,26 @@ void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 #endif
 
-/*
- * Define some priorities for the nmi notifier call chain.
- *
- * Create a local nmi bit that has a higher priority than
- * external nmis, because the local ones are more frequent.
- *
- * Also setup some default high/normal/low settings for
- * subsystems to registers with.  Using 4 bits to separate
- * the priorities.  This can go a lot higher if needed be.
- */
-
-#define NMI_LOCAL_SHIFT		16	/* randomly picked */
-#define NMI_LOCAL_BIT		(1ULL << NMI_LOCAL_SHIFT)
-#define NMI_HIGH_PRIOR		(1ULL << 8)
-#define NMI_NORMAL_PRIOR	(1ULL << 4)
-#define NMI_LOW_PRIOR		(1ULL << 0)
-#define NMI_LOCAL_HIGH_PRIOR	(NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
-#define NMI_LOCAL_NORMAL_PRIOR	(NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
-#define NMI_LOCAL_LOW_PRIOR	(NMI_LOCAL_BIT | NMI_LOW_PRIOR)
+#define NMI_FLAG_FIRST	1
+
+enum {
+	NMI_LOCAL=0,
+	NMI_UNKNOWN,
+	NMI_MAX
+};
+
+#define NMI_DONE	0
+#define NMI_HANDLED	1
+
+typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *);
+
+int register_nmi_handler(unsigned int, nmi_handler_t, unsigned long,
+			 const char *);
+
+void unregister_nmi_handler(unsigned int, const char *);
 
 void stop_nmi(void);
 void restart_nmi(void);
+void local_touch_nmi(void);
 
 #endif /* _ASM_X86_NMI_H */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 704526734bef..e38197806853 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -99,10 +99,10 @@ struct pci_raw_ops {
 						int reg, int len, u32 val);
 };
 
-extern struct pci_raw_ops *raw_pci_ops;
-extern struct pci_raw_ops *raw_pci_ext_ops;
+extern const struct pci_raw_ops *raw_pci_ops;
+extern const struct pci_raw_ops *raw_pci_ext_ops;
 
-extern struct pci_raw_ops pci_direct_conf1;
+extern const struct pci_raw_ops pci_direct_conf1;
 extern bool port_cf9_safe;
 
 /* arch_initcall level */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 094fb30817ab..f61c62f7d5d8 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,6 +29,9 @@
 #define ARCH_PERFMON_EVENTSEL_INV			(1ULL << 23)
 #define ARCH_PERFMON_EVENTSEL_CMASK			0xFF000000ULL
 
+#define AMD_PERFMON_EVENTSEL_GUESTONLY			(1ULL << 40)
+#define AMD_PERFMON_EVENTSEL_HOSTONLY			(1ULL << 41)
+
 #define AMD64_EVENTSEL_EVENT	\
 	(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
 #define INTEL_ARCH_EVENT_MASK	\
@@ -43,14 +46,17 @@
 #define AMD64_RAW_EVENT_MASK		\
 	(X86_RAW_EVENT_MASK          |  \
 	 AMD64_EVENTSEL_EVENT)
+#define AMD64_NUM_COUNTERS				4
+#define AMD64_NUM_COUNTERS_F15H				6
+#define AMD64_NUM_COUNTERS_MAX				AMD64_NUM_COUNTERS_F15H
 
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX			 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX		0
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
 		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED		6
 
 /*
  * Intel "Architectural Performance Monitoring" CPUID
@@ -110,6 +116,35 @@ union cpuid10_edx {
  */
 #define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
 
+/*
+ * IBS cpuid feature detection
+ */
+
+#define IBS_CPUID_FEATURES		0x8000001b
+
+/*
+ * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
+ * bit 0 is used to indicate the existence of IBS.
+ */
+#define IBS_CAPS_AVAIL			(1U<<0)
+#define IBS_CAPS_FETCHSAM		(1U<<1)
+#define IBS_CAPS_OPSAM			(1U<<2)
+#define IBS_CAPS_RDWROPCNT		(1U<<3)
+#define IBS_CAPS_OPCNT			(1U<<4)
+#define IBS_CAPS_BRNTRGT		(1U<<5)
+#define IBS_CAPS_OPCNTEXT		(1U<<6)
+
+#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
+					 | IBS_CAPS_FETCHSAM	\
+					 | IBS_CAPS_OPSAM)
+
+/*
+ * IBS APIC setup
+ */
+#define IBSCTL				0x1cc
+#define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
+#define IBSCTL_LVT_OFFSET_MASK		0x0F
+
 /* IbsFetchCtl bits/masks */
 #define IBS_FETCH_RAND_EN	(1ULL<<57)
 #define IBS_FETCH_VAL		(1ULL<<49)
@@ -124,6 +159,8 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT		0x0000FFFFULL
 #define IBS_OP_MAX_CNT_EXT	0x007FFFFFULL	/* not a register bit mask */
 
+extern u32 get_ibs_caps(void);
+
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
 
@@ -159,7 +196,19 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 	);							\
 }
 
+struct perf_guest_switch_msr {
+	unsigned msr;
+	u64 host, guest;
+};
+
+extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
 #else
+static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+	*nr = 0;
+	return NULL;
+}
+
 static inline void perf_events_lapic_init(void)	{ }
 #endif
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0d1171c97729..b650435ffb53 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -111,6 +111,7 @@ struct cpuinfo_x86 {
 	/* Index into per_cpu list: */
 	u16			cpu_index;
 #endif
+	u32			microcode;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL	0
@@ -179,7 +180,8 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 	      "=b" (*ebx),
 	      "=c" (*ecx),
 	      "=d" (*edx)
-	    : "0" (*eax), "2" (*ecx));
+	    : "0" (*eax), "2" (*ecx)
+	    : "memory");
 }
 
 static inline void load_cr3(pgd_t *pgdir)
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 3250e3d605d9..92f297069e87 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -23,7 +23,7 @@ void machine_real_restart(unsigned int type);
 #define MRR_BIOS	0
 #define MRR_APM		1
 
-typedef void (*nmi_shootdown_cb)(int, struct die_args*);
+typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
 void nmi_shootdown_cpus(nmi_shootdown_cb callback);
 
 #endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index df4cd32b4cc6..2dbe4a721ce5 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -204,13 +204,7 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
  */
 static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
 {
-	long tmp = delta;
-
-	asm volatile(LOCK_PREFIX "xadd %0,%1"
-		     : "+r" (tmp), "+m" (sem->count)
-		     : : "memory");
-
-	return tmp + delta;
+	return delta + xadd(&sem->count, delta);
 }
 
 #endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index ee67edf86fdd..972c260919a3 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -49,109 +49,49 @@
  * issues and should be optimal for the uncontended case. Note the tail must be
  * in the high part, because a wide xadd increment of the low part would carry
  * up and contaminate the high part.
- *
- * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
- * save some instructions and make the code more elegant. There really isn't
- * much between them in performance though, especially as locks are out of line.
  */
-#if (NR_CPUS < 256)
-#define TICKET_SHIFT 8
-
 static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
 {
-	short inc = 0x0100;
-
-	asm volatile (
-		LOCK_PREFIX "xaddw %w0, %1\n"
-		"1:\t"
-		"cmpb %h0, %b0\n\t"
-		"je 2f\n\t"
-		"rep ; nop\n\t"
-		"movb %1, %b0\n\t"
-		/* don't need lfence here, because loads are in-order */
-		"jmp 1b\n"
-		"2:"
-		: "+Q" (inc), "+m" (lock->slock)
-		:
-		: "memory", "cc");
+	register struct __raw_tickets inc = { .tail = 1 };
+
+	inc = xadd(&lock->tickets, inc);
+
+	for (;;) {
+		if (inc.head == inc.tail)
+			break;
+		cpu_relax();
+		inc.head = ACCESS_ONCE(lock->tickets.head);
+	}
+	barrier();		/* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 {
-	int tmp, new;
-
-	asm volatile("movzwl %2, %0\n\t"
-		     "cmpb %h0,%b0\n\t"
-		     "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
-		     "jne 1f\n\t"
-		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
-		     "1:"
-		     "sete %b1\n\t"
-		     "movzbl %b1,%0\n\t"
-		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-		     :
-		     : "memory", "cc");
+	arch_spinlock_t old, new;
+
+	old.tickets = ACCESS_ONCE(lock->tickets);
+	if (old.tickets.head != old.tickets.tail)
+		return 0;
+
+	new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
 
-	return tmp;
+	/* cmpxchg is a full barrier, so nothing can move before it */
+	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
+#if (NR_CPUS < 256)
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
-		     : "+m" (lock->slock)
+		     : "+m" (lock->head_tail)
 		     :
 		     : "memory", "cc");
 }
 #else
-#define TICKET_SHIFT 16
-
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
-{
-	int inc = 0x00010000;
-	int tmp;
-
-	asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
-		     "movzwl %w0, %2\n\t"
-		     "shrl $16, %0\n\t"
-		     "1:\t"
-		     "cmpl %0, %2\n\t"
-		     "je 2f\n\t"
-		     "rep ; nop\n\t"
-		     "movzwl %1, %2\n\t"
-		     /* don't need lfence here, because loads are in-order */
-		     "jmp 1b\n"
-		     "2:"
-		     : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
-		     :
-		     : "memory", "cc");
-}
-
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
-	int tmp;
-	int new;
-
-	asm volatile("movl %2,%0\n\t"
-		     "movl %0,%1\n\t"
-		     "roll $16, %0\n\t"
-		     "cmpl %0,%1\n\t"
-		     "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
-		     "jne 1f\n\t"
-		     LOCK_PREFIX "cmpxchgl %1,%2\n\t"
-		     "1:"
-		     "sete %b1\n\t"
-		     "movzbl %b1,%0\n\t"
-		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-		     :
-		     : "memory", "cc");
-
-	return tmp;
-}
-
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
 	asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
-		     : "+m" (lock->slock)
+		     : "+m" (lock->head_tail)
 		     :
 		     : "memory", "cc");
 }
@@ -159,16 +99,16 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
-	int tmp = ACCESS_ONCE(lock->slock);
+	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+	return !!(tmp.tail ^ tmp.head);
 }
 
 static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 {
-	int tmp = ACCESS_ONCE(lock->slock);
+	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
 
-	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+	return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
 }
 
 #ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 7c7a486fcb68..8ebd5df7451e 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -5,11 +5,29 @@
 # error "please don't include this file directly"
 #endif
 
+#include <linux/types.h>
+
+#if (CONFIG_NR_CPUS < 256)
+typedef u8  __ticket_t;
+typedef u16 __ticketpair_t;
+#else
+typedef u16 __ticket_t;
+typedef u32 __ticketpair_t;
+#endif
+
+#define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
+#define TICKET_MASK	((__ticket_t)((1 << TICKET_SHIFT) - 1))
+
 typedef struct arch_spinlock {
-	unsigned int slock;
+	union {
+		__ticketpair_t head_tail;
+		struct __raw_tickets {
+			__ticket_t head, tail;
+		} tickets;
+	};
 } arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
 #include <asm/rwlock.h>
 
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 593485b38ab3..599c77d38f33 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -352,10 +352,12 @@
 #define __NR_syncfs             344
 #define __NR_sendmmsg		345
 #define __NR_setns		346
+#define __NR_process_vm_readv	347
+#define __NR_process_vm_writev	348
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 347
+#define NR_syscalls 349
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 201040573444..0431f193c3f2 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -624,7 +624,6 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice)
 __SYSCALL(__NR_move_pages, sys_move_pages)
 #define __NR_utimensat				280
 __SYSCALL(__NR_utimensat, sys_utimensat)
-#define __IGNORE_getcpu		/* implemented as a vsyscall */
 #define __NR_epoll_pwait			281
 __SYSCALL(__NR_epoll_pwait, sys_epoll_pwait)
 #define __NR_signalfd				282
@@ -683,6 +682,10 @@ __SYSCALL(__NR_sendmmsg, sys_sendmmsg)
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_getcpu				309
 __SYSCALL(__NR_getcpu, sys_getcpu)
+#define __NR_process_vm_readv			310
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
+#define __NR_process_vm_writev			311
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 37d369859c8e..8e862aaf0d90 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -55,6 +55,7 @@
 #define UV_BAU_TUNABLES_DIR		"sgi_uv"
 #define UV_BAU_TUNABLES_FILE		"bau_tunables"
 #define WHITESPACE			" \t\n"
+#define uv_mmask			((1UL << uv_hub_info->m_val) - 1)
 #define uv_physnodeaddr(x)		((__pa((unsigned long)(x)) & uv_mmask))
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
@@ -656,11 +657,7 @@ static inline int atomic_read_short(const struct atomic_short *v)
  */
 static inline int atom_asr(short i, struct atomic_short *v)
 {
-	short __i = i;
-	asm volatile(LOCK_PREFIX "xaddw %0, %1"
-			: "+r" (i), "+m" (v->counter)
-			: : "memory");
-	return i + __i;
+	return i + xadd(&v->counter, i);
 }
 
 /*
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index f26544a15214..54a13aaebc40 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -46,6 +46,13 @@
  *	PNODE   - the low N bits of the GNODE. The PNODE is the most useful variant
  *		  of the nasid for socket usage.
  *
+ *	GPA	- (global physical address) a socket physical address converted
+ *		  so that it can be used by the GRU as a global address. Socket
+ *		  physical addresses 1) need additional NASID (node) bits added
+ *		  to the high end of the address, and 2) unaliased if the
+ *		  partition does not have a physical address 0. In addition, on
+ *		  UV2 rev 1, GPAs need the gnode left shifted to bits 39 or 40.
+ *
  *
  *  NumaLink Global Physical Address Format:
  *  +--------------------------------+---------------------+
@@ -141,6 +148,8 @@ struct uv_hub_info_s {
 	unsigned int		gnode_extra;
 	unsigned char		hub_revision;
 	unsigned char		apic_pnode_shift;
+	unsigned char		m_shift;
+	unsigned char		n_lshift;
 	unsigned long		gnode_upper;
 	unsigned long		lowmem_remap_top;
 	unsigned long		lowmem_remap_base;
@@ -177,6 +186,16 @@ static inline int is_uv2_hub(void)
 	return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
 }
 
+static inline int is_uv2_1_hub(void)
+{
+	return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE;
+}
+
+static inline int is_uv2_2_hub(void)
+{
+	return uv_hub_info->hub_revision == UV2_HUB_REVISION_BASE + 1;
+}
+
 union uvh_apicid {
     unsigned long       v;
     struct uvh_apicid_s {
@@ -276,7 +295,10 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
 {
 	if (paddr < uv_hub_info->lowmem_remap_top)
 		paddr |= uv_hub_info->lowmem_remap_base;
-	return paddr | uv_hub_info->gnode_upper;
+	paddr |= uv_hub_info->gnode_upper;
+	paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
+		((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift);
+	return paddr;
 }
 
 
@@ -300,16 +322,19 @@ static inline unsigned long uv_gpa_to_soc_phys_ram(unsigned long gpa)
 	unsigned long remap_base = uv_hub_info->lowmem_remap_base;
 	unsigned long remap_top =  uv_hub_info->lowmem_remap_top;
 
+	gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
+		((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
+	gpa = gpa & uv_hub_info->gpa_mask;
 	if (paddr >= remap_base && paddr < remap_base + remap_top)
 		paddr -= remap_base;
 	return paddr;
 }
 
 
-/* gnode -> pnode */
+/* gpa -> pnode */
 static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
 {
-	return gpa >> uv_hub_info->m_val;
+	return gpa >> uv_hub_info->n_lshift;
 }
 
 /* gpa -> pnode */
@@ -320,6 +345,12 @@ static inline int uv_gpa_to_pnode(unsigned long gpa)
 	return uv_gpa_to_gnode(gpa) & n_mask;
 }
 
+/* gpa -> node offset*/
+static inline unsigned long uv_gpa_to_offset(unsigned long gpa)
+{
+	return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift;
+}
+
 /* pnode, offset --> socket virtual */
 static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
 {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2caf290e9895..31f180c21ce9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -350,6 +350,18 @@ enum vmcs_field {
 #define DEBUG_REG_ACCESS_REG(eq)        (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
 
 
+/*
+ * Exit Qualifications for APIC-Access
+ */
+#define APIC_ACCESS_OFFSET              0xfff   /* 11:0, offset within the APIC page */
+#define APIC_ACCESS_TYPE                0xf000  /* 15:12, access type */
+#define TYPE_LINEAR_APIC_INST_READ      (0 << 12)
+#define TYPE_LINEAR_APIC_INST_WRITE     (1 << 12)
+#define TYPE_LINEAR_APIC_INST_FETCH     (2 << 12)
+#define TYPE_LINEAR_APIC_EVENT          (3 << 12)
+#define TYPE_PHYSICAL_APIC_EVENT        (10 << 12)
+#define TYPE_PHYSICAL_APIC_INST         (15 << 12)
+
 /* segment AR */
 #define SEGMENT_AR_L_MASK (1 << 13)
 
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 7ff4669580cf..c34f96c2f7a0 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,6 +12,7 @@
 #include <asm/pgtable.h>
 
 #include <xen/interface/xen.h>
+#include <xen/grant_table.h>
 #include <xen/features.h>
 
 /* Xen machine address */
@@ -48,14 +49,11 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 					     unsigned long pfn_e);
 
 extern int m2p_add_override(unsigned long mfn, struct page *page,
-			    bool clear_pte);
+			    struct gnttab_map_grant_ref *kmap_op);
 extern int m2p_remove_override(struct page *page, bool clear_pte);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
-#ifdef CONFIG_XEN_DEBUG_FS
-extern int p2m_dump_show(struct seq_file *m, void *v);
-#endif
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
 	unsigned long mfn;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 82f2912155a5..8baca3c4871c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -19,7 +19,7 @@ endif
 
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y			+= time.o ioport.o ldt.o dumpstack.o
+obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 8a439d364b94..b1e7c7f7a0af 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -5,7 +5,7 @@
  * This allows to use PCI devices that only support 32bit addresses on systems
  * with more than 4GB.
  *
- * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
+ * See Documentation/DMA-API-HOWTO.txt for the interface specification.
  *
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * Subject to the GNU General Public License v2 only.
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 52fa56399a50..a2fd72e0ab35 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1437,27 +1437,21 @@ void enable_x2apic(void)
 
 int __init enable_IR(void)
 {
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
 	if (!intr_remapping_supported()) {
 		pr_debug("intr-remapping not supported\n");
-		return 0;
+		return -1;
 	}
 
 	if (!x2apic_preenabled && skip_ioapic_setup) {
 		pr_info("Skipped enabling intr-remap because of skipping "
 			"io-apic setup\n");
-		return 0;
+		return -1;
 	}
 
-	if (enable_intr_remapping(x2apic_supported()))
-		return 0;
-
-	pr_info("Enabled Interrupt-remapping\n");
-
-	return 1;
-
+	return enable_intr_remapping();
 #endif
-	return 0;
+	return -1;
 }
 
 void __init enable_IR_x2apic(void)
@@ -1481,11 +1475,11 @@ void __init enable_IR_x2apic(void)
 	mask_ioapic_entries();
 
 	if (dmar_table_init_ret)
-		ret = 0;
+		ret = -1;
 	else
 		ret = enable_IR();
 
-	if (!ret) {
+	if (ret < 0) {
 		/* IR is required if there is APIC ID > 255 even when running
 		 * under KVM
 		 */
@@ -1499,6 +1493,9 @@ void __init enable_IR_x2apic(void)
 		x2apic_force_phys();
 	}
 
+	if (ret == IRQ_REMAP_XAPIC_MODE)
+		goto nox2apic;
+
 	x2apic_enabled = 1;
 
 	if (x2apic_supported() && !x2apic_mode) {
@@ -1508,19 +1505,21 @@ void __init enable_IR_x2apic(void)
 	}
 
 nox2apic:
-	if (!ret) /* IR enabling failed */
+	if (ret < 0) /* IR enabling failed */
 		restore_ioapic_entries();
 	legacy_pic->restore_mask();
 	local_irq_restore(flags);
 
 out:
-	if (x2apic_enabled)
+	if (x2apic_enabled || !x2apic_supported())
 		return;
 
 	if (x2apic_preenabled)
 		panic("x2apic: enabled by BIOS but kernel init failed.");
-	else if (cpu_has_x2apic)
-		pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
+	else if (ret == IRQ_REMAP_XAPIC_MODE)
+		pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
+	else if (ret < 0)
+		pr_info("x2apic not enabled, IRQ remapping init failed\n");
 }
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index efd737e827f4..521bead01137 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -255,12 +255,24 @@ static struct apic apic_bigsmp = {
 	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
 };
 
-struct apic * __init generic_bigsmp_probe(void)
+void __init generic_bigsmp_probe(void)
 {
-	if (probe_bigsmp())
-		return &apic_bigsmp;
+	unsigned int cpu;
 
-	return NULL;
+	if (!probe_bigsmp())
+		return;
+
+	apic = &apic_bigsmp;
+
+	for_each_possible_cpu(cpu) {
+		if (early_per_cpu(x86_cpu_to_logical_apicid,
+				  cpu) == BAD_APICID)
+			continue;
+		early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+			bigsmp_early_logical_apicid(cpu);
+	}
+
+	pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name);
 }
 
 apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index d5e57db0f7be..31cb9ae992b7 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void)
 }
 
 static int __kprobes
-arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
-			 unsigned long cmd, void *__args)
+arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
 {
-	struct die_args *args = __args;
-	struct pt_regs *regs;
 	int cpu;
 
-	switch (cmd) {
-	case DIE_NMI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
 	cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
@@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 		show_regs(regs);
 		arch_spin_unlock(&lock);
 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-		return NOTIFY_STOP;
+		return NMI_HANDLED;
 	}
 
-	return NOTIFY_DONE;
+	return NMI_DONE;
 }
 
-static __read_mostly struct notifier_block backtrace_notifier = {
-	.notifier_call          = arch_trigger_all_cpu_backtrace_handler,
-	.next                   = NULL,
-	.priority               = NMI_LOCAL_LOW_PRIOR,
-};
-
 static int __init register_trigger_all_cpu_backtrace(void)
 {
-	register_die_notifier(&backtrace_notifier);
+	register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
+				0, "arch_bt");
 	return 0;
 }
 early_initcall(register_trigger_all_cpu_backtrace);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8eb863e27ea6..3c31fa98af6d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -92,21 +92,21 @@ static struct ioapic {
 	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
 } ioapics[MAX_IO_APICS];
 
-#define mpc_ioapic_ver(id)		ioapics[id].mp_config.apicver
+#define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
 
-int mpc_ioapic_id(int id)
+int mpc_ioapic_id(int ioapic_idx)
 {
-	return ioapics[id].mp_config.apicid;
+	return ioapics[ioapic_idx].mp_config.apicid;
 }
 
-unsigned int mpc_ioapic_addr(int id)
+unsigned int mpc_ioapic_addr(int ioapic_idx)
 {
-	return ioapics[id].mp_config.apicaddr;
+	return ioapics[ioapic_idx].mp_config.apicaddr;
 }
 
-struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id)
+struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
 {
-	return &ioapics[id].gsi_config;
+	return &ioapics[ioapic_idx].gsi_config;
 }
 
 int nr_ioapics;
@@ -186,11 +186,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
 
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-#ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
-#else
-static struct irq_cfg irq_cfgx[NR_IRQS];
-#endif
 
 int __init arch_early_irq_init(void)
 {
@@ -234,7 +230,6 @@ int __init arch_early_irq_init(void)
 	return 0;
 }
 
-#ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	return irq_get_chip_data(irq);
@@ -269,22 +264,6 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
 	kfree(cfg);
 }
 
-#else
-
-struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
-
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
-	return irq_cfgx + irq;
-}
-
-static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
-
-#endif
-
 static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 {
 	int res = irq_alloc_desc_at(at, node);
@@ -394,13 +373,21 @@ union entry_union {
 	struct IO_APIC_route_entry entry;
 };
 
+static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
+{
+	union entry_union eu;
+
+	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+	return eu.entry;
+}
+
 static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 {
 	union entry_union eu;
 	unsigned long flags;
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+	eu.entry = __ioapic_read_entry(apic, pin);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return eu.entry;
 }
@@ -529,18 +516,6 @@ static void io_apic_modify_irq(struct irq_cfg *cfg,
 		__io_apic_modify_irq(entry, mask_and, mask_or, final);
 }
 
-static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
-{
-	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
-			     IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
-{
-	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
-			     IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
 static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
@@ -585,6 +560,66 @@ static void unmask_ioapic_irq(struct irq_data *data)
 	unmask_ioapic(data->chip_data);
 }
 
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+ */
+static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+{
+	if (mpc_ioapic_ver(apic) >= 0x20) {
+		/*
+		 * Intr-remapping uses pin number as the virtual vector
+		 * in the RTE. Actual vector is programmed in
+		 * intr-remapping table entry. Hence for the io-apic
+		 * EOI we use the pin number.
+		 */
+		if (cfg && irq_remapped(cfg))
+			io_apic_eoi(apic, pin);
+		else
+			io_apic_eoi(apic, vector);
+	} else {
+		struct IO_APIC_route_entry entry, entry1;
+
+		entry = entry1 = __ioapic_read_entry(apic, pin);
+
+		/*
+		 * Mask the entry and change the trigger mode to edge.
+		 */
+		entry1.mask = 1;
+		entry1.trigger = IOAPIC_EDGE;
+
+		__ioapic_write_entry(apic, pin, entry1);
+
+		/*
+		 * Restore the previous level triggered entry.
+		 */
+		__ioapic_write_entry(apic, pin, entry);
+	}
+}
+
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		__eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
 	struct IO_APIC_route_entry entry;
@@ -593,10 +628,44 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	entry = ioapic_read_entry(apic, pin);
 	if (entry.delivery_mode == dest_SMI)
 		return;
+
+	/*
+	 * Make sure the entry is masked and re-read the contents to check
+	 * if it is a level triggered pin and if the remote-IRR is set.
+	 */
+	if (!entry.mask) {
+		entry.mask = 1;
+		ioapic_write_entry(apic, pin, entry);
+		entry = ioapic_read_entry(apic, pin);
+	}
+
+	if (entry.irr) {
+		unsigned long flags;
+
+		/*
+		 * Make sure the trigger mode is set to level. Explicit EOI
+		 * doesn't clear the remote-IRR if the trigger mode is not
+		 * set to level.
+		 */
+		if (!entry.trigger) {
+			entry.trigger = IOAPIC_LEVEL;
+			ioapic_write_entry(apic, pin, entry);
+		}
+
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		__eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
 	/*
-	 * Disable it in the IO-APIC irq-routing table:
+	 * Clear the rest of the bits in the IO-APIC RTE except for the mask
+	 * bit.
 	 */
 	ioapic_mask_entry(apic, pin);
+	entry = ioapic_read_entry(apic, pin);
+	if (entry.irr)
+		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+		       mpc_ioapic_id(apic), pin);
 }
 
 static void clear_IO_APIC (void)
@@ -712,13 +781,13 @@ int restore_ioapic_entries(void)
 /*
  * Find the IRQ entry number of a certain pin.
  */
-static int find_irq_entry(int apic, int pin, int type)
+static int find_irq_entry(int ioapic_idx, int pin, int type)
 {
 	int i;
 
 	for (i = 0; i < mp_irq_entries; i++)
 		if (mp_irqs[i].irqtype == type &&
-		    (mp_irqs[i].dstapic == mpc_ioapic_id(apic) ||
+		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
 		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
 		    mp_irqs[i].dstirq == pin)
 			return i;
@@ -757,12 +826,13 @@ static int __init find_isa_irq_apic(int irq, int type)
 		    (mp_irqs[i].srcbusirq == irq))
 			break;
 	}
+
 	if (i < mp_irq_entries) {
-		int apic;
-		for(apic = 0; apic < nr_ioapics; apic++) {
-			if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic)
-				return apic;
-		}
+		int ioapic_idx;
+
+		for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
+				return ioapic_idx;
 	}
 
 	return -1;
@@ -977,7 +1047,7 @@ static int pin_2_irq(int idx, int apic, int pin)
 int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 				struct io_apic_irq_attr *irq_attr)
 {
-	int apic, i, best_guess = -1;
+	int ioapic_idx, i, best_guess = -1;
 
 	apic_printk(APIC_DEBUG,
 		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
@@ -990,8 +1060,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].srcbus;
 
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic ||
+		for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
 			    mp_irqs[i].dstapic == MP_APIC_ALL)
 				break;
 
@@ -999,13 +1069,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 		    !mp_irqs[i].irqtype &&
 		    (bus == lbus) &&
 		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+			int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
 
-			if (!(apic || IO_APIC_IRQ(irq)))
+			if (!(ioapic_idx || IO_APIC_IRQ(irq)))
 				continue;
 
 			if (pin == (mp_irqs[i].srcbusirq & 3)) {
-				set_io_apic_irq_attr(irq_attr, apic,
+				set_io_apic_irq_attr(irq_attr, ioapic_idx,
 						     mp_irqs[i].dstirq,
 						     irq_trigger(i),
 						     irq_polarity(i));
@@ -1016,7 +1086,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
 			 * best-guess fuzzy result for broken mptables.
 			 */
 			if (best_guess < 0) {
-				set_io_apic_irq_attr(irq_attr, apic,
+				set_io_apic_irq_attr(irq_attr, ioapic_idx,
 						     mp_irqs[i].dstirq,
 						     irq_trigger(i),
 						     irq_polarity(i));
@@ -1202,7 +1272,6 @@ void __setup_vector_irq(int cpu)
 }
 
 static struct irq_chip ioapic_chip;
-static struct irq_chip ir_ioapic_chip;
 
 #ifdef CONFIG_X86_32
 static inline int IO_APIC_irq_trigger(int irq)
@@ -1246,7 +1315,7 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
 
 	if (irq_remapped(cfg)) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-		chip = &ir_ioapic_chip;
+		irq_remap_modify_chip_defaults(chip);
 		fasteoi = trigger != 0;
 	}
 
@@ -1255,77 +1324,100 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
 				      fasteoi ? "fasteoi" : "edge");
 }
 
-static int setup_ioapic_entry(int apic_id, int irq,
-			      struct IO_APIC_route_entry *entry,
-			      unsigned int destination, int trigger,
-			      int polarity, int vector, int pin)
+
+static int setup_ir_ioapic_entry(int irq,
+			      struct IR_IO_APIC_route_entry *entry,
+			      unsigned int destination, int vector,
+			      struct io_apic_irq_attr *attr)
 {
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(entry,0,sizeof(*entry));
+	int index;
+	struct irte irte;
+	int ioapic_id = mpc_ioapic_id(attr->ioapic);
+	struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
 
-	if (intr_remapping_enabled) {
-		struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
-		struct irte irte;
-		struct IR_IO_APIC_route_entry *ir_entry =
-			(struct IR_IO_APIC_route_entry *) entry;
-		int index;
+	if (!iommu) {
+		pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
+		return -ENODEV;
+	}
 
-		if (!iommu)
-			panic("No mapping iommu for ioapic %d\n", apic_id);
+	index = alloc_irte(iommu, irq, 1);
+	if (index < 0) {
+		pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
+		return -ENOMEM;
+	}
 
-		index = alloc_irte(iommu, irq, 1);
-		if (index < 0)
-			panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
+	prepare_irte(&irte, vector, destination);
 
-		prepare_irte(&irte, vector, destination);
+	/* Set source-id of interrupt request */
+	set_ioapic_sid(&irte, ioapic_id);
 
-		/* Set source-id of interrupt request */
-		set_ioapic_sid(&irte, apic_id);
+	modify_irte(irq, &irte);
 
-		modify_irte(irq, &irte);
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
+		"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
+		"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
+		"Avail:%X Vector:%02X Dest:%08X "
+		"SID:%04X SQ:%X SVT:%X)\n",
+		attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
+		irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
+		irte.avail, irte.vector, irte.dest_id,
+		irte.sid, irte.sq, irte.svt);
+
+	memset(entry, 0, sizeof(*entry));
+
+	entry->index2	= (index >> 15) & 0x1;
+	entry->zero	= 0;
+	entry->format	= 1;
+	entry->index	= (index & 0x7fff);
+	/*
+	 * IO-APIC RTE will be configured with virtual vector.
+	 * irq handler will do the explicit EOI to the io-apic.
+	 */
+	entry->vector	= attr->ioapic_pin;
+	entry->mask	= 0;			/* enable IRQ */
+	entry->trigger	= attr->trigger;
+	entry->polarity	= attr->polarity;
 
-		ir_entry->index2 = (index >> 15) & 0x1;
-		ir_entry->zero = 0;
-		ir_entry->format = 1;
-		ir_entry->index = (index & 0x7fff);
-		/*
-		 * IO-APIC RTE will be configured with virtual vector.
-		 * irq handler will do the explicit EOI to the io-apic.
-		 */
-		ir_entry->vector = pin;
-
-		apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
-			"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
-			"Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
-			"Avail:%X Vector:%02X Dest:%08X "
-			"SID:%04X SQ:%X SVT:%X)\n",
-			apic_id, irte.present, irte.fpd, irte.dst_mode,
-			irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
-			irte.avail, irte.vector, irte.dest_id,
-			irte.sid, irte.sq, irte.svt);
-	} else {
-		entry->delivery_mode = apic->irq_delivery_mode;
-		entry->dest_mode = apic->irq_dest_mode;
-		entry->dest = destination;
-		entry->vector = vector;
-	}
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (attr->trigger)
+		entry->mask = 1;
 
-	entry->mask = 0;				/* enable IRQ */
-	entry->trigger = trigger;
-	entry->polarity = polarity;
+	return 0;
+}
 
-	/* Mask level triggered irqs.
+static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+			       unsigned int destination, int vector,
+			       struct io_apic_irq_attr *attr)
+{
+	if (intr_remapping_enabled)
+		return setup_ir_ioapic_entry(irq,
+			 (struct IR_IO_APIC_route_entry *)entry,
+			 destination, vector, attr);
+
+	memset(entry, 0, sizeof(*entry));
+
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode     = apic->irq_dest_mode;
+	entry->dest	     = destination;
+	entry->vector	     = vector;
+	entry->mask	     = 0;			/* enable IRQ */
+	entry->trigger	     = attr->trigger;
+	entry->polarity	     = attr->polarity;
+
+	/*
+	 * Mask level triggered irqs.
 	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 	 */
-	if (trigger)
+	if (attr->trigger)
 		entry->mask = 1;
+
 	return 0;
 }
 
-static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
-			     struct irq_cfg *cfg, int trigger, int polarity)
+static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
+				struct io_apic_irq_attr *attr)
 {
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
@@ -1348,49 +1440,48 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
 		    "IRQ %d Mode:%i Active:%i Dest:%d)\n",
-		    apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector,
-		    irq, trigger, polarity, dest);
-
+		    attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
+		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
 
-	if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry,
-			       dest, trigger, polarity, cfg->vector, pin)) {
-		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-		       mpc_ioapic_id(apic_id), pin);
+	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
+		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
 		__clear_irq_vector(irq, cfg);
+
 		return;
 	}
 
-	ioapic_register_intr(irq, cfg, trigger);
+	ioapic_register_intr(irq, cfg, attr->trigger);
 	if (irq < legacy_pic->nr_legacy_irqs)
 		legacy_pic->mask(irq);
 
-	ioapic_write_entry(apic_id, pin, entry);
+	ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
 }
 
-static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
+static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
 {
 	if (idx != -1)
 		return false;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
-		    mpc_ioapic_id(apic_id), pin);
+		    mpc_ioapic_id(ioapic_idx), pin);
 	return true;
 }
 
-static void __init __io_apic_setup_irqs(unsigned int apic_id)
+static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
 {
 	int idx, node = cpu_to_node(0);
 	struct io_apic_irq_attr attr;
 	unsigned int pin, irq;
 
-	for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) {
-		idx = find_irq_entry(apic_id, pin, mp_INT);
-		if (io_apic_pin_not_connected(idx, apic_id, pin))
+	for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
+		idx = find_irq_entry(ioapic_idx, pin, mp_INT);
+		if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
 			continue;
 
-		irq = pin_2_irq(idx, apic_id, pin);
+		irq = pin_2_irq(idx, ioapic_idx, pin);
 
-		if ((apic_id > 0) && (irq > 16))
+		if ((ioapic_idx > 0) && (irq > 16))
 			continue;
 
 		/*
@@ -1398,10 +1489,10 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id)
 		 * installed and if it returns 1:
 		 */
 		if (apic->multi_timer_check &&
-		    apic->multi_timer_check(apic_id, irq))
+		    apic->multi_timer_check(ioapic_idx, irq))
 			continue;
 
-		set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+		set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
 				     irq_polarity(idx));
 
 		io_apic_setup_irq_pin(irq, node, &attr);
@@ -1410,12 +1501,12 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id)
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	unsigned int apic_id;
+	unsigned int ioapic_idx;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
-		__io_apic_setup_irqs(apic_id);
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+		__io_apic_setup_irqs(ioapic_idx);
 }
 
 /*
@@ -1425,28 +1516,28 @@ static void __init setup_IO_APIC_irqs(void)
  */
 void setup_IO_APIC_irq_extra(u32 gsi)
 {
-	int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
+	int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
 	struct io_apic_irq_attr attr;
 
 	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
-	apic_id = mp_find_ioapic(gsi);
-	if (apic_id < 0)
+	ioapic_idx = mp_find_ioapic(gsi);
+	if (ioapic_idx < 0)
 		return;
 
-	pin = mp_find_ioapic_pin(apic_id, gsi);
-	idx = find_irq_entry(apic_id, pin, mp_INT);
+	pin = mp_find_ioapic_pin(ioapic_idx, gsi);
+	idx = find_irq_entry(ioapic_idx, pin, mp_INT);
 	if (idx == -1)
 		return;
 
-	irq = pin_2_irq(idx, apic_id, pin);
+	irq = pin_2_irq(idx, ioapic_idx, pin);
 
 	/* Only handle the non legacy irqs on secondary ioapics */
-	if (apic_id == 0 || irq < NR_IRQS_LEGACY)
+	if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
 		return;
 
-	set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+	set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
 			     irq_polarity(idx));
 
 	io_apic_setup_irq_pin_once(irq, node, &attr);
@@ -1455,8 +1546,8 @@ void setup_IO_APIC_irq_extra(u32 gsi)
 /*
  * Set up the timer pin, possibly with the 8259A-master behind.
  */
-static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
-					int vector)
+static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
+					 unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
 
@@ -1487,45 +1578,29 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
 	/*
 	 * Add it to the IO-APIC irq-routing table:
 	 */
-	ioapic_write_entry(apic_id, pin, entry);
+	ioapic_write_entry(ioapic_idx, pin, entry);
 }
 
-
-__apicdebuginit(void) print_IO_APIC(void)
+__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
 {
-	int apic, i;
+	int i;
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
 	union IO_APIC_reg_02 reg_02;
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
-	struct irq_cfg *cfg;
-	unsigned int irq;
-
-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
-	for (i = 0; i < nr_ioapics; i++)
-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-		       mpc_ioapic_id(i), ioapics[i].nr_registers);
-
-	/*
-	 * We are a bit conservative about what we expect.  We have to
-	 * know about every hardware change ASAP.
-	 */
-	printk(KERN_INFO "testing the IO APIC.......................\n");
-
-	for (apic = 0; apic < nr_ioapics; apic++) {
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(apic, 0);
-	reg_01.raw = io_apic_read(apic, 1);
+	reg_00.raw = io_apic_read(ioapic_idx, 0);
+	reg_01.raw = io_apic_read(ioapic_idx, 1);
 	if (reg_01.bits.version >= 0x10)
-		reg_02.raw = io_apic_read(apic, 2);
+		reg_02.raw = io_apic_read(ioapic_idx, 2);
 	if (reg_01.bits.version >= 0x20)
-		reg_03.raw = io_apic_read(apic, 3);
+		reg_03.raw = io_apic_read(ioapic_idx, 3);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	printk("\n");
-	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic));
+	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
 	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
 	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1575,7 +1650,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 			struct IO_APIC_route_entry entry;
 			struct IR_IO_APIC_route_entry *ir_entry;
 
-			entry = ioapic_read_entry(apic, i);
+			entry = ioapic_read_entry(ioapic_idx, i);
 			ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
 			printk(KERN_DEBUG " %02x %04X ",
 				i,
@@ -1596,7 +1671,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 		} else {
 			struct IO_APIC_route_entry entry;
 
-			entry = ioapic_read_entry(apic, i);
+			entry = ioapic_read_entry(ioapic_idx, i);
 			printk(KERN_DEBUG " %02x %02X  ",
 				i,
 				entry.dest
@@ -1614,7 +1689,28 @@ __apicdebuginit(void) print_IO_APIC(void)
 			);
 		}
 	}
-	}
+}
+
+__apicdebuginit(void) print_IO_APICs(void)
+{
+	int ioapic_idx;
+	struct irq_cfg *cfg;
+	unsigned int irq;
+
+	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+		       mpc_ioapic_id(ioapic_idx),
+		       ioapics[ioapic_idx].nr_registers);
+
+	/*
+	 * We are a bit conservative about what we expect.  We have to
+	 * know about every hardware change ASAP.
+	 */
+	printk(KERN_INFO "testing the IO APIC.......................\n");
+
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+		print_IO_APIC(ioapic_idx);
 
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
 	for_each_active_irq(irq) {
@@ -1633,8 +1729,6 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 
 	printk(KERN_INFO ".................................... done.\n");
-
-	return;
 }
 
 __apicdebuginit(void) print_APIC_field(int base)
@@ -1828,7 +1922,7 @@ __apicdebuginit(int) print_ICs(void)
 		return 0;
 
 	print_local_APICs(show_lapic);
-	print_IO_APIC();
+	print_IO_APICs();
 
 	return 0;
 }
@@ -1953,7 +2047,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 {
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
-	int apic_id;
+	int ioapic_idx;
 	int i;
 	unsigned char old_id;
 	unsigned long flags;
@@ -1967,21 +2061,20 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 	/*
 	 * Set the IOAPIC ID to the value stored in the MPC table.
 	 */
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-
+	for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
 		/* Read the register 0 value */
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic_id, 0);
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
-		old_id = mpc_ioapic_id(apic_id);
+		old_id = mpc_ioapic_id(ioapic_idx);
 
-		if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) {
+		if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-				apic_id, mpc_ioapic_id(apic_id));
+				ioapic_idx, mpc_ioapic_id(ioapic_idx));
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				reg_00.bits.ID);
-			ioapics[apic_id].mp_config.apicid = reg_00.bits.ID;
+			ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
 		}
 
 		/*
@@ -1990,9 +2083,9 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 		 * 'stuck on smp_invalidate_needed IPI wait' messages.
 		 */
 		if (apic->check_apicid_used(&phys_id_present_map,
-					    mpc_ioapic_id(apic_id))) {
+					    mpc_ioapic_id(ioapic_idx))) {
 			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-				apic_id, mpc_ioapic_id(apic_id));
+				ioapic_idx, mpc_ioapic_id(ioapic_idx));
 			for (i = 0; i < get_physical_broadcast(); i++)
 				if (!physid_isset(i, phys_id_present_map))
 					break;
@@ -2001,14 +2094,14 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
 				i);
 			physid_set(i, phys_id_present_map);
-			ioapics[apic_id].mp_config.apicid = i;
+			ioapics[ioapic_idx].mp_config.apicid = i;
 		} else {
 			physid_mask_t tmp;
-			apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id),
+			apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
 						    &tmp);
 			apic_printk(APIC_VERBOSE, "Setting %d in the "
 					"phys_id_present_map\n",
-					mpc_ioapic_id(apic_id));
+					mpc_ioapic_id(ioapic_idx));
 			physids_or(phys_id_present_map, phys_id_present_map, tmp);
 		}
 
@@ -2016,35 +2109,35 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
 		 * We need to adjust the IRQ routing table
 		 * if the ID changed.
 		 */
-		if (old_id != mpc_ioapic_id(apic_id))
+		if (old_id != mpc_ioapic_id(ioapic_idx))
 			for (i = 0; i < mp_irq_entries; i++)
 				if (mp_irqs[i].dstapic == old_id)
 					mp_irqs[i].dstapic
-						= mpc_ioapic_id(apic_id);
+						= mpc_ioapic_id(ioapic_idx);
 
 		/*
 		 * Update the ID register according to the right value
 		 * from the MPC table if they are different.
 		 */
-		if (mpc_ioapic_id(apic_id) == reg_00.bits.ID)
+		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
 			continue;
 
 		apic_printk(APIC_VERBOSE, KERN_INFO
 			"...changing IO-APIC physical APIC ID to %d ...",
-			mpc_ioapic_id(apic_id));
+			mpc_ioapic_id(ioapic_idx));
 
-		reg_00.bits.ID = mpc_ioapic_id(apic_id);
+		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(apic_id, 0, reg_00.raw);
+		io_apic_write(ioapic_idx, 0, reg_00.raw);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 		/*
 		 * Sanity check
 		 */
 		raw_spin_lock_irqsave(&ioapic_lock, flags);
-		reg_00.raw = io_apic_read(apic_id, 0);
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
 		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-		if (reg_00.bits.ID != mpc_ioapic_id(apic_id))
+		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
 			printk("could not set ID!\n");
 		else
 			apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2255,7 +2348,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	return ret;
 }
 
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
 
 /*
  * Migrate the IO-APIC irq in the presence of intr-remapping.
@@ -2267,6 +2360,9 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
  * updated vector information), by using a virtual vector (io-apic pin number).
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
+ *
+ * As the migration is a simple atomic update of IRTE, the same mechanism
+ * is used to migrate MSI irq's in the presence of interrupt-remapping.
  */
 static int
 ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -2291,10 +2387,16 @@ ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	irte.dest_id = IRTE_DEST(dest);
 
 	/*
-	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 * Atomically updates the IRTE with the new destination, vector
+	 * and flushes the interrupt entry cache.
 	 */
 	modify_irte(irq, &irte);
 
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
@@ -2407,48 +2509,6 @@ static void ack_apic_edge(struct irq_data *data)
 
 atomic_t irq_mis_count;
 
-/*
- * IO-APIC versions below 0x20 don't support EOI register.
- * For the record, here is the information about various versions:
- *     0Xh     82489DX
- *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
- *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
- *     30h-FFh Reserved
- *
- * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
- * version as 0x2. This is an error with documentation and these ICH chips
- * use io-apic's of version 0x20.
- *
- * For IO-APIC's with EOI register, we use that to do an explicit EOI.
- * Otherwise, we simulate the EOI message manually by changing the trigger
- * mode to edge and then back to level, with RTE being masked during this.
-*/
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	struct irq_pin_list *entry;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
-		if (mpc_ioapic_ver(entry->apic) >= 0x20) {
-			/*
-			 * Intr-remapping uses pin number as the virtual vector
-			 * in the RTE. Actual vector is programmed in
-			 * intr-remapping table entry. Hence for the io-apic
-			 * EOI we use the pin number.
-			 */
-			if (irq_remapped(cfg))
-				io_apic_eoi(entry->apic, entry->pin);
-			else
-				io_apic_eoi(entry->apic, cfg->vector);
-		} else {
-			__mask_and_edge_IO_APIC_irq(entry);
-			__unmask_and_level_IO_APIC_irq(entry);
-		}
-	}
-	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
 static void ack_apic_level(struct irq_data *data)
 {
 	struct irq_cfg *cfg = data->chip_data;
@@ -2552,7 +2612,7 @@ static void ack_apic_level(struct irq_data *data)
 	}
 }
 
-#ifdef CONFIG_INTR_REMAP
+#ifdef CONFIG_IRQ_REMAP
 static void ir_ack_apic_edge(struct irq_data *data)
 {
 	ack_APIC_irq();
@@ -2563,7 +2623,23 @@ static void ir_ack_apic_level(struct irq_data *data)
 	ack_APIC_irq();
 	eoi_ioapic_irq(data->irq, data->chip_data);
 }
-#endif /* CONFIG_INTR_REMAP */
+
+static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
+{
+	seq_printf(p, " IR-%s", data->chip->name);
+}
+
+static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
+{
+	chip->irq_print_chip = ir_print_prefix;
+	chip->irq_ack = ir_ack_apic_edge;
+	chip->irq_eoi = ir_ack_apic_level;
+
+#ifdef CONFIG_SMP
+	chip->irq_set_affinity = ir_ioapic_set_affinity;
+#endif
+}
+#endif /* CONFIG_IRQ_REMAP */
 
 static struct irq_chip ioapic_chip __read_mostly = {
 	.name			= "IO-APIC",
@@ -2578,21 +2654,6 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static struct irq_chip ir_ioapic_chip __read_mostly = {
-	.name			= "IR-IO-APIC",
-	.irq_startup		= startup_ioapic_irq,
-	.irq_mask		= mask_ioapic_irq,
-	.irq_unmask		= unmask_ioapic_irq,
-#ifdef CONFIG_INTR_REMAP
-	.irq_ack		= ir_ack_apic_edge,
-	.irq_eoi		= ir_ack_apic_level,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= ir_ioapic_set_affinity,
-#endif
-#endif
-	.irq_retrigger		= ioapic_retrigger_irq,
-};
-
 static inline void init_IO_APIC_traps(void)
 {
 	struct irq_cfg *cfg;
@@ -2944,27 +3005,26 @@ static int __init io_apic_bug_finalize(void)
 
 late_initcall(io_apic_bug_finalize);
 
-static void resume_ioapic_id(int ioapic_id)
+static void resume_ioapic_id(int ioapic_idx)
 {
 	unsigned long flags;
 	union IO_APIC_reg_00 reg_00;
 
-
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(ioapic_id, 0);
-	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) {
-		reg_00.bits.ID = mpc_ioapic_id(ioapic_id);
-		io_apic_write(ioapic_id, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(ioapic_idx, 0);
+	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
+		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+		io_apic_write(ioapic_idx, 0, reg_00.raw);
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void ioapic_resume(void)
 {
-	int ioapic_id;
+	int ioapic_idx;
 
-	for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
-		resume_ioapic_id(ioapic_id);
+	for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
+		resume_ioapic_id(ioapic_idx);
 
 	restore_ioapic_entries();
 }
@@ -3144,45 +3204,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 
 	return 0;
 }
-#ifdef CONFIG_INTR_REMAP
-/*
- * Migrate the MSI irq to another cpumask. This migration is
- * done in the process context using interrupt-remapping hardware.
- */
-static int
-ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		    bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest, irq = data->irq;
-	struct irte irte;
-
-	if (get_irte(irq, &irte))
-		return -1;
-
-	if (__ioapic_set_affinity(data, mask, &dest))
-		return -1;
-
-	irte.vector = cfg->vector;
-	irte.dest_id = IRTE_DEST(dest);
-
-	/*
-	 * atomically update the IRTE with the new destination and vector.
-	 */
-	modify_irte(irq, &irte);
-
-	/*
-	 * After this point, all the interrupts will start arriving
-	 * at the new destination. So, time to cleanup the previous
-	 * vector allocation.
-	 */
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return 0;
-}
-
-#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -3200,19 +3221,6 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static struct irq_chip msi_ir_chip = {
-	.name			= "IR-PCI-MSI",
-	.irq_unmask		= unmask_msi_irq,
-	.irq_mask		= mask_msi_irq,
-#ifdef CONFIG_INTR_REMAP
-	.irq_ack		= ir_ack_apic_edge,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= ir_msi_set_affinity,
-#endif
-#endif
-	.irq_retrigger		= ioapic_retrigger_irq,
-};
-
 /*
  * Map the PCI dev to the corresponding remapping hardware unit
  * and allocate 'nvec' consecutive interrupt-remapping table entries
@@ -3255,7 +3263,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 
 	if (irq_remapped(irq_get_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-		chip = &msi_ir_chip;
+		irq_remap_modify_chip_defaults(chip);
 	}
 
 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3328,7 +3336,7 @@ void native_teardown_msi_irq(unsigned int irq)
 	destroy_irq(irq);
 }
 
-#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
+#ifdef CONFIG_DMAR_TABLE
 #ifdef CONFIG_SMP
 static int
 dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -3409,19 +3417,6 @@ static int hpet_msi_set_affinity(struct irq_data *data,
 
 #endif /* CONFIG_SMP */
 
-static struct irq_chip ir_hpet_msi_type = {
-	.name			= "IR-HPET_MSI",
-	.irq_unmask		= hpet_msi_unmask,
-	.irq_mask		= hpet_msi_mask,
-#ifdef CONFIG_INTR_REMAP
-	.irq_ack		= ir_ack_apic_edge,
-#ifdef CONFIG_SMP
-	.irq_set_affinity	= ir_msi_set_affinity,
-#endif
-#endif
-	.irq_retrigger		= ioapic_retrigger_irq,
-};
-
 static struct irq_chip hpet_msi_type = {
 	.name = "HPET_MSI",
 	.irq_unmask = hpet_msi_unmask,
@@ -3458,7 +3453,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
 	hpet_msi_write(irq_get_handler_data(irq), &msg);
 	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 	if (irq_remapped(irq_get_chip_data(irq)))
-		chip = &ir_hpet_msi_type;
+		irq_remap_modify_chip_defaults(chip);
 
 	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
 	return 0;
@@ -3566,26 +3561,25 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 		return -EINVAL;
 	ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
 	if (!ret)
-		setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
-				 attr->trigger, attr->polarity);
+		setup_ioapic_irq(irq, cfg, attr);
 	return ret;
 }
 
 int io_apic_setup_irq_pin_once(unsigned int irq, int node,
 			       struct io_apic_irq_attr *attr)
 {
-	unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+	unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
 	int ret;
 
 	/* Avoid redundant programming */
-	if (test_bit(pin, ioapics[id].pin_programmed)) {
+	if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
 		pr_debug("Pin %d-%d already programmed\n",
-			 mpc_ioapic_id(id), pin);
+			 mpc_ioapic_id(ioapic_idx), pin);
 		return 0;
 	}
 	ret = io_apic_setup_irq_pin(irq, node, attr);
 	if (!ret)
-		set_bit(pin, ioapics[id].pin_programmed);
+		set_bit(pin, ioapics[ioapic_idx].pin_programmed);
 	return ret;
 }
 
@@ -3621,7 +3615,6 @@ int get_nr_irqs_gsi(void)
 	return nr_irqs_gsi;
 }
 
-#ifdef CONFIG_SPARSE_IRQ
 int __init arch_probe_nr_irqs(void)
 {
 	int nr;
@@ -3641,7 +3634,6 @@ int __init arch_probe_nr_irqs(void)
 
 	return NR_IRQS_LEGACY;
 }
-#endif
 
 int io_apic_set_pci_routing(struct device *dev, int irq,
 			    struct io_apic_irq_attr *irq_attr)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index b5254ad044ab..0787bb3412f4 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -200,14 +200,8 @@ void __init default_setup_apic_routing(void)
 	 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
 	 */
 
-	if (!cmdline_apic && apic == &apic_default) {
-		struct apic *bigsmp = generic_bigsmp_probe();
-		if (bigsmp) {
-			apic = bigsmp;
-			printk(KERN_INFO "Overriding APIC driver with %s\n",
-			       apic->name);
-		}
-	}
+	if (!cmdline_apic && apic == &apic_default)
+		generic_bigsmp_probe();
 #endif
 
 	if (apic->setup_apic_routing)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 34b18594e724..62ae3001ae02 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -672,18 +672,11 @@ void __cpuinit uv_cpu_init(void)
 /*
  * When NMI is received, print a stack trace.
  */
-int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
+int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 {
 	unsigned long real_uv_nmi;
 	int bid;
 
-	if (reason != DIE_NMIUNKNOWN)
-		return NOTIFY_OK;
-
-	if (in_crash_kexec)
-		/* do nothing if entering the crash kernel */
-		return NOTIFY_OK;
-
 	/*
 	 * Each blade has an MMR that indicates when an NMI has been sent
 	 * to cpus on the blade. If an NMI is detected, atomically
@@ -704,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 	}
 
 	if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
-		return NOTIFY_DONE;
+		return NMI_DONE;
 
 	__get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
 
@@ -717,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 	dump_stack();
 	spin_unlock(&uv_nmi_lock);
 
-	return NOTIFY_STOP;
+	return NMI_HANDLED;
 }
 
-static struct notifier_block uv_dump_stack_nmi_nb = {
-	.notifier_call	= uv_handle_nmi,
-	.priority = NMI_LOCAL_LOW_PRIOR - 1,
-};
-
 void uv_register_nmi_notifier(void)
 {
-	if (register_die_notifier(&uv_dump_stack_nmi_nb))
+	if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
 		printk(KERN_WARNING "UV NMI handler failed to register\n");
 }
 
@@ -832,6 +820,10 @@ void __init uv_system_init(void)
 		uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
 		uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
 
+		uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
+		uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ?
+				(m_val == 40 ? 40 : 39) : m_val;
+
 		pnode = uv_apicid_to_pnode(apicid);
 		blade = boot_pnode_to_blade(pnode);
 		lcpu = uv_blade_info[blade].nr_possible_cpus;
@@ -862,8 +854,7 @@ void __init uv_system_init(void)
 		if (uv_node_to_blade[nid] >= 0)
 			continue;
 		paddr = node_start_pfn(nid) << PAGE_SHIFT;
-		paddr = uv_soc_phys_ram_to_gpa(paddr);
-		pnode = (paddr >> m_val) & pnode_mask;
+		pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
 		blade = boot_pnode_to_blade(pnode);
 		uv_node_to_blade[nid] = blade;
 	}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0371c484bb8a..a46bd383953c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -249,8 +249,6 @@ extern int (*console_blank_hook)(int);
 #define	APM_MINOR_DEV	134
 
 /*
- * See Documentation/Config.help for the configuration options.
- *
  * Various options can be changed at boot time as follows:
  * (We allow underscores for compatibility with the modules code)
  *	apm=on/off			enable/disable APM
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6042981d0309..25f24dccdcfa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -15,6 +15,7 @@ CFLAGS_common.o		:= $(nostackp)
 obj-y			:= intel_cacheinfo.o scattered.o topology.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
 obj-y			+= vmware.o hypervisor.o sched.o mshyperv.o
+obj-y			+= rdrand.o
 
 obj-$(CONFIG_X86_32)	+= bugs.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
@@ -28,10 +29,15 @@ obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
 obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
+ifdef CONFIG_PERF_EVENTS
+obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+endif
+
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
 
-obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
+obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o perf_event_amd_ibs.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed393dfce..46ae4f65fc7f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,5 +1,6 @@
 #include <linux/init.h>
 #include <linux/bitops.h>
+#include <linux/elf.h>
 #include <linux/mm.h>
 
 #include <linux/io.h>
@@ -410,8 +411,38 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
 #endif
 }
 
+static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
+{
+	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
+
+		if (c->x86 > 0x10 ||
+		    (c->x86 == 0x10 && c->x86_model >= 0x2)) {
+			u64 val;
+
+			rdmsrl(MSR_K7_HWCR, val);
+			if (!(val & BIT(24)))
+				printk(KERN_WARNING FW_BUG "TSC doesn't count "
+					"with P0 frequency!\n");
+		}
+	}
+
+	if (c->x86 == 0x15) {
+		unsigned long upperbit;
+		u32 cpuid, assoc;
+
+		cpuid	 = cpuid_edx(0x80000005);
+		assoc	 = cpuid >> 16 & 0xff;
+		upperbit = ((cpuid >> 24) << 10) / assoc;
+
+		va_align.mask	  = (upperbit - 1) & PAGE_MASK;
+		va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+	}
+}
+
 static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
+	u32 dummy;
+
 	early_init_amd_mc(c);
 
 	/*
@@ -442,22 +473,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 	}
 #endif
 
-	/* We need to do the following only once */
-	if (c != &boot_cpu_data)
-		return;
-
-	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
-
-		if (c->x86 > 0x10 ||
-		    (c->x86 == 0x10 && c->x86_model >= 0x2)) {
-			u64 val;
-
-			rdmsrl(MSR_K7_HWCR, val);
-			if (!(val & BIT(24)))
-				printk(KERN_WARNING FW_BUG "TSC doesn't count "
-					"with P0 frequency!\n");
-		}
-	}
+	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -679,6 +695,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
 	.c_size_cache	= amd_size_cache,
 #endif
 	.c_early_init   = early_init_amd,
+	.c_bsp_init	= bsp_init_amd,
 	.c_init		= init_amd,
 	.c_x86_vendor	= X86_VENDOR_AMD,
 };
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 62184390a601..aa003b13a831 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -15,6 +15,7 @@
 #include <asm/stackprotector.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
+#include <asm/archrandom.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -681,6 +682,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	filter_cpuid_features(c, false);
 
 	setup_smep(c);
+
+	if (this_cpu->c_bsp_init)
+		this_cpu->c_bsp_init(c);
 }
 
 void __init early_cpu_init(void)
@@ -857,6 +861,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #endif
 
 	init_hypervisor(c);
+	x86_init_rdrand(c);
 
 	/*
 	 * Clear/Set all flags overriden by options, need do it
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e765633f210e..1b22dcc51af4 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -18,6 +18,7 @@ struct cpu_dev {
 	struct		cpu_model_info c_models[4];
 
 	void            (*c_early_init)(struct cpuinfo_x86 *);
+	void		(*c_bsp_init)(struct cpuinfo_x86 *);
 	void		(*c_init)(struct cpuinfo_x86 *);
 	void		(*c_identify)(struct cpuinfo_x86 *);
 	unsigned int	(*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ed6086eedf1d..523131213f08 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,6 +47,15 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
+	if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
+		unsigned lower_word;
+
+		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+		/* Required by the SDM */
+		sync_core();
+		rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
+	}
+
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
 	 *
@@ -55,17 +64,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	 * need the microcode to have already been loaded... so if it is
 	 * not, recommend a BIOS update and disable large pages.
 	 */
-	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) {
-		u32 ucode, junk;
-
-		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-		sync_core();
-		rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
-
-		if (ucode < 0x20e) {
-			printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
-			clear_cpu_cap(c, X86_FEATURE_PSE);
-		}
+	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+	    c->microcode < 0x20e) {
+		printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+		clear_cpu_cap(c, X86_FEATURE_PSE);
 	}
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c105c533ed94..a3b0811693c9 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -151,28 +151,17 @@ union _cpuid4_leaf_ecx {
 	u32 full;
 };
 
-struct amd_l3_cache {
-	struct	 amd_northbridge *nb;
-	unsigned indices;
-	u8	 subcaches[4];
-};
-
-struct _cpuid4_info {
+struct _cpuid4_info_regs {
 	union _cpuid4_leaf_eax eax;
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
-	struct amd_l3_cache *l3;
-	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
+	struct amd_northbridge *nb;
 };
 
-/* subset of above _cpuid4_info w/o shared_cpu_map */
-struct _cpuid4_info_regs {
-	union _cpuid4_leaf_eax eax;
-	union _cpuid4_leaf_ebx ebx;
-	union _cpuid4_leaf_ecx ecx;
-	unsigned long size;
-	struct amd_l3_cache *l3;
+struct _cpuid4_info {
+	struct _cpuid4_info_regs base;
+	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 };
 
 unsigned short			num_cache_leaves;
@@ -314,16 +303,23 @@ struct _cache_attr {
 /*
  * L3 cache descriptors
  */
-static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
+static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
 {
+	struct amd_l3_cache *l3 = &nb->l3_cache;
 	unsigned int sc0, sc1, sc2, sc3;
 	u32 val = 0;
 
-	pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
+	pci_read_config_dword(nb->misc, 0x1C4, &val);
 
 	/* calculate subcache sizes */
 	l3->subcaches[0] = sc0 = !(val & BIT(0));
 	l3->subcaches[1] = sc1 = !(val & BIT(4));
+
+	if (boot_cpu_data.x86 == 0x15) {
+		l3->subcaches[0] = sc0 += !(val & BIT(1));
+		l3->subcaches[1] = sc1 += !(val & BIT(5));
+	}
+
 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
 
@@ -333,33 +329,16 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
 					int index)
 {
-	static struct amd_l3_cache *__cpuinitdata l3_caches;
 	int node;
 
 	/* only for L3, and not in virtualized environments */
-	if (index < 3 || amd_nb_num() == 0)
+	if (index < 3)
 		return;
 
-	/*
-	 * Strictly speaking, the amount in @size below is leaked since it is
-	 * never freed but this is done only on shutdown so it doesn't matter.
-	 */
-	if (!l3_caches) {
-		int size = amd_nb_num() * sizeof(struct amd_l3_cache);
-
-		l3_caches = kzalloc(size, GFP_ATOMIC);
-		if (!l3_caches)
-			return;
-	}
-
 	node = amd_get_nb_id(smp_processor_id());
-
-	if (!l3_caches[node].nb) {
-		l3_caches[node].nb = node_to_amd_nb(node);
-		amd_calc_l3_indices(&l3_caches[node]);
-	}
-
-	this_leaf->l3 = &l3_caches[node];
+	this_leaf->nb = node_to_amd_nb(node);
+	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
+		amd_calc_l3_indices(this_leaf->nb);
 }
 
 /*
@@ -369,11 +348,11 @@ static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
  *
  * @returns: the disabled index if used or negative value if slot free.
  */
-int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
+int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
 {
 	unsigned int reg = 0;
 
-	pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
+	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
 
 	/* check whether this slot is activated already */
 	if (reg & (3UL << 30))
@@ -387,11 +366,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 {
 	int index;
 
-	if (!this_leaf->l3 ||
-	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 		return -EINVAL;
 
-	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
+	index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
 	if (index >= 0)
 		return sprintf(buf, "%d\n", index);
 
@@ -408,7 +386,7 @@ show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,	\
 SHOW_CACHE_DISABLE(0)
 SHOW_CACHE_DISABLE(1)
 
-static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
+static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
 				 unsigned slot, unsigned long idx)
 {
 	int i;
@@ -421,10 +399,10 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 	for (i = 0; i < 4; i++) {
 		u32 reg = idx | (i << 20);
 
-		if (!l3->subcaches[i])
+		if (!nb->l3_cache.subcaches[i])
 			continue;
 
-		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
+		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 
 		/*
 		 * We need to WBINVD on a core on the node containing the L3
@@ -434,7 +412,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
 		wbinvd_on_cpu(cpu);
 
 		reg |= BIT(31);
-		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
+		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
 	}
 }
 
@@ -448,24 +426,24 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
  *
  * @return: 0 on success, error status on failure
  */
-int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
+int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
 			    unsigned long index)
 {
 	int ret = 0;
 
 	/*  check if @slot is already used or the index is already disabled */
-	ret = amd_get_l3_disable_slot(l3, slot);
+	ret = amd_get_l3_disable_slot(nb, slot);
 	if (ret >= 0)
 		return -EINVAL;
 
-	if (index > l3->indices)
+	if (index > nb->l3_cache.indices)
 		return -EINVAL;
 
 	/* check whether the other slot has disabled the same index already */
-	if (index == amd_get_l3_disable_slot(l3, !slot))
+	if (index == amd_get_l3_disable_slot(nb, !slot))
 		return -EINVAL;
 
-	amd_l3_disable_index(l3, cpu, slot, index);
+	amd_l3_disable_index(nb, cpu, slot, index);
 
 	return 0;
 }
@@ -480,8 +458,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!this_leaf->l3 ||
-	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
 		return -EINVAL;
 
 	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -489,7 +466,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 	if (strict_strtoul(buf, 10, &val) < 0)
 		return -EINVAL;
 
-	err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
+	err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
 	if (err) {
 		if (err == -EEXIST)
 			printk(KERN_WARNING "L3 disable slot %d in use!\n",
@@ -518,7 +495,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
 static ssize_t
 show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
 {
-	if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 		return -EINVAL;
 
 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
@@ -533,7 +510,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+	if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
 		return -EINVAL;
 
 	if (strict_strtoul(buf, 16, &val) < 0)
@@ -769,7 +746,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 		return;
 	}
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
-	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
+	num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
 
 	if (num_threads_sharing == 1)
 		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
@@ -820,29 +797,19 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
 	for (i = 0; i < num_cache_leaves; i++)
 		cache_remove_shared_cpu_map(cpu, i);
 
-	kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
 	kfree(per_cpu(ici_cpuid4_info, cpu));
 	per_cpu(ici_cpuid4_info, cpu) = NULL;
 }
 
-static int
-__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
-{
-	struct _cpuid4_info_regs *leaf_regs =
-		(struct _cpuid4_info_regs *)this_leaf;
-
-	return cpuid4_cache_lookup_regs(index, leaf_regs);
-}
-
 static void __cpuinit get_cpu_leaves(void *_retval)
 {
 	int j, *retval = _retval, cpu = smp_processor_id();
 
 	/* Do cpuid and store the results */
 	for (j = 0; j < num_cache_leaves; j++) {
-		struct _cpuid4_info *this_leaf;
-		this_leaf = CPUID4_INFO_IDX(cpu, j);
-		*retval = cpuid4_cache_lookup(j, this_leaf);
+		struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
+
+		*retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
 		if (unlikely(*retval < 0)) {
 			int i;
 
@@ -900,16 +867,16 @@ static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
 	return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
 }
 
-show_one_plus(level, eax.split.level, 0);
-show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
+show_one_plus(level, base.eax.split.level, 0);
+show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
+show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
+show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
+show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
 
 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
 			 unsigned int cpu)
 {
-	return sprintf(buf, "%luK\n", this_leaf->size / 1024);
+	return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
 }
 
 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -946,7 +913,7 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
 			 unsigned int cpu)
 {
-	switch (this_leaf->eax.split.type) {
+	switch (this_leaf->base.eax.split.type) {
 	case CACHE_TYPE_DATA:
 		return sprintf(buf, "Data\n");
 	case CACHE_TYPE_INST:
@@ -1135,7 +1102,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 
 		ktype_cache.default_attrs = default_attrs;
 #ifdef CONFIG_AMD_NB
-		if (this_leaf->l3)
+		if (this_leaf->base.nb)
 			ktype_cache.default_attrs = amd_l3_attrs();
 #endif
 		retval = kobject_init_and_add(&(this_object->kobj),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 0ed633c5048b..6199232161cf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
 
 static cpumask_var_t mce_inject_cpumask;
 
-static int mce_raise_notify(struct notifier_block *self,
-			    unsigned long val, void *data)
+static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
 {
-	struct die_args *args = (struct die_args *)data;
 	int cpu = smp_processor_id();
 	struct mce *m = &__get_cpu_var(injectm);
-	if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
-		return NOTIFY_DONE;
+	if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
+		return NMI_DONE;
 	cpumask_clear_cpu(cpu, mce_inject_cpumask);
 	if (m->inject_flags & MCJ_EXCEPTION)
-		raise_exception(m, args->regs);
+		raise_exception(m, regs);
 	else if (m->status)
 		raise_poll(m);
-	return NOTIFY_STOP;
+	return NMI_HANDLED;
 }
 
-static struct notifier_block mce_raise_nb = {
-	.notifier_call = mce_raise_notify,
-	.priority = NMI_LOCAL_NORMAL_PRIOR,
-};
-
 /* Inject mce on current CPU */
 static int raise_local(void)
 {
@@ -216,7 +209,8 @@ static int inject_init(void)
 		return -ENOMEM;
 	printk(KERN_INFO "Machine check injector initialized\n");
 	mce_chrdev_ops.write = mce_write;
-	register_die_notifier(&mce_raise_nb);
+	register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
+				"mce_notify");
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 08363b042122..7b5063a6ad42 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -217,8 +217,13 @@ static void print_mce(struct mce *m)
 		pr_cont("MISC %llx ", m->misc);
 
 	pr_cont("\n");
-	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
-		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
+	/*
+	 * Note this output is parsed by external tools and old fields
+	 * should not be changed.
+	 */
+	pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
+		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
+		cpu_data(m->extcpu).microcode);
 
 	/*
 	 * Print out human-readable details about the MCE error,
@@ -908,9 +913,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
 	percpu_inc(mce_exception_count);
 
-	if (notify_die(DIE_NMI, "machine check", regs, error_code,
-			   18, SIGKILL) == NOTIFY_STOP)
-		goto out;
 	if (!banks)
 		goto out;
 
@@ -1140,6 +1142,15 @@ static void mce_start_timer(unsigned long data)
 	add_timer_on(t, smp_processor_id());
 }
 
+/* Must not be called in IRQ context where del_timer_sync() can deadlock */
+static void mce_timer_delete_all(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		del_timer_sync(&per_cpu(mce_timer, cpu));
+}
+
 static void mce_do_trigger(struct work_struct *work)
 {
 	call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
@@ -1750,7 +1761,6 @@ static struct syscore_ops mce_syscore_ops = {
 
 static void mce_cpu_restart(void *data)
 {
-	del_timer_sync(&__get_cpu_var(mce_timer));
 	if (!mce_available(__this_cpu_ptr(&cpu_info)))
 		return;
 	__mcheck_cpu_init_generic();
@@ -1760,16 +1770,15 @@ static void mce_cpu_restart(void *data)
 /* Reinit MCEs after user configuration changes */
 static void mce_restart(void)
 {
+	mce_timer_delete_all();
 	on_each_cpu(mce_cpu_restart, NULL, 1);
 }
 
 /* Toggle features for corrected errors */
-static void mce_disable_ce(void *all)
+static void mce_disable_cmci(void *data)
 {
 	if (!mce_available(__this_cpu_ptr(&cpu_info)))
 		return;
-	if (all)
-		del_timer_sync(&__get_cpu_var(mce_timer));
 	cmci_clear();
 }
 
@@ -1852,7 +1861,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
 	if (mce_ignore_ce ^ !!new) {
 		if (new) {
 			/* disable ce features */
-			on_each_cpu(mce_disable_ce, (void *)1, 1);
+			mce_timer_delete_all();
+			on_each_cpu(mce_disable_cmci, NULL, 1);
 			mce_ignore_ce = 1;
 		} else {
 			/* enable ce features */
@@ -1875,7 +1885,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
 	if (mce_cmci_disabled ^ !!new) {
 		if (new) {
 			/* disable cmci */
-			on_each_cpu(mce_disable_ce, NULL, 1);
+			on_each_cpu(mce_disable_cmci, NULL, 1);
 			mce_cmci_disabled = 1;
 		} else {
 			/* enable cmci */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 8694ef56459d..38e49bc95ffc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  * cmci_discover_lock protects against parallel discovery attempts
  * which could race against each other.
  */
-static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
 #define CMCI_THRESHOLD 1
 
@@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot)
 	int hdr = 0;
 	int i;
 
-	spin_lock_irqsave(&cmci_discover_lock, flags);
+	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 	for (i = 0; i < banks; i++) {
 		u64 val;
 
@@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot)
 			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
 		}
 	}
-	spin_unlock_irqrestore(&cmci_discover_lock, flags);
+	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 	if (hdr)
 		printk(KERN_CONT "\n");
 }
@@ -150,7 +150,7 @@ void cmci_clear(void)
 
 	if (!cmci_supported(&banks))
 		return;
-	spin_lock_irqsave(&cmci_discover_lock, flags);
+	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 	for (i = 0; i < banks; i++) {
 		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
 			continue;
@@ -160,7 +160,7 @@ void cmci_clear(void)
 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 		__clear_bit(i, __get_cpu_var(mce_banks_owned));
 	}
-	spin_unlock_irqrestore(&cmci_discover_lock, flags);
+	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index d944bf6c50e9..0a630dd4b620 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -11,6 +11,8 @@
  */
 
 #include <linux/types.h>
+#include <linux/time.h>
+#include <linux/clocksource.h>
 #include <linux/module.h>
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
@@ -36,6 +38,25 @@ static bool __init ms_hyperv_platform(void)
 		!memcmp("Microsoft Hv", hyp_signature, 12);
 }
 
+static cycle_t read_hv_clock(struct clocksource *arg)
+{
+	cycle_t current_tick;
+	/*
+	 * Read the partition counter to get the current tick count. This count
+	 * is set to 0 when the partition is created and is incremented in
+	 * 100 nanosecond units.
+	 */
+	rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+	return current_tick;
+}
+
+static struct clocksource hyperv_cs = {
+	.name		= "hyperv_clocksource",
+	.rating		= 400, /* use this when running on Hyperv*/
+	.read		= read_hv_clock,
+	.mask		= CLOCKSOURCE_MASK(64),
+};
+
 static void __init ms_hyperv_init_platform(void)
 {
 	/*
@@ -46,6 +67,8 @@ static void __init ms_hyperv_init_platform(void)
 
 	printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
 	       ms_hyperv.features, ms_hyperv.hints);
+
+	clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
 }
 
 const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index cfa62ec090ec..640891014b2a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -32,6 +32,8 @@
 #include <asm/smp.h>
 #include <asm/alternative.h>
 
+#include "perf_event.h"
+
 #if 0
 #undef wrmsrl
 #define wrmsrl(msr, val) 					\
@@ -43,283 +45,17 @@ do {								\
 } while (0)
 #endif
 
-/*
- *          |   NHM/WSM    |      SNB     |
- * register -------------------------------
- *          |  HT  | no HT |  HT  | no HT |
- *-----------------------------------------
- * offcore  | core | core  | cpu  | core  |
- * lbr_sel  | core | core  | cpu  | core  |
- * ld_lat   | cpu  | core  | cpu  | core  |
- *-----------------------------------------
- *
- * Given that there is a small number of shared regs,
- * we can pre-allocate their slot in the per-cpu
- * per-core reg tables.
- */
-enum extra_reg_type {
-	EXTRA_REG_NONE  = -1,	/* not used */
-
-	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
-	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
-
-	EXTRA_REG_MAX		/* number of entries needed */
-};
-
-struct event_constraint {
-	union {
-		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-		u64		idxmsk64;
-	};
-	u64	code;
-	u64	cmask;
-	int	weight;
-};
-
-struct amd_nb {
-	int nb_id;  /* NorthBridge id */
-	int refcnt; /* reference count */
-	struct perf_event *owners[X86_PMC_IDX_MAX];
-	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
-};
-
-struct intel_percore;
-
-#define MAX_LBR_ENTRIES		16
-
-struct cpu_hw_events {
-	/*
-	 * Generic x86 PMC bits
-	 */
-	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
-	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	int			enabled;
-
-	int			n_events;
-	int			n_added;
-	int			n_txn;
-	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
-	u64			tags[X86_PMC_IDX_MAX];
-	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
-
-	unsigned int		group_flag;
-
-	/*
-	 * Intel DebugStore bits
-	 */
-	struct debug_store	*ds;
-	u64			pebs_enabled;
-
-	/*
-	 * Intel LBR bits
-	 */
-	int				lbr_users;
-	void				*lbr_context;
-	struct perf_branch_stack	lbr_stack;
-	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
-
-	/*
-	 * manage shared (per-core, per-cpu) registers
-	 * used on Intel NHM/WSM/SNB
-	 */
-	struct intel_shared_regs	*shared_regs;
-
-	/*
-	 * AMD specific bits
-	 */
-	struct amd_nb		*amd_nb;
-};
-
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
-	{ .idxmsk64 = (n) },		\
-	.code = (c),			\
-	.cmask = (m),			\
-	.weight = (w),			\
-}
-
-#define EVENT_CONSTRAINT(c, n, m)	\
-	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
-
-/*
- * Constraint on the Event code.
- */
-#define INTEL_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
-
-/*
- * Constraint on the Event code + UMask + fixed-mask
- *
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define FIXED_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
-
-/*
- * Constraint on the Event code + UMask
- */
-#define INTEL_UEVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-
-#define EVENT_CONSTRAINT_END		\
-	EVENT_CONSTRAINT(0, 0, 0)
-
-#define for_each_event_constraint(e, c)	\
-	for ((e) = (c); (e)->weight; (e)++)
-
-/*
- * Per register state.
- */
-struct er_account {
-	raw_spinlock_t		lock;	/* per-core: protect structure */
-	u64			config;	/* extra MSR config */
-	u64			reg;	/* extra MSR number */
-	atomic_t		ref;	/* reference count */
-};
-
-/*
- * Extra registers for specific events.
- *
- * Some events need large masks and require external MSRs.
- * Those extra MSRs end up being shared for all events on
- * a PMU and sometimes between PMU of sibling HT threads.
- * In either case, the kernel needs to handle conflicting
- * accesses to those extra, shared, regs. The data structure
- * to manage those registers is stored in cpu_hw_event.
- */
-struct extra_reg {
-	unsigned int		event;
-	unsigned int		msr;
-	u64			config_mask;
-	u64			valid_mask;
-	int			idx;  /* per_xxx->regs[] reg index */
-};
-
-#define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\
-	.event = (e),		\
-	.msr = (ms),		\
-	.config_mask = (m),	\
-	.valid_mask = (vm),	\
-	.idx = EXTRA_REG_##i	\
-	}
-
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\
-	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
-
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
-
-union perf_capabilities {
-	struct {
-		u64	lbr_format    : 6;
-		u64	pebs_trap     : 1;
-		u64	pebs_arch_reg : 1;
-		u64	pebs_format   : 4;
-		u64	smm_freeze    : 1;
-	};
-	u64	capabilities;
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-	/*
-	 * Generic x86 PMC bits
-	 */
-	const char	*name;
-	int		version;
-	int		(*handle_irq)(struct pt_regs *);
-	void		(*disable_all)(void);
-	void		(*enable_all)(int added);
-	void		(*enable)(struct perf_event *);
-	void		(*disable)(struct perf_event *);
-	int		(*hw_config)(struct perf_event *event);
-	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
-	unsigned	eventsel;
-	unsigned	perfctr;
-	u64		(*event_map)(int);
-	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
-	int		cntval_bits;
-	u64		cntval_mask;
-	int		apic;
-	u64		max_period;
-	struct event_constraint *
-			(*get_event_constraints)(struct cpu_hw_events *cpuc,
-						 struct perf_event *event);
-
-	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
-						 struct perf_event *event);
-	struct event_constraint *event_constraints;
-	void		(*quirks)(void);
-	int		perfctr_second_write;
-
-	int		(*cpu_prepare)(int cpu);
-	void		(*cpu_starting)(int cpu);
-	void		(*cpu_dying)(int cpu);
-	void		(*cpu_dead)(int cpu);
-
-	/*
-	 * Intel Arch Perfmon v2+
-	 */
-	u64			intel_ctrl;
-	union perf_capabilities intel_cap;
+struct x86_pmu x86_pmu __read_mostly;
 
-	/*
-	 * Intel DebugStore bits
-	 */
-	int		bts, pebs;
-	int		bts_active, pebs_active;
-	int		pebs_record_size;
-	void		(*drain_pebs)(struct pt_regs *regs);
-	struct event_constraint *pebs_constraints;
-
-	/*
-	 * Intel LBR
-	 */
-	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
-	int		lbr_nr;			   /* hardware stack size */
-
-	/*
-	 * Extra registers for events
-	 */
-	struct extra_reg *extra_regs;
-	unsigned int er_flags;
-};
-
-#define ERF_NO_HT_SHARING	1
-#define ERF_HAS_RSP_1		2
-
-static struct x86_pmu x86_pmu __read_mostly;
-
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
 
-static int x86_perf_event_set_period(struct perf_event *event);
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-static u64 __read_mostly hw_cache_event_ids
+u64 __read_mostly hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
-static u64 __read_mostly hw_cache_extra_regs
+u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
@@ -329,8 +65,7 @@ static u64 __read_mostly hw_cache_extra_regs
  * Can only be executed on the CPU where the event is active.
  * Returns the delta events processed.
  */
-static u64
-x86_perf_event_update(struct perf_event *event)
+u64 x86_perf_event_update(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int shift = 64 - x86_pmu.cntval_bits;
@@ -373,30 +108,6 @@ again:
 	return new_raw_count;
 }
 
-static inline int x86_pmu_addr_offset(int index)
-{
-	int offset;
-
-	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
-	alternative_io(ASM_NOP2,
-		       "shll $1, %%eax",
-		       X86_FEATURE_PERFCTR_CORE,
-		       "=a" (offset),
-		       "a"  (index));
-
-	return offset;
-}
-
-static inline unsigned int x86_pmu_config_addr(int index)
-{
-	return x86_pmu.eventsel + x86_pmu_addr_offset(index);
-}
-
-static inline unsigned int x86_pmu_event_addr(int index)
-{
-	return x86_pmu.perfctr + x86_pmu_addr_offset(index);
-}
-
 /*
  * Find and validate any extra registers to set up.
  */
@@ -532,9 +243,6 @@ msr_fail:
 	return false;
 }
 
-static void reserve_ds_buffers(void);
-static void release_ds_buffers(void);
-
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
@@ -583,7 +291,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 	return x86_pmu_extra_regs(val, event);
 }
 
-static int x86_setup_perfctr(struct perf_event *event)
+int x86_setup_perfctr(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
 	struct hw_perf_event *hwc = &event->hw;
@@ -647,7 +355,7 @@ static int x86_setup_perfctr(struct perf_event *event)
 	return 0;
 }
 
-static int x86_pmu_hw_config(struct perf_event *event)
+int x86_pmu_hw_config(struct perf_event *event)
 {
 	if (event->attr.precise_ip) {
 		int precise = 0;
@@ -723,7 +431,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
 	return x86_pmu.hw_config(event);
 }
 
-static void x86_pmu_disable_all(void)
+void x86_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
@@ -758,15 +466,7 @@ static void x86_pmu_disable(struct pmu *pmu)
 	x86_pmu.disable_all();
 }
 
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
-					  u64 enable_mask)
-{
-	if (hwc->extra_reg.reg)
-		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
-	wrmsrl(hwc->config_base, hwc->config | enable_mask);
-}
-
-static void x86_pmu_enable_all(int added)
+void x86_pmu_enable_all(int added)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
@@ -788,7 +488,7 @@ static inline int is_x86_event(struct perf_event *event)
 	return event->pmu == &pmu;
 }
 
-static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -959,7 +659,6 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
 }
 
 static void x86_pmu_start(struct perf_event *event, int flags);
-static void x86_pmu_stop(struct perf_event *event, int flags);
 
 static void x86_pmu_enable(struct pmu *pmu)
 {
@@ -1031,21 +730,13 @@ static void x86_pmu_enable(struct pmu *pmu)
 	x86_pmu.enable_all(added);
 }
 
-static inline void x86_pmu_disable_event(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	wrmsrl(hwc->config_base, hwc->config);
-}
-
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
  * To be called with the event disabled in hw:
  */
-static int
-x86_perf_event_set_period(struct perf_event *event)
+int x86_perf_event_set_period(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	s64 left = local64_read(&hwc->period_left);
@@ -1105,7 +796,7 @@ x86_perf_event_set_period(struct perf_event *event)
 	return ret;
 }
 
-static void x86_pmu_enable_event(struct perf_event *event)
+void x86_pmu_enable_event(struct perf_event *event)
 {
 	if (__this_cpu_read(cpu_hw_events.enabled))
 		__x86_pmu_enable_event(&event->hw,
@@ -1244,7 +935,7 @@ void perf_event_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void x86_pmu_stop(struct perf_event *event, int flags)
+void x86_pmu_stop(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -1297,7 +988,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	perf_event_update_userpage(event);
 }
 
-static int x86_pmu_handle_irq(struct pt_regs *regs)
+int x86_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
 	struct cpu_hw_events *cpuc;
@@ -1367,109 +1058,28 @@ void perf_events_lapic_init(void)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
-struct pmu_nmi_state {
-	unsigned int	marked;
-	int		handled;
-};
-
-static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
-
 static int __kprobes
-perf_event_nmi_handler(struct notifier_block *self,
-			 unsigned long cmd, void *__args)
+perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
-	struct die_args *args = __args;
-	unsigned int this_nmi;
-	int handled;
-
 	if (!atomic_read(&active_events))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-		break;
-	case DIE_NMIUNKNOWN:
-		this_nmi = percpu_read(irq_stat.__nmi_count);
-		if (this_nmi != __this_cpu_read(pmu_nmi.marked))
-			/* let the kernel handle the unknown nmi */
-			return NOTIFY_DONE;
-		/*
-		 * This one is a PMU back-to-back nmi. Two events
-		 * trigger 'simultaneously' raising two back-to-back
-		 * NMIs. If the first NMI handles both, the latter
-		 * will be empty and daze the CPU. So, we drop it to
-		 * avoid false-positive 'unknown nmi' messages.
-		 */
-		return NOTIFY_STOP;
-	default:
-		return NOTIFY_DONE;
-	}
-
-	handled = x86_pmu.handle_irq(args->regs);
-	if (!handled)
-		return NOTIFY_DONE;
-
-	this_nmi = percpu_read(irq_stat.__nmi_count);
-	if ((handled > 1) ||
-		/* the next nmi could be a back-to-back nmi */
-	    ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
-	     (__this_cpu_read(pmu_nmi.handled) > 1))) {
-		/*
-		 * We could have two subsequent back-to-back nmis: The
-		 * first handles more than one counter, the 2nd
-		 * handles only one counter and the 3rd handles no
-		 * counter.
-		 *
-		 * This is the 2nd nmi because the previous was
-		 * handling more than one counter. We will mark the
-		 * next (3rd) and then drop it if unhandled.
-		 */
-		__this_cpu_write(pmu_nmi.marked, this_nmi + 1);
-		__this_cpu_write(pmu_nmi.handled, handled);
-	}
+		return NMI_DONE;
 
-	return NOTIFY_STOP;
+	return x86_pmu.handle_irq(regs);
 }
 
-static __read_mostly struct notifier_block perf_event_nmi_notifier = {
-	.notifier_call		= perf_event_nmi_handler,
-	.next			= NULL,
-	.priority		= NMI_LOCAL_LOW_PRIOR,
-};
-
-static struct event_constraint unconstrained;
-static struct event_constraint emptyconstraint;
-
-static struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
-	struct event_constraint *c;
-
-	if (x86_pmu.event_constraints) {
-		for_each_event_constraint(c, x86_pmu.event_constraints) {
-			if ((event->hw.config & c->cmask) == c->code)
-				return c;
-		}
-	}
-
-	return &unconstrained;
-}
-
-#include "perf_event_amd.c"
-#include "perf_event_p6.c"
-#include "perf_event_p4.c"
-#include "perf_event_intel_lbr.c"
-#include "perf_event_intel_ds.c"
-#include "perf_event_intel.c"
+struct event_constraint emptyconstraint;
+struct event_constraint unconstrained;
 
 static int __cpuinit
 x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 	int ret = NOTIFY_OK;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
+		cpuc->kfree_on_online = NULL;
 		if (x86_pmu.cpu_prepare)
 			ret = x86_pmu.cpu_prepare(cpu);
 		break;
@@ -1479,6 +1089,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 			x86_pmu.cpu_starting(cpu);
 		break;
 
+	case CPU_ONLINE:
+		kfree(cpuc->kfree_on_online);
+		break;
+
 	case CPU_DYING:
 		if (x86_pmu.cpu_dying)
 			x86_pmu.cpu_dying(cpu);
@@ -1557,7 +1171,7 @@ static int __init init_hw_perf_events(void)
 		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
 
 	perf_events_lapic_init();
-	register_die_notifier(&perf_event_nmi_notifier);
+	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
 
 	unconstrained = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
new file mode 100644
index 000000000000..b9698d40ac4b
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -0,0 +1,505 @@
+/*
+ * Performance events x86 architecture header
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+/*
+ *          |   NHM/WSM    |      SNB     |
+ * register -------------------------------
+ *          |  HT  | no HT |  HT  | no HT |
+ *-----------------------------------------
+ * offcore  | core | core  | cpu  | core  |
+ * lbr_sel  | core | core  | cpu  | core  |
+ * ld_lat   | cpu  | core  | cpu  | core  |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
+ */
+enum extra_reg_type {
+	EXTRA_REG_NONE  = -1,	/* not used */
+
+	EXTRA_REG_RSP_0 = 0,	/* offcore_response_0 */
+	EXTRA_REG_RSP_1 = 1,	/* offcore_response_1 */
+
+	EXTRA_REG_MAX		/* number of entries needed */
+};
+
+struct event_constraint {
+	union {
+		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+		u64		idxmsk64;
+	};
+	u64	code;
+	u64	cmask;
+	int	weight;
+};
+
+struct amd_nb {
+	int nb_id;  /* NorthBridge id */
+	int refcnt; /* reference count */
+	struct perf_event *owners[X86_PMC_IDX_MAX];
+	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS		4
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+/*
+ * Per register state.
+ */
+struct er_account {
+	raw_spinlock_t		lock;	/* per-core: protect structure */
+	u64                 config;	/* extra MSR config */
+	u64                 reg;	/* extra MSR number */
+	atomic_t            ref;	/* reference count */
+};
+
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+struct intel_shared_regs {
+	struct er_account       regs[EXTRA_REG_MAX];
+	int                     refcnt;		/* per-core: #HT threads */
+	unsigned                core_id;	/* per-core: core id */
+};
+
+#define MAX_LBR_ENTRIES		16
+
+struct cpu_hw_events {
+	/*
+	 * Generic x86 PMC bits
+	 */
+	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	int			enabled;
+
+	int			n_events;
+	int			n_added;
+	int			n_txn;
+	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+	u64			tags[X86_PMC_IDX_MAX];
+	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+
+	unsigned int		group_flag;
+
+	/*
+	 * Intel DebugStore bits
+	 */
+	struct debug_store	*ds;
+	u64			pebs_enabled;
+
+	/*
+	 * Intel LBR bits
+	 */
+	int				lbr_users;
+	void				*lbr_context;
+	struct perf_branch_stack	lbr_stack;
+	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
+
+	/*
+	 * Intel host/guest exclude bits
+	 */
+	u64				intel_ctrl_guest_mask;
+	u64				intel_ctrl_host_mask;
+	struct perf_guest_switch_msr	guest_switch_msrs[X86_PMC_IDX_MAX];
+
+	/*
+	 * manage shared (per-core, per-cpu) registers
+	 * used on Intel NHM/WSM/SNB
+	 */
+	struct intel_shared_regs	*shared_regs;
+
+	/*
+	 * AMD specific bits
+	 */
+	struct amd_nb		*amd_nb;
+
+	void				*kfree_on_online;
+};
+
+#define __EVENT_CONSTRAINT(c, n, m, w) {\
+	{ .idxmsk64 = (n) },		\
+	.code = (c),			\
+	.cmask = (m),			\
+	.weight = (w),			\
+}
+
+#define EVENT_CONSTRAINT(c, n, m)	\
+	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+
+/*
+ * Constraint on the Event code.
+ */
+#define INTEL_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
+ * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
+ */
+#define FIXED_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+
+/*
+ * Constraint on the Event code + UMask
+ */
+#define INTEL_UEVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+
+#define EVENT_CONSTRAINT_END		\
+	EVENT_CONSTRAINT(0, 0, 0)
+
+#define for_each_event_constraint(e, c)	\
+	for ((e) = (c); (e)->weight; (e)++)
+
+/*
+ * Extra registers for specific events.
+ *
+ * Some events need large masks and require external MSRs.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
+ */
+struct extra_reg {
+	unsigned int		event;
+	unsigned int		msr;
+	u64			config_mask;
+	u64			valid_mask;
+	int			idx;  /* per_xxx->regs[] reg index */
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\
+	.event = (e),		\
+	.msr = (ms),		\
+	.config_mask = (m),	\
+	.valid_mask = (vm),	\
+	.idx = EXTRA_REG_##i	\
+	}
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\
+	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
+
+union perf_capabilities {
+	struct {
+		u64	lbr_format:6;
+		u64	pebs_trap:1;
+		u64	pebs_arch_reg:1;
+		u64	pebs_format:4;
+		u64	smm_freeze:1;
+	};
+	u64	capabilities;
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+	/*
+	 * Generic x86 PMC bits
+	 */
+	const char	*name;
+	int		version;
+	int		(*handle_irq)(struct pt_regs *);
+	void		(*disable_all)(void);
+	void		(*enable_all)(int added);
+	void		(*enable)(struct perf_event *);
+	void		(*disable)(struct perf_event *);
+	int		(*hw_config)(struct perf_event *event);
+	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+	unsigned	eventsel;
+	unsigned	perfctr;
+	u64		(*event_map)(int);
+	int		max_events;
+	int		num_counters;
+	int		num_counters_fixed;
+	int		cntval_bits;
+	u64		cntval_mask;
+	int		apic;
+	u64		max_period;
+	struct event_constraint *
+			(*get_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event);
+
+	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
+						 struct perf_event *event);
+	struct event_constraint *event_constraints;
+	void		(*quirks)(void);
+	int		perfctr_second_write;
+
+	int		(*cpu_prepare)(int cpu);
+	void		(*cpu_starting)(int cpu);
+	void		(*cpu_dying)(int cpu);
+	void		(*cpu_dead)(int cpu);
+
+	/*
+	 * Intel Arch Perfmon v2+
+	 */
+	u64			intel_ctrl;
+	union perf_capabilities intel_cap;
+
+	/*
+	 * Intel DebugStore bits
+	 */
+	int		bts, pebs;
+	int		bts_active, pebs_active;
+	int		pebs_record_size;
+	void		(*drain_pebs)(struct pt_regs *regs);
+	struct event_constraint *pebs_constraints;
+
+	/*
+	 * Intel LBR
+	 */
+	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
+	int		lbr_nr;			   /* hardware stack size */
+
+	/*
+	 * Extra registers for events
+	 */
+	struct extra_reg *extra_regs;
+	unsigned int er_flags;
+
+	/*
+	 * Intel host/guest support (KVM)
+	 */
+	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+};
+
+#define ERF_NO_HT_SHARING	1
+#define ERF_HAS_RSP_1		2
+
+extern struct x86_pmu x86_pmu __read_mostly;
+
+DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+int x86_perf_event_set_period(struct perf_event *event);
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+extern u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+extern u64 __read_mostly hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+u64 x86_perf_event_update(struct perf_event *event);
+
+static inline int x86_pmu_addr_offset(int index)
+{
+	int offset;
+
+	/* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
+	alternative_io(ASM_NOP2,
+		       "shll $1, %%eax",
+		       X86_FEATURE_PERFCTR_CORE,
+		       "=a" (offset),
+		       "a"  (index));
+
+	return offset;
+}
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+	return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+	return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+}
+
+int x86_setup_perfctr(struct perf_event *event);
+
+int x86_pmu_hw_config(struct perf_event *event);
+
+void x86_pmu_disable_all(void);
+
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+					  u64 enable_mask)
+{
+	if (hwc->extra_reg.reg)
+		wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+	wrmsrl(hwc->config_base, hwc->config | enable_mask);
+}
+
+void x86_pmu_enable_all(int added);
+
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
+
+void x86_pmu_stop(struct perf_event *event, int flags);
+
+static inline void x86_pmu_disable_event(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config);
+}
+
+void x86_pmu_enable_event(struct perf_event *event);
+
+int x86_pmu_handle_irq(struct pt_regs *regs);
+
+extern struct event_constraint emptyconstraint;
+
+extern struct event_constraint unconstrained;
+
+#ifdef CONFIG_CPU_SUP_AMD
+
+int amd_pmu_init(void);
+
+#else /* CONFIG_CPU_SUP_AMD */
+
+static inline int amd_pmu_init(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_AMD */
+
+#ifdef CONFIG_CPU_SUP_INTEL
+
+int intel_pmu_save_and_restart(struct perf_event *event);
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event);
+
+struct intel_shared_regs *allocate_shared_regs(int cpu);
+
+int intel_pmu_init(void);
+
+void init_debug_store_on_cpu(int cpu);
+
+void fini_debug_store_on_cpu(int cpu);
+
+void release_ds_buffers(void);
+
+void reserve_ds_buffers(void);
+
+extern struct event_constraint bts_constraint;
+
+void intel_pmu_enable_bts(u64 config);
+
+void intel_pmu_disable_bts(void);
+
+int intel_pmu_drain_bts_buffer(void);
+
+extern struct event_constraint intel_core2_pebs_event_constraints[];
+
+extern struct event_constraint intel_atom_pebs_event_constraints[];
+
+extern struct event_constraint intel_nehalem_pebs_event_constraints[];
+
+extern struct event_constraint intel_westmere_pebs_event_constraints[];
+
+extern struct event_constraint intel_snb_pebs_event_constraints[];
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event);
+
+void intel_pmu_pebs_enable(struct perf_event *event);
+
+void intel_pmu_pebs_disable(struct perf_event *event);
+
+void intel_pmu_pebs_enable_all(void);
+
+void intel_pmu_pebs_disable_all(void);
+
+void intel_ds_init(void);
+
+void intel_pmu_lbr_reset(void);
+
+void intel_pmu_lbr_enable(struct perf_event *event);
+
+void intel_pmu_lbr_disable(struct perf_event *event);
+
+void intel_pmu_lbr_enable_all(void);
+
+void intel_pmu_lbr_disable_all(void);
+
+void intel_pmu_lbr_read(void);
+
+void intel_pmu_lbr_init_core(void);
+
+void intel_pmu_lbr_init_nhm(void);
+
+void intel_pmu_lbr_init_atom(void);
+
+int p4_pmu_init(void);
+
+int p6_pmu_init(void);
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static inline void reserve_ds_buffers(void)
+{
+}
+
+static inline void release_ds_buffers(void)
+{
+}
+
+static inline int intel_pmu_init(void)
+{
+	return 0;
+}
+
+static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+	return NULL;
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 941caa2e449b..aeefd45697a2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,10 @@
-#ifdef CONFIG_CPU_SUP_AMD
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/apicdef.h>
+
+#include "perf_event.h"
 
 static __initconst const u64 amd_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
@@ -132,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event)
 	if (ret)
 		return ret;
 
+	if (event->attr.exclude_host && event->attr.exclude_guest)
+		/*
+		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
+		 * and will count in both modes. We don't want to count in that
+		 * case so we emulate no-counting by setting US = OS = 0.
+		 */
+		event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+				      ARCH_PERFMON_EVENTSEL_OS);
+	else if (event->attr.exclude_host)
+		event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+	else if (event->attr.exclude_guest)
+		event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+
 	if (event->attr.type != PERF_TYPE_RAW)
 		return 0;
 
@@ -350,7 +369,7 @@ static void amd_pmu_cpu_starting(int cpu)
 			continue;
 
 		if (nb->nb_id == nb_id) {
-			kfree(cpuc->amd_nb);
+			cpuc->kfree_on_online = cpuc->amd_nb;
 			cpuc->amd_nb = nb;
 			break;
 		}
@@ -392,7 +411,7 @@ static __initconst const struct x86_pmu amd_pmu = {
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 4,
+	.num_counters		= AMD64_NUM_COUNTERS,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
@@ -556,7 +575,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
 	.perfctr		= MSR_F15H_PERF_CTR,
 	.event_map		= amd_pmu_event_map,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 6,
+	.num_counters		= AMD64_NUM_COUNTERS_F15H,
 	.cntval_bits		= 48,
 	.cntval_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
@@ -573,7 +592,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
 #endif
 };
 
-static __init int amd_pmu_init(void)
+__init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
 	if (boot_cpu_data.x86 < 6)
@@ -602,12 +621,3 @@ static __init int amd_pmu_init(void)
 
 	return 0;
 }
-
-#else /* CONFIG_CPU_SUP_AMD */
-
-static int amd_pmu_init(void)
-{
-	return 0;
-}
-
-#endif
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
new file mode 100644
index 000000000000..ab6343d21825
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -0,0 +1,294 @@
+/*
+ * Performance events - AMD IBS
+ *
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include <asm/apic.h>
+
+static u32 ibs_caps;
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+
+static struct pmu perf_ibs;
+
+static int perf_ibs_init(struct perf_event *event)
+{
+	if (perf_ibs.type != event->attr.type)
+		return -ENOENT;
+	return 0;
+}
+
+static int perf_ibs_add(struct perf_event *event, int flags)
+{
+	return 0;
+}
+
+static void perf_ibs_del(struct perf_event *event, int flags)
+{
+}
+
+static struct pmu perf_ibs = {
+	.event_init= perf_ibs_init,
+	.add= perf_ibs_add,
+	.del= perf_ibs_del,
+};
+
+static __init int perf_event_ibs_init(void)
+{
+	if (!ibs_caps)
+		return -ENODEV;	/* ibs not supported by the cpu */
+
+	perf_pmu_register(&perf_ibs, "ibs", -1);
+	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+
+	return 0;
+}
+
+#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+static __init int perf_event_ibs_init(void) { return 0; }
+
+#endif
+
+/* IBS - apic initialization, for perf and oprofile */
+
+static __init u32 __get_ibs_caps(void)
+{
+	u32 caps;
+	unsigned int max_level;
+
+	if (!boot_cpu_has(X86_FEATURE_IBS))
+		return 0;
+
+	/* check IBS cpuid feature flags */
+	max_level = cpuid_eax(0x80000000);
+	if (max_level < IBS_CPUID_FEATURES)
+		return IBS_CAPS_DEFAULT;
+
+	caps = cpuid_eax(IBS_CPUID_FEATURES);
+	if (!(caps & IBS_CAPS_AVAIL))
+		/* cpuid flags not valid */
+		return IBS_CAPS_DEFAULT;
+
+	return caps;
+}
+
+u32 get_ibs_caps(void)
+{
+	return ibs_caps;
+}
+
+EXPORT_SYMBOL(get_ibs_caps);
+
+static inline int get_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+	return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+/*
+ * Check and reserve APIC extended interrupt LVT offset for IBS if available.
+ */
+static inline int ibs_eilvt_valid(void)
+{
+	int offset;
+	u64 val;
+	int valid = 0;
+
+	preempt_disable();
+
+	rdmsrl(MSR_AMD64_IBSCTL, val);
+	offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+		goto out;
+	}
+
+	if (!get_eilvt(offset)) {
+		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+		goto out;
+	}
+
+	valid = 1;
+out:
+	preempt_enable();
+
+	return valid;
+}
+
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+	struct pci_dev *cpu_cfg;
+	int nodes;
+	u32 value = 0;
+
+	nodes = 0;
+	cpu_cfg = NULL;
+	do {
+		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
+					 cpu_cfg);
+		if (!cpu_cfg)
+			break;
+		++nodes;
+		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+				       | IBSCTL_LVT_OFFSET_VALID);
+		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+			pci_dev_put(cpu_cfg);
+			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
+			       "IBSCTL = 0x%08x\n", value);
+			return -EINVAL;
+		}
+	} while (1);
+
+	if (!nodes) {
+		printk(KERN_DEBUG "No CPU node configured for IBS\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
+ * is using the new offset.
+ */
+static int force_ibs_eilvt_setup(void)
+{
+	int offset;
+	int ret;
+
+	preempt_disable();
+	/* find the next free available EILVT entry, skip offset 0 */
+	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+		if (get_eilvt(offset))
+			break;
+	}
+	preempt_enable();
+
+	if (offset == APIC_EILVT_NR_MAX) {
+		printk(KERN_DEBUG "No EILVT entry available\n");
+		return -EBUSY;
+	}
+
+	ret = setup_ibs_ctl(offset);
+	if (ret)
+		goto out;
+
+	if (!ibs_eilvt_valid()) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
+	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
+
+	return 0;
+out:
+	preempt_disable();
+	put_eilvt(offset);
+	preempt_enable();
+	return ret;
+}
+
+static inline int get_ibs_lvt_offset(void)
+{
+	u64 val;
+
+	rdmsrl(MSR_AMD64_IBSCTL, val);
+	if (!(val & IBSCTL_LVT_OFFSET_VALID))
+		return -EINVAL;
+
+	return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void *dummy)
+{
+	int offset;
+
+	offset = get_ibs_lvt_offset();
+	if (offset < 0)
+		goto failed;
+
+	if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+		return;
+failed:
+	pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
+		smp_processor_id());
+}
+
+static void clear_APIC_ibs(void *dummy)
+{
+	int offset;
+
+	offset = get_ibs_lvt_offset();
+	if (offset >= 0)
+		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
+static int __cpuinit
+perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		setup_APIC_ibs(NULL);
+		break;
+	case CPU_DYING:
+		clear_APIC_ibs(NULL);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static __init int amd_ibs_init(void)
+{
+	u32 caps;
+	int ret;
+
+	caps = __get_ibs_caps();
+	if (!caps)
+		return -ENODEV;	/* ibs not supported by the cpu */
+
+	if (!ibs_eilvt_valid()) {
+		ret = force_ibs_eilvt_setup();
+		if (ret) {
+			pr_err("Failed to setup IBS, %d\n", ret);
+			return ret;
+		}
+	}
+
+	get_online_cpus();
+	ibs_caps = caps;
+	/* make ibs_caps visible to other cpus: */
+	smp_mb();
+	perf_cpu_notifier(perf_ibs_cpu_notifier);
+	smp_call_function(setup_APIC_ibs, NULL, 1);
+	put_online_cpus();
+
+	return perf_event_ibs_init();
+}
+
+/* Since we need the pci subsystem to init ibs we can't do this earlier: */
+device_initcall(amd_ibs_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f88af2c2a561..e09ca20e86ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,16 +1,19 @@
-#ifdef CONFIG_CPU_SUP_INTEL
-
 /*
  * Per core/cpu state
  *
  * Used to coordinate shared registers between HT threads or
  * among events on a single PMU.
  */
-struct intel_shared_regs {
-	struct er_account       regs[EXTRA_REG_MAX];
-	int                     refcnt;		/* per-core: #HT threads */
-	unsigned                core_id;	/* per-core: core id */
-};
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "perf_event.h"
 
 /*
  * Intel PerfMon, used on Core and later.
@@ -746,7 +749,8 @@ static void intel_pmu_enable_all(int added)
 
 	intel_pmu_pebs_enable_all();
 	intel_pmu_lbr_enable_all();
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
 		struct perf_event *event =
@@ -869,6 +873,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 static void intel_pmu_disable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
 		intel_pmu_disable_bts();
@@ -876,6 +881,9 @@ static void intel_pmu_disable_event(struct perf_event *event)
 		return;
 	}
 
+	cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
+	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_disable_fixed(hwc);
 		return;
@@ -921,6 +929,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 static void intel_pmu_enable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
 		if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -930,6 +939,11 @@ static void intel_pmu_enable_event(struct perf_event *event)
 		return;
 	}
 
+	if (event->attr.exclude_host)
+		cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
+	if (event->attr.exclude_guest)
+		cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_enable_fixed(hwc);
 		return;
@@ -945,7 +959,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
  * Save and restart an expired event. Called by NMI contexts,
  * so it has to be careful about preempting normal event ops:
  */
-static int intel_pmu_save_and_restart(struct perf_event *event)
+int intel_pmu_save_and_restart(struct perf_event *event)
 {
 	x86_perf_event_update(event);
 	return x86_perf_event_set_period(event);
@@ -1197,6 +1211,21 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
 	return c;
 }
 
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	if (x86_pmu.event_constraints) {
+		for_each_event_constraint(c, x86_pmu.event_constraints) {
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
+		}
+	}
+
+	return &unconstrained;
+}
+
 static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
@@ -1284,12 +1313,84 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	return 0;
 }
 
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+	if (x86_pmu.guest_get_msrs)
+		return x86_pmu.guest_get_msrs(nr);
+	*nr = 0;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+
+	arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+	arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
+	arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+
+	*nr = 1;
+	return arr;
+}
+
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+	int idx;
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
+		struct perf_event *event = cpuc->events[idx];
+
+		arr[idx].msr = x86_pmu_config_addr(idx);
+		arr[idx].host = arr[idx].guest = 0;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		arr[idx].host = arr[idx].guest =
+			event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
+
+		if (event->attr.exclude_host)
+			arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+		else if (event->attr.exclude_guest)
+			arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+	}
+
+	*nr = x86_pmu.num_counters;
+	return arr;
+}
+
+static void core_pmu_enable_event(struct perf_event *event)
+{
+	if (!event->attr.exclude_host)
+		x86_pmu_enable_event(event);
+}
+
+static void core_pmu_enable_all(int added)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+		if (!test_bit(idx, cpuc->active_mask) ||
+				cpuc->events[idx]->attr.exclude_host)
+			continue;
+
+		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+	}
+}
+
 static __initconst const struct x86_pmu core_pmu = {
 	.name			= "core",
 	.handle_irq		= x86_pmu_handle_irq,
 	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= x86_pmu_enable_all,
-	.enable			= x86_pmu_enable_event,
+	.enable_all		= core_pmu_enable_all,
+	.enable			= core_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
 	.hw_config		= x86_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
@@ -1307,9 +1408,10 @@ static __initconst const struct x86_pmu core_pmu = {
 	.get_event_constraints	= intel_get_event_constraints,
 	.put_event_constraints	= intel_put_event_constraints,
 	.event_constraints	= intel_core_event_constraints,
+	.guest_get_msrs		= core_guest_get_msrs,
 };
 
-static struct intel_shared_regs *allocate_shared_regs(int cpu)
+struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
 	struct intel_shared_regs *regs;
 	int i;
@@ -1362,7 +1464,7 @@ static void intel_pmu_cpu_starting(int cpu)
 
 		pc = per_cpu(cpu_hw_events, i).shared_regs;
 		if (pc && pc->core_id == core_id) {
-			kfree(cpuc->shared_regs);
+			cpuc->kfree_on_online = cpuc->shared_regs;
 			cpuc->shared_regs = pc;
 			break;
 		}
@@ -1413,6 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.cpu_prepare		= intel_pmu_cpu_prepare,
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
+	.guest_get_msrs		= intel_guest_get_msrs,
 };
 
 static void intel_clovertown_quirks(void)
@@ -1441,7 +1544,7 @@ static void intel_clovertown_quirks(void)
 	x86_pmu.pebs_constraints = NULL;
 }
 
-static __init int intel_pmu_init(void)
+__init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
 	union cpuid10_eax eax;
@@ -1597,7 +1700,7 @@ static __init int intel_pmu_init(void)
 		intel_pmu_lbr_init_nhm();
 
 		x86_pmu.event_constraints = intel_snb_event_constraints;
-		x86_pmu.pebs_constraints = intel_snb_pebs_events;
+		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snb_extra_regs;
 		/* all extra regs are per-cpu when HT is on */
 		x86_pmu.er_flags |= ERF_HAS_RSP_1;
@@ -1628,16 +1731,3 @@ static __init int intel_pmu_init(void)
 	}
 	return 0;
 }
-
-#else /* CONFIG_CPU_SUP_INTEL */
-
-static int intel_pmu_init(void)
-{
-	return 0;
-}
-
-static struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-	return NULL;
-}
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 1b1ef3addcfd..c0d238f49db8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1,7 +1,10 @@
-#ifdef CONFIG_CPU_SUP_INTEL
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
 
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS		4
+#include <asm/perf_event.h>
+
+#include "perf_event.h"
 
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
@@ -37,24 +40,7 @@ struct pebs_record_nhm {
 	u64 status, dla, dse, lat;
 };
 
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-	u64	bts_buffer_base;
-	u64	bts_index;
-	u64	bts_absolute_maximum;
-	u64	bts_interrupt_threshold;
-	u64	pebs_buffer_base;
-	u64	pebs_index;
-	u64	pebs_absolute_maximum;
-	u64	pebs_interrupt_threshold;
-	u64	pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
-static void init_debug_store_on_cpu(int cpu)
+void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 
@@ -66,7 +52,7 @@ static void init_debug_store_on_cpu(int cpu)
 		     (u32)((u64)(unsigned long)ds >> 32));
 }
 
-static void fini_debug_store_on_cpu(int cpu)
+void fini_debug_store_on_cpu(int cpu)
 {
 	if (!per_cpu(cpu_hw_events, cpu).ds)
 		return;
@@ -175,7 +161,7 @@ static void release_ds_buffer(int cpu)
 	kfree(ds);
 }
 
-static void release_ds_buffers(void)
+void release_ds_buffers(void)
 {
 	int cpu;
 
@@ -194,7 +180,7 @@ static void release_ds_buffers(void)
 	put_online_cpus();
 }
 
-static void reserve_ds_buffers(void)
+void reserve_ds_buffers(void)
 {
 	int bts_err = 0, pebs_err = 0;
 	int cpu;
@@ -260,10 +246,10 @@ static void reserve_ds_buffers(void)
  * BTS
  */
 
-static struct event_constraint bts_constraint =
+struct event_constraint bts_constraint =
 	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
 
-static void intel_pmu_enable_bts(u64 config)
+void intel_pmu_enable_bts(u64 config)
 {
 	unsigned long debugctlmsr;
 
@@ -282,7 +268,7 @@ static void intel_pmu_enable_bts(u64 config)
 	update_debugctlmsr(debugctlmsr);
 }
 
-static void intel_pmu_disable_bts(void)
+void intel_pmu_disable_bts(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	unsigned long debugctlmsr;
@@ -299,7 +285,7 @@ static void intel_pmu_disable_bts(void)
 	update_debugctlmsr(debugctlmsr);
 }
 
-static int intel_pmu_drain_bts_buffer(void)
+int intel_pmu_drain_bts_buffer(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
@@ -361,7 +347,7 @@ static int intel_pmu_drain_bts_buffer(void)
 /*
  * PEBS
  */
-static struct event_constraint intel_core2_pebs_event_constraints[] = {
+struct event_constraint intel_core2_pebs_event_constraints[] = {
 	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 	INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
 	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
@@ -370,14 +356,14 @@ static struct event_constraint intel_core2_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_atom_pebs_event_constraints[] = {
+struct event_constraint intel_atom_pebs_event_constraints[] = {
 	INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
 	INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
 	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
+struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),    /* MEM_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 	INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
@@ -392,7 +378,7 @@ static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_westmere_pebs_event_constraints[] = {
+struct event_constraint intel_westmere_pebs_event_constraints[] = {
 	INTEL_EVENT_CONSTRAINT(0x0b, 0xf),    /* MEM_INST_RETIRED.* */
 	INTEL_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
 	INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
@@ -407,7 +393,7 @@ static struct event_constraint intel_westmere_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint intel_snb_pebs_events[] = {
+struct event_constraint intel_snb_pebs_event_constraints[] = {
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
 	INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
 	INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
@@ -428,8 +414,7 @@ static struct event_constraint intel_snb_pebs_events[] = {
 	EVENT_CONSTRAINT_END
 };
 
-static struct event_constraint *
-intel_pebs_constraints(struct perf_event *event)
+struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
 
@@ -446,7 +431,7 @@ intel_pebs_constraints(struct perf_event *event)
 	return &emptyconstraint;
 }
 
-static void intel_pmu_pebs_enable(struct perf_event *event)
+void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -460,7 +445,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event)
 		intel_pmu_lbr_enable(event);
 }
 
-static void intel_pmu_pebs_disable(struct perf_event *event)
+void intel_pmu_pebs_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
@@ -475,7 +460,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event)
 		intel_pmu_lbr_disable(event);
 }
 
-static void intel_pmu_pebs_enable_all(void)
+void intel_pmu_pebs_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -483,7 +468,7 @@ static void intel_pmu_pebs_enable_all(void)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 }
 
-static void intel_pmu_pebs_disable_all(void)
+void intel_pmu_pebs_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -576,8 +561,6 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 	return 0;
 }
 
-static int intel_pmu_save_and_restart(struct perf_event *event);
-
 static void __intel_pmu_pebs_event(struct perf_event *event,
 				   struct pt_regs *iregs, void *__pebs)
 {
@@ -716,7 +699,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
  * BTS, PEBS probe and setup
  */
 
-static void intel_ds_init(void)
+void intel_ds_init(void)
 {
 	/*
 	 * No support for 32bit formats
@@ -749,15 +732,3 @@ static void intel_ds_init(void)
 		}
 	}
 }
-
-#else /* CONFIG_CPU_SUP_INTEL */
-
-static void reserve_ds_buffers(void)
-{
-}
-
-static void release_ds_buffers(void)
-{
-}
-
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d202c1bece1a..3fab3de3ce96 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -1,4 +1,10 @@
-#ifdef CONFIG_CPU_SUP_INTEL
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#include "perf_event.h"
 
 enum {
 	LBR_FORMAT_32		= 0x00,
@@ -48,7 +54,7 @@ static void intel_pmu_lbr_reset_64(void)
 	}
 }
 
-static void intel_pmu_lbr_reset(void)
+void intel_pmu_lbr_reset(void)
 {
 	if (!x86_pmu.lbr_nr)
 		return;
@@ -59,7 +65,7 @@ static void intel_pmu_lbr_reset(void)
 		intel_pmu_lbr_reset_64();
 }
 
-static void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -81,7 +87,7 @@ static void intel_pmu_lbr_enable(struct perf_event *event)
 	cpuc->lbr_users++;
 }
 
-static void intel_pmu_lbr_disable(struct perf_event *event)
+void intel_pmu_lbr_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -95,7 +101,7 @@ static void intel_pmu_lbr_disable(struct perf_event *event)
 		__intel_pmu_lbr_disable();
 }
 
-static void intel_pmu_lbr_enable_all(void)
+void intel_pmu_lbr_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -103,7 +109,7 @@ static void intel_pmu_lbr_enable_all(void)
 		__intel_pmu_lbr_enable();
 }
 
-static void intel_pmu_lbr_disable_all(void)
+void intel_pmu_lbr_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -178,7 +184,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 	cpuc->lbr_stack.nr = i;
 }
 
-static void intel_pmu_lbr_read(void)
+void intel_pmu_lbr_read(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -191,7 +197,7 @@ static void intel_pmu_lbr_read(void)
 		intel_pmu_lbr_read_64(cpuc);
 }
 
-static void intel_pmu_lbr_init_core(void)
+void intel_pmu_lbr_init_core(void)
 {
 	x86_pmu.lbr_nr     = 4;
 	x86_pmu.lbr_tos    = 0x01c9;
@@ -199,7 +205,7 @@ static void intel_pmu_lbr_init_core(void)
 	x86_pmu.lbr_to     = 0x60;
 }
 
-static void intel_pmu_lbr_init_nhm(void)
+void intel_pmu_lbr_init_nhm(void)
 {
 	x86_pmu.lbr_nr     = 16;
 	x86_pmu.lbr_tos    = 0x01c9;
@@ -207,12 +213,10 @@ static void intel_pmu_lbr_init_nhm(void)
 	x86_pmu.lbr_to     = 0x6c0;
 }
 
-static void intel_pmu_lbr_init_atom(void)
+void intel_pmu_lbr_init_atom(void)
 {
 	x86_pmu.lbr_nr	   = 8;
 	x86_pmu.lbr_tos    = 0x01c9;
 	x86_pmu.lbr_from   = 0x40;
 	x86_pmu.lbr_to     = 0x60;
 }
-
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 7809d2bcb209..492bf1358a7c 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -7,9 +7,13 @@
  *  For licencing details see kernel-base/COPYING
  */
 
-#ifdef CONFIG_CPU_SUP_INTEL
+#include <linux/perf_event.h>
 
 #include <asm/perf_event_p4.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "perf_event.h"
 
 #define P4_CNTR_LIMIT 3
 /*
@@ -1303,7 +1307,7 @@ static __initconst const struct x86_pmu p4_pmu = {
 	.perfctr_second_write	= 1,
 };
 
-static __init int p4_pmu_init(void)
+__init int p4_pmu_init(void)
 {
 	unsigned int low, high;
 
@@ -1326,5 +1330,3 @@ static __init int p4_pmu_init(void)
 
 	return 0;
 }
-
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 20c097e33860..c7181befecde 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -1,4 +1,7 @@
-#ifdef CONFIG_CPU_SUP_INTEL
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "perf_event.h"
 
 /*
  * Not sure about some of these
@@ -114,7 +117,7 @@ static __initconst const struct x86_pmu p6_pmu = {
 	.event_constraints	= p6_event_constraints,
 };
 
-static __init int p6_pmu_init(void)
+__init int p6_pmu_init(void)
 {
 	switch (boot_cpu_data.x86_model) {
 	case 1:
@@ -138,5 +141,3 @@ static __init int p6_pmu_init(void)
 
 	return 0;
 }
-
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 62ac8cb6ba27..14b23140e81f 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -85,6 +85,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
 	else
 		seq_printf(m, "stepping\t: unknown\n");
+	if (c->microcode)
+		seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
 
 	if (cpu_has(c, X86_FEATURE_TSC)) {
 		unsigned int freq = cpufreq_quick_get(cpu);
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
new file mode 100644
index 000000000000..feca286c2bb4
--- /dev/null
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -0,0 +1,73 @@
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ *          H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/archrandom.h>
+#include <asm/sections.h>
+
+static int __init x86_rdrand_setup(char *s)
+{
+	setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+	return 1;
+}
+__setup("nordrand", x86_rdrand_setup);
+
+/* We can't use arch_get_random_long() here since alternatives haven't run */
+static inline int rdrand_long(unsigned long *v)
+{
+	int ok;
+	asm volatile("1: " RDRAND_LONG "\n\t"
+		     "jc 2f\n\t"
+		     "decl %0\n\t"
+		     "jnz 1b\n\t"
+		     "2:"
+		     : "=r" (ok), "=a" (*v)
+		     : "0" (RDRAND_RETRY_LOOPS));
+	return ok;
+}
+
+/*
+ * Force a reseed cycle; we are architecturally guaranteed a reseed
+ * after no more than 512 128-bit chunks of random data.  This also
+ * acts as a test of the CPU capability.
+ */
+#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
+
+void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_RANDOM
+	unsigned long tmp;
+	int i, count, ok;
+
+	if (!cpu_has(c, X86_FEATURE_RDRAND))
+		return;		/* Nothing to do */
+
+	for (count = i = 0; i < RESEED_LOOP; i++) {
+		ok = rdrand_long(&tmp);
+		if (ok)
+			count++;
+	}
+
+	if (count != RESEED_LOOP)
+		clear_cpu_cap(c, X86_FEATURE_RDRAND);
+#endif
+}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 764c7c2b1811..13ad89971d47 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -32,15 +32,12 @@ int in_crash_kexec;
 
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
-static void kdump_nmi_callback(int cpu, struct die_args *args)
+static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 {
-	struct pt_regs *regs;
 #ifdef CONFIG_X86_32
 	struct pt_regs fixed_regs;
 #endif
 
-	regs = args->regs;
-
 #ifdef CONFIG_X86_32
 	if (!user_mode_vm(regs)) {
 		crash_fixup_ss_esp(&fixed_regs, regs);
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6419bb05ecd5..faf8d5e74b0b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -331,10 +331,15 @@ ENDPROC(native_usergs_sysret64)
 1:	incl PER_CPU_VAR(irq_count)
 	jne 2f
 	mov PER_CPU_VAR(irq_stack_ptr),%rsp
-	EMPTY_FRAME 0
+	CFI_DEF_CFA_REGISTER	rsi
 
 2:	/* Store previous stack value */
 	pushq %rsi
+	CFI_ESCAPE	0x0f /* DW_CFA_def_cfa_expression */, 6, \
+			0x77 /* DW_OP_breg7 */, 0, \
+			0x06 /* DW_OP_deref */, \
+			0x08 /* DW_OP_const1u */, SS+8-RBP, \
+			0x22 /* DW_OP_plus */
 	/* We entered an interrupt context - irqs are off: */
 	TRACE_IRQS_OFF
 	.endm
@@ -788,7 +793,6 @@ END(interrupt)
 	subq $ORIG_RAX-RBP, %rsp
 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
 	SAVE_ARGS_IRQ
-	PARTIAL_FRAME 0
 	call \func
 	.endm
 
@@ -813,10 +817,10 @@ ret_from_intr:
 
 	/* Restore saved previous stack */
 	popq %rsi
-	leaq 16(%rsi), %rsp
-
+	CFI_DEF_CFA_REGISTER	rsi
+	leaq ARGOFFSET-RBP(%rsi), %rsp
 	CFI_DEF_CFA_REGISTER	rsp
-	CFI_ADJUST_CFA_OFFSET	-16
+	CFI_ADJUST_CFA_OFFSET	RBP-ARGOFFSET
 
 exit_intr:
 	GET_THREAD_INFO(%rcx)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 3fee346ef545..cacdd46d184d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -42,7 +42,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	put_online_cpus();
 }
 
-void arch_jump_label_text_poke_early(jump_label_t addr)
+void __init_or_module arch_jump_label_text_poke_early(jump_label_t addr)
 {
 	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
 			JUMP_LABEL_NOP_SIZE);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 00354d4919a9..faba5771acad 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
 
 static int was_in_debug_nmi[NR_CPUS];
 
-static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 {
-	struct pt_regs *regs = args->regs;
-
 	switch (cmd) {
-	case DIE_NMI:
+	case NMI_LOCAL:
 		if (atomic_read(&kgdb_active) != -1) {
 			/* KGDB CPU roundup */
 			kgdb_nmicallback(raw_smp_processor_id(), regs);
 			was_in_debug_nmi[raw_smp_processor_id()] = 1;
 			touch_nmi_watchdog();
-			return NOTIFY_STOP;
+			return NMI_HANDLED;
 		}
-		return NOTIFY_DONE;
+		break;
 
-	case DIE_NMIUNKNOWN:
+	case NMI_UNKNOWN:
 		if (was_in_debug_nmi[raw_smp_processor_id()]) {
 			was_in_debug_nmi[raw_smp_processor_id()] = 0;
-			return NOTIFY_STOP;
+			return NMI_HANDLED;
 		}
-		return NOTIFY_DONE;
+		break;
+	default:
+		/* do nothing */
+		break;
+	}
+	return NMI_DONE;
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
 
+	switch (cmd) {
 	case DIE_DEBUG:
 		if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
 			if (user_mode(regs))
@@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
 
 static struct notifier_block kgdb_notifier = {
 	.notifier_call	= kgdb_notify,
-
-	/*
-	 * Lowest-prio notifier priority, we want to be notified last:
-	 */
-	.priority	= NMI_LOCAL_LOW_PRIOR,
 };
 
 /**
@@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = {
  */
 int kgdb_arch_init(void)
 {
-	return register_die_notifier(&kgdb_notifier);
+	int retval;
+
+	retval = register_die_notifier(&kgdb_notifier);
+	if (retval)
+		goto out;
+
+	retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler,
+					0, "kgdb");
+	if (retval)
+		goto out1;
+
+	retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler,
+					0, "kgdb");
+
+	if (retval)
+		goto out2;
+
+	return retval;
+
+out2:
+	unregister_nmi_handler(NMI_LOCAL, "kgdb");
+out1:
+	unregister_die_notifier(&kgdb_notifier);
+out:
+	return retval;
 }
 
 static void kgdb_hw_overflow_handler(struct perf_event *event,
@@ -673,6 +701,8 @@ void kgdb_arch_exit(void)
 			breakinfo[i].pev = NULL;
 		}
 	}
+	unregister_nmi_handler(NMI_UNKNOWN, "kgdb");
+	unregister_nmi_handler(NMI_LOCAL, "kgdb");
 	unregister_die_notifier(&kgdb_notifier);
 }
 
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f1a6244d7d93..7da647d8b64c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -75,8 +75,11 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 	/*
 	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
 	 * Groups, and some special opcodes can not boost.
+	 * This is non-const and volatile to keep gcc from statically
+	 * optimizing it out, as variable_test_bit makes gcc think only
+	 * *(unsigned long*) is used. 
 	 */
-static const u32 twobyte_is_boostable[256 / 32] = {
+static volatile u32 twobyte_is_boostable[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 	/*      ----------------------------------------------          */
 	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 591be0ee1934..d494799aafcd 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -74,14 +74,13 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	u32 dummy;
 
 	if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
 		pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
 		return -1;
 	}
 
-	rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
+	csig->rev = c->microcode;
 	pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
 
 	return 0;
@@ -130,6 +129,7 @@ static int apply_microcode_amd(int cpu)
 	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	struct microcode_amd *mc_amd = uci->mc;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	/* We should bind the task to the CPU */
 	BUG_ON(cpu_num != cpu);
@@ -150,6 +150,7 @@ static int apply_microcode_amd(int cpu)
 
 	pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
 	uci->cpu_sig.rev = rev;
+	c->microcode = rev;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index f9242800bc84..f2d2a664e797 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -483,7 +483,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 		pr_debug("CPU%d removed\n", cpu);
 		break;
-	case CPU_DEAD:
+
+	/*
+	 * When a CPU goes offline, don't free up or invalidate the copy of
+	 * the microcode in kernel memory, so that we can reuse it when the
+	 * CPU comes back online without unnecessarily requesting the userspace
+	 * for it again.
+	 */
 	case CPU_UP_CANCELED_FROZEN:
 		/* The CPU refused to come up during a system resume */
 		microcode_fini_cpu(cpu);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 1a1b606d3e92..3ca42d0e43a2 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -161,12 +161,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 		csig->pf = 1 << ((val[1] >> 18) & 7);
 	}
 
-	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
-	/* see notes above for revision 1.07.  Apparent chip bug */
-	sync_core();
-	/* get the current revision from MSR 0x8B */
-	rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
-
+	csig->rev = c->microcode;
 	pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
 		cpu_num, csig->sig, csig->pf, csig->rev);
 
@@ -299,9 +294,9 @@ static int apply_microcode(int cpu)
 	struct microcode_intel *mc_intel;
 	struct ucode_cpu_info *uci;
 	unsigned int val[2];
-	int cpu_num;
+	int cpu_num = raw_smp_processor_id();
+	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
 
-	cpu_num = raw_smp_processor_id();
 	uci = ucode_cpu_info + cpu;
 	mc_intel = uci->mc;
 
@@ -317,7 +312,7 @@ static int apply_microcode(int cpu)
 	      (unsigned long) mc_intel->bits >> 16 >> 16);
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 
-	/* see notes above for revision 1.07.  Apparent chip bug */
+	/* As documented in the SDM: Do a CPUID 1 here */
 	sync_core();
 
 	/* get the current revision from MSR 0x8B */
@@ -335,6 +330,7 @@ static int apply_microcode(int cpu)
 		(mc_intel->hdr.date >> 16) & 0xff);
 
 	uci->cpu_sig.rev = val[1];
+	c->microcode = val[1];
 
 	return 0;
 }
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
new file mode 100644
index 000000000000..7ec5bd140b87
--- /dev/null
+++ b/arch/x86/kernel/nmi.c
@@ -0,0 +1,433 @@
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ *  Copyright (C) 2011	Don Zickus Red Hat, Inc.
+ *
+ *  Pentium III FXSR, SSE support
+ *	Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * Handle hardware traps and faults.
+ */
+#include <linux/spinlock.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/hardirq.h>
+#include <linux/slab.h>
+
+#include <linux/mca.h>
+
+#if defined(CONFIG_EDAC)
+#include <linux/edac.h>
+#endif
+
+#include <linux/atomic.h>
+#include <asm/traps.h>
+#include <asm/mach_traps.h>
+#include <asm/nmi.h>
+
+#define NMI_MAX_NAMELEN	16
+struct nmiaction {
+	struct list_head list;
+	nmi_handler_t handler;
+	unsigned int flags;
+	char *name;
+};
+
+struct nmi_desc {
+	spinlock_t lock;
+	struct list_head head;
+};
+
+static struct nmi_desc nmi_desc[NMI_MAX] = 
+{
+	{
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[0].head),
+	},
+	{
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[1].head),
+	},
+
+};
+
+struct nmi_stats {
+	unsigned int normal;
+	unsigned int unknown;
+	unsigned int external;
+	unsigned int swallow;
+};
+
+static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
+
+static int ignore_nmis;
+
+int unknown_nmi_panic;
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
+#define nmi_to_desc(type) (&nmi_desc[type])
+
+static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
+{
+	struct nmi_desc *desc = nmi_to_desc(type);
+	struct nmiaction *a;
+	int handled=0;
+
+	rcu_read_lock();
+
+	/*
+	 * NMIs are edge-triggered, which means if you have enough
+	 * of them concurrently, you can lose some because only one
+	 * can be latched at any given time.  Walk the whole list
+	 * to handle those situations.
+	 */
+	list_for_each_entry_rcu(a, &desc->head, list)
+		handled += a->handler(type, regs);
+
+	rcu_read_unlock();
+
+	/* return total number of NMI events handled */
+	return handled;
+}
+
+static int __setup_nmi(unsigned int type, struct nmiaction *action)
+{
+	struct nmi_desc *desc = nmi_to_desc(type);
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+
+	/*
+	 * most handlers of type NMI_UNKNOWN never return because
+	 * they just assume the NMI is theirs.  Just a sanity check
+	 * to manage expectations
+	 */
+	WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
+
+	/*
+	 * some handlers need to be executed first otherwise a fake
+	 * event confuses some handlers (kdump uses this flag)
+	 */
+	if (action->flags & NMI_FLAG_FIRST)
+		list_add_rcu(&action->list, &desc->head);
+	else
+		list_add_tail_rcu(&action->list, &desc->head);
+	
+	spin_unlock_irqrestore(&desc->lock, flags);
+	return 0;
+}
+
+static struct nmiaction *__free_nmi(unsigned int type, const char *name)
+{
+	struct nmi_desc *desc = nmi_to_desc(type);
+	struct nmiaction *n;
+	unsigned long flags;
+
+	spin_lock_irqsave(&desc->lock, flags);
+
+	list_for_each_entry_rcu(n, &desc->head, list) {
+		/*
+		 * the name passed in to describe the nmi handler
+		 * is used as the lookup key
+		 */
+		if (!strcmp(n->name, name)) {
+			WARN(in_nmi(),
+				"Trying to free NMI (%s) from NMI context!\n", n->name);
+			list_del_rcu(&n->list);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&desc->lock, flags);
+	synchronize_rcu();
+	return (n);
+}
+
+int register_nmi_handler(unsigned int type, nmi_handler_t handler,
+			unsigned long nmiflags, const char *devname)
+{
+	struct nmiaction *action;
+	int retval = -ENOMEM;
+
+	if (!handler)
+		return -EINVAL;
+
+	action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
+	if (!action)
+		goto fail_action;
+
+	action->handler = handler;
+	action->flags = nmiflags;
+	action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
+	if (!action->name)
+		goto fail_action_name;
+
+	retval = __setup_nmi(type, action);
+
+	if (retval)
+		goto fail_setup_nmi;
+
+	return retval;
+
+fail_setup_nmi:
+	kfree(action->name);
+fail_action_name:
+	kfree(action);
+fail_action:	
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(register_nmi_handler);
+
+void unregister_nmi_handler(unsigned int type, const char *name)
+{
+	struct nmiaction *a;
+
+	a = __free_nmi(type, name);
+	if (a) {
+		kfree(a->name);
+		kfree(a);
+	}
+}
+
+EXPORT_SYMBOL_GPL(unregister_nmi_handler);
+
+static notrace __kprobes void
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
+{
+	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
+
+	/*
+	 * On some machines, PCI SERR line is used to report memory
+	 * errors. EDAC makes use of it.
+	 */
+#if defined(CONFIG_EDAC)
+	if (edac_handler_set()) {
+		edac_atomic_assert_error();
+		return;
+	}
+#endif
+
+	if (panic_on_unrecovered_nmi)
+		panic("NMI: Not continuing");
+
+	pr_emerg("Dazed and confused, but trying to continue\n");
+
+	/* Clear and disable the PCI SERR error line. */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+	outb(reason, NMI_REASON_PORT);
+}
+
+static notrace __kprobes void
+io_check_error(unsigned char reason, struct pt_regs *regs)
+{
+	unsigned long i;
+
+	pr_emerg(
+	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
+	show_registers(regs);
+
+	if (panic_on_io_nmi)
+		panic("NMI IOCK error: Not continuing");
+
+	/* Re-enable the IOCK line, wait for a few seconds */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
+
+	i = 20000;
+	while (--i) {
+		touch_nmi_watchdog();
+		udelay(100);
+	}
+
+	reason &= ~NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
+}
+
+static notrace __kprobes void
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
+{
+	int handled;
+
+	/*
+	 * Use 'false' as back-to-back NMIs are dealt with one level up.
+	 * Of course this makes having multiple 'unknown' handlers useless
+	 * as only the first one is ever run (unless it can actually determine
+	 * if it caused the NMI)
+	 */
+	handled = nmi_handle(NMI_UNKNOWN, regs, false);
+	if (handled) {
+		__this_cpu_add(nmi_stats.unknown, handled);
+		return;
+	}
+
+	__this_cpu_add(nmi_stats.unknown, 1);
+
+#ifdef CONFIG_MCA
+	/*
+	 * Might actually be able to figure out what the guilty party
+	 * is:
+	 */
+	if (MCA_bus) {
+		mca_handle_nmi();
+		return;
+	}
+#endif
+	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
+
+	pr_emerg("Do you have a strange power saving mode enabled?\n");
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
+		panic("NMI: Not continuing");
+
+	pr_emerg("Dazed and confused, but trying to continue\n");
+}
+
+static DEFINE_PER_CPU(bool, swallow_nmi);
+static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
+
+static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+{
+	unsigned char reason = 0;
+	int handled;
+	bool b2b = false;
+
+	/*
+	 * CPU-specific NMI must be processed before non-CPU-specific
+	 * NMI, otherwise we may lose it, because the CPU-specific
+	 * NMI can not be detected/processed on other CPUs.
+	 */
+
+	/*
+	 * Back-to-back NMIs are interesting because they can either
+	 * be two NMI or more than two NMIs (any thing over two is dropped
+	 * due to NMI being edge-triggered).  If this is the second half
+	 * of the back-to-back NMI, assume we dropped things and process
+	 * more handlers.  Otherwise reset the 'swallow' NMI behaviour
+	 */
+	if (regs->ip == __this_cpu_read(last_nmi_rip))
+		b2b = true;
+	else
+		__this_cpu_write(swallow_nmi, false);
+
+	__this_cpu_write(last_nmi_rip, regs->ip);
+
+	handled = nmi_handle(NMI_LOCAL, regs, b2b);
+	__this_cpu_add(nmi_stats.normal, handled);
+	if (handled) {
+		/*
+		 * There are cases when a NMI handler handles multiple
+		 * events in the current NMI.  One of these events may
+		 * be queued for in the next NMI.  Because the event is
+		 * already handled, the next NMI will result in an unknown
+		 * NMI.  Instead lets flag this for a potential NMI to
+		 * swallow.
+		 */
+		if (handled > 1)
+			__this_cpu_write(swallow_nmi, true);
+		return;
+	}
+
+	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+	raw_spin_lock(&nmi_reason_lock);
+	reason = get_nmi_reason();
+
+	if (reason & NMI_REASON_MASK) {
+		if (reason & NMI_REASON_SERR)
+			pci_serr_error(reason, regs);
+		else if (reason & NMI_REASON_IOCHK)
+			io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
+		/*
+		 * Reassert NMI in case it became active
+		 * meanwhile as it's edge-triggered:
+		 */
+		reassert_nmi();
+#endif
+		__this_cpu_add(nmi_stats.external, 1);
+		raw_spin_unlock(&nmi_reason_lock);
+		return;
+	}
+	raw_spin_unlock(&nmi_reason_lock);
+
+	/*
+	 * Only one NMI can be latched at a time.  To handle
+	 * this we may process multiple nmi handlers at once to
+	 * cover the case where an NMI is dropped.  The downside
+	 * to this approach is we may process an NMI prematurely,
+	 * while its real NMI is sitting latched.  This will cause
+	 * an unknown NMI on the next run of the NMI processing.
+	 *
+	 * We tried to flag that condition above, by setting the
+	 * swallow_nmi flag when we process more than one event.
+	 * This condition is also only present on the second half
+	 * of a back-to-back NMI, so we flag that condition too.
+	 *
+	 * If both are true, we assume we already processed this
+	 * NMI previously and we swallow it.  Otherwise we reset
+	 * the logic.
+	 *
+	 * There are scenarios where we may accidentally swallow
+	 * a 'real' unknown NMI.  For example, while processing
+	 * a perf NMI another perf NMI comes in along with a
+	 * 'real' unknown NMI.  These two NMIs get combined into
+	 * one (as descibed above).  When the next NMI gets
+	 * processed, it will be flagged by perf as handled, but
+	 * noone will know that there was a 'real' unknown NMI sent
+	 * also.  As a result it gets swallowed.  Or if the first
+	 * perf NMI returns two events handled then the second
+	 * NMI will get eaten by the logic below, again losing a
+	 * 'real' unknown NMI.  But this is the best we can do
+	 * for now.
+	 */
+	if (b2b && __this_cpu_read(swallow_nmi))
+		__this_cpu_add(nmi_stats.swallow, 1);
+	else
+		unknown_nmi_error(reason, regs);
+}
+
+dotraplinkage notrace __kprobes void
+do_nmi(struct pt_regs *regs, long error_code)
+{
+	nmi_enter();
+
+	inc_irq_stat(__nmi_count);
+
+	if (!ignore_nmis)
+		default_do_nmi(regs);
+
+	nmi_exit();
+}
+
+void stop_nmi(void)
+{
+	ignore_nmis++;
+}
+
+void restart_nmi(void)
+{
+	ignore_nmis--;
+}
+
+/* reset the back-to-back NMI logic */
+void local_touch_nmi(void)
+{
+	__this_cpu_write(last_nmi_rip, 0);
+}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b49d00da2aed..622872054fbe 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -117,8 +117,8 @@ again:
 }
 
 /*
- * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
- * documentation.
+ * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
+ * parameter documentation.
  */
 static __init int iommu_setup(char *p)
 {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e7e3b019c439..b9b3b1a51643 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -49,7 +49,7 @@ void free_thread_xstate(struct task_struct *tsk)
 void free_thread_info(struct thread_info *ti)
 {
 	free_thread_xstate(ti->task);
-	free_pages((unsigned long)ti, get_order(THREAD_SIZE));
+	free_pages((unsigned long)ti, THREAD_ORDER);
 }
 
 void arch_task_cache_init(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7a3b65107a27..795b79f984c2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
+#include <asm/nmi.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -107,6 +108,7 @@ void cpu_idle(void)
 			if (cpu_is_offline(cpu))
 				play_dead();
 
+			local_touch_nmi();
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
@@ -262,7 +264,7 @@ EXPORT_SYMBOL_GPL(start_thread);
 
 
 /*
- *	switch_to(x,yn) should switch tasks from x to y.
+ *	switch_to(x,y) should switch tasks from x to y.
  *
  * We fsave/fwait so that an exception goes off at the right time
  * (as a call from the fsave or fwait in effect) rather than to
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f693e44e1bf6..3bd7e6eebf31 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,7 @@
 #include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
+#include <asm/nmi.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -133,6 +134,7 @@ void cpu_idle(void)
 			 * from here on, until they go to idle.
 			 * Otherwise, idle callbacks can misfire.
 			 */
+			local_touch_nmi();
 			local_irq_disable();
 			enter_idle();
 			/* Don't trace irqs off for idle */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9242436e9937..e334be1182b9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -464,7 +464,7 @@ static inline void kb_wait(void)
 	}
 }
 
-static void vmxoff_nmi(int cpu, struct die_args *args)
+static void vmxoff_nmi(int cpu, struct pt_regs *regs)
 {
 	cpu_emergency_vmxoff();
 }
@@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback;
 
 static atomic_t waiting_for_crash_ipi;
 
-static int crash_nmi_callback(struct notifier_block *self,
-			unsigned long val, void *data)
+static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
 {
 	int cpu;
 
-	if (val != DIE_NMI)
-		return NOTIFY_OK;
-
 	cpu = raw_smp_processor_id();
 
 	/* Don't do anything if this handler is invoked on crashing cpu.
@@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self,
 	 * an NMI if system was initially booted with nmi_watchdog parameter.
 	 */
 	if (cpu == crashing_cpu)
-		return NOTIFY_STOP;
+		return NMI_HANDLED;
 	local_irq_disable();
 
-	shootdown_callback(cpu, (struct die_args *)data);
+	shootdown_callback(cpu, regs);
 
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
@@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 	for (;;)
 		cpu_relax();
 
-	return 1;
+	return NMI_HANDLED;
 }
 
 static void smp_send_nmi_allbutself(void)
@@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void)
 	apic->send_IPI_allbutself(NMI_VECTOR);
 }
 
-static struct notifier_block crash_nmi_nb = {
-	.notifier_call = crash_nmi_callback,
-	/* we want to be the first one called */
-	.priority = NMI_LOCAL_HIGH_PRIOR+1,
-};
-
 /* Halt all other CPUs, calling the specified function on each of them
  *
  * This function can be used to halt all other CPUs on crash
@@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
-	if (register_die_notifier(&crash_nmi_nb))
+	if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
+				 NMI_FLAG_FIRST, "crash"))
 		return;		/* return what? */
 	/* Ensure the new callback function is set before sending
 	 * out the NMI
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index ff14a5044ce6..051489082d59 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -14,10 +14,73 @@
 #include <linux/personality.h>
 #include <linux/random.h>
 #include <linux/uaccess.h>
+#include <linux/elf.h>
 
 #include <asm/ia32.h>
 #include <asm/syscalls.h>
 
+/*
+ * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
+ *
+ * @flags denotes the allocation direction - bottomup or topdown -
+ * or vDSO; see call sites below.
+ */
+unsigned long align_addr(unsigned long addr, struct file *filp,
+			 enum align_flags flags)
+{
+	unsigned long tmp_addr;
+
+	/* handle 32- and 64-bit case with a single conditional */
+	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
+		return addr;
+
+	if (!(current->flags & PF_RANDOMIZE))
+		return addr;
+
+	if (!((flags & ALIGN_VDSO) || filp))
+		return addr;
+
+	tmp_addr = addr;
+
+	/*
+	 * We need an address which is <= than the original
+	 * one only when in topdown direction.
+	 */
+	if (!(flags & ALIGN_TOPDOWN))
+		tmp_addr += va_align.mask;
+
+	tmp_addr &= ~va_align.mask;
+
+	return tmp_addr;
+}
+
+static int __init control_va_addr_alignment(char *str)
+{
+	/* guard against enabling this on other CPU families */
+	if (va_align.flags < 0)
+		return 1;
+
+	if (*str == 0)
+		return 1;
+
+	if (*str == '=')
+		str++;
+
+	if (!strcmp(str, "32"))
+		va_align.flags = ALIGN_VA_32;
+	else if (!strcmp(str, "64"))
+		va_align.flags = ALIGN_VA_64;
+	else if (!strcmp(str, "off"))
+		va_align.flags = 0;
+	else if (!strcmp(str, "on"))
+		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
+	else
+		return 0;
+
+	return 1;
+}
+__setup("align_va_addr", control_va_addr_alignment);
+
 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
 		unsigned long, fd, unsigned long, off)
@@ -92,6 +155,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	start_addr = addr;
 
 full_search:
+
+	addr = align_addr(addr, filp, 0);
+
 	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 		/* At this point:  (!vma || addr < vma->vm_end). */
 		if (end - len < addr) {
@@ -117,6 +183,7 @@ full_search:
 			mm->cached_hole_size = vma->vm_start - addr;
 
 		addr = vma->vm_end;
+		addr = align_addr(addr, filp, 0);
 	}
 }
 
@@ -161,10 +228,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	/* make sure it can fit in the remaining address space */
 	if (addr > len) {
-		vma = find_vma(mm, addr-len);
-		if (!vma || addr <= vma->vm_start)
+		unsigned long tmp_addr = align_addr(addr - len, filp,
+						    ALIGN_TOPDOWN);
+
+		vma = find_vma(mm, tmp_addr);
+		if (!vma || tmp_addr + len <= vma->vm_start)
 			/* remember the address as a hint for next time */
-			return mm->free_area_cache = addr-len;
+			return mm->free_area_cache = tmp_addr;
 	}
 
 	if (mm->mmap_base < len)
@@ -173,6 +243,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	addr = mm->mmap_base-len;
 
 	do {
+		addr = align_addr(addr, filp, ALIGN_TOPDOWN);
+
 		/*
 		 * Lookup failure means no vma is above this address,
 		 * else if new region fits below vma->vm_start,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index bc19be332bc9..9a0e31293920 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -346,3 +346,5 @@ ENTRY(sys_call_table)
 	.long sys_syncfs
 	.long sys_sendmmsg		/* 345 */
 	.long sys_setns
+	.long sys_process_vm_readv
+	.long sys_process_vm_writev
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6913369c234c..a8e3eb83466c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -81,15 +81,6 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
 DECLARE_BITMAP(used_vectors, NR_VECTORS);
 EXPORT_SYMBOL_GPL(used_vectors);
 
-static int ignore_nmis;
-
-int unknown_nmi_panic;
-/*
- * Prevent NMI reason port (0x61) being accessed simultaneously, can
- * only be used in NMI handler.
- */
-static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
-
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -307,152 +298,6 @@ gp_in_kernel:
 	die("general protection fault", regs, error_code);
 }
 
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static notrace __kprobes void
-pci_serr_error(unsigned char reason, struct pt_regs *regs)
-{
-	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-
-	/*
-	 * On some machines, PCI SERR line is used to report memory
-	 * errors. EDAC makes use of it.
-	 */
-#if defined(CONFIG_EDAC)
-	if (edac_handler_set()) {
-		edac_atomic_assert_error();
-		return;
-	}
-#endif
-
-	if (panic_on_unrecovered_nmi)
-		panic("NMI: Not continuing");
-
-	pr_emerg("Dazed and confused, but trying to continue\n");
-
-	/* Clear and disable the PCI SERR error line. */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
-	outb(reason, NMI_REASON_PORT);
-}
-
-static notrace __kprobes void
-io_check_error(unsigned char reason, struct pt_regs *regs)
-{
-	unsigned long i;
-
-	pr_emerg(
-	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-	show_registers(regs);
-
-	if (panic_on_io_nmi)
-		panic("NMI IOCK error: Not continuing");
-
-	/* Re-enable the IOCK line, wait for a few seconds */
-	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
-
-	i = 20000;
-	while (--i) {
-		touch_nmi_watchdog();
-		udelay(100);
-	}
-
-	reason &= ~NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
-}
-
-static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
-{
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
-			NOTIFY_STOP)
-		return;
-#ifdef CONFIG_MCA
-	/*
-	 * Might actually be able to figure out what the guilty party
-	 * is:
-	 */
-	if (MCA_bus) {
-		mca_handle_nmi();
-		return;
-	}
-#endif
-	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-		 reason, smp_processor_id());
-
-	pr_emerg("Do you have a strange power saving mode enabled?\n");
-	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-		panic("NMI: Not continuing");
-
-	pr_emerg("Dazed and confused, but trying to continue\n");
-}
-
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
-{
-	unsigned char reason = 0;
-
-	/*
-	 * CPU-specific NMI must be processed before non-CPU-specific
-	 * NMI, otherwise we may lose it, because the CPU-specific
-	 * NMI can not be detected/processed on other CPUs.
-	 */
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
-	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
-	raw_spin_lock(&nmi_reason_lock);
-	reason = get_nmi_reason();
-
-	if (reason & NMI_REASON_MASK) {
-		if (reason & NMI_REASON_SERR)
-			pci_serr_error(reason, regs);
-		else if (reason & NMI_REASON_IOCHK)
-			io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
-		/*
-		 * Reassert NMI in case it became active
-		 * meanwhile as it's edge-triggered:
-		 */
-		reassert_nmi();
-#endif
-		raw_spin_unlock(&nmi_reason_lock);
-		return;
-	}
-	raw_spin_unlock(&nmi_reason_lock);
-
-	unknown_nmi_error(reason, regs);
-}
-
-dotraplinkage notrace __kprobes void
-do_nmi(struct pt_regs *regs, long error_code)
-{
-	nmi_enter();
-
-	inc_irq_stat(__nmi_count);
-
-	if (!ignore_nmis)
-		default_do_nmi(regs);
-
-	nmi_exit();
-}
-
-void stop_nmi(void)
-{
-	ignore_nmis++;
-}
-
-void restart_nmi(void)
-{
-	ignore_nmis--;
-}
-
 /* May run on IST stack. */
 dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 18ae83dd1cd7..b56c65de384d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -56,7 +56,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
 	.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
 };
 
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
 
 static int __init vsyscall_setup(char *str)
 {
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8b4cc5f067de..f1e3be18a08f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -29,6 +29,39 @@
 #include "tss.h"
 
 /*
+ * Operand types
+ */
+#define OpNone             0ull
+#define OpImplicit         1ull  /* No generic decode */
+#define OpReg              2ull  /* Register */
+#define OpMem              3ull  /* Memory */
+#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
+#define OpDI               5ull  /* ES:DI/EDI/RDI */
+#define OpMem64            6ull  /* Memory, 64-bit */
+#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
+#define OpDX               8ull  /* DX register */
+#define OpCL               9ull  /* CL register (for shifts) */
+#define OpImmByte         10ull  /* 8-bit sign extended immediate */
+#define OpOne             11ull  /* Implied 1 */
+#define OpImm             12ull  /* Sign extended immediate */
+#define OpMem16           13ull  /* Memory operand (16-bit). */
+#define OpMem32           14ull  /* Memory operand (32-bit). */
+#define OpImmU            15ull  /* Immediate operand, zero extended */
+#define OpSI              16ull  /* SI/ESI/RSI */
+#define OpImmFAddr        17ull  /* Immediate far address */
+#define OpMemFAddr        18ull  /* Far address in memory */
+#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
+#define OpES              20ull  /* ES */
+#define OpCS              21ull  /* CS */
+#define OpSS              22ull  /* SS */
+#define OpDS              23ull  /* DS */
+#define OpFS              24ull  /* FS */
+#define OpGS              25ull  /* GS */
+
+#define OpBits             5  /* Width of operand field */
+#define OpMask             ((1ull << OpBits) - 1)
+
+/*
  * Opcode effective-address decode tables.
  * Note that we only emulate instructions that have at least one memory
  * operand (excluding implicit stack references). We assume that stack
@@ -40,37 +73,35 @@
 /* Operand sizes: 8-bit operands or specified/overridden size. */
 #define ByteOp      (1<<0)	/* 8-bit operands. */
 /* Destination operand type. */
-#define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
-#define DstReg      (2<<1)	/* Register operand. */
-#define DstMem      (3<<1)	/* Memory operand. */
-#define DstAcc      (4<<1)	/* Destination Accumulator */
-#define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
-#define DstMem64    (6<<1)	/* 64bit memory operand */
-#define DstImmUByte (7<<1)	/* 8-bit unsigned immediate operand */
-#define DstDX       (8<<1)	/* Destination is in DX register */
-#define DstMask     (0xf<<1)
+#define DstShift    1
+#define ImplicitOps (OpImplicit << DstShift)
+#define DstReg      (OpReg << DstShift)
+#define DstMem      (OpMem << DstShift)
+#define DstAcc      (OpAcc << DstShift)
+#define DstDI       (OpDI << DstShift)
+#define DstMem64    (OpMem64 << DstShift)
+#define DstImmUByte (OpImmUByte << DstShift)
+#define DstDX       (OpDX << DstShift)
+#define DstMask     (OpMask << DstShift)
 /* Source operand type. */
-#define SrcNone     (0<<5)	/* No source operand. */
-#define SrcReg      (1<<5)	/* Register operand. */
-#define SrcMem      (2<<5)	/* Memory operand. */
-#define SrcMem16    (3<<5)	/* Memory operand (16-bit). */
-#define SrcMem32    (4<<5)	/* Memory operand (32-bit). */
-#define SrcImm      (5<<5)	/* Immediate operand. */
-#define SrcImmByte  (6<<5)	/* 8-bit sign-extended immediate operand. */
-#define SrcOne      (7<<5)	/* Implied '1' */
-#define SrcImmUByte (8<<5)      /* 8-bit unsigned immediate operand. */
-#define SrcImmU     (9<<5)      /* Immediate operand, unsigned */
-#define SrcSI       (0xa<<5)	/* Source is in the DS:RSI */
-#define SrcImmFAddr (0xb<<5)	/* Source is immediate far address */
-#define SrcMemFAddr (0xc<<5)	/* Source is far address in memory */
-#define SrcAcc      (0xd<<5)	/* Source Accumulator */
-#define SrcImmU16   (0xe<<5)    /* Immediate operand, unsigned, 16 bits */
-#define SrcDX       (0xf<<5)	/* Source is in DX register */
-#define SrcMask     (0xf<<5)
-/* Generic ModRM decode. */
-#define ModRM       (1<<9)
-/* Destination is only written; never read. */
-#define Mov         (1<<10)
+#define SrcShift    6
+#define SrcNone     (OpNone << SrcShift)
+#define SrcReg      (OpReg << SrcShift)
+#define SrcMem      (OpMem << SrcShift)
+#define SrcMem16    (OpMem16 << SrcShift)
+#define SrcMem32    (OpMem32 << SrcShift)
+#define SrcImm      (OpImm << SrcShift)
+#define SrcImmByte  (OpImmByte << SrcShift)
+#define SrcOne      (OpOne << SrcShift)
+#define SrcImmUByte (OpImmUByte << SrcShift)
+#define SrcImmU     (OpImmU << SrcShift)
+#define SrcSI       (OpSI << SrcShift)
+#define SrcImmFAddr (OpImmFAddr << SrcShift)
+#define SrcMemFAddr (OpMemFAddr << SrcShift)
+#define SrcAcc      (OpAcc << SrcShift)
+#define SrcImmU16   (OpImmU16 << SrcShift)
+#define SrcDX       (OpDX << SrcShift)
+#define SrcMask     (OpMask << SrcShift)
 #define BitOp       (1<<11)
 #define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
 #define String      (1<<13)     /* String instruction (rep capable) */
@@ -81,6 +112,10 @@
 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 #define Sse         (1<<18)     /* SSE Vector instruction */
+/* Generic ModRM decode. */
+#define ModRM       (1<<19)
+/* Destination is only written; never read. */
+#define Mov         (1<<20)
 /* Misc flags */
 #define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
 #define VendorSpecific (1<<22) /* Vendor specific instruction */
@@ -91,12 +126,19 @@
 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
 #define No64	    (1<<28)
 /* Source 2 operand type */
-#define Src2None    (0<<29)
-#define Src2CL      (1<<29)
-#define Src2ImmByte (2<<29)
-#define Src2One     (3<<29)
-#define Src2Imm     (4<<29)
-#define Src2Mask    (7<<29)
+#define Src2Shift   (29)
+#define Src2None    (OpNone << Src2Shift)
+#define Src2CL      (OpCL << Src2Shift)
+#define Src2ImmByte (OpImmByte << Src2Shift)
+#define Src2One     (OpOne << Src2Shift)
+#define Src2Imm     (OpImm << Src2Shift)
+#define Src2ES      (OpES << Src2Shift)
+#define Src2CS      (OpCS << Src2Shift)
+#define Src2SS      (OpSS << Src2Shift)
+#define Src2DS      (OpDS << Src2Shift)
+#define Src2FS      (OpFS << Src2Shift)
+#define Src2GS      (OpGS << Src2Shift)
+#define Src2Mask    (OpMask << Src2Shift)
 
 #define X2(x...) x, x
 #define X3(x...) X2(x), x
@@ -108,8 +150,8 @@
 #define X16(x...) X8(x), X8(x)
 
 struct opcode {
-	u32 flags;
-	u8 intercept;
+	u64 flags : 56;
+	u64 intercept : 8;
 	union {
 		int (*execute)(struct x86_emulate_ctxt *ctxt);
 		struct opcode *group;
@@ -205,105 +247,100 @@ struct gprefix {
 #define ON64(x)
 #endif
 
-#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \
+#define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype)	\
 	do {								\
 		__asm__ __volatile__ (					\
 			_PRE_EFLAGS("0", "4", "2")			\
 			_op _suffix " %"_x"3,%1; "			\
 			_POST_EFLAGS("0", "4", "2")			\
-			: "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\
+			: "=m" ((ctxt)->eflags),			\
+			  "+q" (*(_dsttype*)&(ctxt)->dst.val),		\
 			  "=&r" (_tmp)					\
-			: _y ((_src).val), "i" (EFLAGS_MASK));		\
+			: _y ((ctxt)->src.val), "i" (EFLAGS_MASK));	\
 	} while (0)
 
 
 /* Raw emulation: instruction has two explicit operands. */
-#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
+#define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy)		\
 	do {								\
 		unsigned long _tmp;					\
 									\
-		switch ((_dst).bytes) {					\
+		switch ((ctxt)->dst.bytes) {				\
 		case 2:							\
-			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\
+			____emulate_2op(ctxt,_op,_wx,_wy,"w",u16);	\
 			break;						\
 		case 4:							\
-			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\
+			____emulate_2op(ctxt,_op,_lx,_ly,"l",u32);	\
 			break;						\
 		case 8:							\
-			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \
+			ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \
 			break;						\
 		}							\
 	} while (0)
 
-#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
+#define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)		     \
 	do {								     \
 		unsigned long _tmp;					     \
-		switch ((_dst).bytes) {				             \
+		switch ((ctxt)->dst.bytes) {				     \
 		case 1:							     \
-			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \
+			____emulate_2op(ctxt,_op,_bx,_by,"b",u8);	     \
 			break;						     \
 		default:						     \
-			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
+			__emulate_2op_nobyte(ctxt, _op,			     \
 					     _wx, _wy, _lx, _ly, _qx, _qy);  \
 			break;						     \
 		}							     \
 	} while (0)
 
 /* Source operand is byte-sized and may be restricted to just %cl. */
-#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
-	__emulate_2op(_op, _src, _dst, _eflags,				\
-		      "b", "c", "b", "c", "b", "c", "b", "c")
+#define emulate_2op_SrcB(ctxt, _op)					\
+	__emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c")
 
 /* Source operand is byte, word, long or quad sized. */
-#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
-	__emulate_2op(_op, _src, _dst, _eflags,				\
-		      "b", "q", "w", "r", _LO32, "r", "", "r")
+#define emulate_2op_SrcV(ctxt, _op)					\
+	__emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r")
 
 /* Source operand is word, long or quad sized. */
-#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
-	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
-			     "w", "r", _LO32, "r", "", "r")
+#define emulate_2op_SrcV_nobyte(ctxt, _op)				\
+	__emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r")
 
 /* Instruction has three operands and one operand is stored in ECX register */
-#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type)	\
+#define __emulate_2op_cl(ctxt, _op, _suffix, _type)		\
 	do {								\
 		unsigned long _tmp;					\
-		_type _clv  = (_cl).val;				\
-		_type _srcv = (_src).val;				\
-		_type _dstv = (_dst).val;				\
+		_type _clv  = (ctxt)->src2.val;				\
+		_type _srcv = (ctxt)->src.val;				\
+		_type _dstv = (ctxt)->dst.val;				\
 									\
 		__asm__ __volatile__ (					\
 			_PRE_EFLAGS("0", "5", "2")			\
 			_op _suffix " %4,%1 \n"				\
 			_POST_EFLAGS("0", "5", "2")			\
-			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)	\
+			: "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \
 			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)	\
 			);						\
 									\
-		(_cl).val  = (unsigned long) _clv;			\
-		(_src).val = (unsigned long) _srcv;			\
-		(_dst).val = (unsigned long) _dstv;			\
+		(ctxt)->src2.val  = (unsigned long) _clv;		\
+		(ctxt)->src2.val = (unsigned long) _srcv;		\
+		(ctxt)->dst.val = (unsigned long) _dstv;		\
 	} while (0)
 
-#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)			\
+#define emulate_2op_cl(ctxt, _op)					\
 	do {								\
-		switch ((_dst).bytes) {					\
+		switch ((ctxt)->dst.bytes) {				\
 		case 2:							\
-			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
-					 "w", unsigned short);         	\
+			__emulate_2op_cl(ctxt, _op, "w", u16);		\
 			break;						\
 		case 4:							\
-			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
-					 "l", unsigned int);           	\
+			__emulate_2op_cl(ctxt, _op, "l", u32);		\
 			break;						\
 		case 8:							\
-			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
-					      "q", unsigned long));	\
+			ON64(__emulate_2op_cl(ctxt, _op, "q", ulong));	\
 			break;						\
 		}							\
 	} while (0)
 
-#define __emulate_1op(_op, _dst, _eflags, _suffix)			\
+#define __emulate_1op(ctxt, _op, _suffix)				\
 	do {								\
 		unsigned long _tmp;					\
 									\
@@ -311,39 +348,27 @@ struct gprefix {
 			_PRE_EFLAGS("0", "3", "2")			\
 			_op _suffix " %1; "				\
 			_POST_EFLAGS("0", "3", "2")			\
-			: "=m" (_eflags), "+m" ((_dst).val),		\
+			: "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \
 			  "=&r" (_tmp)					\
 			: "i" (EFLAGS_MASK));				\
 	} while (0)
 
 /* Instruction has only one explicit operand (no source operand). */
-#define emulate_1op(_op, _dst, _eflags)                                    \
+#define emulate_1op(ctxt, _op)						\
 	do {								\
-		switch ((_dst).bytes) {				        \
-		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
-		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
-		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
-		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
+		switch ((ctxt)->dst.bytes) {				\
+		case 1:	__emulate_1op(ctxt, _op, "b"); break;		\
+		case 2:	__emulate_1op(ctxt, _op, "w"); break;		\
+		case 4:	__emulate_1op(ctxt, _op, "l"); break;		\
+		case 8:	ON64(__emulate_1op(ctxt, _op, "q")); break;	\
 		}							\
 	} while (0)
 
-#define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix)		\
-	do {								\
-		unsigned long _tmp;					\
-									\
-		__asm__ __volatile__ (					\
-			_PRE_EFLAGS("0", "4", "1")			\
-			_op _suffix " %5; "				\
-			_POST_EFLAGS("0", "4", "1")			\
-			: "=m" (_eflags), "=&r" (_tmp),			\
-			  "+a" (_rax), "+d" (_rdx)			\
-			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
-			  "a" (_rax), "d" (_rdx));			\
-	} while (0)
-
-#define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \
+#define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
 	do {								\
 		unsigned long _tmp;					\
+		ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX];		\
+		ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX];		\
 									\
 		__asm__ __volatile__ (					\
 			_PRE_EFLAGS("0", "5", "1")			\
@@ -356,53 +381,27 @@ struct gprefix {
 			"jmp 2b \n\t"					\
 			".popsection \n\t"				\
 			_ASM_EXTABLE(1b, 3b)				\
-			: "=m" (_eflags), "=&r" (_tmp),			\
-			  "+a" (_rax), "+d" (_rdx), "+qm"(_ex)		\
-			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
-			  "a" (_rax), "d" (_rdx));			\
+			: "=m" ((ctxt)->eflags), "=&r" (_tmp),		\
+			  "+a" (*rax), "+d" (*rdx), "+qm"(_ex)		\
+			: "i" (EFLAGS_MASK), "m" ((ctxt)->src.val),	\
+			  "a" (*rax), "d" (*rdx));			\
 	} while (0)
 
 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
-#define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags)		\
+#define emulate_1op_rax_rdx(ctxt, _op, _ex)	\
 	do {								\
-		switch((_src).bytes) {					\
+		switch((ctxt)->src.bytes) {				\
 		case 1:							\
-			__emulate_1op_rax_rdx(_op, _src, _rax, _rdx,	\
-					      _eflags, "b");		\
+			__emulate_1op_rax_rdx(ctxt, _op, "b", _ex);	\
 			break;						\
 		case 2:							\
-			__emulate_1op_rax_rdx(_op, _src, _rax, _rdx,	\
-					      _eflags, "w");		\
+			__emulate_1op_rax_rdx(ctxt, _op, "w", _ex);	\
 			break;						\
 		case 4:							\
-			__emulate_1op_rax_rdx(_op, _src, _rax, _rdx,	\
-					      _eflags, "l");		\
-			break;						\
-		case 8:							\
-			ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, \
-						   _eflags, "q"));	\
-			break;						\
-		}							\
-	} while (0)
-
-#define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex)	\
-	do {								\
-		switch((_src).bytes) {					\
-		case 1:							\
-			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx,	\
-						 _eflags, "b", _ex);	\
-			break;						\
-		case 2:							\
-			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
-						 _eflags, "w", _ex);	\
-			break;						\
-		case 4:							\
-			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
-						 _eflags, "l", _ex);	\
+			__emulate_1op_rax_rdx(ctxt, _op, "l", _ex);	\
 			break;						\
 		case 8: ON64(						\
-			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
-						 _eflags, "q", _ex));	\
+			__emulate_1op_rax_rdx(ctxt, _op, "q", _ex));	\
 			break;						\
 		}							\
 	} while (0)
@@ -651,41 +650,50 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 	return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
 }
 
-static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt,
-			      unsigned long eip, u8 *dest)
+/*
+ * Fetch the next byte of the instruction being emulated which is pointed to
+ * by ctxt->_eip, then increment ctxt->_eip.
+ *
+ * Also prefetch the remaining bytes of the instruction without crossing page
+ * boundary if they are not in fetch_cache yet.
+ */
+static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest)
 {
 	struct fetch_cache *fc = &ctxt->fetch;
 	int rc;
 	int size, cur_size;
 
-	if (eip == fc->end) {
+	if (ctxt->_eip == fc->end) {
 		unsigned long linear;
-		struct segmented_address addr = { .seg=VCPU_SREG_CS, .ea=eip};
+		struct segmented_address addr = { .seg = VCPU_SREG_CS,
+						  .ea  = ctxt->_eip };
 		cur_size = fc->end - fc->start;
-		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
+		size = min(15UL - cur_size,
+			   PAGE_SIZE - offset_in_page(ctxt->_eip));
 		rc = __linearize(ctxt, addr, size, false, true, &linear);
-		if (rc != X86EMUL_CONTINUE)
+		if (unlikely(rc != X86EMUL_CONTINUE))
 			return rc;
 		rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
 				      size, &ctxt->exception);
-		if (rc != X86EMUL_CONTINUE)
+		if (unlikely(rc != X86EMUL_CONTINUE))
 			return rc;
 		fc->end += size;
 	}
-	*dest = fc->data[eip - fc->start];
+	*dest = fc->data[ctxt->_eip - fc->start];
+	ctxt->_eip++;
 	return X86EMUL_CONTINUE;
 }
 
 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
-			 unsigned long eip, void *dest, unsigned size)
+			 void *dest, unsigned size)
 {
 	int rc;
 
 	/* x86 instructions are limited to 15 bytes. */
-	if (eip + size - ctxt->eip > 15)
+	if (unlikely(ctxt->_eip + size - ctxt->eip > 15))
 		return X86EMUL_UNHANDLEABLE;
 	while (size--) {
-		rc = do_insn_fetch_byte(ctxt, eip++, dest++);
+		rc = do_insn_fetch_byte(ctxt, dest++);
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 	}
@@ -693,20 +701,18 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
 }
 
 /* Fetch next part of the instruction being emulated. */
-#define insn_fetch(_type, _size, _eip)					\
+#define insn_fetch(_type, _ctxt)					\
 ({	unsigned long _x;						\
-	rc = do_insn_fetch(ctxt, (_eip), &_x, (_size));			\
+	rc = do_insn_fetch(_ctxt, &_x, sizeof(_type));			\
 	if (rc != X86EMUL_CONTINUE)					\
 		goto done;						\
-	(_eip) += (_size);						\
 	(_type)_x;							\
 })
 
-#define insn_fetch_arr(_arr, _size, _eip)				\
-({	rc = do_insn_fetch(ctxt, (_eip), _arr, (_size));		\
+#define insn_fetch_arr(_arr, _size, _ctxt)				\
+({	rc = do_insn_fetch(_ctxt, _arr, (_size));			\
 	if (rc != X86EMUL_CONTINUE)					\
 		goto done;						\
-	(_eip) += (_size);						\
 })
 
 /*
@@ -894,7 +900,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
 	}
 
-	ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip);
+	ctxt->modrm = insn_fetch(u8, ctxt);
 	ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
 	ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
 	ctxt->modrm_rm |= (ctxt->modrm & 0x07);
@@ -928,13 +934,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		switch (ctxt->modrm_mod) {
 		case 0:
 			if (ctxt->modrm_rm == 6)
-				modrm_ea += insn_fetch(u16, 2, ctxt->_eip);
+				modrm_ea += insn_fetch(u16, ctxt);
 			break;
 		case 1:
-			modrm_ea += insn_fetch(s8, 1, ctxt->_eip);
+			modrm_ea += insn_fetch(s8, ctxt);
 			break;
 		case 2:
-			modrm_ea += insn_fetch(u16, 2, ctxt->_eip);
+			modrm_ea += insn_fetch(u16, ctxt);
 			break;
 		}
 		switch (ctxt->modrm_rm) {
@@ -971,13 +977,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	} else {
 		/* 32/64-bit ModR/M decode. */
 		if ((ctxt->modrm_rm & 7) == 4) {
-			sib = insn_fetch(u8, 1, ctxt->_eip);
+			sib = insn_fetch(u8, ctxt);
 			index_reg |= (sib >> 3) & 7;
 			base_reg |= sib & 7;
 			scale = sib >> 6;
 
 			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
-				modrm_ea += insn_fetch(s32, 4, ctxt->_eip);
+				modrm_ea += insn_fetch(s32, ctxt);
 			else
 				modrm_ea += ctxt->regs[base_reg];
 			if (index_reg != 4)
@@ -990,13 +996,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		switch (ctxt->modrm_mod) {
 		case 0:
 			if (ctxt->modrm_rm == 5)
-				modrm_ea += insn_fetch(s32, 4, ctxt->_eip);
+				modrm_ea += insn_fetch(s32, ctxt);
 			break;
 		case 1:
-			modrm_ea += insn_fetch(s8, 1, ctxt->_eip);
+			modrm_ea += insn_fetch(s8, ctxt);
 			break;
 		case 2:
-			modrm_ea += insn_fetch(s32, 4, ctxt->_eip);
+			modrm_ea += insn_fetch(s32, ctxt);
 			break;
 		}
 	}
@@ -1013,13 +1019,13 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt,
 	op->type = OP_MEM;
 	switch (ctxt->ad_bytes) {
 	case 2:
-		op->addr.mem.ea = insn_fetch(u16, 2, ctxt->_eip);
+		op->addr.mem.ea = insn_fetch(u16, ctxt);
 		break;
 	case 4:
-		op->addr.mem.ea = insn_fetch(u32, 4, ctxt->_eip);
+		op->addr.mem.ea = insn_fetch(u32, ctxt);
 		break;
 	case 8:
-		op->addr.mem.ea = insn_fetch(u64, 8, ctxt->_eip);
+		op->addr.mem.ea = insn_fetch(u64, ctxt);
 		break;
 	}
 done:
@@ -1452,15 +1458,18 @@ static int em_popf(struct x86_emulate_ctxt *ctxt)
 	return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
 }
 
-static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
+static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
 {
+	int seg = ctxt->src2.val;
+
 	ctxt->src.val = get_segment_selector(ctxt, seg);
 
 	return em_push(ctxt);
 }
 
-static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int seg)
+static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 {
+	int seg = ctxt->src2.val;
 	unsigned long selector;
 	int rc;
 
@@ -1674,64 +1683,74 @@ static int em_grp2(struct x86_emulate_ctxt *ctxt)
 {
 	switch (ctxt->modrm_reg) {
 	case 0:	/* rol */
-		emulate_2op_SrcB("rol", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "rol");
 		break;
 	case 1:	/* ror */
-		emulate_2op_SrcB("ror", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "ror");
 		break;
 	case 2:	/* rcl */
-		emulate_2op_SrcB("rcl", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "rcl");
 		break;
 	case 3:	/* rcr */
-		emulate_2op_SrcB("rcr", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "rcr");
 		break;
 	case 4:	/* sal/shl */
 	case 6:	/* sal/shl */
-		emulate_2op_SrcB("sal", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "sal");
 		break;
 	case 5:	/* shr */
-		emulate_2op_SrcB("shr", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "shr");
 		break;
 	case 7:	/* sar */
-		emulate_2op_SrcB("sar", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcB(ctxt, "sar");
 		break;
 	}
 	return X86EMUL_CONTINUE;
 }
 
-static int em_grp3(struct x86_emulate_ctxt *ctxt)
+static int em_not(struct x86_emulate_ctxt *ctxt)
+{
+	ctxt->dst.val = ~ctxt->dst.val;
+	return X86EMUL_CONTINUE;
+}
+
+static int em_neg(struct x86_emulate_ctxt *ctxt)
+{
+	emulate_1op(ctxt, "neg");
+	return X86EMUL_CONTINUE;
+}
+
+static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
+{
+	u8 ex = 0;
+
+	emulate_1op_rax_rdx(ctxt, "mul", ex);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_imul_ex(struct x86_emulate_ctxt *ctxt)
+{
+	u8 ex = 0;
+
+	emulate_1op_rax_rdx(ctxt, "imul", ex);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_div_ex(struct x86_emulate_ctxt *ctxt)
 {
-	unsigned long *rax = &ctxt->regs[VCPU_REGS_RAX];
-	unsigned long *rdx = &ctxt->regs[VCPU_REGS_RDX];
 	u8 de = 0;
 
-	switch (ctxt->modrm_reg) {
-	case 0 ... 1:	/* test */
-		emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags);
-		break;
-	case 2:	/* not */
-		ctxt->dst.val = ~ctxt->dst.val;
-		break;
-	case 3:	/* neg */
-		emulate_1op("neg", ctxt->dst, ctxt->eflags);
-		break;
-	case 4: /* mul */
-		emulate_1op_rax_rdx("mul", ctxt->src, *rax, *rdx, ctxt->eflags);
-		break;
-	case 5: /* imul */
-		emulate_1op_rax_rdx("imul", ctxt->src, *rax, *rdx, ctxt->eflags);
-		break;
-	case 6: /* div */
-		emulate_1op_rax_rdx_ex("div", ctxt->src, *rax, *rdx,
-				       ctxt->eflags, de);
-		break;
-	case 7: /* idiv */
-		emulate_1op_rax_rdx_ex("idiv", ctxt->src, *rax, *rdx,
-				       ctxt->eflags, de);
-		break;
-	default:
-		return X86EMUL_UNHANDLEABLE;
-	}
+	emulate_1op_rax_rdx(ctxt, "div", de);
+	if (de)
+		return emulate_de(ctxt);
+	return X86EMUL_CONTINUE;
+}
+
+static int em_idiv_ex(struct x86_emulate_ctxt *ctxt)
+{
+	u8 de = 0;
+
+	emulate_1op_rax_rdx(ctxt, "idiv", de);
 	if (de)
 		return emulate_de(ctxt);
 	return X86EMUL_CONTINUE;
@@ -1743,10 +1762,10 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
 
 	switch (ctxt->modrm_reg) {
 	case 0:	/* inc */
-		emulate_1op("inc", ctxt->dst, ctxt->eflags);
+		emulate_1op(ctxt, "inc");
 		break;
 	case 1:	/* dec */
-		emulate_1op("dec", ctxt->dst, ctxt->eflags);
+		emulate_1op(ctxt, "dec");
 		break;
 	case 2: /* call near abs */ {
 		long int old_eip;
@@ -1812,8 +1831,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, int seg)
+static int em_lseg(struct x86_emulate_ctxt *ctxt)
 {
+	int seg = ctxt->src2.val;
 	unsigned short sel;
 	int rc;
 
@@ -2452,7 +2472,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
 	ctxt->src.type = OP_IMM;
 	ctxt->src.val = 0;
 	ctxt->src.bytes = 1;
-	emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "or");
 	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
 	if (cf)
 		ctxt->eflags |= X86_EFLAGS_CF;
@@ -2502,49 +2522,49 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
 
 static int em_add(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "add");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_or(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("or", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "or");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_adc(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("adc", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "adc");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_sbb(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("sbb", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "sbb");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_and(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("and", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "and");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_sub(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("sub", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "sub");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_xor(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("xor", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "xor");
 	return X86EMUL_CONTINUE;
 }
 
 static int em_cmp(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "cmp");
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
 	return X86EMUL_CONTINUE;
@@ -2552,7 +2572,9 @@ static int em_cmp(struct x86_emulate_ctxt *ctxt)
 
 static int em_test(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV("test", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV(ctxt, "test");
+	/* Disable writeback. */
+	ctxt->dst.type = OP_NONE;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2570,7 +2592,7 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt)
 
 static int em_imul(struct x86_emulate_ctxt *ctxt)
 {
-	emulate_2op_SrcV_nobyte("imul", ctxt->src, ctxt->dst, ctxt->eflags);
+	emulate_2op_SrcV_nobyte(ctxt, "imul");
 	return X86EMUL_CONTINUE;
 }
 
@@ -3025,9 +3047,14 @@ static struct opcode group1A[] = {
 };
 
 static struct opcode group3[] = {
-	D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM),
-	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
-	X4(D(SrcMem | ModRM)),
+	I(DstMem | SrcImm | ModRM, em_test),
+	I(DstMem | SrcImm | ModRM, em_test),
+	I(DstMem | SrcNone | ModRM | Lock, em_not),
+	I(DstMem | SrcNone | ModRM | Lock, em_neg),
+	I(SrcMem | ModRM, em_mul_ex),
+	I(SrcMem | ModRM, em_imul_ex),
+	I(SrcMem | ModRM, em_div_ex),
+	I(SrcMem | ModRM, em_idiv_ex),
 };
 
 static struct opcode group4[] = {
@@ -3090,16 +3117,20 @@ static struct gprefix pfx_0f_6f_0f_7f = {
 static struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	I6ALU(Lock, em_add),
-	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
+	I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
 	/* 0x08 - 0x0F */
 	I6ALU(Lock, em_or),
-	D(ImplicitOps | Stack | No64), N,
+	I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
+	N,
 	/* 0x10 - 0x17 */
 	I6ALU(Lock, em_adc),
-	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+	I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
+	I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
 	/* 0x18 - 0x1F */
 	I6ALU(Lock, em_sbb),
-	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
+	I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
+	I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
 	/* 0x20 - 0x27 */
 	I6ALU(Lock, em_and), N, N,
 	/* 0x28 - 0x2F */
@@ -3167,7 +3198,8 @@ static struct opcode opcode_table[256] = {
 	D2bv(DstMem | SrcImmByte | ModRM),
 	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
 	I(ImplicitOps | Stack, em_ret),
-	D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64),
+	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
+	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
 	G(ByteOp, group11), G(0, group11),
 	/* 0xC8 - 0xCF */
 	N, N, N, I(ImplicitOps | Stack, em_ret_far),
@@ -3242,20 +3274,22 @@ static struct opcode twobyte_table[256] = {
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
-	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
+	I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
 	DI(ImplicitOps, cpuid), D(DstMem | SrcReg | ModRM | BitOp),
 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
 	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
 	/* 0xA8 - 0xAF */
-	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
+	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
 	DI(ImplicitOps, rsm), D(DstMem | SrcReg | ModRM | BitOp | Lock),
 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
 	D(DstMem | SrcReg | Src2CL | ModRM),
 	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
 	/* 0xB0 - 0xB7 */
 	D2bv(DstMem | SrcReg | ModRM | Lock),
-	D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock),
-	D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM),
+	I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
+	D(DstMem | SrcReg | ModRM | BitOp | Lock),
+	I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
+	I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xB8 - 0xBF */
 	N, N,
@@ -3309,13 +3343,13 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
 	/* NB. Immediates are sign-extended as necessary. */
 	switch (op->bytes) {
 	case 1:
-		op->val = insn_fetch(s8, 1, ctxt->_eip);
+		op->val = insn_fetch(s8, ctxt);
 		break;
 	case 2:
-		op->val = insn_fetch(s16, 2, ctxt->_eip);
+		op->val = insn_fetch(s16, ctxt);
 		break;
 	case 4:
-		op->val = insn_fetch(s32, 4, ctxt->_eip);
+		op->val = insn_fetch(s32, ctxt);
 		break;
 	}
 	if (!sign_extension) {
@@ -3335,6 +3369,125 @@ done:
 	return rc;
 }
 
+static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
+			  unsigned d)
+{
+	int rc = X86EMUL_CONTINUE;
+
+	switch (d) {
+	case OpReg:
+		decode_register_operand(ctxt, op,
+			 op == &ctxt->dst &&
+			 ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
+		break;
+	case OpImmUByte:
+		rc = decode_imm(ctxt, op, 1, false);
+		break;
+	case OpMem:
+		ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
+	mem_common:
+		*op = ctxt->memop;
+		ctxt->memopp = op;
+		if ((ctxt->d & BitOp) && op == &ctxt->dst)
+			fetch_bit_operand(ctxt);
+		op->orig_val = op->val;
+		break;
+	case OpMem64:
+		ctxt->memop.bytes = 8;
+		goto mem_common;
+	case OpAcc:
+		op->type = OP_REG;
+		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
+		op->addr.reg = &ctxt->regs[VCPU_REGS_RAX];
+		fetch_register_operand(op);
+		op->orig_val = op->val;
+		break;
+	case OpDI:
+		op->type = OP_MEM;
+		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
+		op->addr.mem.ea =
+			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
+		op->addr.mem.seg = VCPU_SREG_ES;
+		op->val = 0;
+		break;
+	case OpDX:
+		op->type = OP_REG;
+		op->bytes = 2;
+		op->addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+		fetch_register_operand(op);
+		break;
+	case OpCL:
+		op->bytes = 1;
+		op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
+		break;
+	case OpImmByte:
+		rc = decode_imm(ctxt, op, 1, true);
+		break;
+	case OpOne:
+		op->bytes = 1;
+		op->val = 1;
+		break;
+	case OpImm:
+		rc = decode_imm(ctxt, op, imm_size(ctxt), true);
+		break;
+	case OpMem16:
+		ctxt->memop.bytes = 2;
+		goto mem_common;
+	case OpMem32:
+		ctxt->memop.bytes = 4;
+		goto mem_common;
+	case OpImmU16:
+		rc = decode_imm(ctxt, op, 2, false);
+		break;
+	case OpImmU:
+		rc = decode_imm(ctxt, op, imm_size(ctxt), false);
+		break;
+	case OpSI:
+		op->type = OP_MEM;
+		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
+		op->addr.mem.ea =
+			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
+		op->addr.mem.seg = seg_override(ctxt);
+		op->val = 0;
+		break;
+	case OpImmFAddr:
+		op->type = OP_IMM;
+		op->addr.mem.ea = ctxt->_eip;
+		op->bytes = ctxt->op_bytes + 2;
+		insn_fetch_arr(op->valptr, op->bytes, ctxt);
+		break;
+	case OpMemFAddr:
+		ctxt->memop.bytes = ctxt->op_bytes + 2;
+		goto mem_common;
+	case OpES:
+		op->val = VCPU_SREG_ES;
+		break;
+	case OpCS:
+		op->val = VCPU_SREG_CS;
+		break;
+	case OpSS:
+		op->val = VCPU_SREG_SS;
+		break;
+	case OpDS:
+		op->val = VCPU_SREG_DS;
+		break;
+	case OpFS:
+		op->val = VCPU_SREG_FS;
+		break;
+	case OpGS:
+		op->val = VCPU_SREG_GS;
+		break;
+	case OpImplicit:
+		/* Special instructions do their own operand decoding. */
+	default:
+		op->type = OP_NONE; /* Disable writeback. */
+		break;
+	}
+
+done:
+	return rc;
+}
+
 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 {
 	int rc = X86EMUL_CONTINUE;
@@ -3342,8 +3495,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
 	bool op_prefix = false;
 	struct opcode opcode;
-	struct operand memop = { .type = OP_NONE }, *memopp = NULL;
 
+	ctxt->memop.type = OP_NONE;
+	ctxt->memopp = NULL;
 	ctxt->_eip = ctxt->eip;
 	ctxt->fetch.start = ctxt->_eip;
 	ctxt->fetch.end = ctxt->fetch.start + insn_len;
@@ -3366,7 +3520,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 		break;
 #endif
 	default:
-		return -1;
+		return EMULATION_FAILED;
 	}
 
 	ctxt->op_bytes = def_op_bytes;
@@ -3374,7 +3528,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 
 	/* Legacy prefixes. */
 	for (;;) {
-		switch (ctxt->b = insn_fetch(u8, 1, ctxt->_eip)) {
+		switch (ctxt->b = insn_fetch(u8, ctxt)) {
 		case 0x66:	/* operand-size override */
 			op_prefix = true;
 			/* switch between 2/4 bytes */
@@ -3430,7 +3584,7 @@ done_prefixes:
 	/* Two-byte opcode? */
 	if (ctxt->b == 0x0f) {
 		ctxt->twobyte = 1;
-		ctxt->b = insn_fetch(u8, 1, ctxt->_eip);
+		ctxt->b = insn_fetch(u8, ctxt);
 		opcode = twobyte_table[ctxt->b];
 	}
 	ctxt->d = opcode.flags;
@@ -3438,13 +3592,13 @@ done_prefixes:
 	while (ctxt->d & GroupMask) {
 		switch (ctxt->d & GroupMask) {
 		case Group:
-			ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip);
+			ctxt->modrm = insn_fetch(u8, ctxt);
 			--ctxt->_eip;
 			goffset = (ctxt->modrm >> 3) & 7;
 			opcode = opcode.u.group[goffset];
 			break;
 		case GroupDual:
-			ctxt->modrm = insn_fetch(u8, 1, ctxt->_eip);
+			ctxt->modrm = insn_fetch(u8, ctxt);
 			--ctxt->_eip;
 			goffset = (ctxt->modrm >> 3) & 7;
 			if ((ctxt->modrm >> 6) == 3)
@@ -3458,7 +3612,7 @@ done_prefixes:
 			break;
 		case Prefix:
 			if (ctxt->rep_prefix && op_prefix)
-				return X86EMUL_UNHANDLEABLE;
+				return EMULATION_FAILED;
 			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
 			switch (simd_prefix) {
 			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
@@ -3468,10 +3622,10 @@ done_prefixes:
 			}
 			break;
 		default:
-			return X86EMUL_UNHANDLEABLE;
+			return EMULATION_FAILED;
 		}
 
-		ctxt->d &= ~GroupMask;
+		ctxt->d &= ~(u64)GroupMask;
 		ctxt->d |= opcode.flags;
 	}
 
@@ -3481,10 +3635,10 @@ done_prefixes:
 
 	/* Unrecognised? */
 	if (ctxt->d == 0 || (ctxt->d & Undefined))
-		return -1;
+		return EMULATION_FAILED;
 
 	if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
-		return -1;
+		return EMULATION_FAILED;
 
 	if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
 		ctxt->op_bytes = 8;
@@ -3501,96 +3655,27 @@ done_prefixes:
 
 	/* ModRM and SIB bytes. */
 	if (ctxt->d & ModRM) {
-		rc = decode_modrm(ctxt, &memop);
+		rc = decode_modrm(ctxt, &ctxt->memop);
 		if (!ctxt->has_seg_override)
 			set_seg_override(ctxt, ctxt->modrm_seg);
 	} else if (ctxt->d & MemAbs)
-		rc = decode_abs(ctxt, &memop);
+		rc = decode_abs(ctxt, &ctxt->memop);
 	if (rc != X86EMUL_CONTINUE)
 		goto done;
 
 	if (!ctxt->has_seg_override)
 		set_seg_override(ctxt, VCPU_SREG_DS);
 
-	memop.addr.mem.seg = seg_override(ctxt);
+	ctxt->memop.addr.mem.seg = seg_override(ctxt);
 
-	if (memop.type == OP_MEM && ctxt->ad_bytes != 8)
-		memop.addr.mem.ea = (u32)memop.addr.mem.ea;
+	if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8)
+		ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
 
 	/*
 	 * Decode and fetch the source operand: register, memory
 	 * or immediate.
 	 */
-	switch (ctxt->d & SrcMask) {
-	case SrcNone:
-		break;
-	case SrcReg:
-		decode_register_operand(ctxt, &ctxt->src, 0);
-		break;
-	case SrcMem16:
-		memop.bytes = 2;
-		goto srcmem_common;
-	case SrcMem32:
-		memop.bytes = 4;
-		goto srcmem_common;
-	case SrcMem:
-		memop.bytes = (ctxt->d & ByteOp) ? 1 :
-							   ctxt->op_bytes;
-	srcmem_common:
-		ctxt->src = memop;
-		memopp = &ctxt->src;
-		break;
-	case SrcImmU16:
-		rc = decode_imm(ctxt, &ctxt->src, 2, false);
-		break;
-	case SrcImm:
-		rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), true);
-		break;
-	case SrcImmU:
-		rc = decode_imm(ctxt, &ctxt->src, imm_size(ctxt), false);
-		break;
-	case SrcImmByte:
-		rc = decode_imm(ctxt, &ctxt->src, 1, true);
-		break;
-	case SrcImmUByte:
-		rc = decode_imm(ctxt, &ctxt->src, 1, false);
-		break;
-	case SrcAcc:
-		ctxt->src.type = OP_REG;
-		ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RAX];
-		fetch_register_operand(&ctxt->src);
-		break;
-	case SrcOne:
-		ctxt->src.bytes = 1;
-		ctxt->src.val = 1;
-		break;
-	case SrcSI:
-		ctxt->src.type = OP_MEM;
-		ctxt->src.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		ctxt->src.addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
-		ctxt->src.addr.mem.seg = seg_override(ctxt);
-		ctxt->src.val = 0;
-		break;
-	case SrcImmFAddr:
-		ctxt->src.type = OP_IMM;
-		ctxt->src.addr.mem.ea = ctxt->_eip;
-		ctxt->src.bytes = ctxt->op_bytes + 2;
-		insn_fetch_arr(ctxt->src.valptr, ctxt->src.bytes, ctxt->_eip);
-		break;
-	case SrcMemFAddr:
-		memop.bytes = ctxt->op_bytes + 2;
-		goto srcmem_common;
-		break;
-	case SrcDX:
-		ctxt->src.type = OP_REG;
-		ctxt->src.bytes = 2;
-		ctxt->src.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
-		fetch_register_operand(&ctxt->src);
-		break;
-	}
-
+	rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
 	if (rc != X86EMUL_CONTINUE)
 		goto done;
 
@@ -3598,85 +3683,18 @@ done_prefixes:
 	 * Decode and fetch the second source operand: register, memory
 	 * or immediate.
 	 */
-	switch (ctxt->d & Src2Mask) {
-	case Src2None:
-		break;
-	case Src2CL:
-		ctxt->src2.bytes = 1;
-		ctxt->src2.val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
-		break;
-	case Src2ImmByte:
-		rc = decode_imm(ctxt, &ctxt->src2, 1, true);
-		break;
-	case Src2One:
-		ctxt->src2.bytes = 1;
-		ctxt->src2.val = 1;
-		break;
-	case Src2Imm:
-		rc = decode_imm(ctxt, &ctxt->src2, imm_size(ctxt), true);
-		break;
-	}
-
+	rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
 	if (rc != X86EMUL_CONTINUE)
 		goto done;
 
 	/* Decode and fetch the destination operand: register or memory. */
-	switch (ctxt->d & DstMask) {
-	case DstReg:
-		decode_register_operand(ctxt, &ctxt->dst,
-			 ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
-		break;
-	case DstImmUByte:
-		ctxt->dst.type = OP_IMM;
-		ctxt->dst.addr.mem.ea = ctxt->_eip;
-		ctxt->dst.bytes = 1;
-		ctxt->dst.val = insn_fetch(u8, 1, ctxt->_eip);
-		break;
-	case DstMem:
-	case DstMem64:
-		ctxt->dst = memop;
-		memopp = &ctxt->dst;
-		if ((ctxt->d & DstMask) == DstMem64)
-			ctxt->dst.bytes = 8;
-		else
-			ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		if (ctxt->d & BitOp)
-			fetch_bit_operand(ctxt);
-		ctxt->dst.orig_val = ctxt->dst.val;
-		break;
-	case DstAcc:
-		ctxt->dst.type = OP_REG;
-		ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RAX];
-		fetch_register_operand(&ctxt->dst);
-		ctxt->dst.orig_val = ctxt->dst.val;
-		break;
-	case DstDI:
-		ctxt->dst.type = OP_MEM;
-		ctxt->dst.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		ctxt->dst.addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
-		ctxt->dst.addr.mem.seg = VCPU_SREG_ES;
-		ctxt->dst.val = 0;
-		break;
-	case DstDX:
-		ctxt->dst.type = OP_REG;
-		ctxt->dst.bytes = 2;
-		ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
-		fetch_register_operand(&ctxt->dst);
-		break;
-	case ImplicitOps:
-		/* Special instructions do their own operand decoding. */
-	default:
-		ctxt->dst.type = OP_NONE; /* Disable writeback. */
-		break;
-	}
+	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
 done:
-	if (memopp && memopp->type == OP_MEM && ctxt->rip_relative)
-		memopp->addr.mem.ea += ctxt->_eip;
+	if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative)
+		ctxt->memopp->addr.mem.ea += ctxt->_eip;
 
-	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
+	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
@@ -3825,32 +3843,11 @@ special_insn:
 		goto twobyte_insn;
 
 	switch (ctxt->b) {
-	case 0x06:		/* push es */
-		rc = emulate_push_sreg(ctxt, VCPU_SREG_ES);
-		break;
-	case 0x07:		/* pop es */
-		rc = emulate_pop_sreg(ctxt, VCPU_SREG_ES);
-		break;
-	case 0x0e:		/* push cs */
-		rc = emulate_push_sreg(ctxt, VCPU_SREG_CS);
-		break;
-	case 0x16:		/* push ss */
-		rc = emulate_push_sreg(ctxt, VCPU_SREG_SS);
-		break;
-	case 0x17:		/* pop ss */
-		rc = emulate_pop_sreg(ctxt, VCPU_SREG_SS);
-		break;
-	case 0x1e:		/* push ds */
-		rc = emulate_push_sreg(ctxt, VCPU_SREG_DS);
-		break;
-	case 0x1f:		/* pop ds */
-		rc = emulate_pop_sreg(ctxt, VCPU_SREG_DS);
-		break;
 	case 0x40 ... 0x47: /* inc r16/r32 */
-		emulate_1op("inc", ctxt->dst, ctxt->eflags);
+		emulate_1op(ctxt, "inc");
 		break;
 	case 0x48 ... 0x4f: /* dec r16/r32 */
-		emulate_1op("dec", ctxt->dst, ctxt->eflags);
+		emulate_1op(ctxt, "dec");
 		break;
 	case 0x63:		/* movsxd */
 		if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -3891,12 +3888,6 @@ special_insn:
 	case 0xc0 ... 0xc1:
 		rc = em_grp2(ctxt);
 		break;
-	case 0xc4:		/* les */
-		rc = emulate_load_segment(ctxt, VCPU_SREG_ES);
-		break;
-	case 0xc5:		/* lds */
-		rc = emulate_load_segment(ctxt, VCPU_SREG_DS);
-		break;
 	case 0xcc:		/* int3 */
 		rc = emulate_int(ctxt, 3);
 		break;
@@ -3953,9 +3944,6 @@ special_insn:
 		/* complement carry flag from eflags reg */
 		ctxt->eflags ^= EFLG_CF;
 		break;
-	case 0xf6 ... 0xf7:	/* Grp3 */
-		rc = em_grp3(ctxt);
-		break;
 	case 0xf8: /* clc */
 		ctxt->eflags &= ~EFLG_CF;
 		break;
@@ -4103,36 +4091,24 @@ twobyte_insn:
 	case 0x90 ... 0x9f:     /* setcc r/m8 */
 		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
 		break;
-	case 0xa0:	  /* push fs */
-		rc = emulate_push_sreg(ctxt, VCPU_SREG_FS);
-		break;
-	case 0xa1:	 /* pop fs */
-		rc = emulate_pop_sreg(ctxt, VCPU_SREG_FS);
-		break;
 	case 0xa3:
 	      bt:		/* bt */
 		ctxt->dst.type = OP_NONE;
 		/* only subword offset */
 		ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
-		emulate_2op_SrcV_nobyte("bt", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcV_nobyte(ctxt, "bt");
 		break;
 	case 0xa4: /* shld imm8, r, r/m */
 	case 0xa5: /* shld cl, r, r/m */
-		emulate_2op_cl("shld", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags);
-		break;
-	case 0xa8:	/* push gs */
-		rc = emulate_push_sreg(ctxt, VCPU_SREG_GS);
-		break;
-	case 0xa9:	/* pop gs */
-		rc = emulate_pop_sreg(ctxt, VCPU_SREG_GS);
+		emulate_2op_cl(ctxt, "shld");
 		break;
 	case 0xab:
 	      bts:		/* bts */
-		emulate_2op_SrcV_nobyte("bts", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcV_nobyte(ctxt, "bts");
 		break;
 	case 0xac: /* shrd imm8, r, r/m */
 	case 0xad: /* shrd cl, r, r/m */
-		emulate_2op_cl("shrd", ctxt->src2, ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_cl(ctxt, "shrd");
 		break;
 	case 0xae:              /* clflush */
 		break;
@@ -4143,7 +4119,7 @@ twobyte_insn:
 		 */
 		ctxt->src.orig_val = ctxt->src.val;
 		ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
-		emulate_2op_SrcV("cmp", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcV(ctxt, "cmp");
 		if (ctxt->eflags & EFLG_ZF) {
 			/* Success: write back to memory. */
 			ctxt->dst.val = ctxt->src.orig_val;
@@ -4153,18 +4129,9 @@ twobyte_insn:
 			ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
 		}
 		break;
-	case 0xb2:		/* lss */
-		rc = emulate_load_segment(ctxt, VCPU_SREG_SS);
-		break;
 	case 0xb3:
 	      btr:		/* btr */
-		emulate_2op_SrcV_nobyte("btr", ctxt->src, ctxt->dst, ctxt->eflags);
-		break;
-	case 0xb4:		/* lfs */
-		rc = emulate_load_segment(ctxt, VCPU_SREG_FS);
-		break;
-	case 0xb5:		/* lgs */
-		rc = emulate_load_segment(ctxt, VCPU_SREG_GS);
+		emulate_2op_SrcV_nobyte(ctxt, "btr");
 		break;
 	case 0xb6 ... 0xb7:	/* movzx */
 		ctxt->dst.bytes = ctxt->op_bytes;
@@ -4185,7 +4152,7 @@ twobyte_insn:
 		break;
 	case 0xbb:
 	      btc:		/* btc */
-		emulate_2op_SrcV_nobyte("btc", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcV_nobyte(ctxt, "btc");
 		break;
 	case 0xbc: {		/* bsf */
 		u8 zf;
@@ -4217,7 +4184,7 @@ twobyte_insn:
 							(s16) ctxt->src.val;
 		break;
 	case 0xc0 ... 0xc1:	/* xadd */
-		emulate_2op_SrcV("add", ctxt->src, ctxt->dst, ctxt->eflags);
+		emulate_2op_SrcV(ctxt, "add");
 		/* Write back the register source. */
 		ctxt->src.val = ctxt->dst.orig_val;
 		write_register_operand(&ctxt->src);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad72385058..76e3f1cd0369 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -713,14 +713,16 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
+				      KVM_PIT_MEM_LENGTH, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
 		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
-						&pit->speaker_dev);
+					      KVM_SPEAKER_BASE_ADDRESS, 4,
+					      &pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
 	}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 19fe855e7953..cac4746d7ffb 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -34,6 +34,9 @@
 #include <linux/kvm_host.h>
 #include "trace.h"
 
+#define pr_pic_unimpl(fmt, ...)	\
+	pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__)
+
 static void pic_irq_request(struct kvm *kvm, int level);
 
 static void pic_lock(struct kvm_pic *s)
@@ -306,10 +309,10 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 			}
 			s->init_state = 1;
 			if (val & 0x02)
-				printk(KERN_ERR "single mode not supported");
+				pr_pic_unimpl("single mode not supported");
 			if (val & 0x08)
-				printk(KERN_ERR
-				       "level sensitive irq not supported");
+				pr_pic_unimpl(
+					"level sensitive irq not supported");
 		} else if (val & 0x08) {
 			if (val & 0x04)
 				s->poll = 1;
@@ -459,22 +462,15 @@ static int picdev_in_range(gpa_t addr)
 	}
 }
 
-static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
-{
-	return container_of(dev, struct kvm_pic, dev);
-}
-
-static int picdev_write(struct kvm_io_device *this,
+static int picdev_write(struct kvm_pic *s,
 			 gpa_t addr, int len, const void *val)
 {
-	struct kvm_pic *s = to_pic(this);
 	unsigned char data = *(unsigned char *)val;
 	if (!picdev_in_range(addr))
 		return -EOPNOTSUPP;
 
 	if (len != 1) {
-		if (printk_ratelimit())
-			printk(KERN_ERR "PIC: non byte write\n");
+		pr_pic_unimpl("non byte write\n");
 		return 0;
 	}
 	pic_lock(s);
@@ -494,17 +490,15 @@ static int picdev_write(struct kvm_io_device *this,
 	return 0;
 }
 
-static int picdev_read(struct kvm_io_device *this,
+static int picdev_read(struct kvm_pic *s,
 		       gpa_t addr, int len, void *val)
 {
-	struct kvm_pic *s = to_pic(this);
 	unsigned char data = 0;
 	if (!picdev_in_range(addr))
 		return -EOPNOTSUPP;
 
 	if (len != 1) {
-		if (printk_ratelimit())
-			printk(KERN_ERR "PIC: non byte read\n");
+		pr_pic_unimpl("non byte read\n");
 		return 0;
 	}
 	pic_lock(s);
@@ -525,6 +519,48 @@ static int picdev_read(struct kvm_io_device *this,
 	return 0;
 }
 
+static int picdev_master_write(struct kvm_io_device *dev,
+			       gpa_t addr, int len, const void *val)
+{
+	return picdev_write(container_of(dev, struct kvm_pic, dev_master),
+			    addr, len, val);
+}
+
+static int picdev_master_read(struct kvm_io_device *dev,
+			      gpa_t addr, int len, void *val)
+{
+	return picdev_read(container_of(dev, struct kvm_pic, dev_master),
+			    addr, len, val);
+}
+
+static int picdev_slave_write(struct kvm_io_device *dev,
+			      gpa_t addr, int len, const void *val)
+{
+	return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
+			    addr, len, val);
+}
+
+static int picdev_slave_read(struct kvm_io_device *dev,
+			     gpa_t addr, int len, void *val)
+{
+	return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
+			    addr, len, val);
+}
+
+static int picdev_eclr_write(struct kvm_io_device *dev,
+			     gpa_t addr, int len, const void *val)
+{
+	return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
+			    addr, len, val);
+}
+
+static int picdev_eclr_read(struct kvm_io_device *dev,
+			    gpa_t addr, int len, void *val)
+{
+	return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
+			    addr, len, val);
+}
+
 /*
  * callback when PIC0 irq status changed
  */
@@ -537,9 +573,19 @@ static void pic_irq_request(struct kvm *kvm, int level)
 	s->output = level;
 }
 
-static const struct kvm_io_device_ops picdev_ops = {
-	.read     = picdev_read,
-	.write    = picdev_write,
+static const struct kvm_io_device_ops picdev_master_ops = {
+	.read     = picdev_master_read,
+	.write    = picdev_master_write,
+};
+
+static const struct kvm_io_device_ops picdev_slave_ops = {
+	.read     = picdev_slave_read,
+	.write    = picdev_slave_write,
+};
+
+static const struct kvm_io_device_ops picdev_eclr_ops = {
+	.read     = picdev_eclr_read,
+	.write    = picdev_eclr_write,
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
@@ -560,16 +606,39 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	/*
 	 * Initialize PIO device
 	 */
-	kvm_iodevice_init(&s->dev, &picdev_ops);
+	kvm_iodevice_init(&s->dev_master, &picdev_master_ops);
+	kvm_iodevice_init(&s->dev_slave, &picdev_slave_ops);
+	kvm_iodevice_init(&s->dev_eclr, &picdev_eclr_ops);
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,
+				      &s->dev_master);
+	if (ret < 0)
+		goto fail_unlock;
+
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
+	if (ret < 0)
+		goto fail_unreg_2;
+
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
+	if (ret < 0)
+		goto fail_unreg_1;
+
 	mutex_unlock(&kvm->slots_lock);
-	if (ret < 0) {
-		kfree(s);
-		return NULL;
-	}
 
 	return s;
+
+fail_unreg_1:
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
+
+fail_unreg_2:
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
+
+fail_unlock:
+	mutex_unlock(&kvm->slots_lock);
+
+	kfree(s);
+
+	return NULL;
 }
 
 void kvm_destroy_pic(struct kvm *kvm)
@@ -577,7 +646,9 @@ void kvm_destroy_pic(struct kvm *kvm)
 	struct kvm_pic *vpic = kvm->arch.vpic;
 
 	if (vpic) {
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
 		kvm->arch.vpic = NULL;
 		kfree(vpic);
 	}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 53e2d084bffb..2086f2bfba33 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -66,7 +66,9 @@ struct kvm_pic {
 	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
 	int output;		/* intr from master PIC */
-	struct kvm_io_device dev;
+	struct kvm_io_device dev_master;
+	struct kvm_io_device dev_slave;
+	struct kvm_io_device dev_eclr;
 	void (*ack_notifier)(void *opaque, int irq);
 	unsigned long irq_states[16];
 };
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 3377d53fcd36..544076c4f44b 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -45,13 +45,6 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 	return vcpu->arch.walk_mmu->pdptrs[index];
 }
 
-static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index)
-{
-	load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu));
-
-	return mmu->pdptrs[index];
-}
-
 static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
 {
 	ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
index 64bc6ea78d90..497dbaa366d4 100644
--- a/arch/x86/kvm/kvm_timer.h
+++ b/arch/x86/kvm/kvm_timer.h
@@ -2,6 +2,8 @@
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
+	u32 timer_mode_mask;
+	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool reinject;
 	struct kvm_timer_ops *t_ops;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 57dcbd4308fa..54abb40199d6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -68,6 +68,9 @@
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
+static unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
 static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
 {
 	return *((u32 *) (apic->regs + reg_off));
@@ -135,9 +138,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 	return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 }
 
+static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
+{
+	return ((apic_get_reg(apic, APIC_LVTT) &
+		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+}
+
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
+	return ((apic_get_reg(apic, APIC_LVTT) &
+		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+}
+
+static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
+{
+	return ((apic_get_reg(apic, APIC_LVTT) &
+		apic->lapic_timer.timer_mode_mask) ==
+			APIC_LVT_TIMER_TSCDEADLINE);
 }
 
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -166,7 +183,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 }
 
 static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
-	LVT_MASK | APIC_LVT_TIMER_PERIODIC,	/* LVTT */
+	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
 	LINT_MASK, LINT_MASK,	/* LVT0-1 */
@@ -316,8 +333,8 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 			result = 1;
 		break;
 	default:
-		printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
-		       apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
+		apic_debug("Bad DFR vcpu %d: %08x\n",
+			   apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
 		break;
 	}
 
@@ -354,8 +371,8 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 		result = (target != source);
 		break;
 	default:
-		printk(KERN_WARNING "Bad dest shorthand value %x\n",
-		       short_hand);
+		apic_debug("kvm: apic: Bad dest shorthand value %x\n",
+			   short_hand);
 		break;
 	}
 
@@ -401,11 +418,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		break;
 
 	case APIC_DM_REMRD:
-		printk(KERN_DEBUG "Ignoring delivery mode 3\n");
+		apic_debug("Ignoring delivery mode 3\n");
 		break;
 
 	case APIC_DM_SMI:
-		printk(KERN_DEBUG "Ignoring guest SMI\n");
+		apic_debug("Ignoring guest SMI\n");
 		break;
 
 	case APIC_DM_NMI:
@@ -565,11 +582,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 			val = kvm_apic_id(apic) << 24;
 		break;
 	case APIC_ARBPRI:
-		printk(KERN_WARNING "Access APIC ARBPRI register "
-		       "which is for P6\n");
+		apic_debug("Access APIC ARBPRI register which is for P6\n");
 		break;
 
 	case APIC_TMCCT:	/* Timer CCR */
+		if (apic_lvtt_tscdeadline(apic))
+			return 0;
+
 		val = apic_get_tmcct(apic);
 		break;
 
@@ -664,29 +683,40 @@ static void update_divide_count(struct kvm_lapic *apic)
 
 static void start_apic_timer(struct kvm_lapic *apic)
 {
-	ktime_t now = apic->lapic_timer.timer.base->get_time();
-
-	apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
-		    APIC_BUS_CYCLE_NS * apic->divide_count;
+	ktime_t now;
 	atomic_set(&apic->lapic_timer.pending, 0);
 
-	if (!apic->lapic_timer.period)
-		return;
-	/*
-	 * Do not allow the guest to program periodic timers with small
-	 * interval, since the hrtimers are not throttled by the host
-	 * scheduler.
-	 */
-	if (apic_lvtt_period(apic)) {
-		if (apic->lapic_timer.period < NSEC_PER_MSEC/2)
-			apic->lapic_timer.period = NSEC_PER_MSEC/2;
-	}
+	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
+		/* lapic timer in oneshot or peroidic mode */
+		now = apic->lapic_timer.timer.base->get_time();
+		apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
+			    * APIC_BUS_CYCLE_NS * apic->divide_count;
+
+		if (!apic->lapic_timer.period)
+			return;
+		/*
+		 * Do not allow the guest to program periodic timers with small
+		 * interval, since the hrtimers are not throttled by the host
+		 * scheduler.
+		 */
+		if (apic_lvtt_period(apic)) {
+			s64 min_period = min_timer_period_us * 1000LL;
+
+			if (apic->lapic_timer.period < min_period) {
+				pr_info_ratelimited(
+				    "kvm: vcpu %i: requested %lld ns "
+				    "lapic timer period limited to %lld ns\n",
+				    apic->vcpu->vcpu_id,
+				    apic->lapic_timer.period, min_period);
+				apic->lapic_timer.period = min_period;
+			}
+		}
 
-	hrtimer_start(&apic->lapic_timer.timer,
-		      ktime_add_ns(now, apic->lapic_timer.period),
-		      HRTIMER_MODE_ABS);
+		hrtimer_start(&apic->lapic_timer.timer,
+			      ktime_add_ns(now, apic->lapic_timer.period),
+			      HRTIMER_MODE_ABS);
 
-	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+		apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
 			   PRIx64 ", "
 			   "timer initial count 0x%x, period %lldns, "
 			   "expire @ 0x%016" PRIx64 ".\n", __func__,
@@ -695,6 +725,30 @@ static void start_apic_timer(struct kvm_lapic *apic)
 			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
+	} else if (apic_lvtt_tscdeadline(apic)) {
+		/* lapic timer in tsc deadline mode */
+		u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+		u64 ns = 0;
+		struct kvm_vcpu *vcpu = apic->vcpu;
+		unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu);
+		unsigned long flags;
+
+		if (unlikely(!tscdeadline || !this_tsc_khz))
+			return;
+
+		local_irq_save(flags);
+
+		now = apic->lapic_timer.timer.base->get_time();
+		guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+		if (likely(tscdeadline > guest_tsc)) {
+			ns = (tscdeadline - guest_tsc) * 1000000ULL;
+			do_div(ns, this_tsc_khz);
+		}
+		hrtimer_start(&apic->lapic_timer.timer,
+			ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+
+		local_irq_restore(flags);
+	}
 }
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
@@ -782,7 +836,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_LVT0:
 		apic_manage_nmi_watchdog(apic, val);
-	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
 	case APIC_LVT1:
@@ -796,7 +849,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 		break;
 
+	case APIC_LVTT:
+		if ((apic_get_reg(apic, APIC_LVTT) &
+		    apic->lapic_timer.timer_mode_mask) !=
+		   (val & apic->lapic_timer.timer_mode_mask))
+			hrtimer_cancel(&apic->lapic_timer.timer);
+
+		if (!apic_sw_enabled(apic))
+			val |= APIC_LVT_MASKED;
+		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
+		apic_set_reg(apic, APIC_LVTT, val);
+		break;
+
 	case APIC_TMICT:
+		if (apic_lvtt_tscdeadline(apic))
+			break;
+
 		hrtimer_cancel(&apic->lapic_timer.timer);
 		apic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
@@ -804,14 +872,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_TDCR:
 		if (val & 4)
-			printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
+			apic_debug("KVM_WRITE:TDCR %x\n", val);
 		apic_set_reg(apic, APIC_TDCR, val);
 		update_divide_count(apic);
 		break;
 
 	case APIC_ESR:
 		if (apic_x2apic_mode(apic) && val != 0) {
-			printk(KERN_ERR "KVM_WRITE:ESR not zero %x\n", val);
+			apic_debug("KVM_WRITE:ESR not zero %x\n", val);
 			ret = 1;
 		}
 		break;
@@ -864,6 +932,15 @@ static int apic_mmio_write(struct kvm_io_device *this,
 	return 0;
 }
 
+void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
+
 void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu->arch.apic)
@@ -883,6 +960,32 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
  *----------------------------------------------------------------------
  */
 
+u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	if (!apic)
+		return 0;
+
+	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
+		return 0;
+
+	return apic->lapic_timer.tscdeadline;
+}
+
+void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	if (!apic)
+		return;
+
+	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
+		return;
+
+	hrtimer_cancel(&apic->lapic_timer.timer);
+	apic->lapic_timer.tscdeadline = data;
+	start_apic_timer(apic);
+}
+
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 52c9e6b9e725..138e8cc6fea6 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -26,6 +26,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
+void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
@@ -41,6 +42,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
 bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
+u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
+void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
+
 void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8e8da7960dbe..f1b36cf3e3d0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2770,7 +2770,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
 		ASSERT(!VALID_PAGE(root));
 		if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
-			pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i);
+			pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
 			if (!is_present_gpte(pdptr)) {
 				vcpu->arch.mmu.pae_root[i] = 0;
 				continue;
@@ -3318,6 +3318,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->direct_map = true;
 	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
 	context->get_cr3 = get_cr3;
+	context->get_pdptr = kvm_pdptr_read;
 	context->inject_page_fault = kvm_inject_page_fault;
 	context->nx = is_nx(vcpu);
 
@@ -3376,6 +3377,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.walk_mmu->set_cr3           = kvm_x86_ops->set_cr3;
 	vcpu->arch.walk_mmu->get_cr3           = get_cr3;
+	vcpu->arch.walk_mmu->get_pdptr         = kvm_pdptr_read;
 	vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
 
 	return r;
@@ -3386,6 +3388,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
 	g_context->get_cr3           = get_cr3;
+	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
 	/*
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 2460a265be23..746ec259d024 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -121,16 +121,16 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 
 static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 {
+	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
 	unsigned long *rmapp;
 	struct kvm_mmu_page *rev_sp;
 	gfn_t gfn;
 
-
 	rev_sp = page_header(__pa(sptep));
 	gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
 
 	if (!gfn_to_memslot(kvm, gfn)) {
-		if (!printk_ratelimit())
+		if (!__ratelimit(&ratelimit_state))
 			return;
 		audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
 		audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
@@ -141,7 +141,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 
 	rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
 	if (!*rmapp) {
-		if (!printk_ratelimit())
+		if (!__ratelimit(&ratelimit_state))
 			return;
 		audit_printk(kvm, "no rmap for writable spte %llx\n",
 			     *sptep);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 507e2b844cfa..92994100638b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -147,7 +147,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, uninitialized_var(pte_access);
 	gpa_t pte_gpa;
-	bool eperm;
+	bool eperm, last_gpte;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
@@ -163,7 +163,7 @@ retry_walk:
 
 #if PTTYPE == 64
 	if (walker->level == PT32E_ROOT_LEVEL) {
-		pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3);
+		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
 		trace_kvm_mmu_paging_element(pte, walker->level);
 		if (!is_present_gpte(pte))
 			goto error;
@@ -221,6 +221,17 @@ retry_walk:
 			eperm = true;
 #endif
 
+		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
+		if (last_gpte) {
+			pte_access = pt_access &
+				     FNAME(gpte_access)(vcpu, pte, true);
+			/* check if the kernel is fetching from user page */
+			if (unlikely(pte_access & PT_USER_MASK) &&
+			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
+				if (fetch_fault && !user_fault)
+					eperm = true;
+		}
+
 		if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
 			int ret;
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
@@ -238,18 +249,12 @@ retry_walk:
 
 		walker->ptes[walker->level - 1] = pte;
 
-		if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) {
+		if (last_gpte) {
 			int lvl = walker->level;
 			gpa_t real_gpa;
 			gfn_t gfn;
 			u32 ac;
 
-			/* check if the kernel is fetching from user page */
-			if (unlikely(pte_access & PT_USER_MASK) &&
-			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
-				if (fetch_fault && !user_fault)
-					eperm = true;
-
 			gfn = gpte_to_gfn_lvl(pte, lvl);
 			gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
 
@@ -295,7 +300,6 @@ retry_walk:
 		walker->ptes[walker->level - 1] = pte;
 	}
 
-	pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true);
 	walker->pt_access = pt_access;
 	walker->pte_access = pte_access;
 	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 475d1c948501..e32243eac2f4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1084,7 +1084,6 @@ static void init_vmcb(struct vcpu_svm *svm)
 	if (npt_enabled) {
 		/* Setup VMCB for Nested Paging */
 		control->nested_ctl = 1;
-		clr_intercept(svm, INTERCEPT_TASK_SWITCH);
 		clr_intercept(svm, INTERCEPT_INVLPG);
 		clr_exception_intercept(svm, PF_VECTOR);
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
@@ -1844,6 +1843,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
 	return svm->nested.nested_cr3;
 }
 
+static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 cr3 = svm->nested.nested_cr3;
+	u64 pdpte;
+	int ret;
+
+	ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
+				  offset_in_page(cr3) + index * 8, 8);
+	if (ret)
+		return 0;
+	return pdpte;
+}
+
 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
 				   unsigned long root)
 {
@@ -1875,6 +1888,7 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
 	vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
+	vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
 	vcpu->arch.mmu.shadow_root_level = get_npt_level();
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
@@ -2182,7 +2196,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 				       vmcb->control.exit_info_1,
 				       vmcb->control.exit_info_2,
 				       vmcb->control.exit_int_info,
-				       vmcb->control.exit_int_info_err);
+				       vmcb->control.exit_int_info_err,
+				       KVM_ISA_SVM);
 
 	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
 	if (!nested_vmcb)
@@ -2894,15 +2909,20 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 	return 0;
 }
 
+u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
+{
+	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
+	return vmcb->control.tsc_offset +
+		svm_scale_tsc(vcpu, native_read_tsc());
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	switch (ecx) {
 	case MSR_IA32_TSC: {
-		struct vmcb *vmcb = get_host_vmcb(svm);
-
-		*data = vmcb->control.tsc_offset +
+		*data = svm->vmcb->control.tsc_offset +
 			svm_scale_tsc(vcpu, native_read_tsc());
 
 		break;
@@ -3314,8 +3334,6 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 	struct kvm_run *kvm_run = vcpu->run;
 	u32 exit_code = svm->vmcb->control.exit_code;
 
-	trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
-
 	if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
 	if (npt_enabled)
@@ -3335,7 +3353,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 					svm->vmcb->control.exit_info_1,
 					svm->vmcb->control.exit_info_2,
 					svm->vmcb->control.exit_int_info,
-					svm->vmcb->control.exit_int_info_err);
+					svm->vmcb->control.exit_int_info_err,
+					KVM_ISA_SVM);
 
 		vmexit = nested_svm_exit_special(svm);
 
@@ -3768,6 +3787,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
 
+	trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
+
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 		kvm_before_handle_nmi(&svm->vcpu);
 
@@ -3897,60 +3918,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 	}
 }
 
-static const struct trace_print_flags svm_exit_reasons_str[] = {
-	{ SVM_EXIT_READ_CR0,			"read_cr0" },
-	{ SVM_EXIT_READ_CR3,			"read_cr3" },
-	{ SVM_EXIT_READ_CR4,			"read_cr4" },
-	{ SVM_EXIT_READ_CR8,			"read_cr8" },
-	{ SVM_EXIT_WRITE_CR0,			"write_cr0" },
-	{ SVM_EXIT_WRITE_CR3,			"write_cr3" },
-	{ SVM_EXIT_WRITE_CR4,			"write_cr4" },
-	{ SVM_EXIT_WRITE_CR8,			"write_cr8" },
-	{ SVM_EXIT_READ_DR0,			"read_dr0" },
-	{ SVM_EXIT_READ_DR1,			"read_dr1" },
-	{ SVM_EXIT_READ_DR2,			"read_dr2" },
-	{ SVM_EXIT_READ_DR3,			"read_dr3" },
-	{ SVM_EXIT_WRITE_DR0,			"write_dr0" },
-	{ SVM_EXIT_WRITE_DR1,			"write_dr1" },
-	{ SVM_EXIT_WRITE_DR2,			"write_dr2" },
-	{ SVM_EXIT_WRITE_DR3,			"write_dr3" },
-	{ SVM_EXIT_WRITE_DR5,			"write_dr5" },
-	{ SVM_EXIT_WRITE_DR7,			"write_dr7" },
-	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" },
-	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" },
-	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" },
-	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" },
-	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" },
-	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" },
-	{ SVM_EXIT_INTR,			"interrupt" },
-	{ SVM_EXIT_NMI,				"nmi" },
-	{ SVM_EXIT_SMI,				"smi" },
-	{ SVM_EXIT_INIT,			"init" },
-	{ SVM_EXIT_VINTR,			"vintr" },
-	{ SVM_EXIT_CPUID,			"cpuid" },
-	{ SVM_EXIT_INVD,			"invd" },
-	{ SVM_EXIT_HLT,				"hlt" },
-	{ SVM_EXIT_INVLPG,			"invlpg" },
-	{ SVM_EXIT_INVLPGA,			"invlpga" },
-	{ SVM_EXIT_IOIO,			"io" },
-	{ SVM_EXIT_MSR,				"msr" },
-	{ SVM_EXIT_TASK_SWITCH,			"task_switch" },
-	{ SVM_EXIT_SHUTDOWN,			"shutdown" },
-	{ SVM_EXIT_VMRUN,			"vmrun" },
-	{ SVM_EXIT_VMMCALL,			"hypercall" },
-	{ SVM_EXIT_VMLOAD,			"vmload" },
-	{ SVM_EXIT_VMSAVE,			"vmsave" },
-	{ SVM_EXIT_STGI,			"stgi" },
-	{ SVM_EXIT_CLGI,			"clgi" },
-	{ SVM_EXIT_SKINIT,			"skinit" },
-	{ SVM_EXIT_WBINVD,			"wbinvd" },
-	{ SVM_EXIT_MONITOR,			"monitor" },
-	{ SVM_EXIT_MWAIT,			"mwait" },
-	{ SVM_EXIT_XSETBV,			"xsetbv" },
-	{ SVM_EXIT_NPF,				"npf" },
-	{ -1, NULL }
-};
-
 static int svm_get_lpage_level(void)
 {
 	return PT_PDPE_LEVEL;
@@ -4223,7 +4190,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.get_mt_mask = svm_get_mt_mask,
 
 	.get_exit_info = svm_get_exit_info,
-	.exit_reasons_str = svm_exit_reasons_str,
 
 	.get_lpage_level = svm_get_lpage_level,
 
@@ -4239,6 +4205,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.write_tsc_offset = svm_write_tsc_offset,
 	.adjust_tsc_offset = svm_adjust_tsc_offset,
 	.compute_tsc_offset = svm_compute_tsc_offset,
+	.read_l1_tsc = svm_read_l1_tsc,
 
 	.set_tdp_cr3 = set_tdp_cr3,
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 3ff898c104f7..911d2641f14c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -2,6 +2,8 @@
 #define _TRACE_KVM_H
 
 #include <linux/tracepoint.h>
+#include <asm/vmx.h>
+#include <asm/svm.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -181,6 +183,95 @@ TRACE_EVENT(kvm_apic,
 #define KVM_ISA_VMX   1
 #define KVM_ISA_SVM   2
 
+#define VMX_EXIT_REASONS \
+	{ EXIT_REASON_EXCEPTION_NMI,		"EXCEPTION_NMI" }, \
+	{ EXIT_REASON_EXTERNAL_INTERRUPT,	"EXTERNAL_INTERRUPT" }, \
+	{ EXIT_REASON_TRIPLE_FAULT,		"TRIPLE_FAULT" }, \
+	{ EXIT_REASON_PENDING_INTERRUPT,	"PENDING_INTERRUPT" }, \
+	{ EXIT_REASON_NMI_WINDOW,		"NMI_WINDOW" }, \
+	{ EXIT_REASON_TASK_SWITCH,		"TASK_SWITCH" }, \
+	{ EXIT_REASON_CPUID,			"CPUID" }, \
+	{ EXIT_REASON_HLT,			"HLT" }, \
+	{ EXIT_REASON_INVLPG,			"INVLPG" }, \
+	{ EXIT_REASON_RDPMC,			"RDPMC" }, \
+	{ EXIT_REASON_RDTSC,			"RDTSC" }, \
+	{ EXIT_REASON_VMCALL,			"VMCALL" }, \
+	{ EXIT_REASON_VMCLEAR,			"VMCLEAR" }, \
+	{ EXIT_REASON_VMLAUNCH,			"VMLAUNCH" }, \
+	{ EXIT_REASON_VMPTRLD,			"VMPTRLD" }, \
+	{ EXIT_REASON_VMPTRST,			"VMPTRST" }, \
+	{ EXIT_REASON_VMREAD,			"VMREAD" }, \
+	{ EXIT_REASON_VMRESUME,			"VMRESUME" }, \
+	{ EXIT_REASON_VMWRITE,			"VMWRITE" }, \
+	{ EXIT_REASON_VMOFF,			"VMOFF" }, \
+	{ EXIT_REASON_VMON,			"VMON" }, \
+	{ EXIT_REASON_CR_ACCESS,		"CR_ACCESS" }, \
+	{ EXIT_REASON_DR_ACCESS,		"DR_ACCESS" }, \
+	{ EXIT_REASON_IO_INSTRUCTION,		"IO_INSTRUCTION" }, \
+	{ EXIT_REASON_MSR_READ,			"MSR_READ" }, \
+	{ EXIT_REASON_MSR_WRITE,		"MSR_WRITE" }, \
+	{ EXIT_REASON_MWAIT_INSTRUCTION,	"MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_INSTRUCTION,	"MONITOR_INSTRUCTION" }, \
+	{ EXIT_REASON_PAUSE_INSTRUCTION,	"PAUSE_INSTRUCTION" }, \
+	{ EXIT_REASON_MCE_DURING_VMENTRY,	"MCE_DURING_VMENTRY" }, \
+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,	"TPR_BELOW_THRESHOLD" },	\
+	{ EXIT_REASON_APIC_ACCESS,		"APIC_ACCESS" }, \
+	{ EXIT_REASON_EPT_VIOLATION,		"EPT_VIOLATION" }, \
+	{ EXIT_REASON_EPT_MISCONFIG,		"EPT_MISCONFIG" }, \
+	{ EXIT_REASON_WBINVD,			"WBINVD" }
+
+#define SVM_EXIT_REASONS \
+	{ SVM_EXIT_READ_CR0,			"read_cr0" }, \
+	{ SVM_EXIT_READ_CR3,			"read_cr3" }, \
+	{ SVM_EXIT_READ_CR4,			"read_cr4" }, \
+	{ SVM_EXIT_READ_CR8,			"read_cr8" }, \
+	{ SVM_EXIT_WRITE_CR0,			"write_cr0" }, \
+	{ SVM_EXIT_WRITE_CR3,			"write_cr3" }, \
+	{ SVM_EXIT_WRITE_CR4,			"write_cr4" }, \
+	{ SVM_EXIT_WRITE_CR8,			"write_cr8" }, \
+	{ SVM_EXIT_READ_DR0,			"read_dr0" }, \
+	{ SVM_EXIT_READ_DR1,			"read_dr1" }, \
+	{ SVM_EXIT_READ_DR2,			"read_dr2" }, \
+	{ SVM_EXIT_READ_DR3,			"read_dr3" }, \
+	{ SVM_EXIT_WRITE_DR0,			"write_dr0" }, \
+	{ SVM_EXIT_WRITE_DR1,			"write_dr1" }, \
+	{ SVM_EXIT_WRITE_DR2,			"write_dr2" }, \
+	{ SVM_EXIT_WRITE_DR3,			"write_dr3" }, \
+	{ SVM_EXIT_WRITE_DR5,			"write_dr5" }, \
+	{ SVM_EXIT_WRITE_DR7,			"write_dr7" }, \
+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" }, \
+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" }, \
+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" }, \
+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" }, \
+	{ SVM_EXIT_INTR,			"interrupt" }, \
+	{ SVM_EXIT_NMI,				"nmi" }, \
+	{ SVM_EXIT_SMI,				"smi" }, \
+	{ SVM_EXIT_INIT,			"init" }, \
+	{ SVM_EXIT_VINTR,			"vintr" }, \
+	{ SVM_EXIT_CPUID,			"cpuid" }, \
+	{ SVM_EXIT_INVD,			"invd" }, \
+	{ SVM_EXIT_HLT,				"hlt" }, \
+	{ SVM_EXIT_INVLPG,			"invlpg" }, \
+	{ SVM_EXIT_INVLPGA,			"invlpga" }, \
+	{ SVM_EXIT_IOIO,			"io" }, \
+	{ SVM_EXIT_MSR,				"msr" }, \
+	{ SVM_EXIT_TASK_SWITCH,			"task_switch" }, \
+	{ SVM_EXIT_SHUTDOWN,			"shutdown" }, \
+	{ SVM_EXIT_VMRUN,			"vmrun" }, \
+	{ SVM_EXIT_VMMCALL,			"hypercall" }, \
+	{ SVM_EXIT_VMLOAD,			"vmload" }, \
+	{ SVM_EXIT_VMSAVE,			"vmsave" }, \
+	{ SVM_EXIT_STGI,			"stgi" }, \
+	{ SVM_EXIT_CLGI,			"clgi" }, \
+	{ SVM_EXIT_SKINIT,			"skinit" }, \
+	{ SVM_EXIT_WBINVD,			"wbinvd" }, \
+	{ SVM_EXIT_MONITOR,			"monitor" }, \
+	{ SVM_EXIT_MWAIT,			"mwait" }, \
+	{ SVM_EXIT_XSETBV,			"xsetbv" }, \
+	{ SVM_EXIT_NPF,				"npf" }
+
 /*
  * Tracepoint for kvm guest exit:
  */
@@ -205,8 +296,9 @@ TRACE_EVENT(kvm_exit,
 	),
 
 	TP_printk("reason %s rip 0x%lx info %llx %llx",
-		 ftrace_print_symbols_seq(p, __entry->exit_reason,
-					  kvm_x86_ops->exit_reasons_str),
+		 (__entry->isa == KVM_ISA_VMX) ?
+		 __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
+		 __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
 		 __entry->guest_rip, __entry->info1, __entry->info2)
 );
 
@@ -486,9 +578,9 @@ TRACE_EVENT(kvm_nested_intercepts,
 TRACE_EVENT(kvm_nested_vmexit,
 	    TP_PROTO(__u64 rip, __u32 exit_code,
 		     __u64 exit_info1, __u64 exit_info2,
-		     __u32 exit_int_info, __u32 exit_int_info_err),
+		     __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
 	    TP_ARGS(rip, exit_code, exit_info1, exit_info2,
-		    exit_int_info, exit_int_info_err),
+		    exit_int_info, exit_int_info_err, isa),
 
 	TP_STRUCT__entry(
 		__field(	__u64,		rip			)
@@ -497,6 +589,7 @@ TRACE_EVENT(kvm_nested_vmexit,
 		__field(	__u64,		exit_info2		)
 		__field(	__u32,		exit_int_info		)
 		__field(	__u32,		exit_int_info_err	)
+		__field(	__u32,		isa			)
 	),
 
 	TP_fast_assign(
@@ -506,12 +599,14 @@ TRACE_EVENT(kvm_nested_vmexit,
 		__entry->exit_info2		= exit_info2;
 		__entry->exit_int_info		= exit_int_info;
 		__entry->exit_int_info_err	= exit_int_info_err;
+		__entry->isa			= isa;
 	),
 	TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
 		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
 		  __entry->rip,
-		  ftrace_print_symbols_seq(p, __entry->exit_code,
-					   kvm_x86_ops->exit_reasons_str),
+		 (__entry->isa == KVM_ISA_VMX) ?
+		 __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
+		 __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
 		  __entry->exit_info1, __entry->exit_info2,
 		  __entry->exit_int_info, __entry->exit_int_info_err)
 );
@@ -522,9 +617,9 @@ TRACE_EVENT(kvm_nested_vmexit,
 TRACE_EVENT(kvm_nested_vmexit_inject,
 	    TP_PROTO(__u32 exit_code,
 		     __u64 exit_info1, __u64 exit_info2,
-		     __u32 exit_int_info, __u32 exit_int_info_err),
+		     __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa),
 	    TP_ARGS(exit_code, exit_info1, exit_info2,
-		    exit_int_info, exit_int_info_err),
+		    exit_int_info, exit_int_info_err, isa),
 
 	TP_STRUCT__entry(
 		__field(	__u32,		exit_code		)
@@ -532,6 +627,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
 		__field(	__u64,		exit_info2		)
 		__field(	__u32,		exit_int_info		)
 		__field(	__u32,		exit_int_info_err	)
+		__field(	__u32,		isa			)
 	),
 
 	TP_fast_assign(
@@ -540,12 +636,14 @@ TRACE_EVENT(kvm_nested_vmexit_inject,
 		__entry->exit_info2		= exit_info2;
 		__entry->exit_int_info		= exit_int_info;
 		__entry->exit_int_info_err	= exit_int_info_err;
+		__entry->isa			= isa;
 	),
 
 	TP_printk("reason: %s ext_inf1: 0x%016llx "
 		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x",
-		  ftrace_print_symbols_seq(p, __entry->exit_code,
-					   kvm_x86_ops->exit_reasons_str),
+		 (__entry->isa == KVM_ISA_VMX) ?
+		 __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) :
+		 __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS),
 		__entry->exit_info1, __entry->exit_info2,
 		__entry->exit_int_info, __entry->exit_int_info_err)
 );
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e65a158dee64..a0d6bd9ad442 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -71,6 +71,9 @@ module_param(vmm_exclusive, bool, S_IRUGO);
 static int __read_mostly yield_on_hlt = 1;
 module_param(yield_on_hlt, bool, S_IRUGO);
 
+static int __read_mostly fasteoi = 1;
+module_param(fasteoi, bool, S_IRUGO);
+
 /*
  * If nested=1, nested virtualization is supported, i.e., guests may use
  * VMX and be a hypervisor for its own guests. If nested=0, guests may not
@@ -1748,6 +1751,21 @@ static u64 guest_read_tsc(void)
 }
 
 /*
+ * Like guest_read_tsc, but always returns L1's notion of the timestamp
+ * counter, even if a nested guest (L2) is currently running.
+ */
+u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
+{
+	u64 host_tsc, tsc_offset;
+
+	rdtscll(host_tsc);
+	tsc_offset = is_guest_mode(vcpu) ?
+		to_vmx(vcpu)->nested.vmcs01_tsc_offset :
+		vmcs_read64(TSC_OFFSET);
+	return host_tsc + tsc_offset;
+}
+
+/*
  * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
  * ioctl. In this case the call-back should update internal vmx state to make
  * the changes effective.
@@ -1762,15 +1780,23 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
  */
 static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
-	vmcs_write64(TSC_OFFSET, offset);
-	if (is_guest_mode(vcpu))
+	if (is_guest_mode(vcpu)) {
 		/*
-		 * We're here if L1 chose not to trap the TSC MSR. Since
-		 * prepare_vmcs12() does not copy tsc_offset, we need to also
-		 * set the vmcs12 field here.
+		 * We're here if L1 chose not to trap WRMSR to TSC. According
+		 * to the spec, this should set L1's TSC; The offset that L1
+		 * set for L2 remains unchanged, and still needs to be added
+		 * to the newly set TSC to get L2's TSC.
 		 */
-		get_vmcs12(vcpu)->tsc_offset = offset -
-			to_vmx(vcpu)->nested.vmcs01_tsc_offset;
+		struct vmcs12 *vmcs12;
+		to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
+		/* recalculate vmcs02.TSC_OFFSET: */
+		vmcs12 = get_vmcs12(vcpu);
+		vmcs_write64(TSC_OFFSET, offset +
+			(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
+			 vmcs12->tsc_offset : 0));
+	} else {
+		vmcs_write64(TSC_OFFSET, offset);
+	}
 }
 
 static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -2736,8 +2762,8 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
-		printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
-		       __func__);
+		pr_debug_ratelimited("%s: tss fixup for long mode. \n",
+				     __func__);
 		vmcs_write32(GUEST_TR_AR_BYTES,
 			     (guest_tr_ar & ~AR_TYPE_MASK)
 			     | AR_TYPE_BUSY_64_TSS);
@@ -4115,8 +4141,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 	if (is_page_fault(intr_info)) {
 		/* EPT won't cause page fault directly */
-		if (enable_ept)
-			BUG();
+		BUG_ON(enable_ept);
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		trace_kvm_page_fault(cr2, error_code);
 
@@ -4518,6 +4543,24 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
 
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
+	if (likely(fasteoi)) {
+		unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+		int access_type, offset;
+
+		access_type = exit_qualification & APIC_ACCESS_TYPE;
+		offset = exit_qualification & APIC_ACCESS_OFFSET;
+		/*
+		 * Sane guest uses MOV to write EOI, with written value
+		 * not cared. So make a short-circuit here by avoiding
+		 * heavy instruction emulation.
+		 */
+		if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
+		    (offset == APIC_EOI)) {
+			kvm_lapic_set_eoi(vcpu);
+			skip_emulated_instruction(vcpu);
+			return 1;
+		}
+	}
 	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
@@ -5591,8 +5634,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return 0;
 
 	if (unlikely(vmx->fail)) {
-		printk(KERN_INFO "%s failed vm entry %x\n",
-		       __func__, vmcs_read32(VM_INSTRUCTION_ERROR));
+		pr_info_ratelimited("%s failed vm entry %x\n", __func__,
+				    vmcs_read32(VM_INSTRUCTION_ERROR));
 		return 1;
 	}
 
@@ -5696,8 +5739,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	u32 exit_reason = vmx->exit_reason;
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
-	trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
-
 	/* If guest state is invalid, start emulating */
 	if (vmx->emulation_required && emulate_invalid_guest_state)
 		return handle_invalid_guest_state(vcpu);
@@ -6101,6 +6142,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->loaded_vmcs->launched = 1;
 
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
+	trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
 
 	vmx_complete_atomic_exit(vmx);
 	vmx_recover_nmi_blocking(vmx);
@@ -6241,49 +6283,6 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	return ret;
 }
 
-#define _ER(x) { EXIT_REASON_##x, #x }
-
-static const struct trace_print_flags vmx_exit_reasons_str[] = {
-	_ER(EXCEPTION_NMI),
-	_ER(EXTERNAL_INTERRUPT),
-	_ER(TRIPLE_FAULT),
-	_ER(PENDING_INTERRUPT),
-	_ER(NMI_WINDOW),
-	_ER(TASK_SWITCH),
-	_ER(CPUID),
-	_ER(HLT),
-	_ER(INVLPG),
-	_ER(RDPMC),
-	_ER(RDTSC),
-	_ER(VMCALL),
-	_ER(VMCLEAR),
-	_ER(VMLAUNCH),
-	_ER(VMPTRLD),
-	_ER(VMPTRST),
-	_ER(VMREAD),
-	_ER(VMRESUME),
-	_ER(VMWRITE),
-	_ER(VMOFF),
-	_ER(VMON),
-	_ER(CR_ACCESS),
-	_ER(DR_ACCESS),
-	_ER(IO_INSTRUCTION),
-	_ER(MSR_READ),
-	_ER(MSR_WRITE),
-	_ER(MWAIT_INSTRUCTION),
-	_ER(MONITOR_INSTRUCTION),
-	_ER(PAUSE_INSTRUCTION),
-	_ER(MCE_DURING_VMENTRY),
-	_ER(TPR_BELOW_THRESHOLD),
-	_ER(APIC_ACCESS),
-	_ER(EPT_VIOLATION),
-	_ER(EPT_MISCONFIG),
-	_ER(WBINVD),
-	{ -1, NULL }
-};
-
-#undef _ER
-
 static int vmx_get_lpage_level(void)
 {
 	if (enable_ept && !cpu_has_vmx_ept_1g_page())
@@ -6514,8 +6513,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	set_cr4_guest_host_mask(vmx);
 
-	vmcs_write64(TSC_OFFSET,
-		vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
+	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+		vmcs_write64(TSC_OFFSET,
+			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
+	else
+		vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
 
 	if (enable_vpid) {
 		/*
@@ -6610,9 +6612,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (vmcs12->vm_entry_msr_load_count > 0 ||
 	    vmcs12->vm_exit_msr_load_count > 0 ||
 	    vmcs12->vm_exit_msr_store_count > 0) {
-		if (printk_ratelimit())
-			printk(KERN_WARNING
-			  "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__);
+		pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
+				    __func__);
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
@@ -6922,7 +6923,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 
 	load_vmcs12_host_state(vcpu, vmcs12);
 
-	/* Update TSC_OFFSET if vmx_adjust_tsc_offset() was used while L2 ran */
+	/* Update TSC_OFFSET if TSC was changed while L2 ran */
 	vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
 
 	/* This is needed for same reason as it was needed in prepare_vmcs02 */
@@ -7039,7 +7040,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.get_mt_mask = vmx_get_mt_mask,
 
 	.get_exit_info = vmx_get_exit_info,
-	.exit_reasons_str = vmx_exit_reasons_str,
 
 	.get_lpage_level = vmx_get_lpage_level,
 
@@ -7055,6 +7055,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.adjust_tsc_offset = vmx_adjust_tsc_offset,
 	.compute_tsc_offset = vmx_compute_tsc_offset,
+	.read_l1_tsc = vmx_read_l1_tsc,
 
 	.set_tdp_cr3 = vmx_set_cr3,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 84a28ea45fa4..c38efd7b792e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -44,6 +44,7 @@
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
 #include <linux/hash.h>
+#include <linux/pci.h>
 #include <trace/events/kvm.h>
 
 #define CREATE_TRACE_POINTS
@@ -83,6 +84,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				    struct kvm_cpuid_entry2 __user *entries);
+static void process_nmi(struct kvm_vcpu *vcpu);
 
 struct kvm_x86_ops *kvm_x86_ops;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -359,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
-	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	vcpu->arch.nmi_pending = 1;
+	atomic_inc(&vcpu->arch.nmi_queued);
+	kvm_make_request(KVM_REQ_NMI, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
@@ -599,6 +601,8 @@ static bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
 static void update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 timer_mode_mask;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
 	if (!best)
@@ -610,6 +614,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
 			best->ecx |= bit(X86_FEATURE_OSXSAVE);
 	}
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+		best->function == 0x1) {
+		best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
+		timer_mode_mask = 3 << 17;
+	} else
+		timer_mode_mask = 1 << 17;
+
+	if (apic)
+		apic->lapic_timer.timer_mode_mask = timer_mode_mask;
 }
 
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -825,6 +839,7 @@ static u32 msrs_to_save[] = {
 static unsigned num_msrs_to_save;
 
 static u32 emulated_msrs[] = {
+	MSR_IA32_TSCDEADLINE,
 	MSR_IA32_MISC_ENABLE,
 	MSR_IA32_MCG_STATUS,
 	MSR_IA32_MCG_CTL,
@@ -1000,7 +1015,7 @@ static inline int kvm_tsc_changes_freq(void)
 	return ret;
 }
 
-static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.virtual_tsc_khz)
 		return vcpu->arch.virtual_tsc_khz;
@@ -1098,7 +1113,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
-	kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
+	tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
 	kernel_ns = get_kernel_ns();
 	this_tsc_khz = vcpu_tsc_khz(v);
 	if (unlikely(this_tsc_khz == 0)) {
@@ -1564,6 +1579,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		break;
 	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
 		return kvm_x2apic_msr_write(vcpu, msr, data);
+	case MSR_IA32_TSCDEADLINE:
+		kvm_set_lapic_tscdeadline_msr(vcpu, data);
+		break;
 	case MSR_IA32_MISC_ENABLE:
 		vcpu->arch.ia32_misc_enable_msr = data;
 		break;
@@ -1825,6 +1843,9 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
 	case HV_X64_MSR_TPR:
 		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+	case HV_X64_MSR_APIC_ASSIST_PAGE:
+		data = vcpu->arch.hv_vapic;
+		break;
 	default:
 		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1839,7 +1860,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 
 	switch (msr) {
 	case MSR_IA32_PLATFORM_ID:
-	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_EBL_CR_POWERON:
 	case MSR_IA32_DEBUGCTLMSR:
 	case MSR_IA32_LASTBRANCHFROMIP:
@@ -1860,6 +1880,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_FAM10H_MMIO_CONF_BASE:
 		data = 0;
 		break;
+	case MSR_IA32_UCODE_REV:
+		data = 0x100000000ULL;
+		break;
 	case MSR_MTRRcap:
 		data = 0x500 | KVM_NR_VAR_MTRR;
 		break;
@@ -1888,6 +1911,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
 		return kvm_x2apic_msr_read(vcpu, msr, pdata);
 		break;
+	case MSR_IA32_TSCDEADLINE:
+		data = kvm_get_lapic_tscdeadline_msr(vcpu);
+		break;
 	case MSR_IA32_MISC_ENABLE:
 		data = vcpu->arch.ia32_misc_enable_msr;
 		break;
@@ -2086,6 +2112,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
 		break;
 	case KVM_CAP_NR_VCPUS:
+		r = KVM_SOFT_MAX_VCPUS;
+		break;
+	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
 		break;
 	case KVM_CAP_NR_MEMSLOTS:
@@ -2095,7 +2124,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = 0;
 		break;
 	case KVM_CAP_IOMMU:
-		r = iommu_found();
+		r = iommu_present(&pci_bus_type);
 		break;
 	case KVM_CAP_MCE:
 		r = KVM_MAX_MCE_BANKS;
@@ -2210,7 +2239,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		s64 tsc_delta;
 		u64 tsc;
 
-		kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
+		tsc = kvm_x86_ops->read_l1_tsc(vcpu);
 		tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
 			     tsc - vcpu->arch.last_guest_tsc;
 
@@ -2234,7 +2263,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
-	kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
+	vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -2819,6 +2848,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 					       struct kvm_vcpu_events *events)
 {
+	process_nmi(vcpu);
 	events->exception.injected =
 		vcpu->arch.exception.pending &&
 		!kvm_exception_is_soft(vcpu->arch.exception.nr);
@@ -2836,7 +2866,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 			KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
 
 	events->nmi.injected = vcpu->arch.nmi_injected;
-	events->nmi.pending = vcpu->arch.nmi_pending;
+	events->nmi.pending = vcpu->arch.nmi_pending != 0;
 	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
 	events->nmi.pad = 0;
 
@@ -2856,6 +2886,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 			      | KVM_VCPUEVENT_VALID_SHADOW))
 		return -EINVAL;
 
+	process_nmi(vcpu);
 	vcpu->arch.exception.pending = events->exception.injected;
 	vcpu->arch.exception.nr = events->exception.nr;
 	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
@@ -3556,7 +3587,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			if (r) {
 				mutex_lock(&kvm->slots_lock);
 				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev);
+							  &vpic->dev_master);
+				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+							  &vpic->dev_slave);
+				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+							  &vpic->dev_eclr);
 				mutex_unlock(&kvm->slots_lock);
 				kfree(vpic);
 				goto create_irqchip_unlock;
@@ -4045,84 +4080,105 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 	return 0;
 }
 
-static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
-				  unsigned long addr,
-				  void *val,
-				  unsigned int bytes,
-				  struct x86_exception *exception)
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+			const void *val, int bytes)
 {
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-	gpa_t gpa;
-	int handled, ret;
+	int ret;
 
+	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
+	if (ret < 0)
+		return 0;
+	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
+	return 1;
+}
+
+struct read_write_emulator_ops {
+	int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
+				  int bytes);
+	int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
+				  void *val, int bytes);
+	int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
+			       int bytes, void *val);
+	int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
+				    void *val, int bytes);
+	bool write;
+};
+
+static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
+{
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
 			       vcpu->mmio_phys_addr, *(u64 *)val);
 		vcpu->mmio_read_completed = 0;
-		return X86EMUL_CONTINUE;
+		return 1;
 	}
 
-	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, false);
-
-	if (ret < 0)
-		return X86EMUL_PROPAGATE_FAULT;
-
-	if (ret)
-		goto mmio;
-
-	if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
-	    == X86EMUL_CONTINUE)
-		return X86EMUL_CONTINUE;
+	return 0;
+}
 
-mmio:
-	/*
-	 * Is this MMIO handled locally?
-	 */
-	handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
+static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
+			void *val, int bytes)
+{
+	return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
+}
 
-	if (handled == bytes)
-		return X86EMUL_CONTINUE;
+static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
+			 void *val, int bytes)
+{
+	return emulator_write_phys(vcpu, gpa, val, bytes);
+}
 
-	gpa += handled;
-	bytes -= handled;
-	val += handled;
+static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
+{
+	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+	return vcpu_mmio_write(vcpu, gpa, bytes, val);
+}
 
+static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
+			  void *val, int bytes)
+{
 	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
-
-	vcpu->mmio_needed = 1;
-	vcpu->run->exit_reason = KVM_EXIT_MMIO;
-	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->mmio_size = bytes;
-	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
-	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
-	vcpu->mmio_index = 0;
-
 	return X86EMUL_IO_NEEDED;
 }
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
-			const void *val, int bytes)
+static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
+			   void *val, int bytes)
 {
-	int ret;
-
-	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
-	if (ret < 0)
-		return 0;
-	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
-	return 1;
+	memcpy(vcpu->mmio_data, val, bytes);
+	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	return X86EMUL_CONTINUE;
 }
 
-static int emulator_write_emulated_onepage(unsigned long addr,
-					   const void *val,
-					   unsigned int bytes,
-					   struct x86_exception *exception,
-					   struct kvm_vcpu *vcpu)
+static struct read_write_emulator_ops read_emultor = {
+	.read_write_prepare = read_prepare,
+	.read_write_emulate = read_emulate,
+	.read_write_mmio = vcpu_mmio_read,
+	.read_write_exit_mmio = read_exit_mmio,
+};
+
+static struct read_write_emulator_ops write_emultor = {
+	.read_write_emulate = write_emulate,
+	.read_write_mmio = write_mmio,
+	.read_write_exit_mmio = write_exit_mmio,
+	.write = true,
+};
+
+static int emulator_read_write_onepage(unsigned long addr, void *val,
+				       unsigned int bytes,
+				       struct x86_exception *exception,
+				       struct kvm_vcpu *vcpu,
+				       struct read_write_emulator_ops *ops)
 {
 	gpa_t gpa;
 	int handled, ret;
+	bool write = ops->write;
 
-	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, true);
+	if (ops->read_write_prepare &&
+		  ops->read_write_prepare(vcpu, val, bytes))
+		return X86EMUL_CONTINUE;
+
+	ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
 
 	if (ret < 0)
 		return X86EMUL_PROPAGATE_FAULT;
@@ -4131,15 +4187,14 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	if (ret)
 		goto mmio;
 
-	if (emulator_write_phys(vcpu, gpa, val, bytes))
+	if (ops->read_write_emulate(vcpu, gpa, val, bytes))
 		return X86EMUL_CONTINUE;
 
 mmio:
-	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
+	handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
 	if (handled == bytes)
 		return X86EMUL_CONTINUE;
 
@@ -4148,23 +4203,20 @@ mmio:
 	val += handled;
 
 	vcpu->mmio_needed = 1;
-	memcpy(vcpu->mmio_data, val, bytes);
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
 	vcpu->mmio_size = bytes;
 	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
-	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	vcpu->run->mmio.is_write = vcpu->mmio_is_write = write;
 	vcpu->mmio_index = 0;
 
-	return X86EMUL_CONTINUE;
+	return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
 }
 
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
-			    unsigned long addr,
-			    const void *val,
-			    unsigned int bytes,
-			    struct x86_exception *exception)
+int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+			void *val, unsigned int bytes,
+			struct x86_exception *exception,
+			struct read_write_emulator_ops *ops)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
@@ -4173,16 +4225,38 @@ int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
 		int rc, now;
 
 		now = -addr & ~PAGE_MASK;
-		rc = emulator_write_emulated_onepage(addr, val, now, exception,
-						     vcpu);
+		rc = emulator_read_write_onepage(addr, val, now, exception,
+						 vcpu, ops);
+
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		addr += now;
 		val += now;
 		bytes -= now;
 	}
-	return emulator_write_emulated_onepage(addr, val, bytes, exception,
-					       vcpu);
+
+	return emulator_read_write_onepage(addr, val, bytes, exception,
+					   vcpu, ops);
+}
+
+static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
+				  unsigned long addr,
+				  void *val,
+				  unsigned int bytes,
+				  struct x86_exception *exception)
+{
+	return emulator_read_write(ctxt, addr, val, bytes,
+				   exception, &read_emultor);
+}
+
+int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+			    unsigned long addr,
+			    const void *val,
+			    unsigned int bytes,
+			    struct x86_exception *exception)
+{
+	return emulator_read_write(ctxt, addr, (void *)val, bytes,
+				   exception, &write_emultor);
 }
 
 #define CMPXCHG_TYPE(t, ptr, old, new) \
@@ -4712,7 +4786,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 	kvm_set_rflags(vcpu, ctxt->eflags);
 
 	if (irq == NMI_VECTOR)
-		vcpu->arch.nmi_pending = false;
+		vcpu->arch.nmi_pending = 0;
 	else
 		vcpu->arch.interrupt.pending = false;
 
@@ -4788,7 +4862,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
 		trace_kvm_emulate_insn_start(vcpu);
 		++vcpu->stat.insn_emulation;
-		if (r)  {
+		if (r != EMULATION_OK)  {
 			if (emulation_type & EMULTYPE_TRAP_UD)
 				return EMULATE_FAIL;
 			if (reexecute_instruction(vcpu, cr2))
@@ -5521,7 +5595,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
 	/* try to inject new event if pending */
 	if (vcpu->arch.nmi_pending) {
 		if (kvm_x86_ops->nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
+			--vcpu->arch.nmi_pending;
 			vcpu->arch.nmi_injected = true;
 			kvm_x86_ops->set_nmi(vcpu);
 		}
@@ -5553,10 +5627,26 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 	}
 }
 
+static void process_nmi(struct kvm_vcpu *vcpu)
+{
+	unsigned limit = 2;
+
+	/*
+	 * x86 is limited to one NMI running, and one NMI pending after it.
+	 * If an NMI is already in progress, limit further NMIs to just one.
+	 * Otherwise, allow two (and we'll inject the first one immediately).
+	 */
+	if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
+		limit = 1;
+
+	vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
+	vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
-	bool nmi_pending;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
 
@@ -5596,6 +5686,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
 			record_steal_time(vcpu);
+		if (kvm_check_request(KVM_REQ_NMI, vcpu))
+			process_nmi(vcpu);
 
 	}
 
@@ -5603,19 +5695,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (unlikely(r))
 		goto out;
 
-	/*
-	 * An NMI can be injected between local nmi_pending read and
-	 * vcpu->arch.nmi_pending read inside inject_pending_event().
-	 * But in that case, KVM_REQ_EVENT will be set, which makes
-	 * the race described above benign.
-	 */
-	nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
-
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
 		inject_pending_event(vcpu);
 
 		/* enable NMI/IRQ window open exits if needed */
-		if (nmi_pending)
+		if (vcpu->arch.nmi_pending)
 			kvm_x86_ops->enable_nmi_window(vcpu);
 		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 			kvm_x86_ops->enable_irq_window(vcpu);
@@ -5678,7 +5762,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (hw_breakpoint_active())
 		hw_breakpoint_restore();
 
-	kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
+	vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
 
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
@@ -6323,7 +6407,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.nmi_pending = false;
+	atomic_set(&vcpu->arch.nmi_queued, 0);
+	vcpu->arch.nmi_pending = 0;
 	vcpu->arch.nmi_injected = false;
 
 	vcpu->arch.switch_db_regs = 0;
@@ -6598,7 +6683,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)
 		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
-		|| vcpu->arch.nmi_pending ||
+		|| atomic_read(&vcpu->arch.nmi_queued) ||
 		(kvm_arch_interrupt_allowed(vcpu) &&
 		 kvm_cpu_has_interrupt(vcpu));
 }
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 13ee258442ae..f63da5ef217c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -70,6 +70,7 @@
 #include <asm/i387.h>
 #include <asm/stackprotector.h>
 #include <asm/reboot.h>		/* for struct machine_ops */
+#include <asm/kvm_para.h>
 
 /*G:010
  * Welcome to the Guest!
@@ -455,6 +456,15 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 		*ax &= 0xFFFFF0FF;
 		*ax |= 0x00000500;
 		break;
+
+	/*
+	 * This is used to detect if we're running under KVM.  We might be,
+	 * but that's a Host matter, not us.  So say we're not.
+	 */
+	case KVM_CPUID_SIGNATURE:
+		*bx = *cx = *dx = 0;
+		break;
+
 	/*
 	 * 0x80000000 returns the highest Extended Function, so we futureproof
 	 * like we do above by limiting it to known fields.
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 9f33b984d0ef..374562ed6704 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -22,14 +22,23 @@
 #include <asm/inat.h>
 #include <asm/insn.h>
 
-#define get_next(t, insn)	\
-	({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n)	\
+	((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE)
+
+#define __get_next(t, insn)	\
+	({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n)	\
+	({ t r = *(t*)((insn)->next_byte + n); r; })
 
-#define peek_next(t, insn)	\
-	({t r; r = *(t*)insn->next_byte; r; })
+#define get_next(t, insn)	\
+	({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
 
 #define peek_nbyte_next(t, insn, n)	\
-	({t r; r = *(t*)((insn)->next_byte + n); r; })
+	({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn)	peek_nbyte_next(t, insn, 0)
 
 /**
  * insn_init() - initialize struct insn
@@ -158,6 +167,8 @@ vex_end:
 	insn->vex_prefix.got = 1;
 
 	prefixes->got = 1;
+
+err_out:
 	return;
 }
 
@@ -208,6 +219,9 @@ void insn_get_opcode(struct insn *insn)
 		insn->attr = 0;	/* This instruction is bad */
 end:
 	opcode->got = 1;
+
+err_out:
+	return;
 }
 
 /**
@@ -241,6 +255,9 @@ void insn_get_modrm(struct insn *insn)
 	if (insn->x86_64 && inat_is_force64(insn->attr))
 		insn->opnd_bytes = 8;
 	modrm->got = 1;
+
+err_out:
+	return;
 }
 
 
@@ -290,6 +307,9 @@ void insn_get_sib(struct insn *insn)
 		}
 	}
 	insn->sib.got = 1;
+
+err_out:
+	return;
 }
 
 
@@ -351,6 +371,9 @@ void insn_get_displacement(struct insn *insn)
 	}
 out:
 	insn->displacement.got = 1;
+
+err_out:
+	return;
 }
 
 /* Decode moffset16/32/64 */
@@ -373,6 +396,9 @@ static void __get_moffset(struct insn *insn)
 		break;
 	}
 	insn->moffset1.got = insn->moffset2.got = 1;
+
+err_out:
+	return;
 }
 
 /* Decode imm v32(Iz) */
@@ -389,6 +415,9 @@ static void __get_immv32(struct insn *insn)
 		insn->immediate.nbytes = 4;
 		break;
 	}
+
+err_out:
+	return;
 }
 
 /* Decode imm v64(Iv/Ov) */
@@ -411,6 +440,9 @@ static void __get_immv(struct insn *insn)
 		break;
 	}
 	insn->immediate1.got = insn->immediate2.got = 1;
+
+err_out:
+	return;
 }
 
 /* Decode ptr16:16/32(Ap) */
@@ -432,6 +464,9 @@ static void __get_immptr(struct insn *insn)
 	insn->immediate2.value = get_next(unsigned short, insn);
 	insn->immediate2.nbytes = 2;
 	insn->immediate1.got = insn->immediate2.got = 1;
+
+err_out:
+	return;
 }
 
 /**
@@ -496,6 +531,9 @@ void insn_get_immediate(struct insn *insn)
 	}
 done:
 	insn->immediate.got = 1;
+
+err_out:
+	return;
 }
 
 /**
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 0d17c8c50acd..5db0490deb07 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -17,7 +17,7 @@
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
-#include <asm/vsyscall.h>
+#include <asm/fixmap.h>			/* VSYSCALL_START		*/
 
 /*
  * Page fault error code bits:
@@ -420,12 +420,14 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
 	return 0;
 }
 
+#ifdef CONFIG_CPU_SUP_AMD
 static const char errata93_warning[] =
 KERN_ERR 
 "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 "******* Working around it, but it may cause SEGVs or burn power.\n"
 "******* Please consider a BIOS update.\n"
 "******* Disabling USB legacy in the BIOS may also help.\n";
+#endif
 
 /*
  * No vm86 mode in 64-bit mode:
@@ -505,7 +507,11 @@ bad:
  */
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
+	    || boot_cpu_data.x86 != 0xf)
+		return 0;
+
 	if (address != regs->ip)
 		return 0;
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 30326443ab81..87488b93a65c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -63,9 +63,8 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
 #ifdef CONFIG_X86_32
 	/* for fixmap */
 	tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-
-	good_end = max_pfn_mapped << PAGE_SHIFT;
 #endif
+	good_end = max_pfn_mapped << PAGE_SHIFT;
 
 	base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
 	if (base == MEMBLOCK_ERROR)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1dab5194fd9d..4b5ba85eb5c9 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -31,6 +31,10 @@
 #include <linux/sched.h>
 #include <asm/elf.h>
 
+struct __read_mostly va_alignment va_align = {
+	.flags = -1,
+};
+
 static unsigned int stack_maxrandom_size(void)
 {
 	unsigned int max = 0;
@@ -42,7 +46,6 @@ static unsigned int stack_maxrandom_size(void)
 	return max;
 }
 
-
 /*
  * Top of mmap area (just below the process stack).
  *
@@ -51,21 +54,6 @@ static unsigned int stack_maxrandom_size(void)
 #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
 #define MAX_GAP (TASK_SIZE/6*5)
 
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static int mmap_is_ia32(void)
-{
-#ifdef CONFIG_X86_32
-	return 1;
-#endif
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32))
-		return 1;
-#endif
-	return 0;
-}
-
 static int mmap_is_legacy(void)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 67421f38a215..de54b9b278a7 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
-#include <linux/version.h>
 #include <linux/kallsyms.h>
 #include <asm/pgtable.h>
 #include <linux/mmiotrace.h>
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 68894fdc034b..75f9528e0372 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -61,26 +61,15 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
 }
 
 
-static int profile_exceptions_notify(struct notifier_block *self,
-				     unsigned long val, void *data)
+static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
 {
-	struct die_args *args = (struct die_args *)data;
-	int ret = NOTIFY_DONE;
-
-	switch (val) {
-	case DIE_NMI:
-		if (ctr_running)
-			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
-		else if (!nmi_enabled)
-			break;
-		else
-			model->stop(&__get_cpu_var(cpu_msrs));
-		ret = NOTIFY_STOP;
-		break;
-	default:
-		break;
-	}
-	return ret;
+	if (ctr_running)
+		model->check_ctrs(regs, &__get_cpu_var(cpu_msrs));
+	else if (!nmi_enabled)
+		return NMI_DONE;
+	else
+		model->stop(&__get_cpu_var(cpu_msrs));
+	return NMI_HANDLED;
 }
 
 static void nmi_cpu_save_registers(struct op_msrs *msrs)
@@ -355,20 +344,14 @@ static void nmi_cpu_setup(void *dummy)
 	int cpu = smp_processor_id();
 	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 	nmi_cpu_save_registers(msrs);
-	spin_lock(&oprofilefs_lock);
+	raw_spin_lock(&oprofilefs_lock);
 	model->setup_ctrs(model, msrs);
 	nmi_cpu_setup_mux(cpu, msrs);
-	spin_unlock(&oprofilefs_lock);
+	raw_spin_unlock(&oprofilefs_lock);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
-static struct notifier_block profile_exceptions_nb = {
-	.notifier_call = profile_exceptions_notify,
-	.next = NULL,
-	.priority = NMI_LOCAL_LOW_PRIOR,
-};
-
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 {
 	struct op_msr *counters = msrs->counters;
@@ -402,8 +385,6 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
-	if (model->cpu_down)
-		model->cpu_down();
 }
 
 static void nmi_cpu_up(void *dummy)
@@ -508,7 +489,8 @@ static int nmi_setup(void)
 	ctr_running = 0;
 	/* make variables visible to the nmi handler: */
 	smp_mb();
-	err = register_die_notifier(&profile_exceptions_nb);
+	err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
+					0, "oprofile");
 	if (err)
 		goto fail;
 
@@ -538,7 +520,7 @@ static void nmi_shutdown(void)
 	put_online_cpus();
 	/* make variables visible to the nmi handler: */
 	smp_mb();
-	unregister_die_notifier(&profile_exceptions_nb);
+	unregister_nmi_handler(NMI_LOCAL, "oprofile");
 	msrs = &get_cpu_var(cpu_msrs);
 	model->shutdown(msrs);
 	free_msrs();
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index 720bf5a53c51..7f8052cd6620 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -18,32 +18,16 @@
 #include <asm/apic.h>
 #include <asm/ptrace.h>
 
-static int profile_timer_exceptions_notify(struct notifier_block *self,
-					   unsigned long val, void *data)
+static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
 {
-	struct die_args *args = (struct die_args *)data;
-	int ret = NOTIFY_DONE;
-
-	switch (val) {
-	case DIE_NMI:
-		oprofile_add_sample(args->regs, 0);
-		ret = NOTIFY_STOP;
-		break;
-	default:
-		break;
-	}
-	return ret;
+	oprofile_add_sample(regs, 0);
+	return NMI_HANDLED;
 }
 
-static struct notifier_block profile_timer_exceptions_nb = {
-	.notifier_call = profile_timer_exceptions_notify,
-	.next = NULL,
-	.priority = NMI_LOW_PRIOR,
-};
-
 static int timer_start(void)
 {
-	if (register_die_notifier(&profile_timer_exceptions_nb))
+	if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
+					0, "oprofile-timer"))
 		return 1;
 	return 0;
 }
@@ -51,7 +35,7 @@ static int timer_start(void)
 
 static void timer_stop(void)
 {
-	unregister_die_notifier(&profile_timer_exceptions_nb);
+	unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
 	synchronize_sched();  /* Allow already-started NMIs to complete. */
 }
 
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 9cbb710dc94b..303f08637826 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,8 +29,6 @@
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS		4
-#define NUM_COUNTERS_F15H	6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 #define NUM_VIRT_COUNTERS	32
 #else
@@ -70,62 +68,12 @@ static struct ibs_config ibs_config;
 static struct ibs_state ibs_state;
 
 /*
- * IBS cpuid feature detection
- */
-
-#define IBS_CPUID_FEATURES		0x8000001b
-
-/*
- * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
- * bit 0 is used to indicate the existence of IBS.
- */
-#define IBS_CAPS_AVAIL			(1U<<0)
-#define IBS_CAPS_FETCHSAM		(1U<<1)
-#define IBS_CAPS_OPSAM			(1U<<2)
-#define IBS_CAPS_RDWROPCNT		(1U<<3)
-#define IBS_CAPS_OPCNT			(1U<<4)
-#define IBS_CAPS_BRNTRGT		(1U<<5)
-#define IBS_CAPS_OPCNTEXT		(1U<<6)
-
-#define IBS_CAPS_DEFAULT		(IBS_CAPS_AVAIL		\
-					 | IBS_CAPS_FETCHSAM	\
-					 | IBS_CAPS_OPSAM)
-
-/*
- * IBS APIC setup
- */
-#define IBSCTL				0x1cc
-#define IBSCTL_LVT_OFFSET_VALID		(1ULL<<8)
-#define IBSCTL_LVT_OFFSET_MASK		0x0F
-
-/*
  * IBS randomization macros
  */
 #define IBS_RANDOM_BITS			12
 #define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
 #define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
 
-static u32 get_ibs_caps(void)
-{
-	u32 ibs_caps;
-	unsigned int max_level;
-
-	if (!boot_cpu_has(X86_FEATURE_IBS))
-		return 0;
-
-	/* check IBS cpuid feature flags */
-	max_level = cpuid_eax(0x80000000);
-	if (max_level < IBS_CPUID_FEATURES)
-		return IBS_CAPS_DEFAULT;
-
-	ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
-	if (!(ibs_caps & IBS_CAPS_AVAIL))
-		/* cpuid flags not valid */
-		return IBS_CAPS_DEFAULT;
-
-	return ibs_caps;
-}
-
 /*
  * 16-bit Linear Feedback Shift Register (LFSR)
  *
@@ -316,81 +264,6 @@ static void op_amd_stop_ibs(void)
 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 }
 
-static inline int get_eilvt(int offset)
-{
-	return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
-	return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-static inline int ibs_eilvt_valid(void)
-{
-	int offset;
-	u64 val;
-	int valid = 0;
-
-	preempt_disable();
-
-	rdmsrl(MSR_AMD64_IBSCTL, val);
-	offset = val & IBSCTL_LVT_OFFSET_MASK;
-
-	if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
-		pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
-		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		goto out;
-	}
-
-	if (!get_eilvt(offset)) {
-		pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
-		       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-		goto out;
-	}
-
-	valid = 1;
-out:
-	preempt_enable();
-
-	return valid;
-}
-
-static inline int get_ibs_offset(void)
-{
-	u64 val;
-
-	rdmsrl(MSR_AMD64_IBSCTL, val);
-	if (!(val & IBSCTL_LVT_OFFSET_VALID))
-		return -EINVAL;
-
-	return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void)
-{
-	int offset;
-
-	offset = get_ibs_offset();
-	if (offset < 0)
-		goto failed;
-
-	if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
-		return;
-failed:
-	pr_warn("oprofile: IBS APIC setup failed on cpu #%d\n",
-		smp_processor_id());
-}
-
-static void clear_APIC_ibs(void)
-{
-	int offset;
-
-	offset = get_ibs_offset();
-	if (offset >= 0)
-		setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 
 static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
@@ -439,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 			goto fail;
 		}
 		/* both registers must be reserved */
-		if (num_counters == NUM_COUNTERS_F15H) {
+		if (num_counters == AMD64_NUM_COUNTERS_F15H) {
 			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
 			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
 		} else {
@@ -504,15 +377,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
 		wrmsrl(msrs->controls[i].addr, val);
 	}
-
-	if (ibs_caps)
-		setup_APIC_ibs();
-}
-
-static void op_amd_cpu_shutdown(void)
-{
-	if (ibs_caps)
-		clear_APIC_ibs();
 }
 
 static int op_amd_check_ctrs(struct pt_regs * const regs,
@@ -575,86 +439,6 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	op_amd_stop_ibs();
 }
 
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
-	struct pci_dev *cpu_cfg;
-	int nodes;
-	u32 value = 0;
-
-	nodes = 0;
-	cpu_cfg = NULL;
-	do {
-		cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
-					 PCI_DEVICE_ID_AMD_10H_NB_MISC,
-					 cpu_cfg);
-		if (!cpu_cfg)
-			break;
-		++nodes;
-		pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
-				       | IBSCTL_LVT_OFFSET_VALID);
-		pci_read_config_dword(cpu_cfg, IBSCTL, &value);
-		if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
-			pci_dev_put(cpu_cfg);
-			printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-			       "IBSCTL = 0x%08x\n", value);
-			return -EINVAL;
-		}
-	} while (1);
-
-	if (!nodes) {
-		printk(KERN_DEBUG "No CPU node configured for IBS\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_-
- * amd_cpu_shutdown() using the new offset.
- */
-static int force_ibs_eilvt_setup(void)
-{
-	int offset;
-	int ret;
-
-	preempt_disable();
-	/* find the next free available EILVT entry, skip offset 0 */
-	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
-		if (get_eilvt(offset))
-			break;
-	}
-	preempt_enable();
-
-	if (offset == APIC_EILVT_NR_MAX) {
-		printk(KERN_DEBUG "No EILVT entry available\n");
-		return -EBUSY;
-	}
-
-	ret = setup_ibs_ctl(offset);
-	if (ret)
-		goto out;
-
-	if (!ibs_eilvt_valid()) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
-	pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
-
-	return 0;
-out:
-	preempt_disable();
-	put_eilvt(offset);
-	preempt_enable();
-	return ret;
-}
-
 /*
  * check and reserve APIC extended interrupt LVT offset for IBS if
  * available
@@ -667,17 +451,6 @@ static void init_ibs(void)
 	if (!ibs_caps)
 		return;
 
-	if (ibs_eilvt_valid())
-		goto out;
-
-	if (!force_ibs_eilvt_setup())
-		goto out;
-
-	/* Failed to setup ibs */
-	ibs_caps = 0;
-	return;
-
-out:
 	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 }
 
@@ -741,9 +514,9 @@ static int op_amd_init(struct oprofile_operations *ops)
 	ops->create_files = setup_ibs_files;
 
 	if (boot_cpu_data.x86 == 0x15) {
-		num_counters = NUM_COUNTERS_F15H;
+		num_counters = AMD64_NUM_COUNTERS_F15H;
 	} else {
-		num_counters = NUM_COUNTERS;
+		num_counters = AMD64_NUM_COUNTERS;
 	}
 
 	op_amd_spec.num_counters = num_counters;
@@ -760,7 +533,6 @@ struct op_x86_model_spec op_amd_spec = {
 	.init			= op_amd_init,
 	.fill_in_addresses	= &op_amd_fill_in_addresses,
 	.setup_ctrs		= &op_amd_setup_ctrs,
-	.cpu_down		= &op_amd_cpu_shutdown,
 	.check_ctrs		= &op_amd_check_ctrs,
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 94b745045e45..d90528ea5412 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -28,7 +28,7 @@ static int counter_width = 32;
 
 #define MSR_PPRO_EVENTSEL_RESERVED	((0xFFFFFFFFULL<<32)|(1ULL<<21))
 
-static u64 *reset_value;
+static u64 reset_value[OP_MAX_COUNTER];
 
 static void ppro_shutdown(struct op_msrs const * const msrs)
 {
@@ -40,10 +40,6 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 		release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 		release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 	}
-	if (reset_value) {
-		kfree(reset_value);
-		reset_value = NULL;
-	}
 }
 
 static int ppro_fill_in_addresses(struct op_msrs * const msrs)
@@ -79,13 +75,6 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
 	u64 val;
 	int i;
 
-	if (!reset_value) {
-		reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
-					GFP_ATOMIC);
-		if (!reset_value)
-			return;
-	}
-
 	if (cpu_has_arch_perfmon) {
 		union cpuid10_eax eax;
 		eax.full = cpuid_eax(0xa);
@@ -141,13 +130,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 	u64 val;
 	int i;
 
-	/*
-	 * This can happen if perf counters are in use when
-	 * we steal the die notifier NMI.
-	 */
-	if (unlikely(!reset_value))
-		goto out;
-
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
@@ -158,7 +140,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 		wrmsrl(msrs->counters[i].addr, -reset_value[i]);
 	}
 
-out:
 	/* Only P6 based Pentium M need to re-unmask the apic vector but it
 	 * doesn't hurt other P6 variant */
 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
@@ -179,8 +160,6 @@ static void ppro_start(struct op_msrs const * const msrs)
 	u64 val;
 	int i;
 
-	if (!reset_value)
-		return;
 	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
 			rdmsrl(msrs->controls[i].addr, val);
@@ -196,8 +175,6 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	u64 val;
 	int i;
 
-	if (!reset_value)
-		return;
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
@@ -242,7 +219,7 @@ static void arch_perfmon_setup_counters(void)
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_counters;
+	num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER);
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 89017fa1fd63..71e8a67337e2 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -43,7 +43,6 @@ struct op_x86_model_spec {
 	int		(*fill_in_addresses)(struct op_msrs * const msrs);
 	void		(*setup_ctrs)(struct op_x86_model_spec const *model,
 				      struct op_msrs const * const msrs);
-	void		(*cpu_down)(void);
 	int		(*check_ctrs)(struct pt_regs * const regs,
 				      struct op_msrs const * const msrs);
 	void		(*start)(struct op_msrs const * const msrs);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 039d91315bc5..404f21a3ff9e 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -43,6 +43,17 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"),
                 },
         },
+	/* https://bugzilla.kernel.org/show_bug.cgi?id=30552 */
+	/* 2006 AMD HT/VIA system with two host bridges */
+	{
+		.callback = set_use_crs,
+		.ident = "ASUS M2V-MX SE",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+			DMI_MATCH(DMI_BOARD_NAME, "M2V-MX SE"),
+			DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
+		},
+	},
 	{}
 };
 
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 99176094500b..41bd2a2d2c50 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -304,7 +304,7 @@ static int ce4100_conf_write(unsigned int seg, unsigned int bus,
 	return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
 }
 
-struct pci_raw_ops ce4100_pci_conf = {
+static const struct pci_raw_ops ce4100_pci_conf = {
 	.read =	ce4100_conf_read,
 	.write = ce4100_conf_write,
 };
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 92df322e0b57..7962ccb4d9b2 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -33,8 +33,8 @@ int noioapicreroute = 1;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
 struct pci_bus *pci_root_bus;
-struct pci_raw_ops *raw_pci_ops;
-struct pci_raw_ops *raw_pci_ext_ops;
+const struct pci_raw_ops *__read_mostly raw_pci_ops;
+const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;
 
 int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val)
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 4f2c70439d7f..15460590b8c5 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -79,7 +79,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus,
 
 #undef PCI_CONF1_ADDRESS
 
-struct pci_raw_ops pci_direct_conf1 = {
+const struct pci_raw_ops pci_direct_conf1 = {
 	.read =		pci_conf1_read,
 	.write =	pci_conf1_write,
 };
@@ -175,7 +175,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
 
 #undef PCI_CONF2_ADDRESS
 
-struct pci_raw_ops pci_direct_conf2 = {
+static const struct pci_raw_ops pci_direct_conf2 = {
 	.read =		pci_conf2_read,
 	.write =	pci_conf2_write,
 };
@@ -191,7 +191,7 @@ struct pci_raw_ops pci_direct_conf2 = {
  * This should be close to trivial, but it isn't, because there are buggy
  * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
  */
-static int __init pci_sanity_check(struct pci_raw_ops *o)
+static int __init pci_sanity_check(const struct pci_raw_ops *o)
 {
 	u32 x = 0;
 	int year, devfn;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index a3d9c54792ae..5372e86834c0 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -117,7 +117,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 	return 0;
 }
 
-static struct pci_raw_ops pci_mmcfg = {
+static const struct pci_raw_ops pci_mmcfg = {
 	.read =		pci_mmcfg_read,
 	.write =	pci_mmcfg_write,
 };
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index e783841bd1d7..915a493502cb 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -81,7 +81,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 	return 0;
 }
 
-static struct pci_raw_ops pci_mmcfg = {
+static const struct pci_raw_ops pci_mmcfg = {
 	.read =		pci_mmcfg_read,
 	.write =	pci_mmcfg_write,
 };
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 512a88c41501..51abf02f9226 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -110,7 +110,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
 
 #undef PCI_CONF1_MQ_ADDRESS
 
-static struct pci_raw_ops pci_direct_conf1_mq = {
+static const struct pci_raw_ops pci_direct_conf1_mq = {
 	.read	= pci_conf1_mq_read,
 	.write	= pci_conf1_mq_write
 };
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index 5262603b04d9..7043a4f0e98a 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -301,7 +301,7 @@ static int pci_olpc_write(unsigned int seg, unsigned int bus,
 	return 0;
 }
 
-static struct pci_raw_ops pci_olpc_conf = {
+static const struct pci_raw_ops pci_olpc_conf = {
 	.read =	pci_olpc_read,
 	.write = pci_olpc_write,
 };
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index f68553551467..db0e9a51e611 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -303,7 +303,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus,
  * Function table for BIOS32 access
  */
 
-static struct pci_raw_ops pci_bios_access = {
+static const struct pci_raw_ops pci_bios_access = {
 	.read =		pci_bios_read,
 	.write =	pci_bios_write
 };
@@ -312,7 +312,7 @@ static struct pci_raw_ops pci_bios_access = {
  * Try to find PCI BIOS.
  */
 
-static struct pci_raw_ops * __devinit pci_find_bios(void)
+static const struct pci_raw_ops * __devinit pci_find_bios(void)
 {
 	union bios32 *check;
 	unsigned char sum;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 1017c7bee388..492ade8c978e 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -175,8 +175,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 					       "pcifront-msi-x" :
 					       "pcifront-msi",
 						DOMID_SELF);
-		if (irq < 0)
+		if (irq < 0) {
+			ret = irq;
 			goto free;
+		}
 		i++;
 	}
 	kfree(v);
@@ -221,8 +223,10 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		if (msg.data != XEN_PIRQ_MSI_DATA ||
 		    xen_irq_from_pirq(pirq) < 0) {
 			pirq = xen_allocate_pirq_msi(dev, msidesc);
-			if (pirq < 0)
+			if (pirq < 0) {
+				irq = -ENODEV;
 				goto error;
+			}
 			xen_msi_compose_msg(dev, pirq, &msg);
 			__write_msi_msg(msidesc, &msg);
 			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
@@ -244,10 +248,12 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 error:
 	dev_err(&dev->dev,
 		"Xen PCI frontend has not registered MSI/MSI-X support!\n");
-	return -ENODEV;
+	return irq;
 }
 
 #ifdef CONFIG_XEN_DOM0
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	int ret = 0;
@@ -265,10 +271,11 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		memset(&map_irq, 0, sizeof(map_irq));
 		map_irq.domid = domid;
-		map_irq.type = MAP_PIRQ_TYPE_MSI;
+		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
-		map_irq.bus = dev->bus->number;
+		map_irq.bus = dev->bus->number |
+			      (pci_domain_nr(dev->bus) << 16);
 		map_irq.devfn = dev->devfn;
 
 		if (type == PCI_CAP_ID_MSIX) {
@@ -285,7 +292,20 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 		}
 
-		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+		ret = -EINVAL;
+		if (pci_seg_supported)
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
+			map_irq.type = MAP_PIRQ_TYPE_MSI;
+			map_irq.index = -1;
+			map_irq.pirq = -1;
+			map_irq.bus = dev->bus->number;
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+			if (ret != -EINVAL)
+				pci_seg_supported = false;
+		}
 		if (ret) {
 			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
 				 ret, domid);
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 021eee91c056..8d874396cb29 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -1,6 +1,7 @@
 # Platform specific code goes here
 obj-y	+= ce4100/
 obj-y	+= efi/
+obj-y	+= geode/
 obj-y	+= iris/
 obj-y	+= mrst/
 obj-y	+= olpc/
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
new file mode 100644
index 000000000000..07c9cd05021a
--- /dev/null
+++ b/arch/x86/platform/geode/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ALIX)		+= alix.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
new file mode 100644
index 000000000000..ca1973699d3d
--- /dev/null
+++ b/arch/x86/platform/geode/alix.c
@@ -0,0 +1,142 @@
+/*
+ * System Specific setup for PCEngines ALIX.
+ * At the moment this means setup of GPIO control of LEDs
+ * on Alix.2/3/6 boards.
+ *
+ *
+ * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
+ * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ *
+ * TODO: There are large similarities with leds-net5501.c
+ * by Alessandro Zummo <a.zummo@towertech.it>
+ * In the future leds-net5501.c should be migrated over to platform
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <asm/geode.h>
+
+static int force = 0;
+module_param(force, bool, 0444);
+/* FIXME: Award bios is not automatically detected as Alix platform */
+MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
+
+static struct gpio_led alix_leds[] = {
+	{
+		.name = "alix:1",
+		.gpio = 6,
+		.default_trigger = "default-on",
+		.active_low = 1,
+	},
+	{
+		.name = "alix:2",
+		.gpio = 25,
+		.default_trigger = "default-off",
+		.active_low = 1,
+	},
+	{
+		.name = "alix:3",
+		.gpio = 27,
+		.default_trigger = "default-off",
+		.active_low = 1,
+	},
+};
+
+static struct gpio_led_platform_data alix_leds_data = {
+	.num_leds = ARRAY_SIZE(alix_leds),
+	.leds = alix_leds,
+};
+
+static struct platform_device alix_leds_dev = {
+	.name = "leds-gpio",
+	.id = -1,
+	.dev.platform_data = &alix_leds_data,
+};
+
+static void __init register_alix(void)
+{
+	/* Setup LED control through leds-gpio driver */
+	platform_device_register(&alix_leds_dev);
+}
+
+static int __init alix_present(unsigned long bios_phys,
+				const char *alix_sig,
+				size_t alix_sig_len)
+{
+	const size_t bios_len = 0x00010000;
+	const char *bios_virt;
+	const char *scan_end;
+	const char *p;
+	char name[64];
+
+	if (force) {
+		printk(KERN_NOTICE "%s: forced to skip BIOS test, "
+		       "assume system is ALIX.2/ALIX.3\n",
+		       KBUILD_MODNAME);
+		return 1;
+	}
+
+	bios_virt = phys_to_virt(bios_phys);
+	scan_end = bios_virt + bios_len - (alix_sig_len + 2);
+	for (p = bios_virt; p < scan_end; p++) {
+		const char *tail;
+		char *a;
+
+		if (memcmp(p, alix_sig, alix_sig_len) != 0)
+			continue;
+
+		memcpy(name, p, sizeof(name));
+
+		/* remove the first \0 character from string */
+		a = strchr(name, '\0');
+		if (a)
+			*a = ' ';
+
+		/* cut the string at a newline */
+		a = strchr(name, '\r');
+		if (a)
+			*a = '\0';
+
+		tail = p + alix_sig_len;
+		if ((tail[0] == '2' || tail[0] == '3')) {
+			printk(KERN_INFO
+			       "%s: system is recognized as \"%s\"\n",
+			       KBUILD_MODNAME, name);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int __init alix_init(void)
+{
+	const char tinybios_sig[] = "PC Engines ALIX.";
+	const char coreboot_sig[] = "PC Engines\0ALIX.";
+
+	if (!is_geode())
+		return 0;
+
+	if (alix_present(0xf0000, tinybios_sig, sizeof(tinybios_sig) - 1) ||
+	    alix_present(0x500, coreboot_sig, sizeof(coreboot_sig) - 1))
+		register_alix();
+
+	return 0;
+}
+
+module_init(alix_init);
+
+MODULE_AUTHOR("Ed Wildgoose <kernel@wildgooses.com>");
+MODULE_DESCRIPTION("PCEngines ALIX System Setup");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 58425adc22c6..e6379526675b 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -14,6 +14,8 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
 #include <linux/sfi.h>
 #include <linux/intel_pmic_gpio.h>
 #include <linux/spi/spi.h>
@@ -392,6 +394,7 @@ static void __init *max3111_platform_data(void *info)
 	struct spi_board_info *spi_info = info;
 	int intr = get_gpio_by_name("max3111_int");
 
+	spi_info->mode = SPI_MODE_0;
 	if (intr == -1)
 		return NULL;
 	spi_info->irq = intr + MRST_IRQ_OFFSET;
@@ -678,38 +681,40 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 	pentry = (struct sfi_device_table_entry *)sb->pentry;
 
 	for (i = 0; i < num; i++, pentry++) {
-		if (pentry->irq != (u8)0xff) { /* native RTE case */
+		int irq = pentry->irq;
+
+		if (irq != (u8)0xff) { /* native RTE case */
 			/* these SPI2 devices are not exposed to system as PCI
 			 * devices, but they have separate RTE entry in IOAPIC
 			 * so we have to enable them one by one here
 			 */
-			ioapic = mp_find_ioapic(pentry->irq);
+			ioapic = mp_find_ioapic(irq);
 			irq_attr.ioapic = ioapic;
-			irq_attr.ioapic_pin = pentry->irq;
+			irq_attr.ioapic_pin = irq;
 			irq_attr.trigger = 1;
 			irq_attr.polarity = 1;
-			io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
+			io_apic_set_pci_routing(NULL, irq, &irq_attr);
 		} else
-			pentry->irq = 0; /* No irq */
+			irq = 0; /* No irq */
 
 		switch (pentry->type) {
 		case SFI_DEV_TYPE_IPC:
 			/* ID as IRQ is a hack that will go away */
-			pdev = platform_device_alloc(pentry->name, pentry->irq);
+			pdev = platform_device_alloc(pentry->name, irq);
 			if (pdev == NULL) {
 				pr_err("out of memory for SFI platform device '%s'.\n",
 							pentry->name);
 				continue;
 			}
-			install_irq_resource(pdev, pentry->irq);
+			install_irq_resource(pdev, irq);
 			pr_debug("info[%2d]: IPC bus, name = %16.16s, "
-				"irq = 0x%2x\n", i, pentry->name, pentry->irq);
+				"irq = 0x%2x\n", i, pentry->name, irq);
 			sfi_handle_ipc_dev(pdev);
 			break;
 		case SFI_DEV_TYPE_SPI:
 			memset(&spi_info, 0, sizeof(spi_info));
 			strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
-			spi_info.irq = pentry->irq;
+			spi_info.irq = irq;
 			spi_info.bus_num = pentry->host_num;
 			spi_info.chip_select = pentry->addr;
 			spi_info.max_speed_hz = pentry->max_freq;
@@ -726,7 +731,7 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 			memset(&i2c_info, 0, sizeof(i2c_info));
 			bus = pentry->host_num;
 			strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
-			i2c_info.irq = pentry->irq;
+			i2c_info.irq = irq;
 			i2c_info.addr = pentry->addr;
 			pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
 				"irq = 0x%2x, addr = 0x%x\n", i, bus,
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index db8b915f54bc..5b552198f774 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -115,9 +115,6 @@ early_param("nobau", setup_nobau);
 
 /* base pnode in this partition */
 static int uv_base_pnode __read_mostly;
-/* position of pnode (which is nasid>>1): */
-static int uv_nshift __read_mostly;
-static unsigned long uv_mmask __read_mostly;
 
 static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
 static DEFINE_PER_CPU(struct bau_control, bau_control);
@@ -1435,7 +1432,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
 {
 	int i;
 	int cpu;
-	unsigned long pa;
+	unsigned long gpa;
 	unsigned long m;
 	unsigned long n;
 	size_t dsize;
@@ -1451,9 +1448,9 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
 	bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
 	BUG_ON(!bau_desc);
 
-	pa = uv_gpa(bau_desc); /* need the real nasid*/
-	n = pa >> uv_nshift;
-	m = pa & uv_mmask;
+	gpa = uv_gpa(bau_desc);
+	n = uv_gpa_to_gnode(gpa);
+	m = uv_gpa_to_offset(gpa);
 
 	/* the 14-bit pnode */
 	write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1525,9 +1522,9 @@ static void pq_init(int node, int pnode)
 		bcp->queue_last		= pqp + (DEST_Q_SIZE - 1);
 	}
 	/*
-	 * need the pnode of where the memory was really allocated
+	 * need the gnode of where the memory was really allocated
 	 */
-	pn = uv_gpa(pqp) >> uv_nshift;
+	pn = uv_gpa_to_gnode(uv_gpa(pqp));
 	first = uv_physnodeaddr(pqp);
 	pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
 	last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
@@ -1837,8 +1834,6 @@ static int __init uv_bau_init(void)
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
 	}
 
-	uv_nshift = uv_hub_info->m_val;
-	uv_mmask = (1UL << uv_hub_info->m_val) - 1;
 	nuvhubs = uv_num_possible_blades();
 	spin_lock_init(&disable_lock);
 	congested_cycles = usec_2_cycles(congested_respns_us);
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 316fbca3490e..153407c35b75 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -89,6 +89,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
 	addr = start + (offset << PAGE_SHIFT);
 	if (addr >= end)
 		addr = end;
+
+	/*
+	 * page-align it here so that get_unmapped_area doesn't
+	 * align it wrongfully again to the next page. addr can come in 4K
+	 * unaligned here as a result of stack start randomization.
+	 */
+	addr = PAGE_ALIGN(addr);
+	addr = align_addr(addr, NULL, ALIGN_VDSO);
+
 	return addr;
 }
 
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5cc821cb2e09..26c731a106af 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST
 
 config XEN_PVHVM
 	def_bool y
-	depends on XEN
-	depends on X86_LOCAL_APIC
+	depends on XEN && PCI && X86_LOCAL_APIC
 
 config XEN_MAX_DOMAIN_MEMORY
        int
@@ -49,11 +48,3 @@ config XEN_DEBUG_FS
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
-
-config XEN_DEBUG
-	bool "Enable Xen debug checks"
-	depends on XEN
-	default n
-	help
-	  Enable various WARN_ON checks in the Xen MMU code.
-	  Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2d69617950f7..da8afd576a6b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -251,6 +251,7 @@ static void __init xen_init_cpuid_mask(void)
 			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
 			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
 	ax = 1;
+	cx = 0;
 	xen_cpuid(&ax, &bx, &cx, &dx);
 
 	xsave_mask =
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3dd53f997b11..87f6673b1207 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -495,41 +495,6 @@ static pte_t xen_make_pte(pteval_t pte)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
 
-#ifdef CONFIG_XEN_DEBUG
-pte_t xen_make_pte_debug(pteval_t pte)
-{
-	phys_addr_t addr = (pte & PTE_PFN_MASK);
-	phys_addr_t other_addr;
-	bool io_page = false;
-	pte_t _pte;
-
-	if (pte & _PAGE_IOMAP)
-		io_page = true;
-
-	_pte = xen_make_pte(pte);
-
-	if (!addr)
-		return _pte;
-
-	if (io_page &&
-	    (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
-		other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
-		WARN_ONCE(addr != other_addr,
-			"0x%lx is using VM_IO, but it is 0x%lx!\n",
-			(unsigned long)addr, (unsigned long)other_addr);
-	} else {
-		pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
-		other_addr = (_pte.pte & PTE_PFN_MASK);
-		WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
-			"0x%lx is missing VM_IO (and wasn't fixed)!\n",
-			(unsigned long)addr);
-	}
-
-	return _pte;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-#endif
-
 static pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	pgd = pte_pfn_to_mfn(pgd);
@@ -1992,9 +1957,6 @@ void __init xen_ident_map_ISA(void)
 
 static void __init xen_post_allocator_init(void)
 {
-#ifdef CONFIG_XEN_DEBUG
-	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-#endif
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
@@ -2404,17 +2366,3 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
-	.open		= p2m_dump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9d5440..1b267e75158d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,7 +161,9 @@
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
 
+#include "multicalls.h"
 #include "xen-ops.h"
 
 static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@ static unsigned long mfn_hash(unsigned long mfn)
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -692,16 +695,28 @@ int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
 	}
-
-	page->private = mfn;
+	WARN_ON(PagePrivate(page));
+	SetPagePrivate(page);
+	set_page_private(page, mfn);
 	page->index = pfn_to_mfn(pfn);
 
 	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
 		return -ENOMEM;
 
-	if (clear_pte && !PageHighMem(page))
-		/* Just zap old mapping for now */
-		pte_clear(&init_mm, address, ptep);
+	if (kmap_op != NULL) {
+		if (!PageHighMem(page)) {
+			struct multicall_space mcs =
+				xen_mc_entry(sizeof(*kmap_op));
+
+			MULTI_grant_table_op(mcs.mc,
+					GNTTABOP_map_grant_ref, kmap_op, 1);
+
+			xen_mc_issue(PARAVIRT_LAZY_MMU);
+		}
+		/* let's use dev_bus_addr to record the old mfn instead */
+		kmap_op->dev_bus_addr = page->index;
+		page->index = (unsigned long) kmap_op;
+	}
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_del(&page->lru);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
-	set_phys_to_machine(pfn, page->index);
+	WARN_ON(!PagePrivate(page));
+	ClearPagePrivate(page);
 
-	if (clear_pte && !PageHighMem(page))
-		set_pte_at(&init_mm, address, ptep,
-				pfn_pte(pfn, PAGE_KERNEL));
-		/* No tlb flush necessary because the caller already
-		 * left the pte unmapped. */
+	if (clear_pte) {
+		struct gnttab_map_grant_ref *map_op =
+			(struct gnttab_map_grant_ref *) page->index;
+		set_phys_to_machine(pfn, map_op->dev_bus_addr);
+		if (!PageHighMem(page)) {
+			struct multicall_space mcs;
+			struct gnttab_unmap_grant_ref *unmap_op;
+
+			/*
+			 * It might be that we queued all the m2p grant table
+			 * hypercalls in a multicall, then m2p_remove_override
+			 * get called before the multicall has actually been
+			 * issued. In this case handle is going to -1 because
+			 * it hasn't been modified yet.
+			 */
+			if (map_op->handle == -1)
+				xen_mc_flush();
+			/*
+			 * Now if map_op->handle is negative it means that the
+			 * hypercall actually returned an error.
+			 */
+			if (map_op->handle == GNTST_general_error) {
+				printk(KERN_WARNING "m2p_remove_override: "
+						"pfn %lx mfn %lx, failed to modify kernel mappings",
+						pfn, mfn);
+				return -1;
+			}
+
+			mcs = xen_mc_entry(
+					sizeof(struct gnttab_unmap_grant_ref));
+			unmap_op = mcs.args;
+			unmap_op->host_addr = map_op->host_addr;
+			unmap_op->handle = map_op->handle;
+			unmap_op->dev_bus_addr = 0;
+
+			MULTI_grant_table_op(mcs.mc,
+					GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+			xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+			set_pte_at(&init_mm, address, ptep,
+					pfn_pte(pfn, PAGE_KERNEL));
+			__flush_tlb_single(address);
+			map_op->host_addr = 0;
+		}
+	} else
+		set_phys_to_machine(pfn, page->index);
 
 	return 0;
 }
@@ -758,7 +816,7 @@ struct page *m2p_find_override(unsigned long mfn)
 	spin_lock_irqsave(&m2p_override_lock, flags);
 
 	list_for_each_entry(p, bucket, lru) {
-		if (p->private == mfn) {
+		if (page_private(p) == mfn) {
 			ret = p;
 			break;
 		}
@@ -782,17 +840,21 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
 EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 
 #ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
 {
 	static const char * const level_name[] = { "top", "middle",
-						"entry", "abnormal" };
-	static const char * const type_name[] = { "identity", "missing",
-						"pfn", "abnormal"};
+						"entry", "abnormal", "error"};
 #define TYPE_IDENTITY 0
 #define TYPE_MISSING 1
 #define TYPE_PFN 2
 #define TYPE_UNKNOWN 3
+	static const char * const type_name[] = {
+				[TYPE_IDENTITY] = "identity",
+				[TYPE_MISSING] = "missing",
+				[TYPE_PFN] = "pfn",
+				[TYPE_UNKNOWN] = "abnormal"};
 	unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
 	unsigned int uninitialized_var(prev_level);
 	unsigned int uninitialized_var(prev_type);
@@ -856,4 +918,32 @@ int p2m_dump_show(struct seq_file *m, void *v)
 #undef TYPE_PFN
 #undef TYPE_UNKNOWN
 }
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+	.open		= p2m_dump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+	struct dentry *d_xen = xen_init_debugfs();
+
+	if (d_xen == NULL)
+		return -ENOMEM;
+
+	d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+	debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+	return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 46d6d21dbdbe..38d0af4fefec 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,7 +37,10 @@ extern void xen_syscall_target(void);
 extern void xen_syscall32_target(void);
 
 /* Amount of extra memory space we add to the e820 ranges */
-phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
 
 /* 
  * The maximum amount of extra memory compared to the base size.  The
@@ -51,48 +54,47 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
  */
 #define EXTRA_MEM_RATIO		(10)
 
-static void __init xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(u64 start, u64 size)
 {
 	unsigned long pfn;
+	int i;
 
-	u64 size = (u64)pages * PAGE_SIZE;
-	u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
-
-	if (!pages)
-		return;
-
-	e820_add_region(extra_start, size, E820_RAM);
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
-	memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		/* Add new region. */
+		if (xen_extra_mem[i].size == 0) {
+			xen_extra_mem[i].start = start;
+			xen_extra_mem[i].size  = size;
+			break;
+		}
+		/* Append to existing region. */
+		if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
+			xen_extra_mem[i].size += size;
+			break;
+		}
+	}
+	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
 
-	xen_extra_mem_size += size;
+	memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
 
-	xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+	xen_max_p2m_pfn = PFN_DOWN(start + size);
 
-	for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+	for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
 		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 }
 
-static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
-					      phys_addr_t end_addr)
+static unsigned long __init xen_release_chunk(unsigned long start,
+					      unsigned long end)
 {
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
-	unsigned long start, end;
 	unsigned long len = 0;
 	unsigned long pfn;
 	int ret;
 
-	start = PFN_UP(start_addr);
-	end = PFN_DOWN(end_addr);
-
-	if (end <= start)
-		return 0;
-
 	for(pfn = start; pfn < end; pfn++) {
 		unsigned long mfn = pfn_to_mfn(pfn);
 
@@ -117,72 +119,52 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
 	return len;
 }
 
-static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
-						     const struct e820map *e820)
+static unsigned long __init xen_set_identity_and_release(
+	const struct e820entry *list, size_t map_size, unsigned long nr_pages)
 {
-	phys_addr_t max_addr = PFN_PHYS(max_pfn);
-	phys_addr_t last_end = ISA_END_ADDRESS;
+	phys_addr_t start = 0;
 	unsigned long released = 0;
-	int i;
-
-	/* Free any unused memory above the low 1Mbyte. */
-	for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
-		phys_addr_t end = e820->map[i].addr;
-		end = min(max_addr, end);
-
-		if (last_end < end)
-			released += xen_release_chunk(last_end, end);
-		last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
-	}
-
-	if (last_end < max_addr)
-		released += xen_release_chunk(last_end, max_addr);
-
-	printk(KERN_INFO "released %lu pages of unused memory\n", released);
-	return released;
-}
-
-static unsigned long __init xen_set_identity(const struct e820entry *list,
-					     ssize_t map_size)
-{
-	phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
-	phys_addr_t start_pci = last;
-	const struct e820entry *entry;
 	unsigned long identity = 0;
+	const struct e820entry *entry;
 	int i;
 
+	/*
+	 * Combine non-RAM regions and gaps until a RAM region (or the
+	 * end of the map) is reached, then set the 1:1 map and
+	 * release the pages (if available) in those non-RAM regions.
+	 *
+	 * The combined non-RAM regions are rounded to a whole number
+	 * of pages so any partial pages are accessible via the 1:1
+	 * mapping.  This is needed for some BIOSes that put (for
+	 * example) the DMI tables in a reserved region that begins on
+	 * a non-page boundary.
+	 */
 	for (i = 0, entry = list; i < map_size; i++, entry++) {
-		phys_addr_t start = entry->addr;
-		phys_addr_t end = start + entry->size;
+		phys_addr_t end = entry->addr + entry->size;
 
-		if (start < last)
-			start = last;
+		if (entry->type == E820_RAM || i == map_size - 1) {
+			unsigned long start_pfn = PFN_DOWN(start);
+			unsigned long end_pfn = PFN_UP(end);
 
-		if (end <= start)
-			continue;
+			if (entry->type == E820_RAM)
+				end_pfn = PFN_UP(entry->addr);
 
-		/* Skip over the 1MB region. */
-		if (last > end)
-			continue;
+			if (start_pfn < end_pfn) {
+				if (start_pfn < nr_pages)
+					released += xen_release_chunk(
+						start_pfn, min(end_pfn, nr_pages));
 
-		if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
-			if (start > start_pci)
 				identity += set_phys_range_identity(
-						PFN_UP(start_pci), PFN_DOWN(start));
-
-			/* Without saving 'last' we would gooble RAM too
-			 * at the end of the loop. */
-			last = end;
-			start_pci = end;
-			continue;
+					start_pfn, end_pfn);
+			}
+			start = end;
 		}
-		start_pci = min(start, start_pci);
-		last = end;
 	}
-	if (last > start_pci)
-		identity += set_phys_range_identity(
-					PFN_UP(start_pci), PFN_DOWN(last));
-	return identity;
+
+	printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+
+	return released;
 }
 
 static unsigned long __init xen_get_max_pages(void)
@@ -197,21 +179,32 @@ static unsigned long __init xen_get_max_pages(void)
 	return min(max_pages, MAX_DOMAIN_PAGES);
 }
 
+static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
+{
+	u64 end = start + size;
+
+	/* Align RAM regions to page boundaries. */
+	if (type == E820_RAM) {
+		start = PAGE_ALIGN(start);
+		end &= ~((u64)PAGE_SIZE - 1);
+	}
+
+	e820_add_region(start, end - start, type);
+}
+
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
 char * __init xen_memory_setup(void)
 {
 	static struct e820entry map[E820MAX] __initdata;
-	static struct e820entry map_raw[E820MAX] __initdata;
 
 	unsigned long max_pfn = xen_start_info->nr_pages;
 	unsigned long long mem_end;
 	int rc;
 	struct xen_memory_map memmap;
+	unsigned long max_pages;
 	unsigned long extra_pages = 0;
-	unsigned long extra_limit;
-	unsigned long identity_pages = 0;
 	int i;
 	int op;
 
@@ -237,58 +230,65 @@ char * __init xen_memory_setup(void)
 	}
 	BUG_ON(rc);
 
-	memcpy(map_raw, map, sizeof(map));
-	e820.nr_map = 0;
-	xen_extra_mem_start = mem_end;
-	for (i = 0; i < memmap.nr_entries; i++) {
-		unsigned long long end;
-
-		/* Guard against non-page aligned E820 entries. */
-		if (map[i].type == E820_RAM)
-			map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
-
-		end = map[i].addr + map[i].size;
-		if (map[i].type == E820_RAM && end > mem_end) {
-			/* RAM off the end - may be partially included */
-			u64 delta = min(map[i].size, end - mem_end);
-
-			map[i].size -= delta;
-			end -= delta;
-
-			extra_pages += PFN_DOWN(delta);
-			/*
-			 * Set RAM below 4GB that is not for us to be unusable.
-			 * This prevents "System RAM" address space from being
-			 * used as potential resource for I/O address (happens
-			 * when 'allocate_resource' is called).
-			 */
-			if (delta &&
-				(xen_initial_domain() && end < 0x100000000ULL))
-				e820_add_region(end, delta, E820_UNUSABLE);
+	/* Make sure the Xen-supplied memory map is well-ordered. */
+	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
+
+	max_pages = xen_get_max_pages();
+	if (max_pages > max_pfn)
+		extra_pages += max_pages - max_pfn;
+
+	/*
+	 * Set P2M for all non-RAM pages and E820 gaps to be identity
+	 * type PFNs.  Any RAM pages that would be made inaccesible by
+	 * this are first released.
+	 */
+	xen_released_pages = xen_set_identity_and_release(
+		map, memmap.nr_entries, max_pfn);
+	extra_pages += xen_released_pages;
+
+	/*
+	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+	 * factor the base size.  On non-highmem systems, the base
+	 * size is the full initial memory allocation; on highmem it
+	 * is limited to the max size of lowmem, so that it doesn't
+	 * get completely filled.
+	 *
+	 * In principle there could be a problem in lowmem systems if
+	 * the initial memory is also very large with respect to
+	 * lowmem, but we won't try to deal with that here.
+	 */
+	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+			  extra_pages);
+
+	i = 0;
+	while (i < memmap.nr_entries) {
+		u64 addr = map[i].addr;
+		u64 size = map[i].size;
+		u32 type = map[i].type;
+
+		if (type == E820_RAM) {
+			if (addr < mem_end) {
+				size = min(size, mem_end - addr);
+			} else if (extra_pages) {
+				size = min(size, (u64)extra_pages * PAGE_SIZE);
+				extra_pages -= size / PAGE_SIZE;
+				xen_add_extra_mem(addr, size);
+			} else
+				type = E820_UNUSABLE;
 		}
 
-		if (map[i].size > 0 && end > xen_extra_mem_start)
-			xen_extra_mem_start = end;
+		xen_align_and_add_e820_region(addr, size, type);
 
-		/* Add region if any remains */
-		if (map[i].size > 0)
-			e820_add_region(map[i].addr, map[i].size, map[i].type);
+		map[i].addr += size;
+		map[i].size -= size;
+		if (map[i].size == 0)
+			i++;
 	}
-	/* Align the balloon area so that max_low_pfn does not get set
-	 * to be at the _end_ of the PCI gap at the far end (fee01000).
-	 * Note that xen_extra_mem_start gets set in the loop above to be
-	 * past the last E820 region. */
-	if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
-		xen_extra_mem_start = (1ULL<<32);
 
 	/*
 	 * In domU, the ISA region is normal, usable memory, but we
 	 * reserve ISA memory anyway because too many things poke
 	 * about in there.
-	 *
-	 * In Dom0, the host E820 information can leave gaps in the
-	 * ISA range, which would cause us to release those pages.  To
-	 * avoid this, we unconditionally reserve them here.
 	 */
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
@@ -305,44 +305,6 @@ char * __init xen_memory_setup(void)
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
-	extra_limit = xen_get_max_pages();
-	if (max_pfn + extra_pages > extra_limit) {
-		if (extra_limit > max_pfn)
-			extra_pages = extra_limit - max_pfn;
-		else
-			extra_pages = 0;
-	}
-
-	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
-	/*
-	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
-	 * factor the base size.  On non-highmem systems, the base
-	 * size is the full initial memory allocation; on highmem it
-	 * is limited to the max size of lowmem, so that it doesn't
-	 * get completely filled.
-	 *
-	 * In principle there could be a problem in lowmem systems if
-	 * the initial memory is also very large with respect to
-	 * lowmem, but we won't try to deal with that here.
-	 */
-	extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
-			  max_pfn + extra_pages);
-
-	if (extra_limit >= max_pfn)
-		extra_pages = extra_limit - max_pfn;
-	else
-		extra_pages = 0;
-
-	xen_add_extra_mem(extra_pages);
-
-	/*
-	 * Set P2M for all non-RAM pages and E820 gaps to be identity
-	 * type PFNs. We supply it with the non-sanitized version
-	 * of the E820.
-	 */
-	identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
-	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
 	return "Xen";
 }
 
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index 0234cd198c54..f932b30b47fb 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -15,7 +15,6 @@ CONFIG_GENERIC_GPIO=y
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_NO_IOPORT=y
 CONFIG_HZ=100
-CONFIG_GENERIC_TIME=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 CONFIG_CONSTRUCTORS=y
 
diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig
index 4891abbf16bc..550e8ed5b5c6 100644
--- a/arch/xtensa/configs/s6105_defconfig
+++ b/arch/xtensa/configs/s6105_defconfig
@@ -15,7 +15,6 @@ CONFIG_GENERIC_GPIO=y
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_NO_IOPORT=y
 CONFIG_HZ=100
-CONFIG_GENERIC_TIME=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index f717e20d961b..7dde24456427 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -633,7 +633,7 @@ static const struct net_device_ops iss_netdev_ops = {
 	.ndo_set_mac_address	= iss_net_set_mac,
 	//.ndo_do_ioctl		= iss_net_ioctl,
 	.ndo_tx_timeout		= iss_net_tx_timeout,
-	.ndo_set_multicast_list = iss_net_set_multicast_list,
+	.ndo_set_rx_mode	= iss_net_set_multicast_list,
 };
 
 static int iss_net_configure(int index, char *init)