283 files changed, 5076 insertions, 2857 deletions
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts
index 757e0c62c4f9..3b076fbd8366 100644
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/angel4.dts
@@ -64,7 +64,7 @@
 		};
 
 		arcpmu0: pmu {
-			compatible = "snps,arc700-pmu";
+			compatible = "snps,arc700-pct";
 		};
 	};
 };
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 278dacf2a3f9..d2ac4e56ba1d 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -2,6 +2,9 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -9,7 +12,7 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs"
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_SLUB_DEBUG is not set
@@ -21,12 +24,9 @@ CONFIG_MODULES=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_IDE is not set
-# CONFIG_ARCTANGENT_EMAC is not set
 # CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -39,23 +39,23 @@ CONFIG_INET=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
 # CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_CYPRESS is not set
 # CONFIG_MOUSE_PS2_TRACKPOINT is not set
 CONFIG_MOUSE_PS2_TOUCHKIT=y
-# CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
 CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
@@ -72,4 +72,3 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index be33db8a2ee3..e2b1b1211b0d 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -30,6 +30,7 @@
 #define ARC_REG_D_UNCACH_BCR	0x6A
 #define ARC_REG_BPU_BCR		0xc0
 #define ARC_REG_ISA_CFG_BCR	0xc1
+#define ARC_REG_RTT_BCR		0xF2
 #define ARC_REG_SMART_BCR	0xFF
 
 /* status32 Bits Positions */
@@ -50,11 +51,7 @@
  * [15: 8] = Exception Cause Code
  * [ 7: 0] = Exception Parameters (for certain types only)
  */
-#define ECR_VEC_MASK			0xff0000
-#define ECR_CODE_MASK			0x00ff00
-#define ECR_PARAM_MASK			0x0000ff
-
-/* Exception Cause Vector Values */
+#define ECR_V_MEM_ERR			0x01
 #define ECR_V_INSN_ERR			0x02
 #define ECR_V_MACH_CHK			0x20
 #define ECR_V_ITLB_MISS			0x21
@@ -62,7 +59,8 @@
 #define ECR_V_PROTV			0x23
 #define ECR_V_TRAP			0x25
 
-/* Protection Violation Exception Cause Code Values */
+/* DTLB Miss and Protection Violation Cause Codes */
+
 #define ECR_C_PROTV_INST_FETCH		0x00
 #define ECR_C_PROTV_LOAD		0x01
 #define ECR_C_PROTV_STORE		0x02
@@ -173,11 +171,11 @@
 	}						\
 }
 
-#define WRITE_BCR(reg, into)				\
+#define WRITE_AUX(reg, into)				\
 {							\
 	unsigned int tmp;				\
 	if (sizeof(tmp) == sizeof(into)) {		\
-		tmp = (*(unsigned int *)(into));	\
+		tmp = (*(unsigned int *)&(into));	\
 		write_aux_reg(reg, tmp);		\
 	} else  {					\
 		extern void bogus_undefined(void);	\
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 1a5bf07eefe2..4051e9525939 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -32,6 +32,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *m)
 
 	m += nr >> 5;
 
+	/*
+	 * ARC ISA micro-optimization:
+	 *
+	 * Instructions dealing with bitpos only consider lower 5 bits (0-31)
+	 * e.g (x << 33) is handled like (x << 1) by ASL instruction
+	 *  (mem pointer still needs adjustment to point to next word)
+	 *
+	 * Hence the masking to clamp @nr arg can be elided in general.
+	 *
+	 * However if @nr is a constant (above assumed it in a register),
+	 * and greater than 31, gcc can optimize away (x << 33) to 0,
+	 * as overflow, given the 32-bit ISA. Thus masking needs to be done
+	 * for constant @nr, but no code is generated due to const prop.
+	 */
 	if (__builtin_constant_p(nr))
 		nr &= 0x1f;
 
@@ -374,29 +388,20 @@ __test_and_change_bit(unsigned long nr, volatile unsigned long *m)
  * This routine doesn't need to be atomic.
  */
 static inline int
-__constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-	return ((1UL << (nr & 31)) &
-		(((const volatile unsigned int *)addr)[nr >> 5])) != 0;
-}
-
-static inline int
-__test_bit(unsigned int nr, const volatile unsigned long *addr)
+test_bit(unsigned int nr, const volatile unsigned long *addr)
 {
 	unsigned long mask;
 
 	addr += nr >> 5;
 
-	/* ARC700 only considers 5 bits in bit-fiddling insn */
+	if (__builtin_constant_p(nr))
+		nr &= 0x1f;
+
 	mask = 1 << nr;
 
 	return ((mask & *addr) != 0);
 }
 
-#define test_bit(nr, addr)	(__builtin_constant_p(nr) ? \
-					__constant_test_bit((nr), (addr)) : \
-					__test_bit((nr), (addr)))
-
 /*
  * Count the number of zeros, starting from MSB
  * Helper for fls( ) friends
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index cbf755e32a03..2b8880e953a2 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -54,29 +54,13 @@ struct arc_reg_cc_build {
 #define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
 #define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
 #define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
-#define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_LDC	(PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_STC	(PERF_COUNT_HW_MAX + 7)
+
+#define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 8)
 
 /*
- * The "generalized" performance events seem to really be a copy
- * of the available events on x86 processors; the mapping to ARC
- * events is not always possible 1-to-1. Fortunately, there doesn't
- * seem to be an exact definition for these events, so we can cheat
- * a bit where necessary.
- *
- * In particular, the following PERF events may behave a bit differently
- * compared to other architectures:
- *
- * PERF_COUNT_HW_CPU_CYCLES
- *	Cycles not in halted state
- *
- * PERF_COUNT_HW_REF_CPU_CYCLES
- *	Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
- *	for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
- *
- * PERF_COUNT_HW_BUS_CYCLES
- *	Unclear what this means, Intel uses 0x013c, which according to
- *	their datasheet means "unhalted reference cycles". It sounds similar
- *	to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
+ * Some ARC pct quirks:
  *
  * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
  * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
@@ -91,21 +75,38 @@ struct arc_reg_cc_build {
  *	Note that I$ cache misses aren't counted by either of the two!
  */
 
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find it's name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
 static const char * const arc_pmu_ev_hw_map[] = {
+	/* count cycles */
 	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
 	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
 	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
-	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-	[PERF_COUNT_ARC_DCLM] = "dclm",
-	[PERF_COUNT_ARC_DCSM] = "dcsm",
-	[PERF_COUNT_ARC_ICM] = "icm",
-	[PERF_COUNT_ARC_BPOK] = "bpok",
-	[PERF_COUNT_ARC_EDTLB] = "edtlb",
-	[PERF_COUNT_ARC_EITLB] = "eitlb",
+
+	/* counts condition */
+	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+	[PERF_COUNT_ARC_BPOK]         = "bpok",	  /* NP-NT, PT-T, PNT-NT */
+	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+
+	[PERF_COUNT_ARC_LDC] = "imemrdc",	/* Instr: mem read cached */
+	[PERF_COUNT_ARC_STC] = "imemwrc",	/* Instr: mem write cached */
+
+	[PERF_COUNT_ARC_DCLM] = "dclm",		/* D-cache Load Miss */
+	[PERF_COUNT_ARC_DCSM] = "dcsm",		/* D-cache Store Miss */
+	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
+	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
+	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
 };
 
 #define C(_x)			PERF_COUNT_HW_CACHE_##_x
@@ -114,11 +115,11 @@ static const char * const arc_pmu_ev_hw_map[] = {
 static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {
 		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
 		},
 		[C(OP_WRITE)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_STC,
 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
 		},
 		[C(OP_PREFETCH)] = {
@@ -128,7 +129,7 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	},
 	[C(L1I)] = {
 		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_ACCESS)]	= PERF_COUNT_HW_INSTRUCTIONS,
 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
 		},
 		[C(OP_WRITE)] = {
@@ -156,9 +157,10 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	},
 	[C(DTLB)] = {
 		[C(OP_READ)] = {
-			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
+			[C(RESULT_ACCESS)]	= PERF_COUNT_ARC_LDC,
 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
 		},
+			/* DTLB LD/ST Miss not segregated by h/w*/
 		[C(OP_WRITE)] = {
 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index ae1c485cbc68..fd2ec50102f2 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -16,6 +16,7 @@
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <asm/arcregs.h>
+#include <asm/stacktrace.h>
 
 struct arc_pmu {
 	struct pmu	pmu;
@@ -25,6 +26,46 @@ struct arc_pmu {
 	int		ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
 };
 
+struct arc_callchain_trace {
+	int depth;
+	void *perf_stuff;
+};
+
+static int callchain_trace(unsigned int addr, void *data)
+{
+	struct arc_callchain_trace *ctrl = data;
+	struct perf_callchain_entry *entry = ctrl->perf_stuff;
+	perf_callchain_store(entry, addr);
+
+	if (ctrl->depth++ < 3)
+		return 0;
+
+	return -1;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	struct arc_callchain_trace ctrl = {
+		.depth = 0,
+		.perf_stuff = entry,
+	};
+
+	arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+	/*
+	 * User stack can't be unwound trivially with kernel dwarf unwinder
+	 * So for now just record the user PC
+	 */
+	perf_callchain_store(entry, instruction_pointer(regs));
+}
+
+static struct arc_pmu *arc_pmu;
+
 /* read counter #idx; note that counter# != event# on ARC! */
 static uint64_t arc_pmu_read_counter(int idx)
 {
@@ -47,7 +88,6 @@ static uint64_t arc_pmu_read_counter(int idx)
 static void arc_perf_event_update(struct perf_event *event,
 				  struct hw_perf_event *hwc, int idx)
 {
-	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
 	uint64_t prev_raw_count, new_raw_count;
 	int64_t delta;
 
@@ -89,13 +129,16 @@ static int arc_pmu_cache_event(u64 config)
 	if (ret == CACHE_OP_UNSUPPORTED)
 		return -ENOENT;
 
+	pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
+		 cache_type, cache_op, cache_result, ret,
+		 arc_pmu_ev_hw_map[ret]);
+
 	return ret;
 }
 
 /* initializes hw_perf_event structure if event is supported */
 static int arc_pmu_event_init(struct perf_event *event)
 {
-	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int ret;
 
@@ -106,8 +149,9 @@ static int arc_pmu_event_init(struct perf_event *event)
 		if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
 			return -ENOENT;
 		hwc->config = arc_pmu->ev_hw_idx[event->attr.config];
-		pr_debug("initializing event %d with cfg %d\n",
-			 (int) event->attr.config, (int) hwc->config);
+		pr_debug("init event %d with h/w %d \'%s\'\n",
+			 (int) event->attr.config, (int) hwc->config,
+			 arc_pmu_ev_hw_map[event->attr.config]);
 		return 0;
 	case PERF_TYPE_HW_CACHE:
 		ret = arc_pmu_cache_event(event->attr.config);
@@ -183,8 +227,6 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
 
 static void arc_pmu_del(struct perf_event *event, int flags)
 {
-	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
-
 	arc_pmu_stop(event, PERF_EF_UPDATE);
 	__clear_bit(event->hw.idx, arc_pmu->used_mask);
 
@@ -194,7 +236,6 @@ static void arc_pmu_del(struct perf_event *event, int flags)
 /* allocate hardware counter and optionally start counting */
 static int arc_pmu_add(struct perf_event *event, int flags)
 {
-	struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
@@ -247,10 +288,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 	BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS);
 
 	READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
-	if (!cc_bcr.v) {
-		pr_err("Performance counters exist, but no countable conditions?\n");
-		return -ENODEV;
-	}
+	BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
 
 	arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
 	if (!arc_pmu)
@@ -263,19 +301,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 		arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
 
 	cc_name.str[8] = 0;
-	for (i = 0; i < PERF_COUNT_HW_MAX; i++)
+	for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
 		arc_pmu->ev_hw_idx[i] = -1;
 
+	/* loop thru all available h/w condition indexes */
 	for (j = 0; j < cc_bcr.c; j++) {
 		write_aux_reg(ARC_REG_CC_INDEX, j);
 		cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
 		cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+
+		/* See if it has been mapped to a perf event_id */
 		for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
 			if (arc_pmu_ev_hw_map[i] &&
 			    !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
 			    strlen(arc_pmu_ev_hw_map[i])) {
-				pr_debug("mapping %d to idx %d with name %s\n",
-					 i, j, cc_name.str);
+				pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+					 i, cc_name.str, j);
 				arc_pmu->ev_hw_idx[i] = j;
 			}
 		}
@@ -302,7 +343,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
 #ifdef CONFIG_OF
 static const struct of_device_id arc_pmu_match[] = {
-	{ .compatible = "snps,arc700-pmu" },
+	{ .compatible = "snps,arc700-pct" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, arc_pmu_match);
@@ -310,7 +351,7 @@ MODULE_DEVICE_TABLE(of, arc_pmu_match);
 
 static struct platform_driver arc_pmu_driver = {
 	.driver	= {
-		.name		= "arc700-pmu",
+		.name		= "arc700-pct",
 		.of_match_table = of_match_ptr(arc_pmu_match),
 	},
 	.probe		= arc_pmu_device_probe,
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index f46efd14059d..e095c557afdd 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -49,7 +49,10 @@ void arch_cpu_idle(void)
 
 asmlinkage void ret_from_fork(void);
 
-/* Layout of Child kernel mode stack as setup at the end of this function is
+/*
+ * Copy architecture-specific thread state
+ *
+ * Layout of Child kernel mode stack as setup at the end of this function is
  *
  * |     ...        |
  * |     ...        |
@@ -81,7 +84,7 @@ asmlinkage void ret_from_fork(void);
  * ------------------  <===== END of PAGE
  */
 int copy_thread(unsigned long clone_flags,
-		unsigned long usp, unsigned long arg,
+		unsigned long usp, unsigned long kthread_arg,
 		struct task_struct *p)
 {
 	struct pt_regs *c_regs;        /* child's pt_regs */
@@ -112,7 +115,7 @@ int copy_thread(unsigned long clone_flags,
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		memset(c_regs, 0, sizeof(struct pt_regs));
 
-		c_callee->r13 = arg; /* argument to kernel thread */
+		c_callee->r13 = kthread_arg;
 		c_callee->r14 = usp;  /* function */
 
 		return 0;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 900f68a70088..1d167c6df8ca 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -120,7 +120,10 @@ static void read_arc_build_cfg_regs(void)
 	READ_BCR(ARC_REG_SMART_BCR, bcr);
 	cpu->extn.smart = bcr.ver ? 1 : 0;
 
-	cpu->extn.debug = cpu->extn.ap | cpu->extn.smart;
+	READ_BCR(ARC_REG_RTT_BCR, bcr);
+	cpu->extn.rtt = bcr.ver ? 1 : 0;
+
+	cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
 }
 
 static const struct cpuinfo_data arc_cpu_tbl[] = {
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 3eadfdabc322..c927aa84e652 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -42,7 +42,7 @@ void die(const char *str, struct pt_regs *regs, unsigned long address)
  *  -for kernel, chk if due to copy_(to|from)_user, otherwise die()
  */
 static noinline int
-handle_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
+unhandled_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
 {
 	if (user_mode(regs)) {
 		struct task_struct *tsk = current;
@@ -71,7 +71,7 @@ int name(unsigned long address, struct pt_regs *regs) \
 		.si_code  = sicode,		\
 		.si_addr = (void __user *)address,	\
 	};					\
-	return handle_exception(str, regs, &info);\
+	return unhandled_exception(str, regs, &info);\
 }
 
 /*
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 523412369f70..d44eedd8c322 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -71,7 +71,7 @@ early_param("initrd", early_initrd);
  */
 void __init setup_arch_memory(void)
 {
-	unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 };
+	unsigned long zones_size[MAX_NR_ZONES];
 	unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
 
 	init_mm.start_code = (unsigned long)_text;
@@ -90,7 +90,7 @@ void __init setup_arch_memory(void)
 	/*------------- externs in mm need setting up ---------------*/
 
 	/* first page of system - kernel .vector starts here */
-	min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE);
+	min_low_pfn = ARCH_PFN_OFFSET;
 
 	/* Last usable page of low mem (no HIGHMEM yet for ARC port) */
 	max_low_pfn = max_pfn = PFN_DOWN(end_mem);
@@ -111,7 +111,7 @@ void __init setup_arch_memory(void)
 
 	/*-------------- node setup --------------------------------*/
 	memset(zones_size, 0, sizeof(zones_size));
-	zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
+	zones_size[ZONE_NORMAL] = max_mapnr;
 
 	/*
 	 * We can't use the helper free_area_init(zones[]) because it uses
@@ -123,6 +123,8 @@ void __init setup_arch_memory(void)
 			    zones_size,		/* num pages per zone */
 			    min_low_pfn,	/* first pfn of node */
 			    NULL);		/* NO holes */
+
+	high_memory = (void *)end_mem;
 }
 
 /*
@@ -133,7 +135,6 @@ void __init setup_arch_memory(void)
  */
 void __init mem_init(void)
 {
-	high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz);
 	free_all_bootmem();
 	mem_init_print_info(NULL);
 }
diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 8ae29c955c11..c17097d2c167 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -49,7 +49,7 @@
 		pinctrl-0 = <&matrix_keypad_pins>;
 
 		debounce-delay-ms = <5>;
-		col-scan-delay-us = <1500>;
+		col-scan-delay-us = <5>;
 
 		row-gpios = <&gpio5 5 GPIO_ACTIVE_HIGH		/* Bank5, pin5 */
 				&gpio5 6 GPIO_ACTIVE_HIGH>;	/* Bank5, pin6 */
@@ -473,7 +473,7 @@
 		interrupt-parent = <&gpio0>;
 		interrupts = <31 0>;
 
-		wake-gpios = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&gpio1 28 GPIO_ACTIVE_LOW>;
 
 		touchscreen-size-x = <480>;
 		touchscreen-size-y = <272>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 15f198e4864d..7128fad991ac 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -18,6 +18,7 @@
 	aliases {
 		rtc0 = &mcp_rtc;
 		rtc1 = &tps659038_rtc;
+		rtc2 = &rtc;
 	};
 
 	memory {
@@ -83,7 +84,7 @@
 	gpio_fan: gpio_fan {
 		/* Based on 5v 500mA AFB02505HHB */
 		compatible = "gpio-fan";
-		gpios =  <&tps659038_gpio 1 GPIO_ACTIVE_HIGH>;
+		gpios =  <&tps659038_gpio 2 GPIO_ACTIVE_HIGH>;
 		gpio-fan,speed-map = <0     0>,
 				     <13000 1>;
 		#cooling-cells = <2>;
@@ -130,8 +131,8 @@
 
 	uart3_pins_default: uart3_pins_default {
 		pinctrl-single,pins = <
-			0x248 (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_rxd.rxd */
-			0x24c (PIN_INPUT_SLEW | MUX_MODE0) /* uart3_txd.txd */
+			0x3f8 (PIN_INPUT_SLEW | MUX_MODE2) /* uart2_ctsn.uart3_rxd */
+			0x3fc (PIN_INPUT_SLEW | MUX_MODE1) /* uart2_rtsn.uart3_txd */
 		>;
 	};
 
@@ -455,7 +456,7 @@
 	mcp_rtc: rtc@6f {
 		compatible = "microchip,mcp7941x";
 		reg = <0x6f>;
-		interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_LOW>;  /* IRQ_SYS_1N */
+		interrupts = <GIC_SPI 2 IRQ_TYPE_EDGE_RISING>;  /* IRQ_SYS_1N */
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&mcp79410_pins_default>;
@@ -478,7 +479,7 @@
 &uart3 {
 	status = "okay";
 	interrupts-extended = <&crossbar_mpu GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>,
-			      <&dra7_pmx_core 0x248>;
+			      <&dra7_pmx_core 0x3f8>;
 
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart3_pins_default>;
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index e3b08fb959e5..990e8a2100f0 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -105,6 +105,10 @@
 		};
 
 		internal-regs {
+			rtc@10300 {
+				/* No crystal connected to the internal RTC */
+				status = "disabled";
+			};
 			serial@12000 {
 				status = "okay";
 			};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 5332b57b4950..f03a091cd076 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -911,7 +911,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07ddc 0x4>, <0x4ae07de0 0x4>,
-			      <0x4ae06014 0x4>, <0x4a003b20 0x8>,
+			      <0x4ae06014 0x4>, <0x4a003b20 0xc>,
 			      <0x4ae0c158 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -944,7 +944,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07e34 0x4>, <0x4ae07e24 0x4>,
-			      <0x4ae06010 0x4>, <0x4a0025cc 0x8>,
+			      <0x4ae06010 0x4>, <0x4a0025cc 0xc>,
 			      <0x4a002470 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -977,7 +977,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07e30 0x4>, <0x4ae07e20 0x4>,
-			      <0x4ae06010 0x4>, <0x4a0025e0 0x8>,
+			      <0x4ae06010 0x4>, <0x4a0025e0 0xc>,
 			      <0x4a00246c 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -1010,7 +1010,7 @@
 			ti,clock-cycles = <16>;
 
 			reg = <0x4ae07de4 0x4>, <0x4ae07de8 0x4>,
-			      <0x4ae06010 0x4>, <0x4a003b08 0x8>,
+			      <0x4ae06010 0x4>, <0x4a003b08 0xc>,
 			      <0x4ae0c154 0x4>;
 			reg-names = "setup-address", "control-address",
 				    "int-address", "efuse-address",
@@ -1203,7 +1203,7 @@
 			status = "disabled";
 		};
 
-		rtc@48838000 {
+		rtc: rtc@48838000 {
 			compatible = "ti,am3352-rtc";
 			reg = <0x48838000 0x100>;
 			interrupts = <GIC_SPI 217 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index 8de12af7c276..d6b49e5b32e9 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -9,6 +9,7 @@
 
 #include <dt-bindings/sound/samsung-i2s.h>
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/clock/maxim,max77686.h>
 #include "exynos4412.dtsi"
 
 / {
@@ -105,6 +106,8 @@
 
 	rtc@10070000 {
 		status = "okay";
+		clocks = <&clock CLK_RTC>, <&max77686 MAX77686_CLK_AP>;
+		clock-names = "rtc", "rtc_src";
 	};
 
 	g2d@10800000 {
diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts
index 2657e842e5a5..1eca97ee4bd6 100644
--- a/arch/arm/boot/dts/exynos5250-snow.dts
+++ b/arch/arm/boot/dts/exynos5250-snow.dts
@@ -567,6 +567,7 @@
 	num-slots = <1>;
 	broken-cd;
 	cap-sdio-irq;
+	keep-power-in-suspend;
 	card-detect-delay = <200>;
 	samsung,dw-mshc-ciu-div = <3>;
 	samsung,dw-mshc-sdr-timing = <2 3>;
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
index 5d31fc140823..2180a0152c9b 100644
--- a/arch/arm/boot/dts/exynos5420-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
@@ -28,7 +28,7 @@ trips {
 		type = "active";
 	};
 	cpu-crit-0 {
-		temperature = <1200000>; /* millicelsius */
+		temperature = <120000>; /* millicelsius */
 		hysteresis = <0>; /* millicelsius */
 		type = "critical";
 	};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index f67b23f303c3..45317538bbae 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -536,6 +536,7 @@
 		clock-names = "dp";
 		phys = <&dp_phy>;
 		phy-names = "dp";
+		power-domains = <&disp_pd>;
 	};
 
 	mipi_phy: video-phy@10040714 {
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
index 48adfa8f4300..356e963edf11 100644
--- a/arch/arm/boot/dts/exynos5440-trip-points.dtsi
+++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
@@ -18,7 +18,7 @@ trips {
 		type = "active";
 	};
 	cpu-crit-0 {
-		temperature = <1050000>; /* millicelsius */
+		temperature = <105000>; /* millicelsius */
 		hysteresis = <0>; /* millicelsius */
 		type = "critical";
 	};
diff --git a/arch/arm/boot/dts/imx23-olinuxino.dts b/arch/arm/boot/dts/imx23-olinuxino.dts
index 7e6eef2488e8..82045398bf1f 100644
--- a/arch/arm/boot/dts/imx23-olinuxino.dts
+++ b/arch/arm/boot/dts/imx23-olinuxino.dts
@@ -12,6 +12,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 #include "imx23.dtsi"
 
 / {
@@ -93,6 +94,7 @@
 
 	ahb@80080000 {
 		usb0: usb@80080000 {
+			dr_mode = "host";
 			vbus-supply = <&reg_usb0_vbus>;
 			status = "okay";
 		};
@@ -122,7 +124,7 @@
 
 		user {
 			label = "green";
-			gpios = <&gpio2 1 1>;
+			gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi
index e4d3aecc4ed2..677f81d9dcd5 100644
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@ -428,6 +428,7 @@
 
 			pwm4: pwm@53fc8000 {
 				compatible = "fsl,imx25-pwm", "fsl,imx27-pwm";
+				#pwm-cells = <2>;
 				reg = <0x53fc8000 0x4000>;
 				clocks = <&clks 108>, <&clks 52>;
 				clock-names = "ipg", "per";
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 25e25f82fbae..4e073e854742 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -913,7 +913,7 @@
 					      80 81 68 69
 					      70 71 72 73
 					      74 75 76 77>;
-				interrupt-names = "auart4-rx", "aurat4-tx", "spdif-tx", "empty",
+				interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
 						  "saif0", "saif1", "i2c0", "i2c1",
 						  "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
 						  "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
index 19cc269a08d4..1ce6133b67f5 100644
--- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
@@ -31,6 +31,7 @@
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio4 15 0>;
+			enable-active-high;
 		};
 
 		reg_usb_h1_vbus: regulator@1 {
@@ -40,6 +41,7 @@
 			regulator-min-microvolt = <5000000>;
 			regulator-max-microvolt = <5000000>;
 			gpio = <&gpio1 0 0>;
+			enable-active-high;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index 46b2fed7c319..3b24b12651b2 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -185,7 +185,6 @@
 &i2c3 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_i2c3>;
-	pinctrl-assert-gpios = <&gpio5 4 GPIO_ACTIVE_HIGH>;
 	status = "okay";
 
 	max7310_a: gpio@30 {
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index a29315833ecd..5c16145920ea 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -498,6 +498,8 @@
 		DRVDD-supply = <&vmmc2>;
 		IOVDD-supply = <&vio>;
 		DVDD-supply = <&vio>;
+
+		ai3x-micbias-vg = <1>;
 	};
 
 	tlv320aic3x_aux: tlv320aic3x@19 {
@@ -509,6 +511,8 @@
 		DRVDD-supply = <&vmmc2>;
 		IOVDD-supply = <&vio>;
 		DVDD-supply = <&vio>;
+
+		ai3x-micbias-vg = <2>;
 	};
 
 	tsl2563: tsl2563@29 {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index d18a90f5eca3..69a40cfc1f29 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -456,6 +456,7 @@
 		};
 
 		mmu_isp: mmu@480bd400 {
+			#iommu-cells = <0>;
 			compatible = "ti,omap2-iommu";
 			reg = <0x480bd400 0x80>;
 			interrupts = <24>;
@@ -464,6 +465,7 @@
 		};
 
 		mmu_iva: mmu@5d000000 {
+			#iommu-cells = <0>;
 			compatible = "ti,omap2-iommu";
 			reg = <0x5d000000 0x80>;
 			interrupts = <28>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index efe5f737f39b..7d24ae0306b5 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -128,7 +128,7 @@
 	 * hierarchy.
 	 */
 	ocp {
-		compatible = "ti,omap4-l3-noc", "simple-bus";
+		compatible = "ti,omap5-l3-noc", "simple-bus";
 		#address-cells = <1>;
 		#size-cells = <1>;
 		ranges;
diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts
index 74c3212f1f11..824ddab9c3ad 100644
--- a/arch/arm/boot/dts/r8a7791-koelsch.dts
+++ b/arch/arm/boot/dts/r8a7791-koelsch.dts
@@ -545,7 +545,7 @@
 		compatible = "adi,adv7511w";
 		reg = <0x39>;
 		interrupt-parent = <&gpio3>;
-		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+		interrupts = <29 IRQ_TYPE_LEVEL_LOW>;
 
 		adi,input-depth = <8>;
 		adi,input-colorspace = "rgb";
diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi
index bfd3f1c734b8..2201cd5da3bb 100644
--- a/arch/arm/boot/dts/ste-dbx5x0.dtsi
+++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi
@@ -1017,23 +1017,6 @@
 			status = "disabled";
 		};
 
-		vmmci: regulator-gpio {
-			compatible = "regulator-gpio";
-
-			regulator-min-microvolt = <1800000>;
-			regulator-max-microvolt = <2900000>;
-			regulator-name = "mmci-reg";
-			regulator-type = "voltage";
-
-			startup-delay-us = <100>;
-			enable-active-high;
-
-			states = <1800000 0x1
-				  2900000 0x0>;
-
-			status = "disabled";
-		};
-
 		mcde@a0350000 {
 			compatible = "stericsson,mcde";
 			reg = <0xa0350000 0x1000>, /* MCDE */
diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi
index bf8f0eddc2c0..744c1e3a744d 100644
--- a/arch/arm/boot/dts/ste-href.dtsi
+++ b/arch/arm/boot/dts/ste-href.dtsi
@@ -111,6 +111,21 @@
 			pinctrl-1 = <&i2c3_sleep_mode>;
 		};
 
+		vmmci: regulator-gpio {
+			compatible = "regulator-gpio";
+
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <2900000>;
+			regulator-name = "mmci-reg";
+			regulator-type = "voltage";
+
+			startup-delay-us = <100>;
+			enable-active-high;
+
+			states = <1800000 0x1
+				  2900000 0x0>;
+		};
+
 		// External Micro SD slot
 		sdi0_per1@80126000 {
 			arm,primecell-periphid = <0x10480180>;
diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts
index 206826a855c0..1bc84ebdccaa 100644
--- a/arch/arm/boot/dts/ste-snowball.dts
+++ b/arch/arm/boot/dts/ste-snowball.dts
@@ -146,8 +146,21 @@
 		};
 
 		vmmci: regulator-gpio {
+			compatible = "regulator-gpio";
+
 			gpios = <&gpio7 4 0x4>;
 			enable-gpio = <&gpio6 25 0x4>;
+
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <2900000>;
+			regulator-name = "mmci-reg";
+			regulator-type = "voltage";
+
+			startup-delay-us = <100>;
+			enable-active-high;
+
+			states = <1800000 0x1
+				  2900000 0x0>;
 		};
 
 		// External Micro SD slot
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ab86655c1f4b..0ca4a3eaf65d 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -39,11 +39,14 @@ CONFIG_ARCH_HIP04=y
 CONFIG_ARCH_KEYSTONE=y
 CONFIG_ARCH_MESON=y
 CONFIG_ARCH_MXC=y
+CONFIG_SOC_IMX50=y
 CONFIG_SOC_IMX51=y
 CONFIG_SOC_IMX53=y
 CONFIG_SOC_IMX6Q=y
 CONFIG_SOC_IMX6SL=y
+CONFIG_SOC_IMX6SX=y
 CONFIG_SOC_VF610=y
+CONFIG_SOC_LS1021A=y
 CONFIG_ARCH_OMAP3=y
 CONFIG_ARCH_OMAP4=y
 CONFIG_SOC_OMAP5=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 9ff7b54b2a83..3743ca221d40 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -393,7 +393,7 @@ CONFIG_TI_EDMA=y
 CONFIG_DMA_OMAP=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_EXTCON=m
-CONFIG_EXTCON_GPIO=m
+CONFIG_EXTCON_USB_GPIO=m
 CONFIG_EXTCON_PALMAS=m
 CONFIG_TI_EMIF=m
 CONFIG_PWM=y
diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h
index 8e3fcb924db6..2ef282f96651 100644
--- a/arch/arm/include/asm/dma-iommu.h
+++ b/arch/arm/include/asm/dma-iommu.h
@@ -25,7 +25,7 @@ struct dma_iommu_mapping {
 };
 
 struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size);
 
 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
 
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 2f7e6ff67d51..0b579b2f4e0e 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -110,5 +110,6 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 bool xen_arch_need_swiotlb(struct device *dev,
 			   unsigned long pfn,
 			   unsigned long mfn);
+unsigned long xen_get_swiotlb_free_pages(unsigned int order);
 
 #endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 2499867dd0d8..df3f60cb1168 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -195,8 +195,14 @@ struct kvm_arch_memory_slot {
 #define KVM_ARM_IRQ_CPU_IRQ		0
 #define KVM_ARM_IRQ_CPU_FIQ		1
 
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
 #define KVM_ARM_IRQ_GIC_MAX		127
+#endif
 
 /* One single KVM irqchip, ie. the VGIC */
 #define KVM_NR_IRQCHIPS          1
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index cc176b67c134..aebfbf79a1a3 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -80,9 +80,9 @@ ENTRY(stext)
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ initialising sctlr
 	adr	lr, BSYM(1f)			@ return (PIC) address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
  1:	b	__after_proc_init
 ENDPROC(stext)
 
@@ -117,9 +117,9 @@ ENTRY(secondary_startup)
 
 	adr	lr, BSYM(__after_proc_init)	@ return address
 	mov	r13, r12			@ __secondary_switched address
- ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
- THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
- THUMB(	ret	r12				)
+	ldr	r12, [r10, #PROCINFO_INITFUNC]
+	add	r12, r12, r10
+	ret	r12
 ENDPROC(secondary_startup)
 
 ENTRY(__secondary_switched)
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 91c7ba182dcd..213919ba326f 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -303,12 +303,17 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 
 static int of_pmu_irq_cfg(struct platform_device *pdev)
 {
-	int i;
+	int i, irq;
 	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 
 	if (!irqs)
 		return -ENOMEM;
 
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
 	for (i = 0; i < pdev->num_resources; ++i) {
 		struct device_node *dn;
 		int cpu;
@@ -317,7 +322,7 @@ static int of_pmu_irq_cfg(struct platform_device *pdev)
 				      i);
 		if (!dn) {
 			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(dn), i);
+				of_node_full_name(pdev->dev.of_node), i);
 			break;
 		}
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 6f536451ab78..d9631ecddd56 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -671,8 +671,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 		if (!irqchip_in_kernel(kvm))
 			return -ENXIO;
 
-		if (irq_num < VGIC_NR_PRIVATE_IRQS ||
-		    irq_num > KVM_ARM_IRQ_GIC_MAX)
+		if (irq_num < VGIC_NR_PRIVATE_IRQS)
 			return -EINVAL;
 
 		return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
diff --git a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
index fb8d4a2ad48c..a5edd7d60266 100644
--- a/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
+++ b/arch/arm/mach-imx/devices/platform-sdhci-esdhc-imx.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Pengutronix, Wolfram Sang <w.sang@pengutronix.de>
+ * Copyright (C) 2010 Pengutronix, Wolfram Sang <kernel@pengutronix.de>
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License version 2 as published by the
diff --git a/arch/arm/mach-omap2/prm-regbits-34xx.h b/arch/arm/mach-omap2/prm-regbits-34xx.h
index cbefbd7cfdb5..661d753df584 100644
--- a/arch/arm/mach-omap2/prm-regbits-34xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-34xx.h
@@ -112,6 +112,7 @@
 #define OMAP3430_VC_CMD_ONLP_SHIFT			16
 #define OMAP3430_VC_CMD_RET_SHIFT			8
 #define OMAP3430_VC_CMD_OFF_SHIFT			0
+#define OMAP3430_SREN_MASK				(1 << 4)
 #define OMAP3430_HSEN_MASK				(1 << 3)
 #define OMAP3430_MCODE_MASK				(0x7 << 0)
 #define OMAP3430_VALID_MASK				(1 << 24)
diff --git a/arch/arm/mach-omap2/prm-regbits-44xx.h b/arch/arm/mach-omap2/prm-regbits-44xx.h
index b1c7a33e00e7..e794828dee55 100644
--- a/arch/arm/mach-omap2/prm-regbits-44xx.h
+++ b/arch/arm/mach-omap2/prm-regbits-44xx.h
@@ -35,6 +35,7 @@
 #define OMAP4430_GLOBAL_WARM_SW_RST_SHIFT				1
 #define OMAP4430_GLOBAL_WUEN_MASK					(1 << 16)
 #define OMAP4430_HSMCODE_MASK						(0x7 << 0)
+#define OMAP4430_SRMODEEN_MASK						(1 << 4)
 #define OMAP4430_HSMODEEN_MASK						(1 << 3)
 #define OMAP4430_HSSCLL_SHIFT						24
 #define OMAP4430_ICEPICK_RST_SHIFT					9
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index be9ef834fa81..076fd20d7e5a 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -316,7 +316,8 @@ static void __init omap3_vc_init_pmic_signaling(struct voltagedomain *voltdm)
 	 * idle. And we can also scale voltages to zero for off-idle.
 	 * Note that no actual voltage scaling during off-idle will
 	 * happen unless the board specific twl4030 PMIC scripts are
-	 * loaded.
+	 * loaded. See also omap_vc_i2c_init for comments regarding
+	 * erratum i531.
 	 */
 	val = voltdm->read(OMAP3_PRM_VOLTCTRL_OFFSET);
 	if (!(val & OMAP3430_PRM_VOLTCTRL_SEL_OFF)) {
@@ -704,9 +705,16 @@ static void __init omap_vc_i2c_init(struct voltagedomain *voltdm)
 		return;
 	}
 
+	/*
+	 * Note that for omap3 OMAP3430_SREN_MASK clears SREN to work around
+	 * erratum i531 "Extra Power Consumed When Repeated Start Operation
+	 * Mode Is Enabled on I2C Interface Dedicated for Smart Reflex (I2C4)".
+	 * Otherwise I2C4 eventually leads into about 23mW extra power being
+	 * consumed even during off idle using VMODE.
+	 */
 	i2c_high_speed = voltdm->pmic->i2c_high_speed;
 	if (i2c_high_speed)
-		voltdm->rmw(vc->common->i2c_cfg_hsen_mask,
+		voltdm->rmw(vc->common->i2c_cfg_clear_mask,
 			    vc->common->i2c_cfg_hsen_mask,
 			    vc->common->i2c_cfg_reg);
 
diff --git a/arch/arm/mach-omap2/vc.h b/arch/arm/mach-omap2/vc.h
index cdbdd78e755e..89b83b7ff3ec 100644
--- a/arch/arm/mach-omap2/vc.h
+++ b/arch/arm/mach-omap2/vc.h
@@ -34,6 +34,7 @@ struct voltagedomain;
  * @cmd_ret_shift: RET field shift in PRM_VC_CMD_VAL_* register
  * @cmd_off_shift: OFF field shift in PRM_VC_CMD_VAL_* register
  * @i2c_cfg_reg: I2C configuration register offset
+ * @i2c_cfg_clear_mask: high-speed mode bit clear mask in I2C config register
  * @i2c_cfg_hsen_mask: high-speed mode bit field mask in I2C config register
  * @i2c_mcode_mask: MCODE field mask for I2C config register
  *
@@ -52,6 +53,7 @@ struct omap_vc_common {
 	u8 cmd_ret_shift;
 	u8 cmd_off_shift;
 	u8 i2c_cfg_reg;
+	u8 i2c_cfg_clear_mask;
 	u8 i2c_cfg_hsen_mask;
 	u8 i2c_mcode_mask;
 };
diff --git a/arch/arm/mach-omap2/vc3xxx_data.c b/arch/arm/mach-omap2/vc3xxx_data.c
index 75bc4aa22b3a..71d74c9172c1 100644
--- a/arch/arm/mach-omap2/vc3xxx_data.c
+++ b/arch/arm/mach-omap2/vc3xxx_data.c
@@ -40,6 +40,7 @@ static struct omap_vc_common omap3_vc_common = {
 	.cmd_onlp_shift	 = OMAP3430_VC_CMD_ONLP_SHIFT,
 	.cmd_ret_shift	 = OMAP3430_VC_CMD_RET_SHIFT,
 	.cmd_off_shift	 = OMAP3430_VC_CMD_OFF_SHIFT,
+	.i2c_cfg_clear_mask = OMAP3430_SREN_MASK | OMAP3430_HSEN_MASK,
 	.i2c_cfg_hsen_mask = OMAP3430_HSEN_MASK,
 	.i2c_cfg_reg	 = OMAP3_PRM_VC_I2C_CFG_OFFSET,
 	.i2c_mcode_mask	 = OMAP3430_MCODE_MASK,
diff --git a/arch/arm/mach-omap2/vc44xx_data.c b/arch/arm/mach-omap2/vc44xx_data.c
index 085e5d6a04fd..2abd5fa8a697 100644
--- a/arch/arm/mach-omap2/vc44xx_data.c
+++ b/arch/arm/mach-omap2/vc44xx_data.c
@@ -42,6 +42,7 @@ static const struct omap_vc_common omap4_vc_common = {
 	.cmd_ret_shift = OMAP4430_RET_SHIFT,
 	.cmd_off_shift = OMAP4430_OFF_SHIFT,
 	.i2c_cfg_reg = OMAP4_PRM_VC_CFG_I2C_MODE_OFFSET,
+	.i2c_cfg_clear_mask = OMAP4430_SRMODEEN_MASK | OMAP4430_HSMODEEN_MASK,
 	.i2c_cfg_hsen_mask = OMAP4430_HSMODEEN_MASK,
 	.i2c_mcode_mask	 = OMAP4430_HSMCODE_MASK,
 };
diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig
index 8896e71586f5..f09683687963 100644
--- a/arch/arm/mach-pxa/Kconfig
+++ b/arch/arm/mach-pxa/Kconfig
@@ -691,4 +691,13 @@ config SHARPSL_PM_MAX1111
 config PXA310_ULPI
 	bool
 
+config PXA_SYSTEMS_CPLDS
+	tristate "Motherboard cplds"
+	default ARCH_LUBBOCK || MACH_MAINSTONE
+	help
+	  This driver supports the Lubbock and Mainstone multifunction chip
+	  found on the pxa25x development platform system (Lubbock) and pxa27x
+	  development platform system (Mainstone). This IO board supports the
+	  interrupts handling, ethernet controller, flash chips, etc ...
+
 endif
diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile
index eb0bf7678a99..4087d334ecdf 100644
--- a/arch/arm/mach-pxa/Makefile
+++ b/arch/arm/mach-pxa/Makefile
@@ -90,4 +90,5 @@ obj-$(CONFIG_MACH_RAUMFELD_CONNECTOR)	+= raumfeld.o
 obj-$(CONFIG_MACH_RAUMFELD_SPEAKER)	+= raumfeld.o
 obj-$(CONFIG_MACH_ZIPIT2)	+= z2.o
 
+obj-$(CONFIG_PXA_SYSTEMS_CPLDS)	+= pxa_cplds_irqs.o
 obj-$(CONFIG_TOSA_BT)		+= tosa-bt.o
diff --git a/arch/arm/mach-pxa/include/mach/lubbock.h b/arch/arm/mach-pxa/include/mach/lubbock.h
index 958cd6af9384..1eecf794acd2 100644
--- a/arch/arm/mach-pxa/include/mach/lubbock.h
+++ b/arch/arm/mach-pxa/include/mach/lubbock.h
@@ -37,7 +37,9 @@
 #define LUB_GP			__LUB_REG(LUBBOCK_FPGA_PHYS + 0x100)
 
 /* Board specific IRQs */
-#define LUBBOCK_IRQ(x)		(IRQ_BOARD_START + (x))
+#define LUBBOCK_NR_IRQS		IRQ_BOARD_START
+
+#define LUBBOCK_IRQ(x)		(LUBBOCK_NR_IRQS + (x))
 #define LUBBOCK_SD_IRQ		LUBBOCK_IRQ(0)
 #define LUBBOCK_SA1111_IRQ	LUBBOCK_IRQ(1)
 #define LUBBOCK_USB_IRQ		LUBBOCK_IRQ(2)  /* usb connect */
@@ -47,8 +49,7 @@
 #define LUBBOCK_USB_DISC_IRQ	LUBBOCK_IRQ(6)  /* usb disconnect */
 #define LUBBOCK_LAST_IRQ	LUBBOCK_IRQ(6)
 
-#define LUBBOCK_SA1111_IRQ_BASE	(IRQ_BOARD_START + 16)
-#define LUBBOCK_NR_IRQS		(IRQ_BOARD_START + 16 + 55)
+#define LUBBOCK_SA1111_IRQ_BASE	(LUBBOCK_NR_IRQS + 32)
 
 #ifndef __ASSEMBLY__
 extern void lubbock_set_misc_wr(unsigned int mask, unsigned int set);
diff --git a/arch/arm/mach-pxa/include/mach/mainstone.h b/arch/arm/mach-pxa/include/mach/mainstone.h
index 1bfc4e822a41..e82a7d31104e 100644
--- a/arch/arm/mach-pxa/include/mach/mainstone.h
+++ b/arch/arm/mach-pxa/include/mach/mainstone.h
@@ -120,7 +120,9 @@
 #define MST_PCMCIA_PWR_VCC_50   0x4	   /* voltage VCC = 5.0V */
 
 /* board specific IRQs */
-#define MAINSTONE_IRQ(x)	(IRQ_BOARD_START + (x))
+#define MAINSTONE_NR_IRQS	IRQ_BOARD_START
+
+#define MAINSTONE_IRQ(x)	(MAINSTONE_NR_IRQS + (x))
 #define MAINSTONE_MMC_IRQ	MAINSTONE_IRQ(0)
 #define MAINSTONE_USIM_IRQ	MAINSTONE_IRQ(1)
 #define MAINSTONE_USBC_IRQ	MAINSTONE_IRQ(2)
@@ -136,6 +138,4 @@
 #define MAINSTONE_S1_STSCHG_IRQ	MAINSTONE_IRQ(14)
 #define MAINSTONE_S1_IRQ	MAINSTONE_IRQ(15)
 
-#define MAINSTONE_NR_IRQS	(IRQ_BOARD_START + 16)
-
 #endif
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index d8a1be619f21..4ac9ab80d24b 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -12,6 +12,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -123,84 +124,6 @@ void lubbock_set_misc_wr(unsigned int mask, unsigned int set)
 }
 EXPORT_SYMBOL(lubbock_set_misc_wr);
 
-static unsigned long lubbock_irq_enabled;
-
-static void lubbock_mask_irq(struct irq_data *d)
-{
-	int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
-	LUB_IRQ_MASK_EN = (lubbock_irq_enabled &= ~(1 << lubbock_irq));
-}
-
-static void lubbock_unmask_irq(struct irq_data *d)
-{
-	int lubbock_irq = (d->irq - LUBBOCK_IRQ(0));
-	/* the irq can be acknowledged only if deasserted, so it's done here */
-	LUB_IRQ_SET_CLR &= ~(1 << lubbock_irq);
-	LUB_IRQ_MASK_EN = (lubbock_irq_enabled |= (1 << lubbock_irq));
-}
-
-static struct irq_chip lubbock_irq_chip = {
-	.name		= "FPGA",
-	.irq_ack	= lubbock_mask_irq,
-	.irq_mask	= lubbock_mask_irq,
-	.irq_unmask	= lubbock_unmask_irq,
-};
-
-static void lubbock_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	unsigned long pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
-	do {
-		/* clear our parent irq */
-		desc->irq_data.chip->irq_ack(&desc->irq_data);
-		if (likely(pending)) {
-			irq = LUBBOCK_IRQ(0) + __ffs(pending);
-			generic_handle_irq(irq);
-		}
-		pending = LUB_IRQ_SET_CLR & lubbock_irq_enabled;
-	} while (pending);
-}
-
-static void __init lubbock_init_irq(void)
-{
-	int irq;
-
-	pxa25x_init_irq();
-
-	/* setup extra lubbock irqs */
-	for (irq = LUBBOCK_IRQ(0); irq <= LUBBOCK_LAST_IRQ; irq++) {
-		irq_set_chip_and_handler(irq, &lubbock_irq_chip,
-					 handle_level_irq);
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
-
-	irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), lubbock_irq_handler);
-	irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void lubbock_irq_resume(void)
-{
-	LUB_IRQ_MASK_EN = lubbock_irq_enabled;
-}
-
-static struct syscore_ops lubbock_irq_syscore_ops = {
-	.resume = lubbock_irq_resume,
-};
-
-static int __init lubbock_irq_device_init(void)
-{
-	if (machine_is_lubbock()) {
-		register_syscore_ops(&lubbock_irq_syscore_ops);
-		return 0;
-	}
-	return -ENODEV;
-}
-
-device_initcall(lubbock_irq_device_init);
-
-#endif
-
 static int lubbock_udc_is_connected(void)
 {
 	return (LUB_MISC_RD & (1 << 9)) == 0;
@@ -383,11 +306,38 @@ static struct platform_device lubbock_flash_device[2] = {
 	},
 };
 
+static struct resource lubbock_cplds_resources[] = {
+	[0] = {
+		.start	= LUBBOCK_FPGA_PHYS + 0xc0,
+		.end	= LUBBOCK_FPGA_PHYS + 0xe0 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= PXA_GPIO_TO_IRQ(0),
+		.end	= PXA_GPIO_TO_IRQ(0),
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+	},
+	[2] = {
+		.start	= LUBBOCK_IRQ(0),
+		.end	= LUBBOCK_IRQ(6),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device lubbock_cplds_device = {
+	.name		= "pxa_cplds_irqs",
+	.id		= -1,
+	.resource	= &lubbock_cplds_resources[0],
+	.num_resources	= 3,
+};
+
+
 static struct platform_device *devices[] __initdata = {
 	&sa1111_device,
 	&smc91x_device,
 	&lubbock_flash_device[0],
 	&lubbock_flash_device[1],
+	&lubbock_cplds_device,
 };
 
 static struct pxafb_mode_info sharp_lm8v31_mode = {
@@ -648,7 +598,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)")
 	/* Maintainer: MontaVista Software Inc. */
 	.map_io		= lubbock_map_io,
 	.nr_irqs	= LUBBOCK_NR_IRQS,
-	.init_irq	= lubbock_init_irq,
+	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
 	.init_time	= pxa_timer_init,
 	.init_machine	= lubbock_init,
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index 78b84c0dfc79..2c0658cf6be2 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -13,6 +13,7 @@
  *  published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
@@ -122,92 +123,6 @@ static unsigned long mainstone_pin_config[] = {
 	GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
 };
 
-static unsigned long mainstone_irq_enabled;
-
-static void mainstone_mask_irq(struct irq_data *d)
-{
-	int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
-	MST_INTMSKENA = (mainstone_irq_enabled &= ~(1 << mainstone_irq));
-}
-
-static void mainstone_unmask_irq(struct irq_data *d)
-{
-	int mainstone_irq = (d->irq - MAINSTONE_IRQ(0));
-	/* the irq can be acknowledged only if deasserted, so it's done here */
-	MST_INTSETCLR &= ~(1 << mainstone_irq);
-	MST_INTMSKENA = (mainstone_irq_enabled |= (1 << mainstone_irq));
-}
-
-static struct irq_chip mainstone_irq_chip = {
-	.name		= "FPGA",
-	.irq_ack	= mainstone_mask_irq,
-	.irq_mask	= mainstone_mask_irq,
-	.irq_unmask	= mainstone_unmask_irq,
-};
-
-static void mainstone_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	unsigned long pending = MST_INTSETCLR & mainstone_irq_enabled;
-	do {
-		/* clear useless edge notification */
-		desc->irq_data.chip->irq_ack(&desc->irq_data);
-		if (likely(pending)) {
-			irq = MAINSTONE_IRQ(0) + __ffs(pending);
-			generic_handle_irq(irq);
-		}
-		pending = MST_INTSETCLR & mainstone_irq_enabled;
-	} while (pending);
-}
-
-static void __init mainstone_init_irq(void)
-{
-	int irq;
-
-	pxa27x_init_irq();
-
-	/* setup extra Mainstone irqs */
-	for(irq = MAINSTONE_IRQ(0); irq <= MAINSTONE_IRQ(15); irq++) {
-		irq_set_chip_and_handler(irq, &mainstone_irq_chip,
-					 handle_level_irq);
-		if (irq == MAINSTONE_IRQ(10) || irq == MAINSTONE_IRQ(14))
-			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE | IRQF_NOAUTOEN);
-		else
-			set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-	}
-	set_irq_flags(MAINSTONE_IRQ(8), 0);
-	set_irq_flags(MAINSTONE_IRQ(12), 0);
-
-	MST_INTMSKENA = 0;
-	MST_INTSETCLR = 0;
-
-	irq_set_chained_handler(PXA_GPIO_TO_IRQ(0), mainstone_irq_handler);
-	irq_set_irq_type(PXA_GPIO_TO_IRQ(0), IRQ_TYPE_EDGE_FALLING);
-}
-
-#ifdef CONFIG_PM
-
-static void mainstone_irq_resume(void)
-{
-	MST_INTMSKENA = mainstone_irq_enabled;
-}
-
-static struct syscore_ops mainstone_irq_syscore_ops = {
-	.resume = mainstone_irq_resume,
-};
-
-static int __init mainstone_irq_device_init(void)
-{
-	if (machine_is_mainstone())
-		register_syscore_ops(&mainstone_irq_syscore_ops);
-
-	return 0;
-}
-
-device_initcall(mainstone_irq_device_init);
-
-#endif
-
-
 static struct resource smc91x_resources[] = {
 	[0] = {
 		.start	= (MST_ETH_PHYS + 0x300),
@@ -487,11 +402,37 @@ static struct platform_device mst_gpio_keys_device = {
 	},
 };
 
+static struct resource mst_cplds_resources[] = {
+	[0] = {
+		.start	= MST_FPGA_PHYS + 0xc0,
+		.end	= MST_FPGA_PHYS + 0xe0 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= PXA_GPIO_TO_IRQ(0),
+		.end	= PXA_GPIO_TO_IRQ(0),
+		.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE,
+	},
+	[2] = {
+		.start	= MAINSTONE_IRQ(0),
+		.end	= MAINSTONE_IRQ(15),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device mst_cplds_device = {
+	.name		= "pxa_cplds_irqs",
+	.id		= -1,
+	.resource	= &mst_cplds_resources[0],
+	.num_resources	= 3,
+};
+
 static struct platform_device *platform_devices[] __initdata = {
 	&smc91x_device,
 	&mst_flash_device[0],
 	&mst_flash_device[1],
 	&mst_gpio_keys_device,
+	&mst_cplds_device,
 };
 
 static struct pxaohci_platform_data mainstone_ohci_platform_data = {
@@ -718,7 +659,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)")
 	.atag_offset	= 0x100,	/* BLOB boot parameter setting */
 	.map_io		= mainstone_map_io,
 	.nr_irqs	= MAINSTONE_NR_IRQS,
-	.init_irq	= mainstone_init_irq,
+	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
 	.init_time	= pxa_timer_init,
 	.init_machine	= mainstone_init,
diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
new file mode 100644
index 000000000000..f1aeb54fabe3
--- /dev/null
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -0,0 +1,200 @@
+/*
+ * Intel Reference Systems cplds
+ *
+ * Copyright (C) 2014 Robert Jarzmik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Cplds motherboard driver, supporting lubbock and mainstone SoC board.
+ */
+
+#include <linux/bitops.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#define FPGA_IRQ_MASK_EN 0x0
+#define FPGA_IRQ_SET_CLR 0x10
+
+#define CPLDS_NB_IRQ	32
+
+struct cplds {
+	void __iomem *base;
+	int irq;
+	unsigned int irq_mask;
+	struct gpio_desc *gpio0;
+	struct irq_domain *irqdomain;
+};
+
+static irqreturn_t cplds_irq_handler(int in_irq, void *d)
+{
+	struct cplds *fpga = d;
+	unsigned long pending;
+	unsigned int bit;
+
+	pending = readl(fpga->base + FPGA_IRQ_SET_CLR) & fpga->irq_mask;
+	for_each_set_bit(bit, &pending, CPLDS_NB_IRQ)
+		generic_handle_irq(irq_find_mapping(fpga->irqdomain, bit));
+
+	return IRQ_HANDLED;
+}
+
+static void cplds_irq_mask_ack(struct irq_data *d)
+{
+	struct cplds *fpga = irq_data_get_irq_chip_data(d);
+	unsigned int cplds_irq = irqd_to_hwirq(d);
+	unsigned int set, bit = BIT(cplds_irq);
+
+	fpga->irq_mask &= ~bit;
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+	set = readl(fpga->base + FPGA_IRQ_SET_CLR);
+	writel(set & ~bit, fpga->base + FPGA_IRQ_SET_CLR);
+}
+
+static void cplds_irq_unmask(struct irq_data *d)
+{
+	struct cplds *fpga = irq_data_get_irq_chip_data(d);
+	unsigned int cplds_irq = irqd_to_hwirq(d);
+	unsigned int bit = BIT(cplds_irq);
+
+	fpga->irq_mask |= bit;
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+}
+
+static struct irq_chip cplds_irq_chip = {
+	.name		= "pxa_cplds",
+	.irq_mask_ack	= cplds_irq_mask_ack,
+	.irq_unmask	= cplds_irq_unmask,
+	.flags		= IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
+};
+
+static int cplds_irq_domain_map(struct irq_domain *d, unsigned int irq,
+				   irq_hw_number_t hwirq)
+{
+	struct cplds *fpga = d->host_data;
+
+	irq_set_chip_and_handler(irq, &cplds_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, fpga);
+
+	return 0;
+}
+
+static const struct irq_domain_ops cplds_irq_domain_ops = {
+	.xlate = irq_domain_xlate_twocell,
+	.map = cplds_irq_domain_map,
+};
+
+static int cplds_resume(struct platform_device *pdev)
+{
+	struct cplds *fpga = platform_get_drvdata(pdev);
+
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+
+	return 0;
+}
+
+static int cplds_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct cplds *fpga;
+	int ret;
+	unsigned int base_irq = 0;
+	unsigned long irqflags = 0;
+
+	fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
+	if (!fpga)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (res) {
+		fpga->irq = (unsigned int)res->start;
+		irqflags = res->flags;
+	}
+	if (!fpga->irq)
+		return -ENODEV;
+
+	base_irq = platform_get_irq(pdev, 1);
+	if (base_irq < 0)
+		base_irq = 0;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fpga->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fpga->base))
+		return PTR_ERR(fpga->base);
+
+	platform_set_drvdata(pdev, fpga);
+
+	writel(fpga->irq_mask, fpga->base + FPGA_IRQ_MASK_EN);
+	writel(0, fpga->base + FPGA_IRQ_SET_CLR);
+
+	ret = devm_request_irq(&pdev->dev, fpga->irq, cplds_irq_handler,
+			       irqflags, dev_name(&pdev->dev), fpga);
+	if (ret == -ENOSYS)
+		return -EPROBE_DEFER;
+
+	if (ret) {
+		dev_err(&pdev->dev, "couldn't request main irq%d: %d\n",
+			fpga->irq, ret);
+		return ret;
+	}
+
+	irq_set_irq_wake(fpga->irq, 1);
+	fpga->irqdomain = irq_domain_add_linear(pdev->dev.of_node,
+					       CPLDS_NB_IRQ,
+					       &cplds_irq_domain_ops, fpga);
+	if (!fpga->irqdomain)
+		return -ENODEV;
+
+	if (base_irq) {
+		ret = irq_create_strict_mappings(fpga->irqdomain, base_irq, 0,
+						 CPLDS_NB_IRQ);
+		if (ret) {
+			dev_err(&pdev->dev, "couldn't create the irq mapping %d..%d\n",
+				base_irq, base_irq + CPLDS_NB_IRQ);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int cplds_remove(struct platform_device *pdev)
+{
+	struct cplds *fpga = platform_get_drvdata(pdev);
+
+	irq_set_chip_and_handler(fpga->irq, NULL, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id cplds_id_table[] = {
+	{ .compatible = "intel,lubbock-cplds-irqs", },
+	{ .compatible = "intel,mainstone-cplds-irqs", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, cplds_id_table);
+
+static struct platform_driver cplds_driver = {
+	.driver		= {
+		.name	= "pxa_cplds_irqs",
+		.of_match_table = of_match_ptr(cplds_id_table),
+	},
+	.probe		= cplds_probe,
+	.remove		= cplds_remove,
+	.resume		= cplds_resume,
+};
+
+module_platform_driver(cplds_driver);
+
+MODULE_DESCRIPTION("PXA Cplds interrupts driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c
index b07d88602073..22812fe06460 100644
--- a/arch/arm/mach-rockchip/pm.c
+++ b/arch/arm/mach-rockchip/pm.c
@@ -44,9 +44,11 @@ static void __iomem *rk3288_bootram_base;
 static phys_addr_t rk3288_bootram_phy;
 
 static struct regmap *pmu_regmap;
+static struct regmap *grf_regmap;
 static struct regmap *sgrf_regmap;
 
 static u32 rk3288_pmu_pwr_mode_con;
+static u32 rk3288_grf_soc_con0;
 static u32 rk3288_sgrf_soc_con0;
 
 static inline u32 rk3288_l2_config(void)
@@ -70,12 +72,26 @@ static void rk3288_slp_mode_set(int level)
 {
 	u32 mode_set, mode_set1;
 
+	regmap_read(grf_regmap, RK3288_GRF_SOC_CON0, &rk3288_grf_soc_con0);
+
 	regmap_read(sgrf_regmap, RK3288_SGRF_SOC_CON0, &rk3288_sgrf_soc_con0);
 
 	regmap_read(pmu_regmap, RK3288_PMU_PWRMODE_CON,
 		    &rk3288_pmu_pwr_mode_con);
 
 	/*
+	 * We need set this bit GRF_FORCE_JTAG here, for the debug module,
+	 * otherwise, it may become inaccessible after resume.
+	 * This creates a potential security issue, as the sdmmc pins may
+	 * accept jtag data for a short time during resume if no card is
+	 * inserted.
+	 * But this is of course also true for the regular boot, before we
+	 * turn of the jtag/sdmmc autodetect.
+	 */
+	regmap_write(grf_regmap, RK3288_GRF_SOC_CON0, GRF_FORCE_JTAG |
+		     GRF_FORCE_JTAG_WRITE);
+
+	/*
 	 * SGRF_FAST_BOOT_EN - system to boot from FAST_BOOT_ADDR
 	 * PCLK_WDT_GATE - disable WDT during suspend.
 	 */
@@ -83,6 +99,13 @@ static void rk3288_slp_mode_set(int level)
 		     SGRF_PCLK_WDT_GATE | SGRF_FAST_BOOT_EN
 		     | SGRF_PCLK_WDT_GATE_WRITE | SGRF_FAST_BOOT_EN_WRITE);
 
+	/*
+	 * The dapswjdp can not auto reset before resume, that cause it may
+	 * access some illegal address during resume. Let's disable it before
+	 * suspend, and the MASKROM will enable it back.
+	 */
+	regmap_write(sgrf_regmap, RK3288_SGRF_CPU_CON0, SGRF_DAPDEVICEEN_WRITE);
+
 	/* booting address of resuming system is from this register value */
 	regmap_write(sgrf_regmap, RK3288_SGRF_FAST_BOOT_ADDR,
 		     rk3288_bootram_phy);
@@ -128,6 +151,9 @@ static void rk3288_slp_mode_set_resume(void)
 	regmap_write(sgrf_regmap, RK3288_SGRF_SOC_CON0,
 		     rk3288_sgrf_soc_con0 | SGRF_PCLK_WDT_GATE_WRITE
 		     | SGRF_FAST_BOOT_EN_WRITE);
+
+	regmap_write(grf_regmap, RK3288_GRF_SOC_CON0, rk3288_grf_soc_con0 |
+		     GRF_FORCE_JTAG_WRITE);
 }
 
 static int rockchip_lpmode_enter(unsigned long arg)
@@ -186,6 +212,13 @@ static int rk3288_suspend_init(struct device_node *np)
 		return PTR_ERR(pmu_regmap);
 	}
 
+	grf_regmap = syscon_regmap_lookup_by_compatible(
+				"rockchip,rk3288-grf");
+	if (IS_ERR(grf_regmap)) {
+		pr_err("%s: could not find grf regmap\n", __func__);
+		return PTR_ERR(pmu_regmap);
+	}
+
 	sram_np = of_find_compatible_node(NULL, NULL,
 					  "rockchip,rk3288-pmu-sram");
 	if (!sram_np) {
diff --git a/arch/arm/mach-rockchip/pm.h b/arch/arm/mach-rockchip/pm.h
index 03ff31d8282d..f8a747bc1437 100644
--- a/arch/arm/mach-rockchip/pm.h
+++ b/arch/arm/mach-rockchip/pm.h
@@ -48,6 +48,10 @@ static inline void rockchip_suspend_init(void)
 #define RK3288_PMU_WAKEUP_RST_CLR_CNT	0x44
 #define RK3288_PMU_PWRMODE_CON1		0x90
 
+#define RK3288_GRF_SOC_CON0		0x244
+#define GRF_FORCE_JTAG			BIT(12)
+#define GRF_FORCE_JTAG_WRITE		BIT(28)
+
 #define RK3288_SGRF_SOC_CON0		(0x0000)
 #define RK3288_SGRF_FAST_BOOT_ADDR	(0x0120)
 #define SGRF_PCLK_WDT_GATE		BIT(6)
@@ -55,6 +59,10 @@ static inline void rockchip_suspend_init(void)
 #define SGRF_FAST_BOOT_EN		BIT(8)
 #define SGRF_FAST_BOOT_EN_WRITE		BIT(24)
 
+#define RK3288_SGRF_CPU_CON0		(0x40)
+#define SGRF_DAPDEVICEEN		BIT(0)
+#define SGRF_DAPDEVICEEN_WRITE		BIT(16)
+
 #define RK3288_CRU_MODE_CON		0x50
 #define RK3288_CRU_SEL0_CON		0x60
 #define RK3288_CRU_SEL1_CON		0x64
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index d360ec044b66..b6cf3b449428 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -30,11 +30,30 @@
 #include "pm.h"
 
 #define RK3288_GRF_SOC_CON0 0x244
+#define RK3288_TIMER6_7_PHYS 0xff810000
 
 static void __init rockchip_timer_init(void)
 {
 	if (of_machine_is_compatible("rockchip,rk3288")) {
 		struct regmap *grf;
+		void __iomem *reg_base;
+
+		/*
+		 * Most/all uboot versions for rk3288 don't enable timer7
+		 * which is needed for the architected timer to work.
+		 * So make sure it is running during early boot.
+		 */
+		reg_base = ioremap(RK3288_TIMER6_7_PHYS, SZ_16K);
+		if (reg_base) {
+			writel(0, reg_base + 0x30);
+			writel(0xffffffff, reg_base + 0x20);
+			writel(0xffffffff, reg_base + 0x24);
+			writel(1, reg_base + 0x30);
+			dsb();
+			iounmap(reg_base);
+		} else {
+			pr_err("rockchip: could not map timer7 registers\n");
+		}
 
 		/*
 		 * Disable auto jtag/sdmmc switching that causes issues
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 36aaeb12e1a5..bf37e3c532f6 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -754,12 +754,12 @@ static struct platform_device vcc_sdhi1 = {
 };
 
 /* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi0_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
 			  MMC_CAP_POWER_OFF_CARD,
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
+	.flags		= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
 	.cd_gpio	= 167,
 };
 
@@ -796,12 +796,12 @@ static struct platform_device sdhi0_device = {
 };
 
 /* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI1_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI1_RX,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi1_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI1_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI1_RX,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
 			  MMC_CAP_POWER_OFF_CARD,
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
+	.flags		= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
 	/* Port72 cannot generate IRQs, will be used in polling mode. */
 	.cd_gpio	= 72,
 };
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c
index f27b5a833bf0..25558d1f417f 100644
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -201,12 +201,12 @@ static struct rcar_phy_platform_data usb_phy_platform_data __initdata =
 
 
 /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info __initdata = {
-	.dma_slave_tx	= HPBDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= HPBDMA_SLAVE_SDHI0_RX,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED,
-	.tmio_ocr_mask	= MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT,
+static struct tmio_mmc_data sdhi0_info __initdata = {
+	.chan_priv_tx	= (void *)HPBDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)HPBDMA_SLAVE_SDHI0_RX,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED,
+	.ocr_mask	= MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
+	.flags		= TMIO_MMC_HAS_IDLE_WAIT,
 };
 
 static struct resource sdhi0_resources[] __initdata = {
@@ -683,7 +683,7 @@ static void __init bockw_init(void)
 		platform_device_register_resndata(
 			NULL, "sh_mobile_sdhi", 0,
 			sdhi0_resources, ARRAY_SIZE(sdhi0_resources),
-			&sdhi0_info, sizeof(struct sh_mobile_sdhi_info));
+			&sdhi0_info, sizeof(struct tmio_mmc_data));
 	}
 
 	/* for Audio */
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index 7c9b63bdde9f..260d8319fd82 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -442,11 +442,11 @@ static struct platform_device vcc_sdhi2 = {
 };
 
 /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi0_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
+	.flags		= TMIO_MMC_HAS_IDLE_WAIT,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
 			  MMC_CAP_POWER_OFF_CARD,
 };
 
@@ -484,13 +484,13 @@ static struct platform_device sdhi0_device = {
 };
 
 /* Micro SD */
-static struct sh_mobile_sdhi_info sdhi2_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI2_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI2_RX,
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT |
+static struct tmio_mmc_data sdhi2_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI2_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI2_RX,
+	.flags		= TMIO_MMC_HAS_IDLE_WAIT |
 			  TMIO_MMC_USE_GPIO_CD |
 			  TMIO_MMC_WRPROTECT_DISABLE,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_POWER_OFF_CARD,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_POWER_OFF_CARD,
 	.cd_gpio	= 13,
 };
 
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index 598f704f76ae..51db288f192a 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -122,11 +122,11 @@ static struct resource sdhi0_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sdhi0_platform_data = {
-	.dma_slave_tx = HPBDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx = HPBDMA_SLAVE_SDHI0_RX,
-	.tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
-	.tmio_caps = MMC_CAP_SD_HIGHSPEED,
+static struct tmio_mmc_data sdhi0_platform_data = {
+	.chan_priv_tx = (void *)HPBDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx = (void *)HPBDMA_SLAVE_SDHI0_RX,
+	.flags        = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
+	.capabilities = MMC_CAP_SD_HIGHSPEED,
 };
 
 static struct platform_device sdhi0_device = {
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b7644310236b..b4f92b9a13ac 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -827,7 +827,7 @@ config KUSER_HELPERS
 
 config VDSO
 	bool "Enable VDSO for acceleration of some system calls"
-	depends on AEABI && MMU
+	depends on AEABI && MMU && CPU_V7
 	default y if ARM_ARCH_TIMER
 	select GENERIC_TIME_VSYSCALL
 	help
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 09c5fe3d30c2..7e7583ddd607 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1878,7 +1878,7 @@ struct dma_map_ops iommu_coherent_ops = {
  * arm_iommu_attach_device function.
  */
 struct dma_iommu_mapping *
-arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
 {
 	unsigned int bits = size >> PAGE_SHIFT;
 	unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
@@ -1886,6 +1886,10 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
 	int extensions = 1;
 	int err = -ENOMEM;
 
+	/* currently only 32-bit DMA address space is supported */
+	if (size > DMA_BIT_MASK(32) + 1)
+		return ERR_PTR(-ERANGE);
+
 	if (!bitmap_size)
 		return ERR_PTR(-EINVAL);
 
@@ -2057,13 +2061,6 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!iommu)
 		return false;
 
-	/*
-	 * currently arm_iommu_create_mapping() takes a max of size_t
-	 * for size param. So check this limit for now.
-	 */
-	if (size > SIZE_MAX)
-		return false;
-
 	mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
 	if (IS_ERR(mapping)) {
 		pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index aa0519eed698..774ef1323554 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -22,8 +22,6 @@
  *
  * These are the low level assembler for performing cache and TLB
  * functions on the arm1020.
- *
- *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index bff4c7f70fd6..ae3c27b71594 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -22,8 +22,6 @@
  *
  * These are the low level assembler for performing cache and TLB
  * functions on the arm1020e.
- *
- *  CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt
  */
 #include <linux/linkage.h>
 #include <linux/init.h>
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ede8c54ab4aa..32a47cc19076 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -441,9 +441,6 @@ ENTRY(cpu_arm925_set_pte_ext)
 	.type	__arm925_setup, #function
 __arm925_setup:
 	mov	r0, #0
-#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE)
-        orr     r0,r0,#1 << 7
-#endif
 
 	/* Transparent on, D-cache clean & flush mode. See  NOTE2 above */
         orr     r0,r0,#1 << 1			@ transparent mode on
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index e494d6d6acbe..92e08bf37aad 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -602,7 +602,6 @@ __\name\()_proc_info:
 		PMD_SECT_AP_WRITE | \
 		PMD_SECT_AP_READ
 	initfn	__feroceon_setup, __\name\()_proc_info
-	.long __feroceon_setup
 	.long	cpu_arch_name
 	.long	cpu_elf_name
 	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index e1268f905026..e0e23582c8b4 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -54,6 +54,7 @@
 #define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
 
 #define FLAG_NEED_X_RESET	(1 << 0)
+#define FLAG_IMM_OVERFLOW	(1 << 1)
 
 struct jit_ctx {
 	const struct bpf_prog *skf;
@@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 	/* PC in ARM mode == address of the instruction + 8 */
 	imm = offset - (8 + ctx->idx * 4);
 
+	if (imm & ~0xfff) {
+		/*
+		 * literal pool is too far, signal it into flags. we
+		 * can only detect it on the second pass unfortunately.
+		 */
+		ctx->flags |= FLAG_IMM_OVERFLOW;
+		return 0;
+	}
+
 	return imm;
 }
 
@@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx)
 		return;
 	}
 #endif
-	if (rm != ARM_R0)
-		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+
+	/*
+	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
+	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
+	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
+	 * before using it as a source for ARM_R1.
+	 *
+	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
+	 * ARM_R5 (r_X) so there is no particular register overlap
+	 * issues.
+	 */
 	if (rn != ARM_R1)
 		emit(ARM_MOV_R(ARM_R1, rn), ctx);
+	if (rm != ARM_R0)
+		emit(ARM_MOV_R(ARM_R0, rm), ctx);
 
 	ctx->seen |= SEEN_CALL;
 	emit_mov_i(ARM_R3, (u32)jit_udiv, ctx);
@@ -855,6 +876,14 @@ b_epilogue:
 		default:
 			return -1;
 		}
+
+		if (ctx->flags & FLAG_IMM_OVERFLOW)
+			/*
+			 * this instruction generated an overflow when
+			 * trying to access the literal pool, so
+			 * delegate this filter to the kernel interpreter.
+			 */
+			return -1;
 	}
 
 	/* compute offsets only during the first pass */
@@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	ctx.idx = 0;
 
 	build_prologue(&ctx);
-	build_body(&ctx);
+	if (build_body(&ctx) < 0) {
+#if __LINUX_ARM_ARCH__ < 7
+		if (ctx.imm_count)
+			kfree(ctx.imms);
+#endif
+		bpf_jit_binary_free(header);
+		goto out;
+	}
 	build_epilogue(&ctx);
 
 	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore
index f8b69d84238e..6b47f6e0b032 100644
--- a/arch/arm/vdso/.gitignore
+++ b/arch/arm/vdso/.gitignore
@@ -1 +1,3 @@
 vdso.lds
+vdso.so.raw
+vdsomunge
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index bab0a8be7924..8aa791051029 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -10,8 +10,8 @@ ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
 ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
 ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
 
-obj-y += vdso.o
-extra-y += vdso.lds
+obj-$(CONFIG_VDSO) += vdso.o
+extra-$(CONFIG_VDSO) += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
 CFLAGS_REMOVE_vdso.o = -pg
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 793551d15f1d..498325074a06 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -4,6 +4,7 @@
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
+#include <linux/memblock.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -21,6 +22,20 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
+unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	struct memblock_region *reg;
+	gfp_t flags = __GFP_NOWARN;
+
+	for_each_memblock(memory, reg) {
+		if (reg->base < (phys_addr_t)0xffffffff) {
+			flags |= __GFP_DMA;
+			break;
+		}
+	}
+	return __get_free_pages(flags, order);
+}
+
 enum dma_cache_op {
        DMA_UNMAP,
        DMA_MAP,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index da5f20e8cc50..7796af4b1d6f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,5 +1,7 @@
 config ARM64
 	def_bool y
+	select ACPI_GENERIC_GSI if ACPI
+	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
@@ -29,6 +31,7 @@ config ARM64
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
@@ -758,6 +761,8 @@ source "drivers/Kconfig"
 
 source "drivers/firmware/Kconfig"
 
+source "drivers/acpi/Kconfig"
+
 source "fs/Kconfig"
 
 source "arch/arm64/kvm/Kconfig"
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index e74f6e0a208c..c8d3e0e86678 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -102,6 +102,7 @@
 		#address-cells = <2>;
 		#size-cells = <2>;
 		ranges;
+		dma-ranges = <0x0 0x0 0x0 0x0 0x400 0x0>;
 
 		clocks {
 			#address-cells = <2>;
@@ -362,6 +363,15 @@
 				reg-names = "csr-reg";
 				clock-output-names = "pcie4clk";
 			};
+
+			dmaclk: dmaclk@1f27c000 {
+				compatible = "apm,xgene-device-clock";
+				#clock-cells = <1>;
+				clocks = <&socplldiv2 0>;
+				reg = <0x0 0x1f27c000 0x0 0x1000>;
+				reg-names = "csr-reg";
+				clock-output-names = "dmaclk";
+			};
 		};
 
 		pcie0: pcie@1f2b0000 {
@@ -684,5 +694,21 @@
 			interrupts = <0x0 0x41 0x4>;
 			clocks = <&rngpkaclk 0>;
 		};
+
+		dma: dma@1f270000 {
+			compatible = "apm,xgene-storm-dma";
+			device_type = "dma";
+			reg = <0x0 0x1f270000 0x0 0x10000>,
+			      <0x0 0x1f200000 0x0 0x10000>,
+			      <0x0 0x1b008000 0x0 0x2000>,
+			      <0x0 0x1054a000 0x0 0x100>;
+			interrupts = <0x0 0x82 0x4>,
+				     <0x0 0xb8 0x4>,
+				     <0x0 0xb9 0x4>,
+				     <0x0 0xba 0x4>,
+				     <0x0 0xbb 0x4>;
+			dma-coherent;
+			clocks = <&dmaclk 0>;
+		};
 	};
 };
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
index 9499199924ae..6a37c3c6b11d 100644
--- a/arch/arm64/crypto/crc32-arm64.c
+++ b/arch/arm64/crypto/crc32-arm64.c
@@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
 {
 	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
 
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
 	put_unaligned_le32(~ctx->crc, out);
 	return 0;
 }
 
 static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
 {
-	put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out);
+	put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
 	return 0;
 }
 
@@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm)
 {
 	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
 
+	mctx->key = 0;
+	return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
 	mctx->key = ~0;
 	return 0;
 }
@@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = {
 	.setkey			=	chksum_setkey,
 	.init			=	chksum_init,
 	.update			=	chksumc_update,
-	.final			=	chksum_final,
+	.final			=	chksumc_final,
 	.finup			=	chksumc_finup,
 	.digest			=	chksumc_digest,
 	.descsize		=	sizeof(struct chksum_desc_ctx),
@@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = {
 		.cra_alignmask		=	0,
 		.cra_ctxsize		=	sizeof(struct chksum_ctx),
 		.cra_module		=	THIS_MODULE,
-		.cra_init		=	crc32_cra_init,
+		.cra_init		=	crc32c_cra_init,
 	}
 };
 
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 114e7cc5de8c..aefda9868627 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 
 static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
+	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
 	kernel_neon_begin_partial(16);
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 1340e44c048b..7cd587564a41 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 
 static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
+	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+
+	sctx->finalize = 0;
 	kernel_neon_begin_partial(28);
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
diff --git a/arch/arm64/include/asm/acenv.h b/arch/arm64/include/asm/acenv.h
new file mode 100644
index 000000000000..b49166fde7ea
--- /dev/null
+++ b/arch/arm64/include/asm/acenv.h
@@ -0,0 +1,18 @@
+/*
+ * ARM64 specific ACPICA environments and implementation
+ *
+ * Copyright (C) 2014, Linaro Ltd.
+ *   Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *   Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ACENV_H
+#define _ASM_ACENV_H
+
+/* It is required unconditionally by ACPI core, update it when needed. */
+
+#endif /* _ASM_ACENV_H */
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
new file mode 100644
index 000000000000..59c05d8ea4a0
--- /dev/null
+++ b/arch/arm64/include/asm/acpi.h
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *	Author: Al Stone <al.stone@linaro.org>
+ *	Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *	Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation;
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#include <linux/mm.h>
+#include <linux/irqchip/arm-gic-acpi.h>
+
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+/* Basic configuration for ACPI */
+#ifdef	CONFIG_ACPI
+/* ACPI table mapping after acpi_gbl_permanent_mmap is set */
+static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
+					    acpi_size size)
+{
+	if (!page_is_ram(phys >> PAGE_SHIFT))
+		return ioremap(phys, size);
+
+	return ioremap_cache(phys, size);
+}
+#define acpi_os_ioremap acpi_os_ioremap
+
+typedef u64 phys_cpuid_t;
+#define PHYS_CPUID_INVALID INVALID_HWID
+
+#define acpi_strict 1	/* No out-of-spec workarounds on ARM64 */
+extern int acpi_disabled;
+extern int acpi_noirq;
+extern int acpi_pci_disabled;
+
+/* 1 to indicate PSCI 0.2+ is implemented */
+static inline bool acpi_psci_present(void)
+{
+	return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT;
+}
+
+/* 1 to indicate HVC must be used instead of SMC as the PSCI conduit */
+static inline bool acpi_psci_use_hvc(void)
+{
+	return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;
+}
+
+static inline void disable_acpi(void)
+{
+	acpi_disabled = 1;
+	acpi_pci_disabled = 1;
+	acpi_noirq = 1;
+}
+
+static inline void enable_acpi(void)
+{
+	acpi_disabled = 0;
+	acpi_pci_disabled = 0;
+	acpi_noirq = 0;
+}
+
+/*
+ * The ACPI processor driver for ACPI core code needs this macro
+ * to find out this cpu was already mapped (mapping from CPU hardware
+ * ID to CPU logical ID) or not.
+ */
+#define cpu_physical_id(cpu) cpu_logical_map(cpu)
+
+/*
+ * It's used from ACPI core in kdump to boot UP system with SMP kernel,
+ * with this check the ACPI core will not override the CPU index
+ * obtained from GICC with 0 and not print some error message as well.
+ * Since MADT must provide at least one GICC structure for GIC
+ * initialization, CPU will be always available in MADT on ARM64.
+ */
+static inline bool acpi_has_cpu_in_madt(void)
+{
+	return true;
+}
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+void __init acpi_init_cpus(void);
+
+#else
+static inline bool acpi_psci_present(void) { return false; }
+static inline bool acpi_psci_use_hvc(void) { return false; }
+static inline void acpi_init_cpus(void) { }
+#endif /* CONFIG_ACPI */
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index a5abb0062d6e..71f19c4dc0de 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -65,6 +65,14 @@ do {									\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("stlrb %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
+	case 2:								\
+		asm volatile ("stlrh %w1, %0"				\
+				: "=Q" (*p) : "r" (v) : "memory");	\
+		break;							\
 	case 4:								\
 		asm volatile ("stlr %w1, %0"				\
 				: "=Q" (*p) : "r" (v) : "memory");	\
@@ -81,6 +89,14 @@ do {									\
 	typeof(*p) ___p1;						\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("ldarb %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
+	case 2:								\
+		asm volatile ("ldarh %w0, %1"				\
+			: "=r" (___p1) : "Q" (*p) : "memory");		\
+		break;							\
 	case 4:								\
 		asm volatile ("ldar %w0, %1"				\
 			: "=r" (___p1) : "Q" (*p) : "memory");		\
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index da301ee7395c..5a31d6716914 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -66,5 +66,6 @@ struct cpu_operations {
 extern const struct cpu_operations *cpu_ops[NR_CPUS];
 int __init cpu_read_ops(struct device_node *dn, int cpu);
 void __init cpu_read_bootcpu_ops(void);
+const struct cpu_operations *cpu_get_ops(const char *name);
 
 #endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 926495686554..95e6b6dcbe37 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,6 +62,9 @@ void __init early_fixmap_init(void);
 
 #define __early_set_fixmap __set_fixmap
 
+#define __late_set_fixmap __set_fixmap
+#define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR)
+
 extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
 
 #include <asm-generic/fixmap.h>
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 94c53674a31d..bbb251b14746 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
+#include <linux/irqchip/arm-gic-acpi.h>
+
 #include <asm-generic/irq.h>
 
 struct pt_regs;
@@ -8,4 +10,15 @@ struct pt_regs;
 extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
+static inline void acpi_irq_init(void)
+{
+	/*
+	 * Hardcode ACPI IRQ chip initialization to GICv2 for now.
+	 * Proper irqchip infrastructure will be implemented along with
+	 * incoming  GICv2m|GICv3|ITS bits.
+	 */
+	acpi_gic_init();
+}
+#define acpi_irq_init acpi_irq_init
+
 #endif
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index 872ba939fcb2..b008a72f8bc0 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -27,6 +27,12 @@
 extern int isa_dma_bridge_buggy;
 
 #ifdef CONFIG_PCI
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	/* no legacy IRQ on arm64 */
+	return -ENODEV;
+}
+
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
 	return 1;
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index e5312ea0ec1a..2454bc59c916 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,6 +14,7 @@
 #ifndef __ASM_PSCI_H
 #define __ASM_PSCI_H
 
-int psci_init(void);
+int psci_dt_init(void);
+int psci_acpi_init(void);
 
 #endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 780f82c827b6..bf22650b1a78 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -39,9 +39,10 @@ extern void show_ipi_list(struct seq_file *p, int prec);
 extern void handle_IPI(int ipinr, struct pt_regs *regs);
 
 /*
- * Setup the set of possible CPUs (via set_cpu_possible)
+ * Discover the set of possible CPUs and determine their
+ * SMP operations.
  */
-extern void smp_init_cpus(void);
+extern void of_smp_init_cpus(void);
 
 /*
  * Provide a function to raise an IPI cross call on CPUs in callmap.
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index c154c0b7eb60..d26832022127 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -188,8 +188,14 @@ struct kvm_arch_memory_slot {
 #define KVM_ARM_IRQ_CPU_IRQ		0
 #define KVM_ARM_IRQ_CPU_FIQ		1
 
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
 #define KVM_ARM_IRQ_GIC_MAX		127
+#endif
 
 /* One single KVM irqchip, ie. the VGIC */
 #define KVM_NR_IRQCHIPS          1
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index b12e15b80516..426d0763c81b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -35,6 +35,7 @@ arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
+arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
new file mode 100644
index 000000000000..8b839558838e
--- /dev/null
+++ b/arch/arm64/kernel/acpi.c
@@ -0,0 +1,345 @@
+/*
+ *  ARM64 Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *	Author: Al Stone <al.stone@linaro.org>
+ *	Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *	Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *	Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ *	Author: Naresh Bhat <naresh.bhat@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+
+#include <asm/cputype.h>
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+
+int acpi_noirq = 1;		/* skip ACPI IRQ initialization */
+int acpi_disabled = 1;
+EXPORT_SYMBOL(acpi_disabled);
+
+int acpi_pci_disabled = 1;	/* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
+
+/* Processors with enabled flag and sane MPIDR */
+static int enabled_cpus;
+
+/* Boot CPU is valid or not in MADT */
+static bool bootcpu_valid  __initdata;
+
+static bool param_acpi_off __initdata;
+static bool param_acpi_force __initdata;
+
+static int __init parse_acpi(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	/* "acpi=off" disables both ACPI table parsing and interpreter */
+	if (strcmp(arg, "off") == 0)
+		param_acpi_off = true;
+	else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
+		param_acpi_force = true;
+	else
+		return -EINVAL;	/* Core will print when we return error */
+
+	return 0;
+}
+early_param("acpi", parse_acpi);
+
+static int __init dt_scan_depth1_nodes(unsigned long node,
+				       const char *uname, int depth,
+				       void *data)
+{
+	/*
+	 * Return 1 as soon as we encounter a node at depth 1 that is
+	 * not the /chosen node.
+	 */
+	if (depth == 1 && (strcmp(uname, "chosen") != 0))
+		return 1;
+	return 0;
+}
+
+/*
+ * __acpi_map_table() will be called before page_init(), so early_ioremap()
+ * or early_memremap() should be called here to for ACPI table mapping.
+ */
+char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+{
+	if (!size)
+		return NULL;
+
+	return early_memremap(phys, size);
+}
+
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+	if (!map || !size)
+		return;
+
+	early_memunmap(map, size);
+}
+
+/**
+ * acpi_map_gic_cpu_interface - generates a logical cpu number
+ * and map to MPIDR represented by GICC structure
+ */
+static void __init
+acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
+{
+	int i;
+	u64 mpidr = processor->arm_mpidr & MPIDR_HWID_BITMASK;
+	bool enabled = !!(processor->flags & ACPI_MADT_ENABLED);
+
+	if (mpidr == INVALID_HWID) {
+		pr_info("Skip MADT cpu entry with invalid MPIDR\n");
+		return;
+	}
+
+	total_cpus++;
+	if (!enabled)
+		return;
+
+	if (enabled_cpus >=  NR_CPUS) {
+		pr_warn("NR_CPUS limit of %d reached, Processor %d/0x%llx ignored.\n",
+			NR_CPUS, total_cpus, mpidr);
+		return;
+	}
+
+	/* Check if GICC structure of boot CPU is available in the MADT */
+	if (cpu_logical_map(0) == mpidr) {
+		if (bootcpu_valid) {
+			pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+			       mpidr);
+			return;
+		}
+
+		bootcpu_valid = true;
+	}
+
+	/*
+	 * Duplicate MPIDRs are a recipe for disaster. Scan
+	 * all initialized entries and check for
+	 * duplicates. If any is found just ignore the CPU.
+	 */
+	for (i = 1; i < enabled_cpus; i++) {
+		if (cpu_logical_map(i) == mpidr) {
+			pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+			       mpidr);
+			return;
+		}
+	}
+
+	if (!acpi_psci_present())
+		return;
+
+	cpu_ops[enabled_cpus] = cpu_get_ops("psci");
+	/* CPU 0 was already initialized */
+	if (enabled_cpus) {
+		if (!cpu_ops[enabled_cpus])
+			return;
+
+		if (cpu_ops[enabled_cpus]->cpu_init(NULL, enabled_cpus))
+			return;
+
+		/* map the logical cpu id to cpu MPIDR */
+		cpu_logical_map(enabled_cpus) = mpidr;
+	}
+
+	enabled_cpus++;
+}
+
+static int __init
+acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
+				const unsigned long end)
+{
+	struct acpi_madt_generic_interrupt *processor;
+
+	processor = (struct acpi_madt_generic_interrupt *)header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+	acpi_map_gic_cpu_interface(processor);
+	return 0;
+}
+
+/* Parse GIC cpu interface entries in MADT for SMP init */
+void __init acpi_init_cpus(void)
+{
+	int count, i;
+
+	/*
+	 * do a partial walk of MADT to determine how many CPUs
+	 * we have including disabled CPUs, and get information
+	 * we need for SMP init
+	 */
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+			acpi_parse_gic_cpu_interface, 0);
+
+	if (!count) {
+		pr_err("No GIC CPU interface entries present\n");
+		return;
+	} else if (count < 0) {
+		pr_err("Error parsing GIC CPU interface entry\n");
+		return;
+	}
+
+	if (!bootcpu_valid) {
+		pr_err("MADT missing boot CPU MPIDR, not enabling secondaries\n");
+		return;
+	}
+
+	for (i = 0; i < enabled_cpus; i++)
+		set_cpu_possible(i, true);
+
+	/* Make boot-up look pretty */
+	pr_info("%d CPUs enabled, %d CPUs total\n", enabled_cpus, total_cpus);
+}
+
+/*
+ * acpi_fadt_sanity_check() - Check FADT presence and carry out sanity
+ *			      checks on it
+ *
+ * Return 0 on success,  <0 on failure
+ */
+static int __init acpi_fadt_sanity_check(void)
+{
+	struct acpi_table_header *table;
+	struct acpi_table_fadt *fadt;
+	acpi_status status;
+	acpi_size tbl_size;
+	int ret = 0;
+
+	/*
+	 * FADT is required on arm64; retrieve it to check its presence
+	 * and carry out revision and ACPI HW reduced compliancy tests
+	 */
+	status = acpi_get_table_with_size(ACPI_SIG_FADT, 0, &table, &tbl_size);
+	if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+
+		pr_err("Failed to get FADT table, %s\n", msg);
+		return -ENODEV;
+	}
+
+	fadt = (struct acpi_table_fadt *)table;
+
+	/*
+	 * Revision in table header is the FADT Major revision, and there
+	 * is a minor revision of FADT which was introduced by ACPI 5.1,
+	 * we only deal with ACPI 5.1 or newer revision to get GIC and SMP
+	 * boot protocol configuration data.
+	 */
+	if (table->revision < 5 ||
+	   (table->revision == 5 && fadt->minor_revision < 1)) {
+		pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
+		       table->revision, fadt->minor_revision);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
+		pr_err("FADT not ACPI hardware reduced compliant\n");
+		ret = -EINVAL;
+	}
+
+out:
+	/*
+	 * acpi_get_table_with_size() creates FADT table mapping that
+	 * should be released after parsing and before resuming boot
+	 */
+	early_acpi_os_unmap_memory(table, tbl_size);
+	return ret;
+}
+
+/*
+ * acpi_boot_table_init() called from setup_arch(), always.
+ *	1. find RSDP and get its address, and then find XSDT
+ *	2. extract all tables and checksums them all
+ *	3. check ACPI FADT revision
+ *	4. check ACPI FADT HW reduced flag
+ *
+ * We can parse ACPI boot-time tables such as MADT after
+ * this function is called.
+ *
+ * On return ACPI is enabled if either:
+ *
+ * - ACPI tables are initialized and sanity checks passed
+ * - acpi=force was passed in the command line and ACPI was not disabled
+ *   explicitly through acpi=off command line parameter
+ *
+ * ACPI is disabled on function return otherwise
+ */
+void __init acpi_boot_table_init(void)
+{
+	/*
+	 * Enable ACPI instead of device tree unless
+	 * - ACPI has been disabled explicitly (acpi=off), or
+	 * - the device tree is not empty (it has more than just a /chosen node)
+	 *   and ACPI has not been force enabled (acpi=force)
+	 */
+	if (param_acpi_off ||
+	    (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+		return;
+
+	/*
+	 * ACPI is disabled at this point. Enable it in order to parse
+	 * the ACPI tables and carry out sanity checks
+	 */
+	enable_acpi();
+
+	/*
+	 * If ACPI tables are initialized and FADT sanity checks passed,
+	 * leave ACPI enabled and carry on booting; otherwise disable ACPI
+	 * on initialization error.
+	 * If acpi=force was passed on the command line it forces ACPI
+	 * to be enabled even if its initialization failed.
+	 */
+	if (acpi_table_init() || acpi_fadt_sanity_check()) {
+		pr_err("Failed to init ACPI tables\n");
+		if (!param_acpi_force)
+			disable_acpi();
+	}
+}
+
+void __init acpi_gic_init(void)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+	acpi_size tbl_size;
+	int err;
+
+	if (acpi_disabled)
+		return;
+
+	status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size);
+	if (ACPI_FAILURE(status)) {
+		const char *msg = acpi_format_exception(status);
+
+		pr_err("Failed to get MADT table, %s\n", msg);
+		return;
+	}
+
+	err = gic_v2_acpi_init(table);
+	if (err)
+		pr_err("Failed to initialize GIC IRQ controller");
+
+	early_acpi_os_unmap_memory((char *)table, tbl_size);
+}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 21033bba9390..28f8365edc4c 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,7 +24,6 @@
 #include <asm/cacheflush.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <asm/insn.h>
 #include <linux/stop_machine.h>
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -34,48 +33,6 @@ struct alt_region {
 	struct alt_instr *end;
 };
 
-/*
- * Decode the imm field of a b/bl instruction, and return the byte
- * offset as a signed value (so it can be used when computing a new
- * branch target).
- */
-static s32 get_branch_offset(u32 insn)
-{
-	s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
-
-	/* sign-extend the immediate before turning it into a byte offset */
-	return (imm << 6) >> 4;
-}
-
-static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
-{
-	u32 insn;
-
-	aarch64_insn_read(altinsnptr, &insn);
-
-	/* Stop the world on instructions we don't support... */
-	BUG_ON(aarch64_insn_is_cbz(insn));
-	BUG_ON(aarch64_insn_is_cbnz(insn));
-	BUG_ON(aarch64_insn_is_bcond(insn));
-	/* ... and there is probably more. */
-
-	if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
-		enum aarch64_insn_branch_type type;
-		unsigned long target;
-
-		if (aarch64_insn_is_b(insn))
-			type = AARCH64_INSN_BRANCH_NOLINK;
-		else
-			type = AARCH64_INSN_BRANCH_LINK;
-
-		target = (unsigned long)altinsnptr + get_branch_offset(insn);
-		insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
-						   target, type);
-	}
-
-	return insn;
-}
-
 static int __apply_alternatives(void *alt_region)
 {
 	struct alt_instr *alt;
@@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region)
 	u8 *origptr, *replptr;
 
 	for (alt = region->begin; alt < region->end; alt++) {
-		u32 insn;
-		int i;
-
 		if (!cpus_have_cap(alt->cpufeature))
 			continue;
 
@@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region)
 
 		origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
 		replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
-
-		for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
-			insn = get_alt_insn(origptr + i, replptr + i);
-			aarch64_insn_write(origptr + i, insn);
-		}
-
+		memcpy(origptr, replptr, alt->alt_len);
 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + alt->alt_len));
 	}
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index cce952440c64..fb8ff9ba467a 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -35,7 +35,7 @@ static const struct cpu_operations *supported_cpu_ops[] __initconst = {
 	NULL,
 };
 
-static const struct cpu_operations * __init cpu_get_ops(const char *name)
+const struct cpu_operations * __init cpu_get_ops(const char *name)
 {
 	const struct cpu_operations **ops = supported_cpu_ops;
 
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index 6f93c24ca801..4095379dc069 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -46,3 +47,27 @@ int pcibios_add_device(struct pci_dev *dev)
 
 	return 0;
 }
+
+/*
+ * raw_pci_read/write - Platform-specific PCI config space access.
+ */
+int raw_pci_read(unsigned int domain, unsigned int bus,
+		  unsigned int devfn, int reg, int len, u32 *val)
+{
+	return -ENXIO;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus,
+		unsigned int devfn, int reg, int len, u32 val)
+{
+	return -ENXIO;
+}
+
+#ifdef CONFIG_ACPI
+/* Root bridge scanning */
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+	/* TODO: Should be revisited when implementing PCI on ACPI */
+	return NULL;
+}
+#endif
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 195991dadc37..cce18c85d2e8 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1310,11 +1310,16 @@ static const struct of_device_id armpmu_of_device_ids[] = {
 
 static int armpmu_device_probe(struct platform_device *pdev)
 {
-	int i, *irqs;
+	int i, irq, *irqs;
 
 	if (!cpu_pmu)
 		return -ENODEV;
 
+	/* Don't bother with PPIs; they're already affine */
+	irq = platform_get_irq(pdev, 0);
+	if (irq >= 0 && irq_is_percpu(irq))
+		return 0;
+
 	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
 	if (!irqs)
 		return -ENOMEM;
@@ -1327,7 +1332,7 @@ static int armpmu_device_probe(struct platform_device *pdev)
 				      i);
 		if (!dn) {
 			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(dn), i);
+				of_node_full_name(pdev->dev.of_node), i);
 			break;
 		}
 
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 9b8a70ae64a1..ea18cb53921e 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -15,6 +15,7 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/smp.h>
@@ -24,6 +25,7 @@
 #include <linux/slab.h>
 #include <uapi/linux/psci.h>
 
+#include <asm/acpi.h>
 #include <asm/compiler.h>
 #include <asm/cpu_ops.h>
 #include <asm/errno.h>
@@ -273,39 +275,8 @@ static void psci_sys_poweroff(void)
 	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
 }
 
-/*
- * PSCI Function IDs for v0.2+ are well defined so use
- * standard values.
- */
-static int __init psci_0_2_init(struct device_node *np)
+static void __init psci_0_2_set_functions(void)
 {
-	int err, ver;
-
-	err = get_set_conduit_method(np);
-
-	if (err)
-		goto out_put_node;
-
-	ver = psci_get_version();
-
-	if (ver == PSCI_RET_NOT_SUPPORTED) {
-		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
-		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
-		err = -EOPNOTSUPP;
-		goto out_put_node;
-	} else {
-		pr_info("PSCIv%d.%d detected in firmware.\n",
-				PSCI_VERSION_MAJOR(ver),
-				PSCI_VERSION_MINOR(ver));
-
-		if (PSCI_VERSION_MAJOR(ver) == 0 &&
-				PSCI_VERSION_MINOR(ver) < 2) {
-			err = -EINVAL;
-			pr_err("Conflicting PSCI version detected.\n");
-			goto out_put_node;
-		}
-	}
-
 	pr_info("Using standard PSCI v0.2 function IDs\n");
 	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
 	psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -329,6 +300,60 @@ static int __init psci_0_2_init(struct device_node *np)
 	arm_pm_restart = psci_sys_reset;
 
 	pm_power_off = psci_sys_poweroff;
+}
+
+/*
+ * Probe function for PSCI firmware versions >= 0.2
+ */
+static int __init psci_probe(void)
+{
+	int ver = psci_get_version();
+
+	if (ver == PSCI_RET_NOT_SUPPORTED) {
+		/*
+		 * PSCI versions >=0.2 mandates implementation of
+		 * PSCI_VERSION.
+		 */
+		pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+		return -EOPNOTSUPP;
+	} else {
+		pr_info("PSCIv%d.%d detected in firmware.\n",
+				PSCI_VERSION_MAJOR(ver),
+				PSCI_VERSION_MINOR(ver));
+
+		if (PSCI_VERSION_MAJOR(ver) == 0 &&
+				PSCI_VERSION_MINOR(ver) < 2) {
+			pr_err("Conflicting PSCI version detected.\n");
+			return -EINVAL;
+		}
+	}
+
+	psci_0_2_set_functions();
+
+	return 0;
+}
+
+/*
+ * PSCI init function for PSCI versions >=0.2
+ *
+ * Probe based on PSCI PSCI_VERSION function
+ */
+static int __init psci_0_2_init(struct device_node *np)
+{
+	int err;
+
+	err = get_set_conduit_method(np);
+
+	if (err)
+		goto out_put_node;
+	/*
+	 * Starting with v0.2, the PSCI specification introduced a call
+	 * (PSCI_VERSION) that allows probing the firmware version, so
+	 * that PSCI function IDs and version specific initialization
+	 * can be carried out according to the specific version reported
+	 * by firmware
+	 */
+	err = psci_probe();
 
 out_put_node:
 	of_node_put(np);
@@ -381,7 +406,7 @@ static const struct of_device_id psci_of_match[] __initconst = {
 	{},
 };
 
-int __init psci_init(void)
+int __init psci_dt_init(void)
 {
 	struct device_node *np;
 	const struct of_device_id *matched_np;
@@ -396,6 +421,27 @@ int __init psci_init(void)
 	return init_fn(np);
 }
 
+/*
+ * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's
+ * explicitly clarified in SBBR
+ */
+int __init psci_acpi_init(void)
+{
+	if (!acpi_psci_present()) {
+		pr_info("is not implemented in ACPI.\n");
+		return -EOPNOTSUPP;
+	}
+
+	pr_info("probing for conduit method from ACPI.\n");
+
+	if (acpi_psci_use_hvc())
+		invoke_psci_fn = __invoke_psci_fn_hvc;
+	else
+		invoke_psci_fn = __invoke_psci_fn_smc;
+
+	return psci_probe();
+}
+
 #ifdef CONFIG_SMP
 
 static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 51ef97274b52..74753132c3ac 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/stddef.h>
@@ -46,6 +47,7 @@
 #include <linux/efi.h>
 #include <linux/personality.h>
 
+#include <asm/acpi.h>
 #include <asm/fixmap.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
@@ -395,18 +397,27 @@ void __init setup_arch(char **cmdline_p)
 	efi_init();
 	arm64_memblock_init();
 
+	/* Parse the ACPI tables for possible boot-time configuration */
+	acpi_boot_table_init();
+
 	paging_init();
 	request_standard_resources();
 
 	early_ioremap_reset();
 
-	unflatten_device_tree();
-
-	psci_init();
+	if (acpi_disabled) {
+		unflatten_device_tree();
+		psci_dt_init();
+		cpu_read_bootcpu_ops();
+#ifdef CONFIG_SMP
+		of_smp_init_cpus();
+#endif
+	} else {
+		psci_acpi_init();
+		acpi_init_cpus();
+	}
 
-	cpu_read_bootcpu_ops();
 #ifdef CONFIG_SMP
-	smp_init_cpus();
 	smp_build_mpidr_hash();
 #endif
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 714411f62391..2cb008177252 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -323,7 +323,7 @@ void __init smp_prepare_boot_cpu(void)
  * cpu logical map array containing MPIDR values related to logical
  * cpus. Assumes that cpu_logical_map(0) has already been initialized.
  */
-void __init smp_init_cpus(void)
+void __init of_smp_init_cpus(void)
 {
 	struct device_node *dn = NULL;
 	unsigned int i, cpu = 1;
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 1a7125c3099b..42f9195cf2f8 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/clocksource.h>
 #include <linux/clk-provider.h>
+#include <linux/acpi.h>
 
 #include <clocksource/arm_arch_timer.h>
 
@@ -72,6 +73,12 @@ void __init time_init(void)
 
 	tick_setup_hrtimer_broadcast();
 
+	/*
+	 * Since ACPI or FDT will only one be available in the system,
+	 * we can use acpi_generic_timer_init() here safely
+	 */
+	acpi_generic_timer_init();
+
 	arch_timer_rate = arch_timer_get_rate();
 	if (!arch_timer_rate)
 		panic("Unable to initialise architected timer.\n");
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index ef7d112f5ce0..b0bd4e5fd5cf 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -67,8 +67,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
 
 		*ret_page = phys_to_page(phys);
 		ptr = (void *)val;
-		if (flags & __GFP_ZERO)
-			memset(ptr, 0, size);
+		memset(ptr, 0, size);
 	}
 
 	return ptr;
@@ -105,7 +104,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 		struct page *page;
 		void *addr;
 
-		size = PAGE_ALIGN(size);
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
 							get_order(size));
 		if (!page)
@@ -113,8 +111,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 
 		*dma_handle = phys_to_dma(dev, page_to_phys(page));
 		addr = page_address(page);
-		if (flags & __GFP_ZERO)
-			memset(addr, 0, size);
+		memset(addr, 0, size);
 		return addr;
 	} else {
 		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
@@ -195,6 +192,8 @@ static void __dma_free(struct device *dev, size_t size,
 {
 	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
 
+	size = PAGE_ALIGN(size);
+
 	if (!is_device_dma_coherent(dev)) {
 		if (__free_from_pool(vaddr, size))
 			return;
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 74c256744b25..f3d6221cd5bd 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -328,10 +328,12 @@ static int ptdump_init(void)
 			for (j = 0; j < pg_level[i].num; j++)
 				pg_level[i].mask |= pg_level[i].bits[j].mask;
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
 	address_markers[VMEMMAP_START_NR].start_address =
 				(unsigned long)virt_to_page(PAGE_OFFSET);
 	address_markers[VMEMMAP_END_NR].start_address =
 				(unsigned long)virt_to_page(high_memory);
+#endif
 
 	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
 				 &ptdump_fops);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index edba042b2325..dc6a4842683a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -487,7 +487,7 @@ emit_cond_jmp:
 			return -EINVAL;
 		}
 
-		imm64 = (u64)insn1.imm << 32 | imm;
+		imm64 = (u64)insn1.imm << 32 | (u32)imm;
 		emit_a64_mov_i64(dst, imm64, ctx);
 
 		return 1;
diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig
index 383007877b2b..99c00d835f47 100644
--- a/arch/blackfin/configs/BF518F-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig
@@ -48,7 +48,6 @@ CONFIG_IP_PNP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
diff --git a/arch/blackfin/configs/BF527-TLL6527M_defconfig b/arch/blackfin/configs/BF527-TLL6527M_defconfig
index cd0636bb24a0..cdeb51856f26 100644
--- a/arch/blackfin/configs/BF527-TLL6527M_defconfig
+++ b/arch/blackfin/configs/BF527-TLL6527M_defconfig
@@ -67,7 +67,6 @@ CONFIG_BFIN_SIR0=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig
index 16273a922056..ed7d2c096739 100644
--- a/arch/blackfin/configs/BF533-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF533-EZKIT_defconfig
@@ -50,7 +50,6 @@ CONFIG_IRTTY_SIR=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig
index 0df2f921f7e5..0c241f4d28d7 100644
--- a/arch/blackfin/configs/BF533-STAMP_defconfig
+++ b/arch/blackfin/configs/BF533-STAMP_defconfig
@@ -50,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig
index 91d3eda42742..e5360b30e39a 100644
--- a/arch/blackfin/configs/BF537-STAMP_defconfig
+++ b/arch/blackfin/configs/BF537-STAMP_defconfig
@@ -55,13 +55,14 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_RAM=y
 CONFIG_MTD_ROM=m
 CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
 CONFIG_NET_BFIN=y
diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig
index be03be6ba543..60f6fb86125c 100644
--- a/arch/blackfin/configs/BF538-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF538-EZKIT_defconfig
@@ -60,7 +60,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
diff --git a/arch/blackfin/configs/BF561-ACVILON_defconfig b/arch/blackfin/configs/BF561-ACVILON_defconfig
index 802f9c421621..78f6bc79f910 100644
--- a/arch/blackfin/configs/BF561-ACVILON_defconfig
+++ b/arch/blackfin/configs/BF561-ACVILON_defconfig
@@ -50,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_PLATRAM=y
 CONFIG_MTD_PHRAM=y
diff --git a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
index e2a2fa5935ce..fac8bb578249 100644
--- a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
@@ -52,7 +52,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig
index 680730eeaf23..2a2e4d0cebc1 100644
--- a/arch/blackfin/configs/BF561-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT_defconfig
@@ -54,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig
index fcec5ce71392..ba4267f658af 100644
--- a/arch/blackfin/configs/BF609-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF609-EZKIT_defconfig
@@ -105,6 +105,7 @@ CONFIG_SPI=y
 CONFIG_SPI_ADI_V3=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_MCP23S08=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_BFIN_WDT=y
diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig
index 05108b85ab12..1902bb05d086 100644
--- a/arch/blackfin/configs/CM-BF527_defconfig
+++ b/arch/blackfin/configs/CM-BF527_defconfig
@@ -55,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF533_defconfig b/arch/blackfin/configs/CM-BF533_defconfig
index 5e0db82b679e..9a5716d57ebc 100644
--- a/arch/blackfin/configs/CM-BF533_defconfig
+++ b/arch/blackfin/configs/CM-BF533_defconfig
@@ -37,7 +37,6 @@ CONFIG_UNIX=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF537E_defconfig b/arch/blackfin/configs/CM-BF537E_defconfig
index 2e47df77490f..684592884349 100644
--- a/arch/blackfin/configs/CM-BF537E_defconfig
+++ b/arch/blackfin/configs/CM-BF537E_defconfig
@@ -52,7 +52,6 @@ CONFIG_IP_PNP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF537U_defconfig b/arch/blackfin/configs/CM-BF537U_defconfig
index 6da629ffc2f1..d9915e984787 100644
--- a/arch/blackfin/configs/CM-BF537U_defconfig
+++ b/arch/blackfin/configs/CM-BF537U_defconfig
@@ -48,7 +48,6 @@ CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig
index 9ff79df6825c..92d8130cdb51 100644
--- a/arch/blackfin/configs/CM-BF548_defconfig
+++ b/arch/blackfin/configs/CM-BF548_defconfig
@@ -54,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF561_defconfig b/arch/blackfin/configs/CM-BF561_defconfig
index d6dd98e67146..fa8d91132a57 100644
--- a/arch/blackfin/configs/CM-BF561_defconfig
+++ b/arch/blackfin/configs/CM-BF561_defconfig
@@ -52,7 +52,6 @@ CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/DNP5370_defconfig b/arch/blackfin/configs/DNP5370_defconfig
index 2b58cb221283..88600593c731 100644
--- a/arch/blackfin/configs/DNP5370_defconfig
+++ b/arch/blackfin/configs/DNP5370_defconfig
@@ -36,7 +36,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_DEBUG=y
 CONFIG_MTD_DEBUG_VERBOSE=1
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_NFTL=y
 CONFIG_NFTL_RW=y
diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig
index 5adf0da58499..9e3ae4b36d20 100644
--- a/arch/blackfin/configs/IP0X_defconfig
+++ b/arch/blackfin/configs/IP0X_defconfig
@@ -43,7 +43,6 @@ CONFIG_IP_NF_TARGET_REJECT=y
 CONFIG_IP_NF_MANGLE=y
 # CONFIG_WIRELESS is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig
index a6a7298962ed..c7926812971c 100644
--- a/arch/blackfin/configs/PNAV-10_defconfig
+++ b/arch/blackfin/configs/PNAV-10_defconfig
@@ -46,7 +46,6 @@ CONFIG_IP_PNP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig
index bc216646fe18..23fdc57d657a 100644
--- a/arch/blackfin/configs/SRV1_defconfig
+++ b/arch/blackfin/configs/SRV1_defconfig
@@ -38,7 +38,6 @@ CONFIG_IRTTY_SIR=m
 # CONFIG_WIRELESS is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
diff --git a/arch/blackfin/configs/TCM-BF518_defconfig b/arch/blackfin/configs/TCM-BF518_defconfig
index ea88158ab432..e28959479fe0 100644
--- a/arch/blackfin/configs/TCM-BF518_defconfig
+++ b/arch/blackfin/configs/TCM-BF518_defconfig
@@ -55,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_ADV_OPTIONS=y
diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig
index c1f45f15295c..39e85cce95d7 100644
--- a/arch/blackfin/configs/TCM-BF537_defconfig
+++ b/arch/blackfin/configs/TCM-BF537_defconfig
@@ -44,7 +44,6 @@ CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h
index dccae26805b0..4e8ad0523118 100644
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -11,27 +11,12 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 
-#define DECLARE_BFIN_RAW_READX(size, type, asm, asm_sign) \
-static inline type __raw_read##size(const volatile void __iomem *addr) \
-{ \
-	unsigned int val; \
-	int tmp; \
-	__asm__ __volatile__ ( \
-		"cli %1;" \
-		"NOP; NOP; SSYNC;" \
-		"%0 = "#asm" [%2] "#asm_sign";" \
-		"sti %1;" \
-		: "=d"(val), "=d"(tmp) \
-		: "a"(addr) \
-	); \
-	return (type) val; \
-}
-DECLARE_BFIN_RAW_READX(b, u8, b, (z))
-#define __raw_readb __raw_readb
-DECLARE_BFIN_RAW_READX(w, u16, w, (z))
-#define __raw_readw __raw_readw
-DECLARE_BFIN_RAW_READX(l, u32, , )
-#define __raw_readl __raw_readl
+#define __raw_readb bfin_read8
+#define __raw_readw bfin_read16
+#define __raw_readl bfin_read32
+#define __raw_writeb(val, addr) bfin_write8(addr, val)
+#define __raw_writew(val, addr) bfin_write16(addr, val)
+#define __raw_writel(val, addr) bfin_write32(addr, val)
 
 extern void outsb(unsigned long port, const void *addr, unsigned long count);
 extern void outsw(unsigned long port, const void *addr, unsigned long count);
@@ -50,14 +35,6 @@ extern void insl_16(unsigned long port, void *addr, unsigned long count);
 #define insw insw
 #define insl insl
 
-extern void dma_outsb(unsigned long port, const void *addr, unsigned short count);
-extern void dma_outsw(unsigned long port, const void *addr, unsigned short count);
-extern void dma_outsl(unsigned long port, const void *addr, unsigned short count);
-
-extern void dma_insb(unsigned long port, void *addr, unsigned short count);
-extern void dma_insw(unsigned long port, void *addr, unsigned short count);
-extern void dma_insl(unsigned long port, void *addr, unsigned short count);
-
 /**
  * I/O write barrier
  *
diff --git a/arch/blackfin/include/uapi/asm/unistd.h b/arch/blackfin/include/uapi/asm/unistd.h
index a4511649a864..0cb9078ef482 100644
--- a/arch/blackfin/include/uapi/asm/unistd.h
+++ b/arch/blackfin/include/uapi/asm/unistd.h
@@ -401,8 +401,18 @@
 #define __NR_sendmmsg		380
 #define __NR_process_vm_readv	381
 #define __NR_process_vm_writev	382
+#define __NR_kcmp		383
+#define __NR_finit_module	384
+#define __NR_sched_setattr	385
+#define __NR_sched_getattr	386
+#define __NR_renameat2		387
+#define __NR_seccomp		388
+#define __NR_getrandom		389
+#define __NR_memfd_create	390
+#define __NR_bpf		391
+#define __NR_execveat		392
 
-#define __NR_syscall		383
+#define __NR_syscall		393  /* For internal using, not implemented */
 #define NR_syscalls		__NR_syscall
 
 /* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c
index 947ad0832338..86b1cd3a0309 100644
--- a/arch/blackfin/kernel/debug-mmrs.c
+++ b/arch/blackfin/kernel/debug-mmrs.c
@@ -1620,7 +1620,6 @@ static int __init bfin_debug_mmrs_init(void)
 	D16(USB_APHY_CNTRL);
 	D16(USB_APHY_CALIB);
 	D16(USB_APHY_CNTRL2);
-	D16(USB_PHY_TEST);
 	D16(USB_PLLOSC_CTRL);
 	D16(USB_SRP_CLKDIV);
 	D16(USB_EP_NI0_TXMAXP);
diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index fa53faeeb0e9..cf773f0f1f30 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -330,9 +330,6 @@ static void bfin_disable_hw_debug(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_SMP
-extern void generic_exec_single(int cpu, struct call_single_data *data, int wait);
-static struct call_single_data kgdb_smp_ipi_data[NR_CPUS];
-
 void kgdb_passive_cpu_callback(void *info)
 {
 	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
@@ -343,15 +340,14 @@ void kgdb_roundup_cpus(unsigned long flags)
 	unsigned int cpu;
 
 	for (cpu = cpumask_first(cpu_online_mask); cpu < nr_cpu_ids;
-		cpu = cpumask_next(cpu, cpu_online_mask)) {
-		kgdb_smp_ipi_data[cpu].func = kgdb_passive_cpu_callback;
-		generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
-	}
+		cpu = cpumask_next(cpu, cpu_online_mask))
+		smp_call_function_single(cpu, kgdb_passive_cpu_callback,
+					 NULL, 0);
 }
 
 void kgdb_roundup_cpu(int cpu, unsigned long flags)
 {
-	generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
+	smp_call_function_single(cpu, kgdb_passive_cpu_callback, NULL, 0);
 }
 #endif
 
@@ -359,19 +355,6 @@ void kgdb_roundup_cpu(int cpu, unsigned long flags)
 static unsigned long kgdb_arch_imask;
 #endif
 
-void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
-{
-	if (kgdb_single_step)
-		preempt_enable();
-
-#ifdef CONFIG_IPIPE
-	if (kgdb_arch_imask) {
-		cpu_pda[raw_smp_processor_id()].ex_imask = kgdb_arch_imask;
-		kgdb_arch_imask = 0;
-	}
-#endif
-}
-
 int kgdb_arch_handle_exception(int vector, int signo,
 			       int err_code, char *remcom_in_buffer,
 			       char *remcom_out_buffer,
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index 4f424ae3b36d..ad82468bd94d 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -1464,5 +1464,5 @@ void __init cmdline_init(const char *r0)
 {
 	early_shadow_stamp();
 	if (r0)
-		strncpy(command_line, r0, COMMAND_LINE_SIZE);
+		strlcpy(command_line, r0, COMMAND_LINE_SIZE);
 }
diff --git a/arch/blackfin/mach-bf527/include/mach/cdefBF525.h b/arch/blackfin/mach-bf527/include/mach/cdefBF525.h
index d90a85b6b6b9..bd045318a250 100644
--- a/arch/blackfin/mach-bf527/include/mach/cdefBF525.h
+++ b/arch/blackfin/mach-bf527/include/mach/cdefBF525.h
@@ -122,11 +122,6 @@
 #define bfin_read_USB_APHY_CNTRL2()		bfin_read16(USB_APHY_CNTRL2)
 #define bfin_write_USB_APHY_CNTRL2(val)		bfin_write16(USB_APHY_CNTRL2, val)
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()		bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)		bfin_write16(USB_PHY_TEST, val)
-
 #define bfin_read_USB_PLLOSC_CTRL()		bfin_read16(USB_PLLOSC_CTRL)
 #define bfin_write_USB_PLLOSC_CTRL(val)		bfin_write16(USB_PLLOSC_CTRL, val)
 #define bfin_read_USB_SRP_CLKDIV()		bfin_read16(USB_SRP_CLKDIV)
diff --git a/arch/blackfin/mach-bf527/include/mach/defBF525.h b/arch/blackfin/mach-bf527/include/mach/defBF525.h
index 71578d964d00..591e00ff620a 100644
--- a/arch/blackfin/mach-bf527/include/mach/defBF525.h
+++ b/arch/blackfin/mach-bf527/include/mach/defBF525.h
@@ -77,10 +77,6 @@
 
 #define                  USB_APHY_CNTRL2  0xffc039e8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc039ec   /* Used for reducing simulation time and simplifies FIFO testability */
-
 #define                  USB_PLLOSC_CTRL  0xffc039f0   /* Used to program different parameters for USB PLL and Oscillator */
 #define                   USB_SRP_CLKDIV  0xffc039f4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
 
diff --git a/arch/blackfin/mach-bf548/include/mach/cdefBF542.h b/arch/blackfin/mach-bf548/include/mach/cdefBF542.h
index d09c19cd1b7b..916347901d5a 100644
--- a/arch/blackfin/mach-bf548/include/mach/cdefBF542.h
+++ b/arch/blackfin/mach-bf548/include/mach/cdefBF542.h
@@ -241,10 +241,6 @@
 #define bfin_read_USB_APHY_CNTRL2()		bfin_read16(USB_APHY_CNTRL2)
 #define bfin_write_USB_APHY_CNTRL2(val)		bfin_write16(USB_APHY_CNTRL2, val)
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()		bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)		bfin_write16(USB_PHY_TEST, val)
 #define bfin_read_USB_PLLOSC_CTRL()		bfin_read16(USB_PLLOSC_CTRL)
 #define bfin_write_USB_PLLOSC_CTRL(val)		bfin_write16(USB_PLLOSC_CTRL, val)
 #define bfin_read_USB_SRP_CLKDIV()		bfin_read16(USB_SRP_CLKDIV)
diff --git a/arch/blackfin/mach-bf548/include/mach/cdefBF547.h b/arch/blackfin/mach-bf548/include/mach/cdefBF547.h
index bcb9726dea54..be83f645bba8 100644
--- a/arch/blackfin/mach-bf548/include/mach/cdefBF547.h
+++ b/arch/blackfin/mach-bf548/include/mach/cdefBF547.h
@@ -408,10 +408,6 @@
 #define bfin_read_USB_APHY_CNTRL2()		bfin_read16(USB_APHY_CNTRL2)
 #define bfin_write_USB_APHY_CNTRL2(val)		bfin_write16(USB_APHY_CNTRL2, val)
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()		bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)		bfin_write16(USB_PHY_TEST, val)
 #define bfin_read_USB_PLLOSC_CTRL()		bfin_read16(USB_PLLOSC_CTRL)
 #define bfin_write_USB_PLLOSC_CTRL(val)		bfin_write16(USB_PLLOSC_CTRL, val)
 #define bfin_read_USB_SRP_CLKDIV()		bfin_read16(USB_SRP_CLKDIV)
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF542.h b/arch/blackfin/mach-bf548/include/mach/defBF542.h
index 51161575a163..ae4b889e3606 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF542.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF542.h
@@ -140,9 +140,6 @@
 #define                   USB_APHY_CALIB  0xffc03de4   /* Register used to set some calibration values */
 #define                  USB_APHY_CNTRL2  0xffc03de8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc03dec   /* Used for reducing simulation time and simplifies FIFO testability */
 #define                  USB_PLLOSC_CTRL  0xffc03df0   /* Used to program different parameters for USB PLL and Oscillator */
 #define                   USB_SRP_CLKDIV  0xffc03df4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
 
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h
index d55dcc0f5324..7cc7928a3c73 100644
--- a/arch/blackfin/mach-bf548/include/mach/defBF547.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h
@@ -254,9 +254,6 @@
 #define                   USB_APHY_CALIB  0xffc03de4   /* Register used to set some calibration values */
 #define                  USB_APHY_CNTRL2  0xffc03de8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc03dec   /* Used for reducing simulation time and simplifies FIFO testability */
 #define                  USB_PLLOSC_CTRL  0xffc03df0   /* Used to program different parameters for USB PLL and Oscillator */
 #define                   USB_SRP_CLKDIV  0xffc03df4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
 
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c
index 7f9fc272ec30..2c61fc0c98f9 100644
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -780,8 +780,8 @@ static struct adi_spi3_chip spidev_chip_info = {
 };
 #endif
 
-#if IS_ENABLED(CONFIG_SND_BF5XX_I2S)
-static struct platform_device bfin_i2s_pcm = {
+#if IS_ENABLED(CONFIG_SND_BF6XX_PCM)
+static struct platform_device bfin_pcm = {
 	.name = "bfin-i2s-pcm-audio",
 	.id = -1,
 };
@@ -1034,7 +1034,6 @@ static struct adv7842_platform_data adv7842_data = {
 	.i2c_infoframe = 0x48,
 	.i2c_cec = 0x49,
 	.i2c_avlink = 0x4a,
-	.i2c_ex = 0x26,
 };
 
 static struct bfin_capture_config bfin_capture_data = {
@@ -1104,7 +1103,6 @@ static struct disp_route adv7511_routes[] = {
 
 static struct adv7511_platform_data adv7511_data = {
 	.edid_addr = 0x7e,
-	.i2c_ex = 0x25,
 };
 
 static struct bfin_display_config bfin_display_data = {
@@ -1209,6 +1207,35 @@ static struct platform_device bfin_display_device = {
 };
 #endif
 
+#if defined(CONFIG_FB_BF609_NL8048) \
+	|| defined(CONFIG_FB_BF609_NL8048_MODULE)
+static struct resource nl8048_resources[] = {
+	{
+		.start = EPPI2_STAT,
+		.end = EPPI2_STAT,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = CH_EPPI2_CH0,
+		.end = CH_EPPI2_CH0,
+		.flags = IORESOURCE_DMA,
+	},
+	{
+		.start = IRQ_EPPI2_STAT,
+		.end = IRQ_EPPI2_STAT,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+static struct platform_device bfin_fb_device = {
+	.name = "bf609_nl8048",
+	.num_resources = ARRAY_SIZE(nl8048_resources),
+	.resource = nl8048_resources,
+	.dev = {
+		.platform_data = (void *)GPIO_PC15,
+	},
+};
+#endif
+
 #if defined(CONFIG_BFIN_CRC)
 #define BFIN_CRC_NAME "bfin-crc"
 
@@ -1862,6 +1889,29 @@ static struct platform_device i2c_bfin_twi1_device = {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#include <linux/spi/mcp23s08.h>
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch0 = {
+	.base = 120,
+};
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch1 = {
+	.base = 130,
+};
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch2 = {
+	.base = 140,
+};
+# if IS_ENABLED(CONFIG_VIDEO_ADV7842)
+static const struct mcp23s08_platform_data bfin_adv7842_soft_switch = {
+	.base = 150,
+};
+# endif
+# if IS_ENABLED(CONFIG_VIDEO_ADV7511) || IS_ENABLED(CONFIG_VIDEO_ADV7343)
+static const struct mcp23s08_platform_data bfin_adv7511_soft_switch = {
+	.base = 160,
+};
+# endif
+#endif
+
 static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
 #if IS_ENABLED(CONFIG_INPUT_ADXL34X_I2C)
 	{
@@ -1881,6 +1931,32 @@ static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
 		I2C_BOARD_INFO("ssm2602", 0x1b),
 	},
 #endif
+#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+	{
+		I2C_BOARD_INFO("mcp23017", 0x21),
+		.platform_data = (void *)&bfin_mcp23s08_soft_switch0
+	},
+	{
+		I2C_BOARD_INFO("mcp23017", 0x22),
+		.platform_data = (void *)&bfin_mcp23s08_soft_switch1
+	},
+	{
+		I2C_BOARD_INFO("mcp23017", 0x23),
+		.platform_data = (void *)&bfin_mcp23s08_soft_switch2
+	},
+# if IS_ENABLED(CONFIG_VIDEO_ADV7842)
+	{
+		I2C_BOARD_INFO("mcp23017", 0x26),
+		.platform_data = (void *)&bfin_adv7842_soft_switch
+	},
+# endif
+# if IS_ENABLED(CONFIG_VIDEO_ADV7511) || IS_ENABLED(CONFIG_VIDEO_ADV7343)
+	{
+		I2C_BOARD_INFO("mcp23017", 0x25),
+		.platform_data = (void *)&bfin_adv7511_soft_switch
+	},
+# endif
+#endif
 };
 
 static struct i2c_board_info __initdata bfin_i2c_board_info1[] = {
@@ -2023,8 +2099,8 @@ static struct platform_device *ezkit_devices[] __initdata = {
 #if IS_ENABLED(CONFIG_MTD_PHYSMAP)
 	&ezkit_flash_device,
 #endif
-#if IS_ENABLED(CONFIG_SND_BF5XX_I2S)
-	&bfin_i2s_pcm,
+#if IS_ENABLED(CONFIG_SND_BF6XX_PCM)
+	&bfin_pcm,
 #endif
 #if IS_ENABLED(CONFIG_SND_BF6XX_SOC_I2S)
 	&bfin_i2s,
@@ -2060,7 +2136,7 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = {
 	PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary",  "pinctrl-adi2.0", NULL, "rotary"),
 	PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0",  "pinctrl-adi2.0", NULL, "can0"),
 	PIN_MAP_MUX_GROUP_DEFAULT("physmap-flash.0",  "pinctrl-adi2.0", NULL, "smc0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
+	PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.0",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
 	PIN_MAP_MUX_GROUP("bfin_display.0", "8bit",  "pinctrl-adi2.0", "ppi2_8bgrp", "ppi2"),
 	PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
 	PIN_MAP_MUX_GROUP("bfin_display.0", "16bit",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
diff --git a/arch/blackfin/mach-bf609/clock.c b/arch/blackfin/mach-bf609/clock.c
index 244fa4ab4c56..378305844b2c 100644
--- a/arch/blackfin/mach-bf609/clock.c
+++ b/arch/blackfin/mach-bf609/clock.c
@@ -363,6 +363,12 @@ static struct clk ethclk = {
 	.ops	    = &dummy_clk_ops,
 };
 
+static struct clk ethpclk = {
+	.name       = "pclk",
+	.parent     = &sclk0,
+	.ops	    = &dummy_clk_ops,
+};
+
 static struct clk spiclk = {
 	.name       = "spi",
 	.parent     = &sclk1,
@@ -381,6 +387,7 @@ static struct clk_lookup bf609_clks[] = {
 	CLK(dclk, NULL, "DCLK"),
 	CLK(oclk, NULL, "OCLK"),
 	CLK(ethclk, NULL, "stmmaceth"),
+	CLK(ethpclk, NULL, "pclk"),
 	CLK(spiclk, NULL, "spi"),
 };
 
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 86b5a095c5a1..8d9431e22e8c 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1694,6 +1694,16 @@ ENTRY(_sys_call_table)
 	.long _sys_sendmmsg		/* 380 */
 	.long _sys_process_vm_readv
 	.long _sys_process_vm_writev
+	.long _sys_kcmp
+	.long _sys_finit_module
+	.long _sys_sched_setattr	/* 385 */
+	.long _sys_sched_getattr
+	.long _sys_renameat2
+	.long _sys_seccomp
+	.long _sys_getrandom
+	.long _sys_memfd_create		/* 390 */
+	.long _sys_bpf
+	.long _sys_execveat
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c
index 1387a94bcfd5..a66d979ec651 100644
--- a/arch/blackfin/mach-common/pm.c
+++ b/arch/blackfin/mach-common/pm.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/delay.h>
 
 #include <asm/cplb.h>
 #include <asm/gpio.h>
@@ -180,6 +181,7 @@ int bfin_pm_suspend_mem_enter(void)
 
 #if defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK)
 	flushinv_all_dcache();
+	udelay(1);
 #endif
 	_disable_dcplb();
 	_disable_icplb();
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 4a03911053ab..0314e325a669 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -46,12 +46,18 @@ config CRIS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IOMAP
-	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION
+	select ARCH_REQUIRE_GPIOLIB
+	select IRQ_DOMAIN if ETRAX_ARCH_V32
+	select OF if ETRAX_ARCH_V32
+	select OF_EARLY_FLATTREE if ETRAX_ARCH_V32
+	select CLKSRC_MMIO if ETRAX_ARCH_V32
+	select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32
+	select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32
 
 config HZ
 	int
@@ -61,6 +67,10 @@ config NR_CPUS
 	int
 	default "1"
 
+config BUILTIN_DTB
+	string "DTB to build into the kernel image"
+	depends on OF
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/cris/Makefile b/arch/cris/Makefile
index 39dc7d00083e..4a5404b3d0e4 100644
--- a/arch/cris/Makefile
+++ b/arch/cris/Makefile
@@ -40,6 +40,10 @@ else
 MACH :=
 endif
 
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+core-$(CONFIG_OF) += arch/cris/boot/dts/
+endif
+
 LD = $(CROSS_COMPILE)ld -mcrislinux
 
 OBJCOPYFLAGS := -O binary -R .note -R .comment -S
diff --git a/arch/cris/arch-v32/kernel/Makefile b/arch/cris/arch-v32/kernel/Makefile
index 40358355d0cb..d9fc617ea253 100644
--- a/arch/cris/arch-v32/kernel/Makefile
+++ b/arch/cris/arch-v32/kernel/Makefile
@@ -9,7 +9,6 @@ obj-y   := entry.o traps.o irq.o debugport.o \
 	   process.o ptrace.o setup.o signal.o traps.o time.o \
 	   cache.o cacheflush.o
 
-obj-$(CONFIG_SMP) += smp.o
 obj-$(CONFIG_ETRAX_KGDB) += kgdb.o kgdb_asm.o
 obj-$(CONFIG_ETRAX_FAST_TIMER) += fasttimer.o
 obj-$(CONFIG_MODULES)    += crisksyms.o
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 2f19ac6217aa..026a0b21b8f0 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -99,6 +99,8 @@ ret_from_kernel_thread:
 
 	.type	ret_from_intr,@function
 ret_from_intr:
+	moveq	0, $r9			; not a syscall
+
 	;; Check for resched if preemptive kernel, or if we're going back to
 	;; user-mode. This test matches the user_regs(regs) macro. Don't simply
 	;; test CCS since that doesn't necessarily reflect what mode we'll
@@ -145,7 +147,7 @@ system_call:
 	;; Stack-frame similar to the irq heads, which is reversed in
 	;; ret_from_sys_call.
 
-	sub.d	92, $sp		; Skip EXS and EDA.
+	sub.d	92, $sp		; Skip EDA.
 	movem	$r13, [$sp]
 	move.d	$sp, $r8
 	addq	14*4, $r8
@@ -156,8 +158,9 @@ system_call:
 	move	$ccs, $r4
 	move	$srp, $r5
 	move	$erp, $r6
+	move.d	$r9, $r7	; Store syscall number in EXS
 	subq	4, $sp
-	movem	$r6, [$r8]
+	movem	$r7, [$r8]
 	ei			; Enable interrupts while processing syscalls.
 	move.d	$r10, [$sp]
 
@@ -278,43 +281,14 @@ _syscall_exit_work:
 	.type	_work_pending,@function
 _work_pending:
 	addoq	+TI_flags, $r0, $acr
-	move.d	[$acr], $r10
-	btstq	TIF_NEED_RESCHED, $r10	; Need resched?
-	bpl	_work_notifysig		; No, must be signal/notify.
-	nop
-	.size	_work_pending, . - _work_pending
-
-	.type	_work_resched,@function
-_work_resched:
-	move.d	$r9, $r1		; Preserve R9.
-	jsr	schedule
-	nop
-	move.d	$r1, $r9
-	di
-
-	addoq	+TI_flags, $r0, $acr
-	move.d	[$acr], $r1
-	and.d	_TIF_WORK_MASK, $r1	; Ignore sycall trace counter.
-	beq	_Rexit
-	nop
-	btstq	TIF_NEED_RESCHED, $r1
-	bmi	_work_resched		; current->work.need_resched.
-	nop
-	.size	_work_resched, . - _work_resched
-
-	.type	_work_notifysig,@function
-_work_notifysig:
-	;; Deal with pending signals and notify-resume requests.
-
-	addoq	+TI_flags, $r0, $acr
 	move.d	[$acr], $r12		; The thread_info_flags parameter.
 	move.d	$sp, $r11		; The regs param.
-	jsr	do_notify_resume
-	move.d	$r9, $r10		; do_notify_resume syscall/irq param.
+	jsr	do_work_pending
+	move.d	$r9, $r10		; The syscall/irq param.
 
 	ba _Rexit
 	nop
-	.size	_work_notifysig, . - _work_notifysig
+	.size	_work_pending, . - _work_pending
 
 	;; We get here as a sidetrack when we've entered a syscall with the
 	;; trace-bit set. We need to call do_syscall_trace and then continue
diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S
index 51e34165ece7..74a66e0e3777 100644
--- a/arch/cris/arch-v32/kernel/head.S
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -52,11 +52,6 @@ tstart:
 
 	GIO_INIT
 
-#ifdef CONFIG_SMP
-secondary_cpu_entry: /* Entry point for secondary CPUs */
-	di
-#endif
-
 	;; Setup and enable the MMU. Use same configuration for both the data
 	;; and the instruction MMU.
 	;;
@@ -164,33 +159,6 @@ secondary_cpu_entry: /* Entry point for secondary CPUs */
 	nop
 	nop
 
-#ifdef CONFIG_SMP
-	;; Read CPU ID
-	move    0, $srs
-	nop
-	nop
-	nop
-	move    $s12, $r0
-	cmpq    0, $r0
-	beq	master_cpu
-	nop
-slave_cpu:
-	; Time to boot-up. Get stack location provided by master CPU.
-	move.d  smp_init_current_idle_thread, $r1
-	move.d  [$r1], $sp
-	add.d	8192, $sp
-	move.d	ebp_start, $r0	; Defined in linker-script.
-	move	$r0, $ebp
-	jsr	smp_callin
-	nop
-master_cpu:
-	/* Set up entry point for secondary CPUs. The boot ROM has set up
-	 * EBP at start of internal memory. The CPU will get there
-	 * later when we issue an IPI to them... */
-	move.d MEM_INTMEM_START + IPI_INTR_VECT * 4, $r0
-	move.d secondary_cpu_entry, $r1
-	move.d $r1, [$r0]
-#endif
 	; Check if starting from DRAM (network->RAM boot or unpacked
 	; compressed kernel), or directly from flash.
 	lapcq	., $r0
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 25437ae28128..6a881e0e92b4 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -10,6 +10,8 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/profile.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/threads.h>
@@ -56,9 +58,6 @@ struct cris_irq_allocation irq_allocations[NR_REAL_IRQS] =
 static unsigned long irq_regs[NR_CPUS] =
 {
   regi_irq,
-#ifdef CONFIG_SMP
-  regi_irq2,
-#endif
 };
 
 #if NR_REAL_IRQS > 32
@@ -431,6 +430,19 @@ crisv32_do_multiple(struct pt_regs* regs)
 	irq_exit();
 }
 
+static int crisv32_irq_map(struct irq_domain *h, unsigned int virq,
+			   irq_hw_number_t hw_irq_num)
+{
+	irq_set_chip_and_handler(virq, &crisv32_irq_type, handle_simple_irq);
+
+	return 0;
+}
+
+static struct irq_domain_ops crisv32_irq_ops = {
+	.map	= crisv32_irq_map,
+	.xlate	= irq_domain_xlate_onecell,
+};
+
 /*
  * This is called by start_kernel. It fixes the IRQ masks and setup the
  * interrupt vector table to point to bad_interrupt pointers.
@@ -441,6 +453,8 @@ init_IRQ(void)
 	int i;
 	int j;
 	reg_intr_vect_rw_mask vect_mask = {0};
+	struct device_node *np;
+	struct irq_domain *domain;
 
 	/* Clear all interrupts masks. */
 	for (i = 0; i < NBR_REGS; i++)
@@ -449,10 +463,15 @@ init_IRQ(void)
 	for (i = 0; i < 256; i++)
 		etrax_irv->v[i] = weird_irq;
 
-	/* Point all IRQ's to bad handlers. */
+	np = of_find_compatible_node(NULL, NULL, "axis,crisv32-intc");
+	domain = irq_domain_add_legacy(np, NR_IRQS - FIRST_IRQ,
+				       FIRST_IRQ, FIRST_IRQ,
+				       &crisv32_irq_ops, NULL);
+	BUG_ON(!domain);
+	irq_set_default_host(domain);
+	of_node_put(np);
+
 	for (i = FIRST_IRQ, j = 0; j < NR_IRQS; i++, j++) {
-		irq_set_chip_and_handler(j, &crisv32_irq_type,
-					 handle_simple_irq);
 		set_exception_vector(i, interrupt[j]);
 	}
 
diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c
index 81715c683baf..cd1865d68b2e 100644
--- a/arch/cris/arch-v32/kernel/setup.c
+++ b/arch/cris/arch-v32/kernel/setup.c
@@ -63,11 +63,6 @@ int show_cpuinfo(struct seq_file *m, void *v)
 
 	info = &cpinfo[ARRAY_SIZE(cpinfo) - 1];
 
-#ifdef CONFIG_SMP
-	if (!cpu_online(cpu))
-		return 0;
-#endif
-
 	revision = rdvr();
 
 	for (i = 0; i < ARRAY_SIZE(cpinfo); i++) {
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 0c9ce9eac614..3a36ae6b79d5 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -72,6 +72,9 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 	/* Make that the user-mode flag is set. */
 	regs->ccs |= (1 << (U_CCS_BITNR + CCS_SHIFT));
 
+	/* Don't perform syscall restarting */
+	regs->exs = -1;
+
 	/* Restore the old USP. */
 	err |= __get_user(old_usp, &sc->usp);
 	wrusp(old_usp);
@@ -425,6 +428,8 @@ do_signal(int canrestart, struct pt_regs *regs)
 {
 	struct ksignal ksig;
 
+	canrestart = canrestart && ((int)regs->exs >= 0);
+
 	/*
 	 * The common case should go fast, which is why this point is
 	 * reached from kernel-mode. If that's the case, just return
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
deleted file mode 100644
index 0698582467ca..000000000000
--- a/arch/cris/arch-v32/kernel/smp.c
+++ /dev/null
@@ -1,358 +0,0 @@
-#include <linux/types.h>
-#include <asm/delay.h>
-#include <irq.h>
-#include <hwregs/intr_vect.h>
-#include <hwregs/intr_vect_defs.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <hwregs/asm/mmu_defs_asm.h>
-#include <hwregs/supp_reg.h>
-#include <linux/atomic.h>
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#define IPI_SCHEDULE 1
-#define IPI_CALL 2
-#define IPI_FLUSH_TLB 4
-#define IPI_BOOT 8
-
-#define FLUSH_ALL (void*)0xffffffff
-
-/* Vector of locks used for various atomic operations */
-spinlock_t cris_atomic_locks[] = {
-	[0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
-};
-
-/* CPU masks */
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(phys_cpu_present_map);
-
-/* Variables used during SMP boot */
-volatile int cpu_now_booting = 0;
-volatile struct thread_info *smp_init_current_idle_thread;
-
-/* Variables used during IPI */
-static DEFINE_SPINLOCK(call_lock);
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	int wait;
-};
-
-static struct call_data_struct * call_data;
-
-static struct mm_struct* flush_mm;
-static struct vm_area_struct* flush_vma;
-static unsigned long flush_addr;
-
-/* Mode registers */
-static unsigned long irq_regs[NR_CPUS] = {
-  regi_irq,
-  regi_irq2
-};
-
-static irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id);
-static int send_ipi(int vector, int wait, cpumask_t cpu_mask);
-static struct irqaction irq_ipi  = {
-	.handler = crisv32_ipi_interrupt,
-	.flags = 0,
-	.name = "ipi",
-};
-
-extern void cris_mmu_init(void);
-extern void cris_timer_init(void);
-
-/* SMP initialization */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-	int i;
-
-	/* From now on we can expect IPIs so set them up */
-	setup_irq(IPI_INTR_VECT, &irq_ipi);
-
-	/* Mark all possible CPUs as present */
-	for (i = 0; i < max_cpus; i++)
-		cpumask_set_cpu(i, &phys_cpu_present_map);
-}
-
-void smp_prepare_boot_cpu(void)
-{
-	/* PGD pointer has moved after per_cpu initialization so
-	 * update the MMU.
-	 */
-  	pgd_t **pgd;
-	pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
-
-	SUPP_BANK_SEL(1);
-	SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-	SUPP_BANK_SEL(2);
-	SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-
-	set_cpu_online(0, true);
-	cpumask_set_cpu(0, &phys_cpu_present_map);
-	set_cpu_possible(0, true);
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
-/* Bring one cpu online.*/
-static int __init
-smp_boot_one_cpu(int cpuid, struct task_struct idle)
-{
-	unsigned timeout;
-	cpumask_t cpu_mask;
-
-	cpumask_clear(&cpu_mask);
-	task_thread_info(idle)->cpu = cpuid;
-
-	/* Information to the CPU that is about to boot */
-	smp_init_current_idle_thread = task_thread_info(idle);
-	cpu_now_booting = cpuid;
-
-	/* Kick it */
-	set_cpu_online(cpuid, true);
-	cpumask_set_cpu(cpuid, &cpu_mask);
-	send_ipi(IPI_BOOT, 0, cpu_mask);
-	set_cpu_online(cpuid, false);
-
-	/* Wait for CPU to come online */
-	for (timeout = 0; timeout < 10000; timeout++) {
-		if(cpu_online(cpuid)) {
-			cpu_now_booting = 0;
-			smp_init_current_idle_thread = NULL;
-			return 0; /* CPU online */
-		}
-		udelay(100);
-		barrier();
-	}
-
-	printk(KERN_CRIT "SMP: CPU:%d is stuck.\n", cpuid);
-	return -1;
-}
-
-/* Secondary CPUs starts using C here. Here we need to setup CPU
- * specific stuff such as the local timer and the MMU. */
-void __init smp_callin(void)
-{
-	int cpu = cpu_now_booting;
-	reg_intr_vect_rw_mask vect_mask = {0};
-
-	/* Initialise the idle task for this CPU */
-	atomic_inc(&init_mm.mm_count);
-	current->active_mm = &init_mm;
-
-	/* Set up MMU */
-	cris_mmu_init();
-	__flush_tlb_all();
-
-	/* Setup local timer. */
-	cris_timer_init();
-
-	/* Enable IRQ and idle */
-	REG_WR(intr_vect, irq_regs[cpu], rw_mask, vect_mask);
-	crisv32_unmask_irq(IPI_INTR_VECT);
-	crisv32_unmask_irq(TIMER0_INTR_VECT);
-	preempt_disable();
-	notify_cpu_starting(cpu);
-	local_irq_enable();
-
-	set_cpu_online(cpu, true);
-	cpu_startup_entry(CPUHP_ONLINE);
-}
-
-/* Stop execution on this CPU.*/
-void stop_this_cpu(void* dummy)
-{
-	local_irq_disable();
-	asm volatile("halt");
-}
-
-/* Other calls */
-void smp_send_stop(void)
-{
-	smp_call_function(stop_this_cpu, NULL, 0);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-	return -EINVAL;
-}
-
-
-/* cache_decay_ticks is used by the scheduler to decide if a process
- * is "hot" on one CPU. A higher value means a higher penalty to move
- * a process to another CPU. Our cache is rather small so we report
- * 1 tick.
- */
-unsigned long cache_decay_ticks = 1;
-
-int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
-	smp_boot_one_cpu(cpu, tidle);
-	return cpu_online(cpu) ? 0 : -ENOSYS;
-}
-
-void smp_send_reschedule(int cpu)
-{
-	cpumask_t cpu_mask;
-	cpumask_clear(&cpu_mask);
-	cpumask_set_cpu(cpu, &cpu_mask);
-	send_ipi(IPI_SCHEDULE, 0, cpu_mask);
-}
-
-/* TLB flushing
- *
- * Flush needs to be done on the local CPU and on any other CPU that
- * may have the same mapping. The mm->cpu_vm_mask is used to keep track
- * of which CPUs that a specific process has been executed on.
- */
-void flush_tlb_common(struct mm_struct* mm, struct vm_area_struct* vma, unsigned long addr)
-{
-	unsigned long flags;
-	cpumask_t cpu_mask;
-
-	spin_lock_irqsave(&tlbstate_lock, flags);
-	cpu_mask = (mm == FLUSH_ALL ? cpu_all_mask : *mm_cpumask(mm));
-	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-	flush_mm = mm;
-	flush_vma = vma;
-	flush_addr = addr;
-	send_ipi(IPI_FLUSH_TLB, 1, cpu_mask);
-	spin_unlock_irqrestore(&tlbstate_lock, flags);
-}
-
-void flush_tlb_all(void)
-{
-	__flush_tlb_all();
-	flush_tlb_common(FLUSH_ALL, FLUSH_ALL, 0);
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-	__flush_tlb_mm(mm);
-	flush_tlb_common(mm, FLUSH_ALL, 0);
-	/* No more mappings in other CPUs */
-	cpumask_clear(mm_cpumask(mm));
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-}
-
-void flush_tlb_page(struct vm_area_struct *vma,
-			   unsigned long addr)
-{
-	__flush_tlb_page(vma, addr);
-	flush_tlb_common(vma->vm_mm, vma, addr);
-}
-
-/* Inter processor interrupts
- *
- * The IPIs are used for:
- *   * Force a schedule on a CPU
- *   * FLush TLB on other CPUs
- *   * Call a function on other CPUs
- */
-
-int send_ipi(int vector, int wait, cpumask_t cpu_mask)
-{
-	int i = 0;
-	reg_intr_vect_rw_ipi ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
-	int ret = 0;
-
-	/* Calculate CPUs to send to. */
-	cpumask_and(&cpu_mask, &cpu_mask, cpu_online_mask);
-
-	/* Send the IPI. */
-	for_each_cpu(i, &cpu_mask)
-	{
-		ipi.vector |= vector;
-		REG_WR(intr_vect, irq_regs[i], rw_ipi, ipi);
-	}
-
-	/* Wait for IPI to finish on other CPUS */
-	if (wait) {
-		for_each_cpu(i, &cpu_mask) {
-                        int j;
-                        for (j = 0 ; j < 1000; j++) {
-				ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
-				if (!ipi.vector)
-					break;
-				udelay(100);
-			}
-
-			/* Timeout? */
-			if (ipi.vector) {
-				printk("SMP call timeout from %d to %d\n", smp_processor_id(), i);
-				ret = -ETIMEDOUT;
-				dump_stack();
-			}
-		}
-	}
-	return ret;
-}
-
-/*
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
-	cpumask_t cpu_mask;
-	struct call_data_struct data;
-	int ret;
-
-	cpumask_setall(&cpu_mask);
-	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-
-	WARN_ON(irqs_disabled());
-
-	data.func = func;
-	data.info = info;
-	data.wait = wait;
-
-	spin_lock(&call_lock);
-	call_data = &data;
-	ret = send_ipi(IPI_CALL, wait, cpu_mask);
-	spin_unlock(&call_lock);
-
-	return ret;
-}
-
-irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
-{
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	reg_intr_vect_rw_ipi ipi;
-
-	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
-
-	if (ipi.vector & IPI_SCHEDULE) {
-		scheduler_ipi();
-	}
-	if (ipi.vector & IPI_CALL) {
-		func(info);
-	}
-	if (ipi.vector & IPI_FLUSH_TLB) {
-		if (flush_mm == FLUSH_ALL)
-			__flush_tlb_all();
-		else if (flush_vma == FLUSH_ALL)
-			__flush_tlb_mm(flush_mm);
-		else
-			__flush_tlb_page(flush_vma, flush_addr);
-	}
-
-	ipi.vector = 0;
-	REG_WR(intr_vect, irq_regs[smp_processor_id()], rw_ipi, ipi);
-
-	return IRQ_HANDLED;
-}
-
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index c17b01abdc3b..4fce9f1f7cc0 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -8,12 +8,14 @@
 #include <linux/timex.h>
 #include <linux/time.h>
 #include <linux/clocksource.h>
+#include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/cpufreq.h>
+#include <linux/sched_clock.h>
 #include <linux/mm.h>
 #include <asm/types.h>
 #include <asm/signal.h>
@@ -36,33 +38,11 @@
 /* Number of 763 counts before watchdog bites */
 #define ETRAX_WD_CNT		((2*ETRAX_WD_HZ)/HZ + 1)
 
-/* Register the continuos readonly timer available in FS and ARTPEC-3.  */
-static cycle_t read_cont_rotime(struct clocksource *cs)
-{
-	return (u32)REG_RD(timer, regi_timer0, r_time);
-}
-
-static struct clocksource cont_rotime = {
-	.name   = "crisv32_rotime",
-	.rating = 300,
-	.read   = read_cont_rotime,
-	.mask   = CLOCKSOURCE_MASK(32),
-	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int __init etrax_init_cont_rotime(void)
-{
-	clocksource_register_khz(&cont_rotime, 100000);
-	return 0;
-}
-arch_initcall(etrax_init_cont_rotime);
+#define CRISV32_TIMER_FREQ	(100000000lu)
 
 unsigned long timer_regs[NR_CPUS] =
 {
 	regi_timer0,
-#ifdef CONFIG_SMP
-	regi_timer2
-#endif
 };
 
 extern int set_rtc_mmss(unsigned long nowtime);
@@ -189,81 +169,104 @@ void handle_watchdog_bite(struct pt_regs *regs)
 #endif
 }
 
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick.
- */
-extern void cris_do_profile(struct pt_regs *regs);
+extern void cris_profile_sample(struct pt_regs *regs);
+static void __iomem *timer_base;
 
-static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
+static void crisv32_clkevt_mode(enum clock_event_mode mode,
+				struct clock_event_device *dev)
 {
-	struct pt_regs *regs = get_irq_regs();
-	int cpu = smp_processor_id();
-	reg_timer_r_masked_intr masked_intr;
-	reg_timer_rw_ack_intr ack_intr = { 0 };
-
-	/* Check if the timer interrupt is for us (a tmr0 int) */
-	masked_intr = REG_RD(timer, timer_regs[cpu], r_masked_intr);
-	if (!masked_intr.tmr0)
-		return IRQ_NONE;
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_hold,
+		.freq = regk_timer_f100,
+	};
 
-	/* Acknowledge the timer irq. */
-	ack_intr.tmr0 = 1;
-	REG_WR(timer, timer_regs[cpu], rw_ack_intr, ack_intr);
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+}
 
-	/* Reset watchdog otherwise it resets us! */
-	reset_watchdog();
+static int crisv32_clkevt_next_event(unsigned long evt,
+				     struct clock_event_device *dev)
+{
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_ld,
+		.freq = regk_timer_f100,
+	};
+
+	REG_WR(timer, timer_base, rw_tmr0_div, evt);
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+	ctrl.op = regk_timer_run;
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+	return 0;
+}
+
+static irqreturn_t crisv32_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_hold,
+		.freq = regk_timer_f100,
+	};
+	reg_timer_rw_ack_intr ack = { .tmr0 = 1 };
+	reg_timer_r_masked_intr intr;
+
+	intr = REG_RD(timer, timer_base, r_masked_intr);
+	if (!intr.tmr0)
+		return IRQ_NONE;
 
-	/* Update statistics. */
-	update_process_times(user_mode(regs));
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+	REG_WR(timer, timer_base, rw_ack_intr, ack);
 
-	cris_do_profile(regs); /* Save profiling information */
+	reset_watchdog();
+#ifdef CONFIG_SYSTEM_PROFILER
+	cris_profile_sample(get_irq_regs());
+#endif
 
-	/* The master CPU is responsible for the time keeping. */
-	if (cpu != 0)
-		return IRQ_HANDLED;
+	evt->event_handler(evt);
 
-	/* Call the real timer interrupt handler */
-	xtime_update(1);
 	return IRQ_HANDLED;
 }
 
+static struct clock_event_device crisv32_clockevent = {
+	.name = "crisv32-timer",
+	.rating = 300,
+	.features = CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode = crisv32_clkevt_mode,
+	.set_next_event = crisv32_clkevt_next_event,
+};
+
 /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
 static struct irqaction irq_timer = {
-	.handler = timer_interrupt,
-	.flags = IRQF_SHARED,
-	.name = "timer"
+	.handler = crisv32_timer_interrupt,
+	.flags = IRQF_TIMER | IRQF_SHARED,
+	.name = "crisv32-timer",
+	.dev_id = &crisv32_clockevent,
 };
 
-void __init cris_timer_init(void)
+static u64 notrace crisv32_timer_sched_clock(void)
 {
-	int cpu = smp_processor_id();
-	reg_timer_rw_tmr0_ctrl tmr0_ctrl = { 0 };
-	reg_timer_rw_tmr0_div tmr0_div = TIMER0_DIV;
-	reg_timer_rw_intr_mask timer_intr_mask;
+	return REG_RD(timer, timer_base, r_time);
+}
 
-	/* Setup the etrax timers.
-	 * Base frequency is 100MHz, divider 1000000 -> 100 HZ
-	 * We use timer0, so timer1 is free.
-	 * The trig timer is used by the fasttimer API if enabled.
-	 */
+static void __init crisv32_timer_init(void)
+{
+	reg_timer_rw_intr_mask timer_intr_mask;
+	reg_timer_rw_tmr0_ctrl ctrl = {
+		.op = regk_timer_hold,
+		.freq = regk_timer_f100,
+	};
 
-	tmr0_ctrl.op = regk_timer_ld;
-	tmr0_ctrl.freq = regk_timer_f100;
-	REG_WR(timer, timer_regs[cpu], rw_tmr0_div, tmr0_div);
-	REG_WR(timer, timer_regs[cpu], rw_tmr0_ctrl, tmr0_ctrl); /* Load */
-	tmr0_ctrl.op = regk_timer_run;
-	REG_WR(timer, timer_regs[cpu], rw_tmr0_ctrl, tmr0_ctrl); /* Start */
+	REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
 
-	/* Enable the timer irq. */
-	timer_intr_mask = REG_RD(timer, timer_regs[cpu], rw_intr_mask);
+	timer_intr_mask = REG_RD(timer, timer_base, rw_intr_mask);
 	timer_intr_mask.tmr0 = 1;
-	REG_WR(timer, timer_regs[cpu], rw_intr_mask, timer_intr_mask);
+	REG_WR(timer, timer_base, rw_intr_mask, timer_intr_mask);
 }
 
 void __init time_init(void)
 {
-	reg_intr_vect_rw_mask intr_mask;
+	int irq;
+	int ret;
 
 	/* Probe for the RTC and read it if it exists.
 	 * Before the RTC can be probed the loops_per_usec variable needs
@@ -273,17 +276,28 @@ void __init time_init(void)
 	 */
 	loops_per_usec = 50;
 
-	/* Start CPU local timer. */
-	cris_timer_init();
+	irq = TIMER0_INTR_VECT;
+	timer_base = (void __iomem *) regi_timer0;
+
+	crisv32_timer_init();
+
+	sched_clock_register(crisv32_timer_sched_clock, 32,
+			     CRISV32_TIMER_FREQ);
+
+	clocksource_mmio_init(timer_base + REG_RD_ADDR_timer_r_time,
+			      "crisv32-timer", CRISV32_TIMER_FREQ,
+			      300, 32, clocksource_mmio_readl_up);
+
+	crisv32_clockevent.cpumask = cpu_possible_mask;
+	crisv32_clockevent.irq = irq;
 
-	/* Enable the timer irq in global config. */
-	intr_mask = REG_RD_VECT(intr_vect, regi_irq, rw_mask, 1);
-	intr_mask.timer0 = 1;
-	REG_WR_VECT(intr_vect, regi_irq, rw_mask, 1, intr_mask);
+	ret = setup_irq(irq, &irq_timer);
+	if (ret)
+		pr_warn("failed to setup irq %d\n", irq);
 
-	/* Now actually register the timer irq handler that calls
-	 * timer_interrupt(). */
-	setup_irq(TIMER0_INTR_VECT, &irq_timer);
+	clockevents_config_and_register(&crisv32_clockevent,
+					CRISV32_TIMER_FREQ,
+					2, 0xffffffff);
 
 	/* Enable watchdog if we should use one. */
 
diff --git a/arch/cris/arch-v32/lib/Makefile b/arch/cris/arch-v32/lib/Makefile
index dd296b9db034..e91cf02f625d 100644
--- a/arch/cris/arch-v32/lib/Makefile
+++ b/arch/cris/arch-v32/lib/Makefile
@@ -3,5 +3,5 @@
 #
 
 lib-y  = checksum.o checksumcopy.o string.o usercopy.o memset.o \
-	csumcpfruser.o spinlock.o delay.o strcmp.o
+	csumcpfruser.o delay.o strcmp.o
 
diff --git a/arch/cris/arch-v32/lib/spinlock.S b/arch/cris/arch-v32/lib/spinlock.S
deleted file mode 100644
index fe610b9d775f..000000000000
--- a/arch/cris/arch-v32/lib/spinlock.S
+++ /dev/null
@@ -1,40 +0,0 @@
-;; Core of the spinlock implementation
-;;
-;; Copyright (C) 2004 Axis Communications AB.
-;;
-;; Author: Mikael Starvik
-
-
-	.global cris_spin_lock
-	.type   cris_spin_lock,@function
-	.global cris_spin_trylock
-	.type   cris_spin_trylock,@function
-
-	.text
-
-cris_spin_lock:
-	clearf	p
-1:	test.b	[$r10]
-	beq	1b
-	clearf	p
-	ax
-	clear.b [$r10]
-	bcs     1b
-	clearf	p
-	ret
-	nop
-
-	.size   cris_spin_lock, . - cris_spin_lock
-
-cris_spin_trylock:
-	clearf	p
-1:	move.b	[$r10], $r11
-	ax
-	clear.b [$r10]
-        bcs	1b
-        clearf	p
-	ret
-	movu.b	$r11,$r10
-
-	.size   cris_spin_trylock, . - cris_spin_trylock
-
diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c
index 3deca5253d91..f5438ca8122d 100644
--- a/arch/cris/arch-v32/mm/init.c
+++ b/arch/cris/arch-v32/mm/init.c
@@ -40,17 +40,6 @@ void __init cris_mmu_init(void)
 	 */
 	per_cpu(current_pgd, smp_processor_id()) = init_mm.pgd;
 
-#ifdef CONFIG_SMP
-	{
-		pgd_t **pgd;
-		pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
-		SUPP_BANK_SEL(1);
-		SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-		SUPP_BANK_SEL(2);
-		SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-	}
-#endif
-
 	/* Initialise the TLB. Function found in tlb.c. */
 	tlb_init();
 
diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S
index 72727c1d8e60..c0981044eccb 100644
--- a/arch/cris/arch-v32/mm/mmu.S
+++ b/arch/cris/arch-v32/mm/mmu.S
@@ -115,11 +115,7 @@
 	move.d	$r0, [$r1]	; last_refill_cause = rw_mm_cause
 
 3:	; Probably not in a loop, continue normal processing
-#ifdef CONFIG_SMP
-	move    $s7, $acr	; PGD
-#else
 	move.d  current_pgd, $acr ; PGD
-#endif
 	; Look up PMD in PGD
 	lsrq	24, $r0	; Get PMD index into PGD (bit 24-31)
 	move.d  [$acr], $acr	; PGD for the current process
diff --git a/arch/cris/boot/dts/Makefile b/arch/cris/boot/dts/Makefile
new file mode 100644
index 000000000000..faf69fb9919f
--- /dev/null
+++ b/arch/cris/boot/dts/Makefile
@@ -0,0 +1,6 @@
+BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+obj-$(CONFIG_OF) += $(BUILTIN_DTB)
+endif
+
+clean-files := *.dtb.S
diff --git a/arch/cris/boot/dts/dev88.dts b/arch/cris/boot/dts/dev88.dts
new file mode 100644
index 000000000000..4fa5a3f9d0ec
--- /dev/null
+++ b/arch/cris/boot/dts/dev88.dts
@@ -0,0 +1,18 @@
+/dts-v1/;
+
+/include/ "etraxfs.dtsi"
+
+/ {
+	model = "Axis 88 Developer Board";
+	compatible = "axis,dev88";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	soc {
+		uart0: serial@b00260000 {
+			status = "okay";
+		};
+	};
+};
diff --git a/arch/cris/boot/dts/etraxfs.dtsi b/arch/cris/boot/dts/etraxfs.dtsi
new file mode 100644
index 000000000000..909bcedc3565
--- /dev/null
+++ b/arch/cris/boot/dts/etraxfs.dtsi
@@ -0,0 +1,38 @@
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&intc>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "axis,crisv32";
+			reg = <0>;
+		};
+	};
+
+	soc {
+		compatible = "simple-bus";
+		model = "etraxfs";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		intc: interrupt-controller {
+			compatible = "axis,crisv32-intc";
+			reg = <0xb001c000 0x1000>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		serial@b00260000 {
+			compatible = "axis,etraxfs-uart";
+			reg = <0xb0026000 0x1000>;
+			interrupts = <68>;
+			status = "disabled";
+		};
+	};
+};
diff --git a/arch/cris/include/arch-v10/arch/atomic.h b/arch/cris/include/arch-v10/arch/atomic.h
deleted file mode 100644
index 6ef5e7d09024..000000000000
--- a/arch/cris/include/arch-v10/arch/atomic.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_CRIS_ARCH_ATOMIC__
-#define __ASM_CRIS_ARCH_ATOMIC__
-
-#define cris_atomic_save(addr, flags) local_irq_save(flags);
-#define cris_atomic_restore(addr, flags) local_irq_restore(flags);
-
-#endif
diff --git a/arch/cris/include/arch-v10/arch/system.h b/arch/cris/include/arch-v10/arch/system.h
index 935fde34aa15..9b5580f58b96 100644
--- a/arch/cris/include/arch-v10/arch/system.h
+++ b/arch/cris/include/arch-v10/arch/system.h
@@ -36,12 +36,4 @@ static inline unsigned long _get_base(char * addr)
   return 0;
 }
 
-#define nop() __asm__ __volatile__ ("nop");
-
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-#define tas(ptr) (xchg((ptr),1))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
 #endif
diff --git a/arch/cris/include/arch-v32/arch/atomic.h b/arch/cris/include/arch-v32/arch/atomic.h
deleted file mode 100644
index 852ceff8013f..000000000000
--- a/arch/cris/include/arch-v32/arch/atomic.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __ASM_CRIS_ARCH_ATOMIC__
-#define __ASM_CRIS_ARCH_ATOMIC__
-
-#include <linux/spinlock_types.h>
-
-extern void cris_spin_unlock(void *l, int val);
-extern void cris_spin_lock(void *l);
-extern int cris_spin_trylock(void* l);
-
-#ifndef CONFIG_SMP
-#define cris_atomic_save(addr, flags) local_irq_save(flags);
-#define cris_atomic_restore(addr, flags) local_irq_restore(flags);
-#else
-
-extern spinlock_t cris_atomic_locks[];
-#define LOCK_COUNT 128
-#define HASH_ADDR(a) (((int)a) & 127)
-
-#define cris_atomic_save(addr, flags) \
-  local_irq_save(flags); \
-  cris_spin_lock((void *)&cris_atomic_locks[HASH_ADDR(addr)].raw_lock.slock);
-
-#define cris_atomic_restore(addr, flags) \
-  { \
-    spinlock_t *lock = (void*)&cris_atomic_locks[HASH_ADDR(addr)]; \
-    __asm__ volatile ("move.d %1,%0" \
-			: "=m" (lock->raw_lock.slock) \
-			: "r" (1) \
-			: "memory"); \
-    local_irq_restore(flags); \
-  }
-
-#endif
-
-#endif
-
diff --git a/arch/cris/include/arch-v32/arch/processor.h b/arch/cris/include/arch-v32/arch/processor.h
index a024b7d32fed..568759271ab5 100644
--- a/arch/cris/include/arch-v32/arch/processor.h
+++ b/arch/cris/include/arch-v32/arch/processor.h
@@ -25,8 +25,7 @@ struct thread_struct {
  */
 #define TASK_SIZE	(0xB0000000UL)
 
-/* CCS I=1, enable interrupts. */
-#define INIT_THREAD { 0, 0, (1 << I_CCS_BITNR) }
+#define INIT_THREAD { }
 
 #define KSTK_EIP(tsk)		\
 ({				\
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
deleted file mode 100644
index f13275522f4d..000000000000
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ /dev/null
@@ -1,131 +0,0 @@
-#ifndef __ASM_ARCH_SPINLOCK_H
-#define __ASM_ARCH_SPINLOCK_H
-
-#include <linux/spinlock_types.h>
-
-#define RW_LOCK_BIAS 0x01000000
-
-extern void cris_spin_unlock(void *l, int val);
-extern void cris_spin_lock(void *l);
-extern int cris_spin_trylock(void *l);
-
-static inline int arch_spin_is_locked(arch_spinlock_t *x)
-{
-	return *(volatile signed char *)(&(x)->slock) <= 0;
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	__asm__ volatile ("move.d %1,%0" \
-			  : "=m" (lock->slock) \
-			  : "r" (1) \
-			  : "memory");
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	return cris_spin_trylock((void *)&lock->slock);
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	cris_spin_lock((void *)&lock->slock);
-}
-
-static inline void
-arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	arch_spin_lock(lock);
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- */
-
-static inline int arch_read_can_lock(arch_rwlock_t *x)
-{
-	return (int)(x)->lock > 0;
-}
-
-static inline int arch_write_can_lock(arch_rwlock_t *x)
-{
-	return (x)->lock == RW_LOCK_BIAS;
-}
-
-static  inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	arch_spin_lock(&rw->slock);
-	while (rw->lock == 0);
-	rw->lock--;
-	arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	arch_spin_lock(&rw->slock);
-	while (rw->lock != RW_LOCK_BIAS);
-	rw->lock = 0;
-	arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	arch_spin_lock(&rw->slock);
-	rw->lock++;
-	arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	arch_spin_lock(&rw->slock);
-	while (rw->lock != RW_LOCK_BIAS);
-	rw->lock = RW_LOCK_BIAS;
-	arch_spin_unlock(&rw->slock);
-}
-
-static  inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	int ret = 0;
-	arch_spin_lock(&rw->slock);
-	if (rw->lock != 0) {
-		rw->lock--;
-		ret = 1;
-	}
-	arch_spin_unlock(&rw->slock);
-	return ret;
-}
-
-static  inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	int ret = 0;
-	arch_spin_lock(&rw->slock);
-	if (rw->lock == RW_LOCK_BIAS) {
-		rw->lock = 0;
-		ret = 1;
-	}
-	arch_spin_unlock(&rw->slock);
-	return ret;
-}
-
-#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
-#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
-
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
-#endif /* __ASM_ARCH_SPINLOCK_H */
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 889f2de050a3..057e51859b0a 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,16 +1,29 @@
-
+generic-y += atomic.h
 generic-y += barrier.h
 generic-y += clkdev.h
+generic-y += cmpxchg.h
 generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
 generic-y += exec.h
+generic-y += emergency-restart.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += linkage.h
+generic-y += local.h
+generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
+generic-y += percpu.h
 generic-y += preempt.h
 generic-y += scatterlist.h
 generic-y += sections.h
+generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
deleted file mode 100644
index 279766a70664..000000000000
--- a/arch/cris/include/asm/atomic.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* $Id: atomic.h,v 1.3 2001/07/25 16:15:19 bjornw Exp $ */
-
-#ifndef __ASM_CRIS_ATOMIC__
-#define __ASM_CRIS_ATOMIC__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/cmpxchg.h>
-#include <arch/atomic.h>
-#include <arch/system.h>
-#include <asm/barrier.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i)  { (i) }
-
-#define atomic_read(v) ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/* These should be written in asm but we do it in C for now. */
-
-#define ATOMIC_OP(op, c_op)						\
-static inline void atomic_##op(int i, volatile atomic_t *v)		\
-{									\
-	unsigned long flags;						\
-	cris_atomic_save(v, flags);					\
-	v->counter c_op i;						\
-	cris_atomic_restore(v, flags);					\
-}									\
-
-#define ATOMIC_OP_RETURN(op, c_op)					\
-static inline int atomic_##op##_return(int i, volatile atomic_t *v)	\
-{									\
-	unsigned long flags;						\
-	int retval;							\
-	cris_atomic_save(v, flags);					\
-	retval = (v->counter c_op i);					\
-	cris_atomic_restore(v, flags);					\
-	return retval;							\
-}
-
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
-
-ATOMIC_OPS(add, +=)
-ATOMIC_OPS(sub, -=)
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-static inline int atomic_sub_and_test(int i, volatile atomic_t *v)
-{
-	int retval;
-	unsigned long flags;
-	cris_atomic_save(v, flags);
-	retval = (v->counter -= i) == 0;
-	cris_atomic_restore(v, flags);
-	return retval;
-}
-
-static inline void atomic_inc(volatile atomic_t *v)
-{
-	unsigned long flags;
-	cris_atomic_save(v, flags);
-	(v->counter)++;
-	cris_atomic_restore(v, flags);
-}
-
-static inline void atomic_dec(volatile atomic_t *v)
-{
-	unsigned long flags;
-	cris_atomic_save(v, flags);
-	(v->counter)--;
-	cris_atomic_restore(v, flags);
-}
-
-static inline int atomic_inc_return(volatile atomic_t *v)
-{
-	unsigned long flags;
-	int retval;
-	cris_atomic_save(v, flags);
-	retval = ++(v->counter);
-	cris_atomic_restore(v, flags);
-	return retval;
-}
-
-static inline int atomic_dec_return(volatile atomic_t *v)
-{
-	unsigned long flags;
-	int retval;
-	cris_atomic_save(v, flags);
-	retval = --(v->counter);
-	cris_atomic_restore(v, flags);
-	return retval;
-}
-static inline int atomic_dec_and_test(volatile atomic_t *v)
-{
-	int retval;
-	unsigned long flags;
-	cris_atomic_save(v, flags);
-	retval = --(v->counter) == 0;
-	cris_atomic_restore(v, flags);
-	return retval;
-}
-
-static inline int atomic_inc_and_test(volatile atomic_t *v)
-{
-	int retval;
-	unsigned long flags;
-	cris_atomic_save(v, flags);
-	retval = ++(v->counter) == 0;
-	cris_atomic_restore(v, flags);
-	return retval;
-}
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	int ret;
-	unsigned long flags;
-
-	cris_atomic_save(v, flags);
-	ret = v->counter;
-	if (likely(ret == old))
-		v->counter = new;
-	cris_atomic_restore(v, flags);
-	return ret;
-}
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
-	int ret;
-	unsigned long flags;
-
-	cris_atomic_save(v, flags);
-	ret = v->counter;
-	if (ret != u)
-		v->counter += a;
-	cris_atomic_restore(v, flags);
-	return ret;
-}
-
-#endif
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index bd49a546f4f5..8062cb52d343 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -19,119 +19,10 @@
 #endif
 
 #include <arch/bitops.h>
-#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <asm/barrier.h>
 
-/*
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-
-#define set_bit(nr, addr)    (void)test_and_set_bit(nr, addr)
-
-/*
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
-
-#define clear_bit(nr, addr)  (void)test_and_clear_bit(nr, addr)
-
-/*
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-
-#define change_bit(nr, addr) (void)test_and_change_bit(nr, addr)
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned int mask, retval;
-	unsigned long flags;
-	unsigned int *adr = (unsigned int *)addr;
-	
-	adr += nr >> 5;
-	mask = 1 << (nr & 0x1f);
-	cris_atomic_save(addr, flags);
-	retval = (mask & *adr) != 0;
-	*adr |= mask;
-	cris_atomic_restore(addr, flags);
-	return retval;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned int mask, retval;
-	unsigned long flags;
-	unsigned int *adr = (unsigned int *)addr;
-	
-	adr += nr >> 5;
-	mask = 1 << (nr & 0x1f);
-	cris_atomic_save(addr, flags);
-	retval = (mask & *adr) != 0;
-	*adr &= ~mask;
-	cris_atomic_restore(addr, flags);
-	return retval;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned int mask, retval;
-	unsigned long flags;
-	unsigned int *adr = (unsigned int *)addr;
-	adr += nr >> 5;
-	mask = 1 << (nr & 0x1f);
-	cris_atomic_save(addr, flags);
-	retval = (mask & *adr) != 0;
-	*adr ^= mask;
-	cris_atomic_restore(addr, flags);
-	return retval;
-}
-
+#include <asm-generic/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
 
 /*
diff --git a/arch/cris/include/asm/cmpxchg.h b/arch/cris/include/asm/cmpxchg.h
deleted file mode 100644
index b756dac8aa3f..000000000000
--- a/arch/cris/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef __ASM_CRIS_CMPXCHG__
-#define __ASM_CRIS_CMPXCHG__
-
-#include <linux/irqflags.h>
-
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
-  /* since Etrax doesn't have any atomic xchg instructions, we need to disable
-     irq's (if enabled) and do it with move.d's */
-  unsigned long flags,temp;
-  local_irq_save(flags); /* save flags, including irq enable bit and shut off irqs */
-  switch (size) {
-  case 1:
-    *((unsigned char *)&temp) = x;
-    x = *(unsigned char *)ptr;
-    *(unsigned char *)ptr = *((unsigned char *)&temp);
-    break;
-  case 2:
-    *((unsigned short *)&temp) = x;
-    x = *(unsigned short *)ptr;
-    *(unsigned short *)ptr = *((unsigned short *)&temp);
-    break;
-  case 4:
-    temp = x;
-    x = *(unsigned long *)ptr;
-    *(unsigned long *)ptr = temp;
-    break;
-  }
-  local_irq_restore(flags); /* restore irq enable bit */
-  return x;
-}
-
-#define xchg(ptr,x) \
-	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-
-#define tas(ptr) (xchg((ptr),1))
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)				 	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#endif /* __ASM_CRIS_CMPXCHG__ */
diff --git a/arch/cris/include/asm/device.h b/arch/cris/include/asm/device.h
deleted file mode 100644
index d8f9872b0e2d..000000000000
--- a/arch/cris/include/asm/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/cris/include/asm/div64.h b/arch/cris/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/cris/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h
index 30ded8fbf592..c2a394ff55ff 100644
--- a/arch/cris/include/asm/elf.h
+++ b/arch/cris/include/asm/elf.h
@@ -71,7 +71,7 @@ typedef unsigned long elf_fpregset_t;
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,
diff --git a/arch/cris/include/asm/emergency-restart.h b/arch/cris/include/asm/emergency-restart.h
deleted file mode 100644
index 108d8c48e42e..000000000000
--- a/arch/cris/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/cris/include/asm/futex.h b/arch/cris/include/asm/futex.h
deleted file mode 100644
index 6a332a9f099c..000000000000
--- a/arch/cris/include/asm/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h
deleted file mode 100644
index 04126f7bfab2..000000000000
--- a/arch/cris/include/asm/hardirq.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
-
-#include <asm/irq.h>
-#include <asm-generic/hardirq.h>
-
-#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/cris/include/asm/irq_regs.h b/arch/cris/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/cris/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/cris/include/asm/kdebug.h b/arch/cris/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/cris/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/cris/include/asm/kmap_types.h b/arch/cris/include/asm/kmap_types.h
deleted file mode 100644
index d2d643c4ea59..000000000000
--- a/arch/cris/include/asm/kmap_types.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/* Dummy header just to define km_type.  None of this
- * is actually used on cris. 
- */
-
-#include <asm-generic/kmap_types.h>
-
-#endif
diff --git a/arch/cris/include/asm/local.h b/arch/cris/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/cris/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/cris/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/percpu.h b/arch/cris/include/asm/percpu.h
deleted file mode 100644
index 6db9b43cf80a..000000000000
--- a/arch/cris/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_PERCPU_H
-#define _CRIS_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* _CRIS_PERCPU_H */
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h
deleted file mode 100644
index c615a06dd757..000000000000
--- a/arch/cris/include/asm/smp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __ASM_SMP_H
-#define __ASM_SMP_H
-
-#include <linux/cpumask.h>
-
-extern cpumask_t phys_cpu_present_map;
-
-#define raw_smp_processor_id() (current_thread_info()->cpu)
-
-#endif
diff --git a/arch/cris/include/asm/spinlock.h b/arch/cris/include/asm/spinlock.h
deleted file mode 100644
index ed816b57face..000000000000
--- a/arch/cris/include/asm/spinlock.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <arch/spinlock.h>
diff --git a/arch/cris/include/asm/tlbflush.h b/arch/cris/include/asm/tlbflush.h
index 20697e7ef4f2..b424f43a9fd6 100644
--- a/arch/cris/include/asm/tlbflush.h
+++ b/arch/cris/include/asm/tlbflush.h
@@ -22,16 +22,9 @@ extern void __flush_tlb_mm(struct mm_struct *mm);
 extern void __flush_tlb_page(struct vm_area_struct *vma,
 			   unsigned long addr);
 
-#ifdef CONFIG_SMP
-extern void flush_tlb_all(void);
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, 
-			   unsigned long addr);
-#else
 #define flush_tlb_all __flush_tlb_all
 #define flush_tlb_mm __flush_tlb_mm
 #define flush_tlb_page __flush_tlb_page
-#endif
 
 static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
 {
diff --git a/arch/cris/include/asm/topology.h b/arch/cris/include/asm/topology.h
deleted file mode 100644
index 2ac613d32a89..000000000000
--- a/arch/cris/include/asm/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_CRIS_TOPOLOGY_H
-#define _ASM_CRIS_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_CRIS_TOPOLOGY_H */
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile
index b45640b3e600..edef71f12bb8 100644
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -7,6 +7,7 @@ CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
 extra-y	:= vmlinux.lds
 
 obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
+obj-y += devicetree.o
 
 obj-$(CONFIG_MODULES)    += crisksyms.o
 obj-$(CONFIG_MODULES)	 += module.o
diff --git a/arch/cris/kernel/devicetree.c b/arch/cris/kernel/devicetree.c
new file mode 100644
index 000000000000..53ff8d73e7e1
--- /dev/null
+++ b/arch/cris/kernel/devicetree.c
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/printk.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	pr_err("%s(%llx, %llx)\n",
+	       __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return alloc_bootmem_align(size, align);
+}
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index 58d44ee1a71f..fd3427e563c5 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -42,3 +42,26 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
 		tracehook_notify_resume(regs);
 	}
 }
+
+void do_work_pending(int syscall, struct pt_regs *regs,
+		     unsigned int thread_flags)
+{
+	do {
+		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+			schedule();
+		} else {
+			if (unlikely(!user_mode(regs)))
+				return;
+			local_irq_enable();
+			if (thread_flags & _TIF_SIGPENDING) {
+				do_signal(syscall, regs);
+				syscall = 0;
+			} else {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = current_thread_info()->flags;
+	} while (thread_flags & _TIF_WORK_MASK);
+}
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index 905b70ea9939..bb12aa93201d 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -19,6 +19,9 @@
 #include <linux/utsname.h>
 #include <linux/pfn.h>
 #include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
 #include <asm/setup.h>
 #include <arch/system.h>
 
@@ -64,6 +67,10 @@ void __init setup_arch(char **cmdline_p)
 	unsigned long start_pfn, max_pfn;
 	unsigned long memory_start;
 
+#ifdef CONFIG_OF
+	early_init_dt_scan(__dtb_start);
+#endif
+
 	/* register an initial console printing routine for printk's */
 
 	init_etrax_debug();
@@ -141,6 +148,8 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size, BOOTMEM_DEFAULT);
 
+	unflatten_and_copy_device_tree();
+
 	/* paging_init() sets up the MMU and marks all pages as reserved */
 
 	paging_init();
@@ -204,3 +213,9 @@ static int __init topology_init(void)
 
 subsys_initcall(topology_init);
 
+static int __init cris_of_init(void)
+{
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+	return 0;
+}
+core_initcall(cris_of_init);
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index fe6acdabbc8d..7780d379522f 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -79,11 +79,13 @@ cris_do_profile(struct pt_regs* regs)
 #endif
 }
 
+#ifndef CONFIG_GENERIC_SCHED_CLOCK
 unsigned long long sched_clock(void)
 {
 	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ) +
 		get_ns_in_jiffie();
 }
+#endif
 
 static int
 __init init_udelay(void)
diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h
index 99bb7efaf9b7..0b78bc89e840 100644
--- a/arch/frv/include/asm/io.h
+++ b/arch/frv/include/asm/io.h
@@ -342,6 +342,11 @@ static inline void iowrite32(u32 val, void __iomem *p)
 		__flush_PCI_writes();
 }
 
+#define ioread16be(addr)	be16_to_cpu(ioread16(addr))
+#define ioread32be(addr)	be32_to_cpu(ioread32(addr))
+#define iowrite16be(v, addr)	iowrite16(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)	iowrite32(cpu_to_be32(v), (addr))
+
 static inline void ioread8_rep(void __iomem *p, void *dst, unsigned long count)
 {
 	io_insb((unsigned long) p, dst, count);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 4f9a6661491b..76d25b2cfbbe 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -15,6 +15,7 @@ config IA64
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select PCI if (!IA64_HP_SIM)
 	select ACPI if (!IA64_HP_SIM)
+	select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
 	select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 35bf22cc71b7..b1698bc042c8 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -887,7 +887,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
 	return _acpi_map_lsapic(handle, physid, pcpu);
 }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5f4243f0acfa..60e02f7747ff 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2159,7 +2159,7 @@ static const struct file_operations pfm_file_ops = {
 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
-			     dentry->d_inode->i_ino);
+			     d_inode(dentry)->i_ino);
 }
 
 static const struct dentry_operations pfmfs_dentry_operations = {
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index ce7aea34fdf4..c18ddc74ef9a 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -45,7 +45,7 @@ static volatile unsigned long flushcache_cpumask = 0;
 /*
  * For flush_tlb_others()
  */
-static volatile cpumask_t flush_cpumask;
+static cpumask_t flush_cpumask;
 static struct mm_struct *flush_mm;
 static struct vm_area_struct *flush_vma;
 static volatile unsigned long flush_va;
@@ -415,7 +415,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	 */
 	send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0);
 
-	while (!cpumask_empty((cpumask_t*)&flush_cpumask)) {
+	while (!cpumask_empty(&flush_cpumask)) {
 		/* nothing. lockup detection does not belong here */
 		mb();
 	}
@@ -468,7 +468,7 @@ void smp_invalidate_interrupt(void)
 			__flush_tlb_page(va);
 		}
 	}
-	cpumask_clear_cpu(cpu_id, (cpumask_t*)&flush_cpumask);
+	cpumask_clear_cpu(cpu_id, &flush_cpumask);
 }
 
 /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 483dff986a23..7f546183a0f0 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -174,8 +174,11 @@ void show_regs(struct pt_regs *regs)
 	show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
 }
 
+/*
+ * Copy architecture-specific thread state
+ */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg, struct task_struct *tsk)
+		unsigned long kthread_arg, struct task_struct *tsk)
 {
 	struct pt_regs *childregs = task_pt_regs(tsk);
 	void *kernel_context = ((void *) childregs +
@@ -202,12 +205,13 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		global_base = __core_reg_get(A1GbP);
 		childregs->ctx.AX[0].U1 = (unsigned long) global_base;
 		childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
-		/* Set D1Ar1=arg and D1RtP=usp (fn) */
+		/* Set D1Ar1=kthread_arg and D1RtP=usp (fn) */
 		childregs->ctx.DX[4].U1 = usp;
-		childregs->ctx.DX[3].U1 = arg;
+		childregs->ctx.DX[3].U1 = kthread_arg;
 		tsk->thread.int_depth = 2;
 		return 0;
 	}
+
 	/*
 	 * Get a pointer to where the new child's register block should have
 	 * been pushed.
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index bb02fac9b4fa..2b25d1ba1ea0 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS];
 #define SMP_DUMP		0x8
 #define SMP_ASK_C0COUNT		0x10
 
-extern volatile cpumask_t cpu_callin_map;
+extern cpumask_t cpu_callin_map;
 
 /* Mask of CPUs which are currently definitely operating coherently */
 extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index be4899f3c393..4a4d9e067c89 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 
 	/* Lets see if this is an O32 ELF */
 	if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
-		/* FR = 1 for N32 */
-		if (ehdr32->e_flags & EF_MIPS_ABI2)
-			state->overall_fp_mode = FP_FR1;
-		else
-			/* Set a good default FPU mode for O32 */
-			state->overall_fp_mode = cpu_has_mips_r6 ?
-				FP_FRE : FP_FR0;
-
 		if (ehdr32->e_flags & EF_MIPS_FP64) {
 			/*
 			 * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
@@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
 				  (char *)&abiflags,
 				  sizeof(abiflags));
 	} else {
-		/* FR=1 is really the only option for 64-bit */
-		state->overall_fp_mode = FP_FR1;
-
 		if (phdr64->p_type != PT_MIPS_ABIFLAGS)
 			return 0;
 		if (phdr64->p_filesz < sizeof(abiflags))
@@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
 	struct elf32_hdr *ehdr = _ehdr;
 	struct mode_req prog_req, interp_req;
 	int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
+	bool is_mips64;
 
 	if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
 		return 0;
@@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
 		abi0 = abi1 = fp_abi;
 	}
 
-	/* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */
-	max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) &&
-		   (!(ehdr->e_flags & EF_MIPS_ABI2))) ?
-		MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT;
+	is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
+		    (ehdr->e_flags & EF_MIPS_ABI2);
+
+	if (is_mips64) {
+		/* MIPS64 code always uses FR=1, thus the default is easy */
+		state->overall_fp_mode = FP_FR1;
+
+		/* Disallow access to the various FPXX & FP64 ABIs */
+		max_abi = MIPS_ABI_FP_SOFT;
+	} else {
+		/* Default to a mode capable of running code expecting FR=0 */
+		state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
+
+		/* Allow all ABIs we know about */
+		max_abi = MIPS_ABI_FP_64A;
+	}
 
 	if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
 	    (abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN))
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 193ace7955fb..faa46ebd9dda 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -43,7 +43,7 @@
 #include <asm/time.h>
 #include <asm/setup.h>
 
-volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
+cpumask_t cpu_callin_map;		/* Bitmask of started secondaries */
 
 int __cpu_number_map[NR_CPUS];		/* Map physical to logical */
 EXPORT_SYMBOL(__cpu_number_map);
@@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	/*
 	 * Trust is futile.  We should really have timeouts ...
 	 */
-	while (!cpumask_test_cpu(cpu, &cpu_callin_map))
+	while (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
 		udelay(100);
+		schedule();
+	}
 
 	synchronise_count_master(cpu);
 	return 0;
diff --git a/arch/mn10300/include/asm/io.h b/arch/mn10300/include/asm/io.h
index 897ba3c12b32..cc4a2ba9e228 100644
--- a/arch/mn10300/include/asm/io.h
+++ b/arch/mn10300/include/asm/io.h
@@ -197,6 +197,11 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
 #define iowrite16(v, addr)	writew((v), (addr))
 #define iowrite32(v, addr)	writel((v), (addr))
 
+#define ioread16be(addr)	be16_to_cpu(readw(addr))
+#define ioread32be(addr)	be32_to_cpu(readl(addr))
+#define iowrite16be(v, addr)	writew(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)	writel(cpu_to_be32(v), (addr))
+
 #define ioread8_rep(p, dst, count) \
 	insb((unsigned long) (p), (dst), (count))
 #define ioread16_rep(p, dst, count) \
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 01c75f36e8b3..24b3d8999ac7 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -46,7 +46,6 @@ generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
-generic-y += shmparam.h
 generic-y += siginfo.h
 generic-y += signal.h
 generic-y += socket.h
diff --git a/arch/nios2/include/asm/shmparam.h b/arch/nios2/include/asm/shmparam.h
new file mode 100644
index 000000000000..60784294e407
--- /dev/null
+++ b/arch/nios2/include/asm/shmparam.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright Altera Corporation (C) <2015>. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_NIOS2_SHMPARAM_H
+#define _ASM_NIOS2_SHMPARAM_H
+
+#define	SHMLBA	CONFIG_NIOS2_DCACHE_SIZE
+
+#endif /* _ASM_NIOS2_SHMPARAM_H */
diff --git a/arch/nios2/include/uapi/asm/ptrace.h b/arch/nios2/include/uapi/asm/ptrace.h
index eff00e67c0a2..1d35de90a977 100644
--- a/arch/nios2/include/uapi/asm/ptrace.h
+++ b/arch/nios2/include/uapi/asm/ptrace.h
@@ -14,6 +14,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/types.h>
+
 /*
  * Register numbers used by 'ptrace' system call interface.
  */
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
index 27b006c52e12..1e515ccd698e 100644
--- a/arch/nios2/kernel/entry.S
+++ b/arch/nios2/kernel/entry.S
@@ -92,35 +92,35 @@ exception_table:
 
 trap_table:
 	.word	handle_system_call	/* 0  */
-	.word	instruction_trap	/* 1  */
-	.word	instruction_trap	/* 2  */
-	.word	instruction_trap	/* 3  */
-	.word	instruction_trap	/* 4  */
-	.word	instruction_trap	/* 5  */
-	.word	instruction_trap	/* 6  */
-	.word	instruction_trap	/* 7  */
-	.word	instruction_trap	/* 8  */
-	.word	instruction_trap	/* 9  */
-	.word	instruction_trap	/* 10 */
-	.word	instruction_trap	/* 11 */
-	.word	instruction_trap	/* 12 */
-	.word	instruction_trap	/* 13 */
-	.word	instruction_trap	/* 14 */
-	.word	instruction_trap	/* 15 */
-	.word	instruction_trap	/* 16 */
-	.word	instruction_trap	/* 17 */
-	.word	instruction_trap	/* 18 */
-	.word	instruction_trap	/* 19 */
-	.word	instruction_trap	/* 20 */
-	.word	instruction_trap	/* 21 */
-	.word	instruction_trap	/* 22 */
-	.word	instruction_trap	/* 23 */
-	.word	instruction_trap	/* 24 */
-	.word	instruction_trap	/* 25 */
-	.word	instruction_trap	/* 26 */
-	.word	instruction_trap	/* 27 */
-	.word	instruction_trap	/* 28 */
-	.word	instruction_trap	/* 29 */
+	.word	handle_trap_1		/* 1  */
+	.word	handle_trap_2		/* 2  */
+	.word	handle_trap_3		/* 3  */
+	.word	handle_trap_reserved	/* 4  */
+	.word	handle_trap_reserved	/* 5  */
+	.word	handle_trap_reserved	/* 6  */
+	.word	handle_trap_reserved	/* 7  */
+	.word	handle_trap_reserved	/* 8  */
+	.word	handle_trap_reserved	/* 9  */
+	.word	handle_trap_reserved	/* 10 */
+	.word	handle_trap_reserved	/* 11 */
+	.word	handle_trap_reserved	/* 12 */
+	.word	handle_trap_reserved	/* 13 */
+	.word	handle_trap_reserved	/* 14 */
+	.word	handle_trap_reserved	/* 15 */
+	.word	handle_trap_reserved	/* 16 */
+	.word	handle_trap_reserved	/* 17 */
+	.word	handle_trap_reserved	/* 18 */
+	.word	handle_trap_reserved	/* 19 */
+	.word	handle_trap_reserved	/* 20 */
+	.word	handle_trap_reserved	/* 21 */
+	.word	handle_trap_reserved	/* 22 */
+	.word	handle_trap_reserved	/* 23 */
+	.word	handle_trap_reserved	/* 24 */
+	.word	handle_trap_reserved	/* 25 */
+	.word	handle_trap_reserved	/* 26 */
+	.word	handle_trap_reserved	/* 27 */
+	.word	handle_trap_reserved	/* 28 */
+	.word	handle_trap_reserved	/* 29 */
 #ifdef CONFIG_KGDB
 	.word	handle_kgdb_breakpoint	/* 30 KGDB breakpoint */
 #else
@@ -455,6 +455,19 @@ handle_kgdb_breakpoint:
 	br	ret_from_exception
 #endif
 
+handle_trap_1:
+	call	handle_trap_1_c
+	br	ret_from_exception
+
+handle_trap_2:
+	call	handle_trap_2_c
+	br	ret_from_exception
+
+handle_trap_3:
+handle_trap_reserved:
+	call	handle_trap_3_c
+	br	ret_from_exception
+
 /*
  * Beware - when entering resume, prev (the current task) is
  * in r4, next (the new task) is in r5, don't change these
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index b7b97641a9a6..81f7da7b1d55 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -23,6 +23,17 @@
 
 static DEFINE_SPINLOCK(die_lock);
 
+static void _send_sig(int signo, int code, unsigned long addr)
+{
+	siginfo_t info;
+
+	info.si_signo = signo;
+	info.si_errno = 0;
+	info.si_code = code;
+	info.si_addr = (void __user *) addr;
+	force_sig_info(signo, &info, current);
+}
+
 void die(const char *str, struct pt_regs *regs, long err)
 {
 	console_verbose();
@@ -39,16 +50,10 @@ void die(const char *str, struct pt_regs *regs, long err)
 
 void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr)
 {
-	siginfo_t info;
-
 	if (!user_mode(regs))
 		die("Exception in kernel mode", regs, signo);
 
-	info.si_signo = signo;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = (void __user *) addr;
-	force_sig_info(signo, &info, current);
+	_send_sig(signo, code, addr);
 }
 
 /*
@@ -183,3 +188,18 @@ asmlinkage void unhandled_exception(struct pt_regs *regs, int cause)
 
 	pr_emerg("opcode: 0x%08lx\n", *(unsigned long *)(regs->ea));
 }
+
+asmlinkage void handle_trap_1_c(struct pt_regs *fp)
+{
+	_send_sig(SIGUSR1, 0, fp->ea);
+}
+
+asmlinkage void handle_trap_2_c(struct pt_regs *fp)
+{
+	_send_sig(SIGUSR2, 0, fp->ea);
+}
+
+asmlinkage void handle_trap_3_c(struct pt_regs *fp)
+{
+	_send_sig(SIGILL, ILL_ILLTRP, fp->ea);
+}
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 796642932e2e..223cdcc8203f 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -58,9 +58,6 @@ static void __invalidate_dcache(unsigned long start, unsigned long end)
 	end += (cpuinfo.dcache_line_size - 1);
 	end &= ~(cpuinfo.dcache_line_size - 1);
 
-	if (end > start + cpuinfo.dcache_size)
-		end = start + cpuinfo.dcache_size;
-
 	for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
 		__asm__ __volatile__ ("   initda 0(%0)\n"
 					: /* Outputs */
@@ -131,12 +128,14 @@ void flush_cache_dup_mm(struct mm_struct *mm)
 
 void flush_icache_range(unsigned long start, unsigned long end)
 {
+	__flush_dcache(start, end);
 	__flush_icache(start, end);
 }
 
 void flush_dcache_range(unsigned long start, unsigned long end)
 {
 	__flush_dcache(start, end);
+	__flush_icache(start, end);
 }
 EXPORT_SYMBOL(flush_dcache_range);
 
@@ -159,6 +158,7 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page)
 	unsigned long start = (unsigned long) page_address(page);
 	unsigned long end = start + PAGE_SIZE;
 
+	__flush_dcache(start, end);
 	__flush_icache(start, end);
 }
 
@@ -173,6 +173,18 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 		__flush_icache(start, end);
 }
 
+void __flush_dcache_page(struct address_space *mapping, struct page *page)
+{
+	/*
+	 * Writeback any data associated with the kernel mapping of this
+	 * page.  This ensures that data in the physical page is mutually
+	 * coherent with the kernels mapping.
+	 */
+	unsigned long start = (unsigned long)page_address(page);
+
+	__flush_dcache_all(start, start + PAGE_SIZE);
+}
+
 void flush_dcache_page(struct page *page)
 {
 	struct address_space *mapping;
@@ -190,11 +202,12 @@ void flush_dcache_page(struct page *page)
 	if (mapping && !mapping_mapped(mapping)) {
 		clear_bit(PG_dcache_clean, &page->flags);
 	} else {
-		unsigned long start = (unsigned long)page_address(page);
-
-		__flush_dcache_all(start, start + PAGE_SIZE);
-		if (mapping)
+		__flush_dcache_page(mapping, page);
+		if (mapping) {
+			unsigned long start = (unsigned long)page_address(page);
 			flush_aliases(mapping,  page);
+			flush_icache_range(start, start + PAGE_SIZE);
+		}
 		set_bit(PG_dcache_clean, &page->flags);
 	}
 }
@@ -205,6 +218,7 @@ void update_mmu_cache(struct vm_area_struct *vma,
 {
 	unsigned long pfn = pte_pfn(*pte);
 	struct page *page;
+	struct address_space *mapping;
 
 	if (!pfn_valid(pfn))
 		return;
@@ -217,16 +231,15 @@ void update_mmu_cache(struct vm_area_struct *vma,
 	if (page == ZERO_PAGE(0))
 		return;
 
-	if (!PageReserved(page) &&
-	     !test_and_set_bit(PG_dcache_clean, &page->flags)) {
-		unsigned long start = page_to_virt(page);
-		struct address_space *mapping;
-
-		__flush_dcache(start, start + PAGE_SIZE);
-
-		mapping = page_mapping(page);
-		if (mapping)
-			flush_aliases(mapping, page);
+	mapping = page_mapping(page);
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+		__flush_dcache_page(mapping, page);
+
+	if(mapping)
+	{
+		flush_aliases(mapping, page);
+		if (vma->vm_flags & VM_EXEC)
+			flush_icache_page(vma, page);
 	}
 }
 
@@ -234,15 +247,19 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
 		    struct page *to)
 {
 	__flush_dcache(vaddr, vaddr + PAGE_SIZE);
+	__flush_icache(vaddr, vaddr + PAGE_SIZE);
 	copy_page(vto, vfrom);
 	__flush_dcache((unsigned long)vto, (unsigned long)vto + PAGE_SIZE);
+	__flush_icache((unsigned long)vto, (unsigned long)vto + PAGE_SIZE);
 }
 
 void clear_user_page(void *addr, unsigned long vaddr, struct page *page)
 {
 	__flush_dcache(vaddr, vaddr + PAGE_SIZE);
+	__flush_icache(vaddr, vaddr + PAGE_SIZE);
 	clear_page(addr);
 	__flush_dcache((unsigned long)addr, (unsigned long)addr + PAGE_SIZE);
+	__flush_icache((unsigned long)addr, (unsigned long)addr + PAGE_SIZE);
 }
 
 void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
@@ -251,7 +268,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 {
 	flush_cache_page(vma, user_vaddr, page_to_pfn(page));
 	memcpy(dst, src, len);
-	__flush_dcache((unsigned long)src, (unsigned long)src + len);
+	__flush_dcache_all((unsigned long)src, (unsigned long)src + len);
 	if (vma->vm_flags & VM_EXEC)
 		__flush_icache((unsigned long)src, (unsigned long)src + len);
 }
@@ -262,7 +279,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 {
 	flush_cache_page(vma, user_vaddr, page_to_pfn(page));
 	memcpy(dst, src, len);
-	__flush_dcache((unsigned long)dst, (unsigned long)dst + len);
+	__flush_dcache_all((unsigned long)dst, (unsigned long)dst + len);
 	if (vma->vm_flags & VM_EXEC)
 		__flush_icache((unsigned long)dst, (unsigned long)dst + len);
 }
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index bde531103638..0cc6eedc4780 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -30,8 +30,6 @@ static inline int arch_has_random(void)
 	return !!ppc_md.get_random_long;
 }
 
-int powernv_get_random_long(unsigned long *v);
-
 static inline int arch_get_random_seed_long(unsigned long *v)
 {
 	return 0;
@@ -47,4 +45,13 @@ static inline int arch_has_random_seed(void)
 
 #endif /* CONFIG_ARCH_RANDOM */
 
+#ifdef CONFIG_PPC_POWERNV
+int powernv_hwrng_present(void);
+int powernv_get_random_long(unsigned long *v);
+int powernv_get_random_real_mode(unsigned long *v);
+#else
+static inline int powernv_hwrng_present(void) { return 0; }
+static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+#endif
+
 #endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 993090422690..b91e74a817d8 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -288,6 +288,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
 	return !is_kvmppc_hv_enabled(vcpu->kvm);
 }
 
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 14619a59ec09..3536d12eb798 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -85,6 +85,20 @@ static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
 	return old == 0;
 }
 
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+	hpte_v &= ~HPTE_V_HVLOCK;
+	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+	hpte[0] = cpu_to_be64(hpte_v);
+}
+
+/* Without barrier */
+static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+	hpte_v &= ~HPTE_V_HVLOCK;
+	hpte[0] = cpu_to_be64(hpte_v);
+}
+
 static inline int __hpte_actual_psize(unsigned int lp, int psize)
 {
 	int i, shift;
@@ -281,16 +295,17 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
 
 /*
  * If it's present and writable, atomically set dirty and referenced bits and
- * return the PTE, otherwise return 0. If we find a transparent hugepage
- * and if it is marked splitting we return 0;
+ * return the PTE, otherwise return 0.
  */
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
-						 unsigned int hugepage)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 {
 	pte_t old_pte, new_pte = __pte(0);
 
 	while (1) {
-		old_pte = *ptep;
+		/*
+		 * Make sure we don't reload from ptep
+		 */
+		old_pte = READ_ONCE(*ptep);
 		/*
 		 * wait until _PAGE_BUSY is clear then set it atomically
 		 */
@@ -298,12 +313,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
 			cpu_relax();
 			continue;
 		}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		/* If hugepage and is trans splitting return None */
-		if (unlikely(hugepage &&
-			     pmd_trans_splitting(pte_pmd(old_pte))))
-			return __pte(0);
-#endif
 		/* If pte is not present return None */
 		if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
 			return __pte(0);
@@ -424,6 +433,10 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
 	return rcu_dereference_raw_notrace(kvm->memslots);
 }
 
+extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+
+extern void kvmhv_rm_send_ipi(int cpu);
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c610961720c7..a193a13cf08b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -227,10 +227,8 @@ struct kvm_arch {
 	unsigned long host_sdr1;
 	int tlbie_lock;
 	unsigned long lpcr;
-	unsigned long rmor;
-	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
-	int rma_setup_done;
+	int hpte_setup_done;
 	u32 hpt_order;
 	atomic_t vcpus_running;
 	u32 online_vcores;
@@ -239,6 +237,8 @@ struct kvm_arch {
 	atomic_t hpte_mod_interest;
 	cpumask_t need_tlb_flush;
 	int hpt_cma_alloc;
+	struct dentry *debugfs_dir;
+	struct dentry *htab_dentry;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	struct mutex hpt_mutex;
@@ -263,18 +263,15 @@ struct kvm_arch {
 
 /*
  * Struct for a virtual core.
- * Note: entry_exit_count combines an entry count in the bottom 8 bits
- * and an exit count in the next 8 bits.  This is so that we can
- * atomically increment the entry count iff the exit count is 0
- * without taking the lock.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits.  This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
  */
 struct kvmppc_vcore {
 	int n_runnable;
-	int n_busy;
 	int num_threads;
-	int entry_exit_count;
-	int n_woken;
-	int nap_count;
+	int entry_exit_map;
 	int napping_threads;
 	int first_vcpuid;
 	u16 pcpu;
@@ -299,13 +296,14 @@ struct kvmppc_vcore {
 	ulong conferring_threads;
 };
 
-#define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
-#define VCORE_EXIT_COUNT(vc)	((vc)->entry_exit_count >> 8)
+#define VCORE_ENTRY_MAP(vc)	((vc)->entry_exit_map & 0xff)
+#define VCORE_EXIT_MAP(vc)	((vc)->entry_exit_map >> 8)
+#define VCORE_IS_EXITING(vc)	(VCORE_EXIT_MAP(vc) != 0)
 
 /* Values for vcore_state */
 #define VCORE_INACTIVE	0
 #define VCORE_SLEEPING	1
-#define VCORE_STARTING	2
+#define VCORE_PREEMPT	2
 #define VCORE_RUNNING	3
 #define VCORE_EXITING	4
 
@@ -368,6 +366,14 @@ struct kvmppc_slb {
 	u8 base_page_size;	/* MMU_PAGE_xxx */
 };
 
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+	u64	seqcount;	/* used to synchronize access, also count * 2 */
+	u64	tb_total;	/* total time in timebase ticks */
+	u64	tb_min;		/* min time */
+	u64	tb_max;		/* max time */
+};
+
 # ifdef CONFIG_PPC_FSL_BOOK3E
 #define KVMPPC_BOOKE_IAC_NUM	2
 #define KVMPPC_BOOKE_DAC_NUM	2
@@ -656,6 +662,19 @@ struct kvm_vcpu_arch {
 
 	u32 emul_inst;
 #endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	struct kvmhv_tb_accumulator *cur_activity;	/* What we're timing */
+	u64	cur_tb_start;			/* when it started */
+	struct kvmhv_tb_accumulator rm_entry;	/* real-mode entry code */
+	struct kvmhv_tb_accumulator rm_intr;	/* real-mode intr handling */
+	struct kvmhv_tb_accumulator rm_exit;	/* real-mode exit code */
+	struct kvmhv_tb_accumulator guest_time;	/* guest execution */
+	struct kvmhv_tb_accumulator cede_time;	/* time napping inside guest */
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_timings;
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
 };
 
 #define VCPU_FPR(vcpu, i)	(vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 46bf652c9169..b8475daad884 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -302,6 +302,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
 	return kvm->arch.kvm_ops == kvmppc_hv_ops;
 }
 
+extern int kvmppc_hwrng_present(void);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9835ac4173b7..11a38635dd65 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -247,28 +247,16 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 #define pmd_large(pmd)		0
 #define has_transparent_hugepage() 0
 #endif
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 				 unsigned *shift);
-
-static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
-				     unsigned long *pte_sizep)
+static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+					       unsigned *shift)
 {
-	pte_t *ptep;
-	unsigned long ps = *pte_sizep;
-	unsigned int shift;
-
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
-	if (!ptep)
-		return NULL;
-	if (shift)
-		*pte_sizep = 1ul << shift;
-	else
-		*pte_sizep = PAGE_SIZE;
-
-	if (ps > *pte_sizep)
-		return NULL;
-
-	return ptep;
+	if (!arch_irqs_disabled()) {
+		pr_info("%s called with irq enabled\n", __func__);
+		dump_stack();
+	}
+	return __find_linux_pte_or_hugepte(pgdir, ea, shift);
 }
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 03cbada59d3a..10fc784a2ad4 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
+
 #endif /* __KERNEL__ */
 #endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
index 5047659815a5..5d836b7c1176 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -11,7 +11,7 @@
 #define TM_CAUSE_RESCHED	0xde
 #define TM_CAUSE_TLBI		0xdc
 #define TM_CAUSE_FAC_UNAV	0xda
-#define TM_CAUSE_SYSCALL	0xd8
+#define TM_CAUSE_SYSCALL	0xd8  /* future use */
 #define TM_CAUSE_MISC		0xd6  /* future use */
 #define TM_CAUSE_SIGNAL		0xd4
 #define TM_CAUSE_ALIGNMENT	0xd2
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4717859fdd04..0034b6b3556a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -37,6 +37,7 @@
 #include <asm/thread_info.h>
 #include <asm/rtas.h>
 #include <asm/vdso_datapage.h>
+#include <asm/dbell.h>
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #include <asm/lppaca.h>
@@ -459,6 +460,19 @@ int main(void)
 	DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
 	DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
 #endif
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
+	DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
+	DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
+	DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
+	DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
+	DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
+	DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
+	DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
+	DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
+	DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
+	DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
+#endif
 	DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
 	DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
 	DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
@@ -492,7 +506,6 @@ int main(void)
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
 	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
-	DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
 	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
 	DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
@@ -550,8 +563,7 @@ int main(void)
 	DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
 	DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
 	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
-	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
-	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
+	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
 	DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
@@ -748,5 +760,7 @@ int main(void)
 			offsetof(struct paca_struct, subcore_sibling_mask));
 #endif
 
+	DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index a4c62eb0ee48..9ee61d15653d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -334,9 +334,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 	int hugepage_shift;
 
 	/*
-	 * We won't find hugepages here, iomem
+	 * We won't find hugepages here(this is iomem). Hence we are not
+	 * worried about _PAGE_SPLITTING/collapse. Also we will not hit
+	 * page table free, because of init_mm.
 	 */
-	ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
+	ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
 	if (!ptep)
 		return token;
 	WARN_ON(hugepage_shift);
@@ -747,21 +749,24 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 		eeh_unfreeze_pe(pe, false);
 		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
 		eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 		break;
 	case pcie_hot_reset:
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
 		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 		eeh_ops->reset(pe, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
+		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
 		eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
 		eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
 		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 		break;
 	default:
-		eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
 		return -EINVAL;
 	};
 
@@ -1056,6 +1061,9 @@ void eeh_add_device_early(struct pci_dn *pdn)
 	if (!edev || !eeh_enabled())
 		return;
 
+	if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
+		return;
+
 	/* USB Bus children of PCI devices will not have BUID's */
 	phb = edev->phb;
 	if (NULL == phb ||
@@ -1110,6 +1118,9 @@ void eeh_add_device_late(struct pci_dev *dev)
 		return;
 	}
 
+	if (eeh_has_flag(EEH_PROBE_MODE_DEV))
+		eeh_ops->probe(pdn, NULL);
+
 	/*
 	 * The EEH cache might not be removed correctly because of
 	 * unbalanced kref to the device during unplug time, which
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 8ca9434c40e6..afbc20019c2e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,7 +34,6 @@
 #include <asm/ftrace.h>
 #include <asm/hw_irq.h>
 #include <asm/context_tracking.h>
-#include <asm/tm.h>
 
 /*
  * System calls.
@@ -146,24 +145,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	andi.	r11,r10,_TIF_SYSCALL_DOTRACE
 	bne	syscall_dotrace
 .Lsyscall_dotrace_cont:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
-	b	1f
-END_FTR_SECTION_IFCLR(CPU_FTR_TM)
-	extrdi.	r11, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
-	beq+	1f
-
-	/* Doom the transaction and don't perform the syscall: */
-	mfmsr	r11
-	li	r12, 1
-	rldimi	r11, r12, MSR_TM_LG, 63-MSR_TM_LG
-	mtmsrd	r11, 0
-	li	r11, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-	TABORT(R11)
-
-	b	.Lsyscall_exit
-1:
-#endif
 	cmpldi	0,r0,NR_syscalls
 	bge-	syscall_enosys
 
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index eeaa0d5f69d5..ccde8f084ce4 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -501,9 +501,11 @@ BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	ld	r1,PACAR1(r13)
+	ld	r6,_CCR(r1)
 	ld	r4,_MSR(r1)
 	ld	r5,_NIP(r1)
 	addi	r1,r1,INT_FRAME_SIZE
+	mtcr	r6
 	mtspr	SPRN_SRR1,r4
 	mtspr	SPRN_SRR0,r5
 	rfid
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 24b968f8e4d8..63d9cc4d7366 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -71,15 +71,15 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
 		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
 		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
 			return NULL;
-
-		ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
+		/*
+		 * We won't find huge pages here (iomem). Also can't hit
+		 * a page table free due to init_mm
+		 */
+		ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
 						 &hugepage_shift);
 		if (ptep == NULL)
 			paddr = 0;
 		else {
-			/*
-			 * we don't have hugepages backing iomem
-			 */
 			WARN_ON(hugepage_shift);
 			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
 		}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2d7b33fab953..56f44848b044 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
 }
 #endif
 
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+	return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  *
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 11850f310fb4..3caec2c42105 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -75,7 +75,7 @@ config KVM_BOOK3S_64
 
 config KVM_BOOK3S_64_HV
 	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
-	depends on KVM_BOOK3S_64
+	depends on KVM_BOOK3S_64 && PPC_POWERNV
 	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
 	select CMA
@@ -110,6 +110,20 @@ config KVM_BOOK3S_64_PR
 	  processor, including emulating 32-bit processors on a 64-bit
 	  host.
 
+config KVM_BOOK3S_HV_EXIT_TIMING
+	bool "Detailed timing for hypervisor real-mode code"
+	depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
+	---help---
+	  Calculate time taken for each vcpu in the real-mode guest entry,
+	  exit, and interrupt handling code, plus time spent in the guest
+	  and in nap mode due to idle (cede) while other threads are still
+	  in the guest.  The total, minimum and maximum times in nanoseconds
+	  together with the number of executions are reported in debugfs in
+	  kvm/vm#/vcpu#/timings.  The overhead is of the order of 30 - 40
+	  ns per exit on POWER8.
+
+	  If unsure, say N.
+
 config KVM_BOOKE_HV
 	bool
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index cfbcdc654201..453a8a47a467 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -821,6 +821,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 #endif
 }
 
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+	unsigned long size = kvmppc_get_gpr(vcpu, 4);
+	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+	u64 buf;
+	int ret;
+
+	if (!is_power_of_2(size) || (size > sizeof(buf)))
+		return H_TOO_HARD;
+
+	ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+	if (ret != 0)
+		return H_TOO_HARD;
+
+	switch (size) {
+	case 1:
+		kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+		break;
+
+	case 2:
+		kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(__be16 *)&buf));
+		break;
+
+	case 4:
+		kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(__be32 *)&buf));
+		break;
+
+	case 8:
+		kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(__be64 *)&buf));
+		break;
+
+	default:
+		BUG();
+	}
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load);
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+	unsigned long size = kvmppc_get_gpr(vcpu, 4);
+	unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+	unsigned long val = kvmppc_get_gpr(vcpu, 6);
+	u64 buf;
+	int ret;
+
+	switch (size) {
+	case 1:
+		*(u8 *)&buf = val;
+		break;
+
+	case 2:
+		*(__be16 *)&buf = cpu_to_be16(val);
+		break;
+
+	case 4:
+		*(__be32 *)&buf = cpu_to_be32(val);
+		break;
+
+	case 8:
+		*(__be64 *)&buf = cpu_to_be64(val);
+		break;
+
+	default:
+		return H_TOO_HARD;
+	}
+
+	ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+	if (ret != 0)
+		return H_TOO_HARD;
+
+	return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store);
+
 int kvmppc_core_check_processor_compat(void)
 {
 	/*
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 534acb3c6c3d..1a4acf8bf4f4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -27,6 +27,7 @@
 #include <linux/srcu.h>
 #include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/debugfs.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -116,12 +117,12 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
 	long order;
 
 	mutex_lock(&kvm->lock);
-	if (kvm->arch.rma_setup_done) {
-		kvm->arch.rma_setup_done = 0;
-		/* order rma_setup_done vs. vcpus_running */
+	if (kvm->arch.hpte_setup_done) {
+		kvm->arch.hpte_setup_done = 0;
+		/* order hpte_setup_done vs. vcpus_running */
 		smp_mb();
 		if (atomic_read(&kvm->arch.vcpus_running)) {
-			kvm->arch.rma_setup_done = 1;
+			kvm->arch.hpte_setup_done = 1;
 			goto out;
 		}
 	}
@@ -338,9 +339,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
 	gr = kvm->arch.revmap[index].guest_rpte;
 
-	/* Unlock the HPTE */
-	asm volatile("lwsync" : : : "memory");
-	hptep[0] = cpu_to_be64(v);
+	unlock_hpte(hptep, v);
 	preempt_enable();
 
 	gpte->eaddr = eaddr;
@@ -469,8 +468,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
 	hpte[1] = be64_to_cpu(hptep[1]);
 	hpte[2] = r = rev->guest_rpte;
-	asm volatile("lwsync" : : : "memory");
-	hptep[0] = cpu_to_be64(hpte[0]);
+	unlock_hpte(hptep, hpte[0]);
 	preempt_enable();
 
 	if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
@@ -537,23 +535,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 		/* if the guest wants write access, see if that is OK */
 		if (!writing && hpte_is_writable(r)) {
-			unsigned int hugepage_shift;
 			pte_t *ptep, pte;
-
+			unsigned long flags;
 			/*
 			 * We need to protect against page table destruction
-			 * while looking up and updating the pte.
+			 * hugepage split and collapse.
 			 */
-			rcu_read_lock_sched();
+			local_irq_save(flags);
 			ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-							 hva, &hugepage_shift);
+							 hva, NULL);
 			if (ptep) {
-				pte = kvmppc_read_update_linux_pte(ptep, 1,
-							   hugepage_shift);
+				pte = kvmppc_read_update_linux_pte(ptep, 1);
 				if (pte_write(pte))
 					write_ok = 1;
 			}
-			rcu_read_unlock_sched();
+			local_irq_restore(flags);
 		}
 	}
 
@@ -621,7 +617,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	hptep[1] = cpu_to_be64(r);
 	eieio();
-	hptep[0] = cpu_to_be64(hpte[0]);
+	__unlock_hpte(hptep, hpte[0]);
 	asm volatile("ptesync" : : : "memory");
 	preempt_enable();
 	if (page && hpte_is_writable(r))
@@ -642,7 +638,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return ret;
 
  out_unlock:
-	hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+	__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 	preempt_enable();
 	goto out_put;
 }
@@ -771,7 +767,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			}
 		}
 		unlock_rmap(rmapp);
-		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 	}
 	return 0;
 }
@@ -857,7 +853,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			}
 			ret = 1;
 		}
-		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 	} while ((i = j) != head);
 
 	unlock_rmap(rmapp);
@@ -974,8 +970,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 
 		/* Now check and modify the HPTE */
 		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
-			/* unlock and continue */
-			hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+			__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 			continue;
 		}
 
@@ -996,9 +991,9 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 				npages_dirty = n;
 			eieio();
 		}
-		v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+		v &= ~HPTE_V_ABSENT;
 		v |= HPTE_V_VALID;
-		hptep[0] = cpu_to_be64(v);
+		__unlock_hpte(hptep, v);
 	} while ((i = j) != head);
 
 	unlock_rmap(rmapp);
@@ -1218,8 +1213,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
 			r &= ~HPTE_GR_MODIFIED;
 			revp->guest_rpte = r;
 		}
-		asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-		hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+		unlock_hpte(hptp, be64_to_cpu(hptp[0]));
 		preempt_enable();
 		if (!(valid == want_valid && (first_pass || dirty)))
 			ok = 0;
@@ -1339,20 +1333,20 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 	unsigned long tmp[2];
 	ssize_t nb;
 	long int err, ret;
-	int rma_setup;
+	int hpte_setup;
 
 	if (!access_ok(VERIFY_READ, buf, count))
 		return -EFAULT;
 
 	/* lock out vcpus from running while we're doing this */
 	mutex_lock(&kvm->lock);
-	rma_setup = kvm->arch.rma_setup_done;
-	if (rma_setup) {
-		kvm->arch.rma_setup_done = 0;	/* temporarily */
-		/* order rma_setup_done vs. vcpus_running */
+	hpte_setup = kvm->arch.hpte_setup_done;
+	if (hpte_setup) {
+		kvm->arch.hpte_setup_done = 0;	/* temporarily */
+		/* order hpte_setup_done vs. vcpus_running */
 		smp_mb();
 		if (atomic_read(&kvm->arch.vcpus_running)) {
-			kvm->arch.rma_setup_done = 1;
+			kvm->arch.hpte_setup_done = 1;
 			mutex_unlock(&kvm->lock);
 			return -EBUSY;
 		}
@@ -1405,7 +1399,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 				       "r=%lx\n", ret, i, v, r);
 				goto out;
 			}
-			if (!rma_setup && is_vrma_hpte(v)) {
+			if (!hpte_setup && is_vrma_hpte(v)) {
 				unsigned long psize = hpte_base_page_size(v, r);
 				unsigned long senc = slb_pgsize_encoding(psize);
 				unsigned long lpcr;
@@ -1414,7 +1408,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 					(VRMA_VSID << SLB_VSID_SHIFT_1T);
 				lpcr = senc << (LPCR_VRMASD_SH - 4);
 				kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
-				rma_setup = 1;
+				hpte_setup = 1;
 			}
 			++i;
 			hptp += 2;
@@ -1430,9 +1424,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 	}
 
  out:
-	/* Order HPTE updates vs. rma_setup_done */
+	/* Order HPTE updates vs. hpte_setup_done */
 	smp_wmb();
-	kvm->arch.rma_setup_done = rma_setup;
+	kvm->arch.hpte_setup_done = hpte_setup;
 	mutex_unlock(&kvm->lock);
 
 	if (err)
@@ -1495,6 +1489,141 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
 	return ret;
 }
 
+struct debugfs_htab_state {
+	struct kvm	*kvm;
+	struct mutex	mutex;
+	unsigned long	hpt_index;
+	int		chars_left;
+	int		buf_index;
+	char		buf[64];
+};
+
+static int debugfs_htab_open(struct inode *inode, struct file *file)
+{
+	struct kvm *kvm = inode->i_private;
+	struct debugfs_htab_state *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	kvm_get_kvm(kvm);
+	p->kvm = kvm;
+	mutex_init(&p->mutex);
+	file->private_data = p;
+
+	return nonseekable_open(inode, file);
+}
+
+static int debugfs_htab_release(struct inode *inode, struct file *file)
+{
+	struct debugfs_htab_state *p = file->private_data;
+
+	kvm_put_kvm(p->kvm);
+	kfree(p);
+	return 0;
+}
+
+static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *ppos)
+{
+	struct debugfs_htab_state *p = file->private_data;
+	ssize_t ret, r;
+	unsigned long i, n;
+	unsigned long v, hr, gr;
+	struct kvm *kvm;
+	__be64 *hptp;
+
+	ret = mutex_lock_interruptible(&p->mutex);
+	if (ret)
+		return ret;
+
+	if (p->chars_left) {
+		n = p->chars_left;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf + p->buf_index, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index += n;
+		buf += n;
+		len -= n;
+		ret = n;
+		if (r) {
+			if (!n)
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+
+	kvm = p->kvm;
+	i = p->hpt_index;
+	hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+	for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) {
+		if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
+			continue;
+
+		/* lock the HPTE so it's stable and read it */
+		preempt_disable();
+		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+			cpu_relax();
+		v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
+		hr = be64_to_cpu(hptp[1]);
+		gr = kvm->arch.revmap[i].guest_rpte;
+		unlock_hpte(hptp, v);
+		preempt_enable();
+
+		if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+			continue;
+
+		n = scnprintf(p->buf, sizeof(p->buf),
+			      "%6lx %.16lx %.16lx %.16lx\n",
+			      i, v, hr, gr);
+		p->chars_left = n;
+		if (n > len)
+			n = len;
+		r = copy_to_user(buf, p->buf, n);
+		n -= r;
+		p->chars_left -= n;
+		p->buf_index = n;
+		buf += n;
+		len -= n;
+		ret += n;
+		if (r) {
+			if (!ret)
+				ret = -EFAULT;
+			goto out;
+		}
+	}
+	p->hpt_index = i;
+
+ out:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+			   size_t len, loff_t *ppos)
+{
+	return -EACCES;
+}
+
+static const struct file_operations debugfs_htab_fops = {
+	.owner	 = THIS_MODULE,
+	.open	 = debugfs_htab_open,
+	.release = debugfs_htab_release,
+	.read	 = debugfs_htab_read,
+	.write	 = debugfs_htab_write,
+	.llseek	 = generic_file_llseek,
+};
+
+void kvmppc_mmu_debugfs_init(struct kvm *kvm)
+{
+	kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
+						    kvm->arch.debugfs_dir, kvm,
+						    &debugfs_htab_fops);
+}
+
 void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de747563d29d..48d3c5d2ecc9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -32,6 +32,7 @@
 #include <linux/page-flags.h>
 #include <linux/srcu.h>
 #include <linux/miscdevice.h>
+#include <linux/debugfs.h>
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
@@ -50,6 +51,7 @@
 #include <asm/hvcall.h>
 #include <asm/switch_to.h>
 #include <asm/smp.h>
+#include <asm/dbell.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
@@ -83,9 +85,35 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+static bool kvmppc_ipi_thread(int cpu)
+{
+	/* On POWER8 for IPIs to threads in the same core, use msgsnd */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		preempt_disable();
+		if (cpu_first_thread_sibling(cpu) ==
+		    cpu_first_thread_sibling(smp_processor_id())) {
+			unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+			msg |= cpu_thread_in_core(cpu);
+			smp_mb();
+			__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+			preempt_enable();
+			return true;
+		}
+		preempt_enable();
+	}
+
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+	if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) {
+		xics_wake_cpu(cpu);
+		return true;
+	}
+#endif
+
+	return false;
+}
+
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
-	int me;
 	int cpu = vcpu->cpu;
 	wait_queue_head_t *wqp;
 
@@ -95,20 +123,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 		++vcpu->stat.halt_wakeup;
 	}
 
-	me = get_cpu();
+	if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
+		return;
 
 	/* CPU points to the first thread of the core */
-	if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
-#ifdef CONFIG_PPC_ICP_NATIVE
-		int real_cpu = cpu + vcpu->arch.ptid;
-		if (paca[real_cpu].kvm_hstate.xics_phys)
-			xics_wake_cpu(real_cpu);
-		else
-#endif
-		if (cpu_online(cpu))
-			smp_send_reschedule(cpu);
-	}
-	put_cpu();
+	if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
+		smp_send_reschedule(cpu);
 }
 
 /*
@@ -706,6 +726,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 		/* Send the error out to userspace via KVM_RUN */
 		return rc;
+	case H_LOGICAL_CI_LOAD:
+		ret = kvmppc_h_logical_ci_load(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_LOGICAL_CI_STORE:
+		ret = kvmppc_h_logical_ci_store(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
 	case H_SET_MODE:
 		ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
 					kvmppc_get_gpr(vcpu, 5),
@@ -740,6 +770,8 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_CONFER:
 	case H_REGISTER_VPA:
 	case H_SET_MODE:
+	case H_LOGICAL_CI_LOAD:
+	case H_LOGICAL_CI_STORE:
 #ifdef CONFIG_KVM_XICS
 	case H_XIRR:
 	case H_CPPR:
@@ -1410,6 +1442,154 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 	return vcore;
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static struct debugfs_timings_element {
+	const char *name;
+	size_t offset;
+} timings[] = {
+	{"rm_entry",	offsetof(struct kvm_vcpu, arch.rm_entry)},
+	{"rm_intr",	offsetof(struct kvm_vcpu, arch.rm_intr)},
+	{"rm_exit",	offsetof(struct kvm_vcpu, arch.rm_exit)},
+	{"guest",	offsetof(struct kvm_vcpu, arch.guest_time)},
+	{"cede",	offsetof(struct kvm_vcpu, arch.cede_time)},
+};
+
+#define N_TIMINGS	(sizeof(timings) / sizeof(timings[0]))
+
+struct debugfs_timings_state {
+	struct kvm_vcpu	*vcpu;
+	unsigned int	buflen;
+	char		buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+	struct kvm_vcpu *vcpu = inode->i_private;
+	struct debugfs_timings_state *p;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	kvm_get_kvm(vcpu->kvm);
+	p->vcpu = vcpu;
+	file->private_data = p;
+
+	return nonseekable_open(inode, file);
+}
+
+static int debugfs_timings_release(struct inode *inode, struct file *file)
+{
+	struct debugfs_timings_state *p = file->private_data;
+
+	kvm_put_kvm(p->vcpu->kvm);
+	kfree(p);
+	return 0;
+}
+
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+				    size_t len, loff_t *ppos)
+{
+	struct debugfs_timings_state *p = file->private_data;
+	struct kvm_vcpu *vcpu = p->vcpu;
+	char *s, *buf_end;
+	struct kvmhv_tb_accumulator tb;
+	u64 count;
+	loff_t pos;
+	ssize_t n;
+	int i, loops;
+	bool ok;
+
+	if (!p->buflen) {
+		s = p->buf;
+		buf_end = s + sizeof(p->buf);
+		for (i = 0; i < N_TIMINGS; ++i) {
+			struct kvmhv_tb_accumulator *acc;
+
+			acc = (struct kvmhv_tb_accumulator *)
+				((unsigned long)vcpu + timings[i].offset);
+			ok = false;
+			for (loops = 0; loops < 1000; ++loops) {
+				count = acc->seqcount;
+				if (!(count & 1)) {
+					smp_rmb();
+					tb = *acc;
+					smp_rmb();
+					if (count == acc->seqcount) {
+						ok = true;
+						break;
+					}
+				}
+				udelay(1);
+			}
+			if (!ok)
+				snprintf(s, buf_end - s, "%s: stuck\n",
+					timings[i].name);
+			else
+				snprintf(s, buf_end - s,
+					"%s: %llu %llu %llu %llu\n",
+					timings[i].name, count / 2,
+					tb_to_ns(tb.tb_total),
+					tb_to_ns(tb.tb_min),
+					tb_to_ns(tb.tb_max));
+			s += strlen(s);
+		}
+		p->buflen = s - p->buf;
+	}
+
+	pos = *ppos;
+	if (pos >= p->buflen)
+		return 0;
+	if (len > p->buflen - pos)
+		len = p->buflen - pos;
+	n = copy_to_user(buf, p->buf + pos, len);
+	if (n) {
+		if (n == len)
+			return -EFAULT;
+		len -= n;
+	}
+	*ppos = pos + len;
+	return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+				     size_t len, loff_t *ppos)
+{
+	return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+	.owner	 = THIS_MODULE,
+	.open	 = debugfs_timings_open,
+	.release = debugfs_timings_release,
+	.read	 = debugfs_timings_read,
+	.write	 = debugfs_timings_write,
+	.llseek	 = generic_file_llseek,
+};
+
+/* Create a debugfs directory for the vcpu */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+	char buf[16];
+	struct kvm *kvm = vcpu->kvm;
+
+	snprintf(buf, sizeof(buf), "vcpu%u", id);
+	if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+		return;
+	vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
+	if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
+		return;
+	vcpu->arch.debugfs_timings =
+		debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
+				    vcpu, &debugfs_timings_ops);
+}
+
+#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+}
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+
 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 						   unsigned int id)
 {
@@ -1479,6 +1659,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	kvmppc_sanity_check(vcpu);
 
+	debugfs_vcpu_init(vcpu, id);
+
 	return vcpu;
 
 free_vcpu:
@@ -1566,8 +1748,10 @@ static int kvmppc_grab_hwthread(int cpu)
 	tpaca = &paca[cpu];
 
 	/* Ensure the thread won't go into the kernel if it wakes */
-	tpaca->kvm_hstate.hwthread_req = 1;
 	tpaca->kvm_hstate.kvm_vcpu = NULL;
+	tpaca->kvm_hstate.napping = 0;
+	smp_wmb();
+	tpaca->kvm_hstate.hwthread_req = 1;
 
 	/*
 	 * If the thread is already executing in the kernel (e.g. handling
@@ -1610,35 +1794,41 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 	}
 	cpu = vc->pcpu + vcpu->arch.ptid;
 	tpaca = &paca[cpu];
-	tpaca->kvm_hstate.kvm_vcpu = vcpu;
 	tpaca->kvm_hstate.kvm_vcore = vc;
 	tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
 	vcpu->cpu = vc->pcpu;
+	/* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
 	smp_wmb();
-#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
-	if (cpu != smp_processor_id()) {
-		xics_wake_cpu(cpu);
-		if (vcpu->arch.ptid)
-			++vc->n_woken;
-	}
-#endif
+	tpaca->kvm_hstate.kvm_vcpu = vcpu;
+	if (cpu != smp_processor_id())
+		kvmppc_ipi_thread(cpu);
 }
 
-static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+static void kvmppc_wait_for_nap(void)
 {
-	int i;
+	int cpu = smp_processor_id();
+	int i, loops;
 
-	HMT_low();
-	i = 0;
-	while (vc->nap_count < vc->n_woken) {
-		if (++i >= 1000000) {
-			pr_err("kvmppc_wait_for_nap timeout %d %d\n",
-			       vc->nap_count, vc->n_woken);
-			break;
+	for (loops = 0; loops < 1000000; ++loops) {
+		/*
+		 * Check if all threads are finished.
+		 * We set the vcpu pointer when starting a thread
+		 * and the thread clears it when finished, so we look
+		 * for any threads that still have a non-NULL vcpu ptr.
+		 */
+		for (i = 1; i < threads_per_subcore; ++i)
+			if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+				break;
+		if (i == threads_per_subcore) {
+			HMT_medium();
+			return;
 		}
-		cpu_relax();
+		HMT_low();
 	}
 	HMT_medium();
+	for (i = 1; i < threads_per_subcore; ++i)
+		if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+			pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
 }
 
 /*
@@ -1700,63 +1890,103 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
 	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
 }
 
+static void prepare_threads(struct kvmppc_vcore *vc)
+{
+	struct kvm_vcpu *vcpu, *vnext;
+
+	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+				 arch.run_list) {
+		if (signal_pending(vcpu->arch.run_task))
+			vcpu->arch.ret = -EINTR;
+		else if (vcpu->arch.vpa.update_pending ||
+			 vcpu->arch.slb_shadow.update_pending ||
+			 vcpu->arch.dtl.update_pending)
+			vcpu->arch.ret = RESUME_GUEST;
+		else
+			continue;
+		kvmppc_remove_runnable(vc, vcpu);
+		wake_up(&vcpu->arch.cpu_run);
+	}
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc)
+{
+	u64 now;
+	long ret;
+	struct kvm_vcpu *vcpu, *vnext;
+
+	now = get_tb();
+	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+				 arch.run_list) {
+		/* cancel pending dec exception if dec is positive */
+		if (now < vcpu->arch.dec_expires &&
+		    kvmppc_core_pending_dec(vcpu))
+			kvmppc_core_dequeue_dec(vcpu);
+
+		trace_kvm_guest_exit(vcpu);
+
+		ret = RESUME_GUEST;
+		if (vcpu->arch.trap)
+			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+						    vcpu->arch.run_task);
+
+		vcpu->arch.ret = ret;
+		vcpu->arch.trap = 0;
+
+		if (vcpu->arch.ceded) {
+			if (!is_kvmppc_resume_guest(ret))
+				kvmppc_end_cede(vcpu);
+			else
+				kvmppc_set_timer(vcpu);
+		}
+		if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+			kvmppc_remove_runnable(vc, vcpu);
+			wake_up(&vcpu->arch.cpu_run);
+		}
+	}
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
  */
-static void kvmppc_run_core(struct kvmppc_vcore *vc)
+static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-	struct kvm_vcpu *vcpu, *vnext;
-	long ret;
-	u64 now;
-	int i, need_vpa_update;
+	struct kvm_vcpu *vcpu;
+	int i;
 	int srcu_idx;
-	struct kvm_vcpu *vcpus_to_update[threads_per_core];
 
-	/* don't start if any threads have a signal pending */
-	need_vpa_update = 0;
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-		if (signal_pending(vcpu->arch.run_task))
-			return;
-		if (vcpu->arch.vpa.update_pending ||
-		    vcpu->arch.slb_shadow.update_pending ||
-		    vcpu->arch.dtl.update_pending)
-			vcpus_to_update[need_vpa_update++] = vcpu;
-	}
+	/*
+	 * Remove from the list any threads that have a signal pending
+	 * or need a VPA update done
+	 */
+	prepare_threads(vc);
+
+	/* if the runner is no longer runnable, let the caller pick a new one */
+	if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
+		return;
 
 	/*
-	 * Initialize *vc, in particular vc->vcore_state, so we can
-	 * drop the vcore lock if necessary.
+	 * Initialize *vc.
 	 */
-	vc->n_woken = 0;
-	vc->nap_count = 0;
-	vc->entry_exit_count = 0;
+	vc->entry_exit_map = 0;
 	vc->preempt_tb = TB_NIL;
-	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
 	vc->conferring_threads = 0;
 
 	/*
-	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
-	 * which can't be called with any spinlocks held.
-	 */
-	if (need_vpa_update) {
-		spin_unlock(&vc->lock);
-		for (i = 0; i < need_vpa_update; ++i)
-			kvmppc_update_vpas(vcpus_to_update[i]);
-		spin_lock(&vc->lock);
-	}
-
-	/*
 	 * Make sure we are running on primary threads, and that secondary
 	 * threads are offline.  Also check if the number of threads in this
 	 * guest are greater than the current system threads per guest.
 	 */
 	if ((threads_per_core > 1) &&
 	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 			vcpu->arch.ret = -EBUSY;
+			kvmppc_remove_runnable(vc, vcpu);
+			wake_up(&vcpu->arch.cpu_run);
+		}
 		goto out;
 	}
 
@@ -1797,8 +2027,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		vcpu->cpu = -1;
 	/* wait for secondary threads to finish writing their state to memory */
-	if (vc->nap_count < vc->n_woken)
-		kvmppc_wait_for_nap(vc);
+	kvmppc_wait_for_nap();
 	for (i = 0; i < threads_per_subcore; ++i)
 		kvmppc_release_hwthread(vc->pcpu + i);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
@@ -1812,44 +2041,12 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	kvm_guest_exit();
 
 	preempt_enable();
-	cond_resched();
 
 	spin_lock(&vc->lock);
-	now = get_tb();
-	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-		/* cancel pending dec exception if dec is positive */
-		if (now < vcpu->arch.dec_expires &&
-		    kvmppc_core_pending_dec(vcpu))
-			kvmppc_core_dequeue_dec(vcpu);
-
-		trace_kvm_guest_exit(vcpu);
-
-		ret = RESUME_GUEST;
-		if (vcpu->arch.trap)
-			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
-						    vcpu->arch.run_task);
-
-		vcpu->arch.ret = ret;
-		vcpu->arch.trap = 0;
-
-		if (vcpu->arch.ceded) {
-			if (!is_kvmppc_resume_guest(ret))
-				kvmppc_end_cede(vcpu);
-			else
-				kvmppc_set_timer(vcpu);
-		}
-	}
+	post_guest_process(vc);
 
  out:
 	vc->vcore_state = VCORE_INACTIVE;
-	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
-				 arch.run_list) {
-		if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
-			kvmppc_remove_runnable(vc, vcpu);
-			wake_up(&vcpu->arch.cpu_run);
-		}
-	}
-
 	trace_kvmppc_run_core(vc, 1);
 }
 
@@ -1939,8 +2136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * this thread straight away and have it join in.
 	 */
 	if (!signal_pending(current)) {
-		if (vc->vcore_state == VCORE_RUNNING &&
-		    VCORE_EXIT_COUNT(vc) == 0) {
+		if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
 			trace_kvm_guest_enter(vcpu);
@@ -1971,7 +2167,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		}
 		if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
 			break;
-		vc->runner = vcpu;
 		n_ceded = 0;
 		list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
 			if (!v->arch.pending_exceptions)
@@ -1979,10 +2174,17 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			else
 				v->arch.ceded = 0;
 		}
-		if (n_ceded == vc->n_runnable)
+		vc->runner = vcpu;
+		if (n_ceded == vc->n_runnable) {
 			kvmppc_vcore_blocked(vc);
-		else
+		} else if (should_resched()) {
+			vc->vcore_state = VCORE_PREEMPT;
+			/* Let something else run */
+			cond_resched_lock(&vc->lock);
+			vc->vcore_state = VCORE_INACTIVE;
+		} else {
 			kvmppc_run_core(vc);
+		}
 		vc->runner = NULL;
 	}
 
@@ -2032,11 +2234,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 
 	atomic_inc(&vcpu->kvm->arch.vcpus_running);
-	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+	/* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */
 	smp_mb();
 
 	/* On the first time here, set up HTAB and VRMA */
-	if (!vcpu->kvm->arch.rma_setup_done) {
+	if (!vcpu->kvm->arch.hpte_setup_done) {
 		r = kvmppc_hv_setup_htab_rma(vcpu);
 		if (r)
 			goto out;
@@ -2238,7 +2440,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	int srcu_idx;
 
 	mutex_lock(&kvm->lock);
-	if (kvm->arch.rma_setup_done)
+	if (kvm->arch.hpte_setup_done)
 		goto out;	/* another vcpu beat us to it */
 
 	/* Allocate hashed page table (if not done already) and reset it */
@@ -2289,9 +2491,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
-	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+	/* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
 	smp_wmb();
-	kvm->arch.rma_setup_done = 1;
+	kvm->arch.hpte_setup_done = 1;
 	err = 0;
  out_srcu:
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -2307,6 +2509,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 {
 	unsigned long lpcr, lpid;
+	char buf[32];
 
 	/* Allocate the guest's logical partition ID */
 
@@ -2347,6 +2550,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 */
 	kvm_hv_vm_activated();
 
+	/*
+	 * Create a debugfs directory for the VM
+	 */
+	snprintf(buf, sizeof(buf), "vm%d", current->pid);
+	kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
+	if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+		kvmppc_mmu_debugfs_init(kvm);
+
 	return 0;
 }
 
@@ -2367,6 +2578,8 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 
 static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
+	debugfs_remove_recursive(kvm->arch.debugfs_dir);
+
 	kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 1f083ff8a61a..ed2589d4593f 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,6 +21,10 @@
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
+#include <asm/archrandom.h>
+#include <asm/xics.h>
+#include <asm/dbell.h>
+#include <asm/cputhreads.h>
 
 #define KVM_CMA_CHUNK_ORDER	18
 
@@ -114,11 +118,11 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
 	int rv = H_SUCCESS; /* => don't yield */
 
 	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
-	while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
-		threads_running = VCORE_ENTRY_COUNT(vc);
-		threads_ceded = hweight32(vc->napping_threads);
-		threads_conferring = hweight32(vc->conferring_threads);
-		if (threads_ceded + threads_conferring >= threads_running) {
+	while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
+		threads_running = VCORE_ENTRY_MAP(vc);
+		threads_ceded = vc->napping_threads;
+		threads_conferring = vc->conferring_threads;
+		if ((threads_ceded | threads_conferring) == threads_running) {
 			rv = H_TOO_HARD; /* => do yield */
 			break;
 		}
@@ -169,3 +173,89 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+int kvmppc_hwrng_present(void)
+{
+	return powernv_hwrng_present();
+}
+EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+long kvmppc_h_random(struct kvm_vcpu *vcpu)
+{
+	if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
+		return H_SUCCESS;
+
+	return H_HARDWARE;
+}
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+	__asm__ __volatile__("stbcix %0,0,%1"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
+/*
+ * Send an interrupt or message to another CPU.
+ * This can only be called in real mode.
+ * The caller needs to include any barrier needed to order writes
+ * to memory vs. the IPI/message.
+ */
+void kvmhv_rm_send_ipi(int cpu)
+{
+	unsigned long xics_phys;
+
+	/* On POWER8 for IPIs to threads in the same core, use msgsnd */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+	    cpu_first_thread_sibling(cpu) ==
+	    cpu_first_thread_sibling(raw_smp_processor_id())) {
+		unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+		msg |= cpu_thread_in_core(cpu);
+		__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+		return;
+	}
+
+	/* Else poke the target with an IPI */
+	xics_phys = paca[cpu].kvm_hstate.xics_phys;
+	rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+/*
+ * The following functions are called from the assembly code
+ * in book3s_hv_rmhandlers.S.
+ */
+static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
+{
+	int cpu = vc->pcpu;
+
+	/* Order setting of exit map vs. msgsnd/IPI */
+	smp_mb();
+	for (; active; active >>= 1, ++cpu)
+		if (active & 1)
+			kvmhv_rm_send_ipi(cpu);
+}
+
+void kvmhv_commence_exit(int trap)
+{
+	struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+	int ptid = local_paca->kvm_hstate.ptid;
+	int me, ee;
+
+	/* Set our bit in the threads-exiting-guest map in the 0xff00
+	   bits of vcore->entry_exit_map */
+	me = 0x100 << ptid;
+	do {
+		ee = vc->entry_exit_map;
+	} while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
+
+	/* Are we the first here? */
+	if ((ee >> 8) != 0)
+		return;
+
+	/*
+	 * Trigger the other threads in this vcore to exit the guest.
+	 * If this is a hypervisor decrementer interrupt then they
+	 * will be already on their way out of the guest.
+	 */
+	if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
+		kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 625407e4d3b0..b027a89737b6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x)
 {
 	unsigned long addr = (unsigned long) x;
 	pte_t *p;
-
-	p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
+	/*
+	 * assume we don't have huge pages in vmalloc space...
+	 * So don't worry about THP collapse/split. Called
+	 * Only in realmode, hence won't need irq_save/restore.
+	 */
+	p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
 	if (!p || !pte_present(*p))
 		return NULL;
-	/* assume we don't have huge pages in vmalloc space... */
 	addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
 	return __va(addr);
 }
@@ -131,31 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	unlock_rmap(rmap);
 }
 
-static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
-			      int writing, unsigned long *pte_sizep)
-{
-	pte_t *ptep;
-	unsigned long ps = *pte_sizep;
-	unsigned int hugepage_shift;
-
-	ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
-	if (!ptep)
-		return __pte(0);
-	if (hugepage_shift)
-		*pte_sizep = 1ul << hugepage_shift;
-	else
-		*pte_sizep = PAGE_SIZE;
-	if (ps > *pte_sizep)
-		return __pte(0);
-	return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
-}
-
-static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
-{
-	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-	hpte[0] = cpu_to_be64(hpte_v);
-}
-
 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		       long pte_index, unsigned long pteh, unsigned long ptel,
 		       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
@@ -166,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
-	unsigned long pte_size;
+	unsigned hpage_shift;
 	unsigned long is_io;
 	unsigned long *rmap;
-	pte_t pte;
+	pte_t *ptep;
 	unsigned int writing;
 	unsigned long mmu_seq;
-	unsigned long rcbits;
+	unsigned long rcbits, irq_flags = 0;
 
 	psize = hpte_page_size(pteh, ptel);
 	if (!psize)
@@ -208,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 	/* Translate to host virtual address */
 	hva = __gfn_to_hva_memslot(memslot, gfn);
-
-	/* Look up the Linux PTE for the backing page */
-	pte_size = psize;
-	pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
-	if (pte_present(pte) && !pte_protnone(pte)) {
-		if (writing && !pte_write(pte))
-			/* make the actual HPTE be read-only */
-			ptel = hpte_make_readonly(ptel);
-		is_io = hpte_cache_bits(pte_val(pte));
-		pa = pte_pfn(pte) << PAGE_SHIFT;
-		pa |= hva & (pte_size - 1);
-		pa |= gpa & ~PAGE_MASK;
+	/*
+	 * If we had a page table table change after lookup, we would
+	 * retry via mmu_notifier_retry.
+	 */
+	if (realmode)
+		ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
+	else {
+		local_irq_save(irq_flags);
+		ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
 	}
+	if (ptep) {
+		pte_t pte;
+		unsigned int host_pte_size;
 
-	if (pte_size < psize)
-		return H_PARAMETER;
+		if (hpage_shift)
+			host_pte_size = 1ul << hpage_shift;
+		else
+			host_pte_size = PAGE_SIZE;
+		/*
+		 * We should always find the guest page size
+		 * to <= host page size, if host is using hugepage
+		 */
+		if (host_pte_size < psize) {
+			if (!realmode)
+				local_irq_restore(flags);
+			return H_PARAMETER;
+		}
+		pte = kvmppc_read_update_linux_pte(ptep, writing);
+		if (pte_present(pte) && !pte_protnone(pte)) {
+			if (writing && !pte_write(pte))
+				/* make the actual HPTE be read-only */
+				ptel = hpte_make_readonly(ptel);
+			is_io = hpte_cache_bits(pte_val(pte));
+			pa = pte_pfn(pte) << PAGE_SHIFT;
+			pa |= hva & (host_pte_size - 1);
+			pa |= gpa & ~PAGE_MASK;
+		}
+	}
+	if (!realmode)
+		local_irq_restore(irq_flags);
 
 	ptel &= ~(HPTE_R_PP0 - psize);
 	ptel |= pa;
@@ -271,10 +273,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 				u64 pte;
 				while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 					cpu_relax();
-				pte = be64_to_cpu(*hpte);
+				pte = be64_to_cpu(hpte[0]);
 				if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
 					break;
-				*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+				__unlock_hpte(hpte, pte);
 				hpte += 2;
 			}
 			if (i == 8)
@@ -290,9 +292,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 			while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 				cpu_relax();
-			pte = be64_to_cpu(*hpte);
+			pte = be64_to_cpu(hpte[0]);
 			if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
-				*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+				__unlock_hpte(hpte, pte);
 				return H_PTEG_FULL;
 			}
 		}
@@ -331,7 +333,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 	/* Write the first HPTE dword, unlocking the HPTE and making it valid */
 	eieio();
-	hpte[0] = cpu_to_be64(pteh);
+	__unlock_hpte(hpte, pteh);
 	asm volatile("ptesync" : : : "memory");
 
 	*pte_idx_ret = pte_index;
@@ -412,7 +414,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
 	    ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
-		hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+		__unlock_hpte(hpte, pte);
 		return H_NOT_FOUND;
 	}
 
@@ -548,7 +550,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 				be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
 			rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 			args[j] |= rcbits << (56 - 5);
-			hp[0] = 0;
+			__unlock_hpte(hp, 0);
 		}
 	}
 
@@ -574,7 +576,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	pte = be64_to_cpu(hpte[0]);
 	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
 	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
-		hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+		__unlock_hpte(hpte, pte);
 		return H_NOT_FOUND;
 	}
 
@@ -755,8 +757,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 				/* Return with the HPTE still locked */
 				return (hash << 3) + (i >> 1);
 
-			/* Unlock and move on */
-			hpte[i] = cpu_to_be64(v);
+			__unlock_hpte(&hpte[i], v);
 		}
 
 		if (val & HPTE_V_SECONDARY)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 7c22997de906..00e45b6d4f24 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -23,17 +23,37 @@
 
 #define DEBUG_PASSUP
 
-static inline void rm_writeb(unsigned long paddr, u8 val)
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq);
+
+/* -- ICS routines -- */
+static void ics_rm_check_resend(struct kvmppc_xics *xics,
+				struct kvmppc_ics *ics, struct kvmppc_icp *icp)
 {
-	__asm__ __volatile__("sync; stbcix %0,0,%1"
-		: : "r" (val), "r" (paddr) : "memory");
+	int i;
+
+	arch_spin_lock(&ics->lock);
+
+	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+		struct ics_irq_state *state = &ics->irq_state[i];
+
+		if (!state->resend)
+			continue;
+
+		arch_spin_unlock(&ics->lock);
+		icp_rm_deliver_irq(xics, icp, state->number);
+		arch_spin_lock(&ics->lock);
+	}
+
+	arch_spin_unlock(&ics->lock);
 }
 
+/* -- ICP routines -- */
+
 static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 				struct kvm_vcpu *this_vcpu)
 {
 	struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
-	unsigned long xics_phys;
 	int cpu;
 
 	/* Mark the target VCPU as having an interrupt pending */
@@ -56,9 +76,8 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 	/* In SMT cpu will always point to thread 0, we adjust it */
 	cpu += vcpu->arch.ptid;
 
-	/* Not too hard, then poke the target */
-	xics_phys = paca[cpu].kvm_hstate.xics_phys;
-	rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+	smp_mb();
+	kvmhv_rm_send_ipi(cpu);
 }
 
 static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
@@ -116,6 +135,180 @@ static inline int check_too_hard(struct kvmppc_xics *xics,
 	return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
 }
 
+static void icp_rm_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
+{
+	u32 icsid;
+
+	/* Order this load with the test for need_resend in the caller */
+	smp_rmb();
+	for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+		struct kvmppc_ics *ics = xics->ics[icsid];
+
+		if (!test_and_clear_bit(icsid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_rm_check_resend(xics, ics, icp);
+	}
+}
+
+static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+			       u32 *reject)
+{
+	union kvmppc_icp_state old_state, new_state;
+	bool success;
+
+	do {
+		old_state = new_state = READ_ONCE(icp->state);
+
+		*reject = 0;
+
+		/* See if we can deliver */
+		success = new_state.cppr > priority &&
+			new_state.mfrr > priority &&
+			new_state.pending_pri > priority;
+
+		/*
+		 * If we can, check for a rejection and perform the
+		 * delivery
+		 */
+		if (success) {
+			*reject = new_state.xisr;
+			new_state.xisr = irq;
+			new_state.pending_pri = priority;
+		} else {
+			/*
+			 * If we failed to deliver we set need_resend
+			 * so a subsequent CPPR state change causes us
+			 * to try a new delivery.
+			 */
+			new_state.need_resend = true;
+		}
+
+	} while (!icp_rm_try_update(icp, old_state, new_state));
+
+	return success;
+}
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+			    u32 new_irq)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u32 reject;
+	u16 src;
+
+	/*
+	 * This is used both for initial delivery of an interrupt and
+	 * for subsequent rejection.
+	 *
+	 * Rejection can be racy vs. resends. We have evaluated the
+	 * rejection in an atomic ICP transaction which is now complete,
+	 * so potentially the ICP can already accept the interrupt again.
+	 *
+	 * So we need to retry the delivery. Essentially the reject path
+	 * boils down to a failed delivery. Always.
+	 *
+	 * Now the interrupt could also have moved to a different target,
+	 * thus we may need to re-do the ICP lookup as well
+	 */
+
+ again:
+	/* Get the ICS state and lock it */
+	ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+	if (!ics) {
+		/* Unsafe increment, but this does not need to be accurate */
+		xics->err_noics++;
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	/* Get a lock on the ICS */
+	arch_spin_lock(&ics->lock);
+
+	/* Get our server */
+	if (!icp || state->server != icp->server_num) {
+		icp = kvmppc_xics_find_server(xics->kvm, state->server);
+		if (!icp) {
+			/* Unsafe increment again*/
+			xics->err_noicp++;
+			goto out;
+		}
+	}
+
+	/* Clear the resend bit of that interrupt */
+	state->resend = 0;
+
+	/*
+	 * If masked, bail out
+	 *
+	 * Note: PAPR doesn't mention anything about masked pending
+	 * when doing a resend, only when doing a delivery.
+	 *
+	 * However that would have the effect of losing a masked
+	 * interrupt that was rejected and isn't consistent with
+	 * the whole masked_pending business which is about not
+	 * losing interrupts that occur while masked.
+	 *
+	 * I don't differentiate normal deliveries and resends, this
+	 * implementation will differ from PAPR and not lose such
+	 * interrupts.
+	 */
+	if (state->priority == MASKED) {
+		state->masked_pending = 1;
+		goto out;
+	}
+
+	/*
+	 * Try the delivery, this will set the need_resend flag
+	 * in the ICP as part of the atomic transaction if the
+	 * delivery is not possible.
+	 *
+	 * Note that if successful, the new delivery might have itself
+	 * rejected an interrupt that was "delivered" before we took the
+	 * ics spin lock.
+	 *
+	 * In this case we do the whole sequence all over again for the
+	 * new guy. We cannot assume that the rejected interrupt is less
+	 * favored than the new one, and thus doesn't need to be delivered,
+	 * because by the time we exit icp_rm_try_to_deliver() the target
+	 * processor may well have already consumed & completed it, and thus
+	 * the rejected interrupt might actually be already acceptable.
+	 */
+	if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+		/*
+		 * Delivery was successful, did we reject somebody else ?
+		 */
+		if (reject && reject != XICS_IPI) {
+			arch_spin_unlock(&ics->lock);
+			new_irq = reject;
+			goto again;
+		}
+	} else {
+		/*
+		 * We failed to deliver the interrupt we need to set the
+		 * resend map bit and mark the ICS state as needing a resend
+		 */
+		set_bit(ics->icsid, icp->resend_map);
+		state->resend = 1;
+
+		/*
+		 * If the need_resend flag got cleared in the ICP some time
+		 * between icp_rm_try_to_deliver() atomic update and now, then
+		 * we know it might have missed the resend_map bit. So we
+		 * retry
+		 */
+		smp_mb();
+		if (!icp->state.need_resend) {
+			arch_spin_unlock(&ics->lock);
+			goto again;
+		}
+	}
+ out:
+	arch_spin_unlock(&ics->lock);
+}
+
 static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 			     u8 new_cppr)
 {
@@ -184,8 +377,8 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	 * separately here as well.
 	 */
 	if (resend) {
-		icp->rm_action |= XICS_RM_CHECK_RESEND;
-		icp->rm_resend_icp = icp;
+		icp->n_check_resend++;
+		icp_rm_check_resend(xics, icp);
 	}
 }
 
@@ -300,16 +493,16 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 		}
 	} while (!icp_rm_try_update(icp, old_state, new_state));
 
-	/* Pass rejects to virtual mode */
+	/* Handle reject in real mode */
 	if (reject && reject != XICS_IPI) {
-		this_icp->rm_action |= XICS_RM_REJECT;
-		this_icp->rm_reject = reject;
+		this_icp->n_reject++;
+		icp_rm_deliver_irq(xics, icp, reject);
 	}
 
-	/* Pass resends to virtual mode */
+	/* Handle resends in real mode */
 	if (resend) {
-		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
-		this_icp->rm_resend_icp = icp;
+		this_icp->n_check_resend++;
+		icp_rm_check_resend(xics, icp);
 	}
 
 	return check_too_hard(xics, this_icp);
@@ -365,10 +558,13 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 
 	} while (!icp_rm_try_update(icp, old_state, new_state));
 
-	/* Pass rejects to virtual mode */
+	/*
+	 * Check for rejects. They are handled by doing a new delivery
+	 * attempt (see comments in icp_rm_deliver_irq).
+	 */
 	if (reject && reject != XICS_IPI) {
-		icp->rm_action |= XICS_RM_REJECT;
-		icp->rm_reject = reject;
+		icp->n_reject++;
+		icp_rm_deliver_irq(xics, icp, reject);
 	}
  bail:
 	return check_too_hard(xics, icp);
@@ -416,10 +612,10 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 		goto bail;
 	state = &ics->irq_state[src];
 
-	/* Still asserted, resend it, we make it look like a reject */
+	/* Still asserted, resend it */
 	if (state->asserted) {
-		icp->rm_action |= XICS_RM_REJECT;
-		icp->rm_reject = irq;
+		icp->n_reject++;
+		icp_rm_deliver_irq(xics, icp, irq);
 	}
 
 	if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6cbf1630cb70..4d70df26c402 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -172,6 +172,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
+	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+	mfspr	r3, SPRN_HDEC
+	mtspr	SPRN_DEC, r3
+	/*
+	 * Make sure the primary has finished the MMU switch.
+	 * We should never get here on a secondary thread, but
+	 * check it for robustness' sake.
+	 */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+65:	lbz	r0, VCORE_IN_GUEST(r5)
+	cmpwi	r0, 0
+	beq	65b
+	/* Set LPCR. */
+	ld	r8,VCORE_LPCR(r5)
+	mtspr	SPRN_LPCR,r8
+	isync
 	/* set our bit in napping_threads */
 	ld	r5, HSTATE_KVM_VCORE(r13)
 	lbz	r7, HSTATE_PTID(r13)
@@ -182,7 +198,7 @@ kvmppc_primary_no_guest:
 	or	r3, r3, r0
 	stwcx.	r3, 0, r6
 	bne	1b
-	/* order napping_threads update vs testing entry_exit_count */
+	/* order napping_threads update vs testing entry_exit_map */
 	isync
 	li	r12, 0
 	lwz	r7, VCORE_ENTRY_EXIT(r5)
@@ -191,6 +207,7 @@ kvmppc_primary_no_guest:
 	li	r3, NAPPING_NOVCPU
 	stb	r3, HSTATE_NAPPING(r13)
 
+	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
 	b	kvm_do_nap
 
 kvm_novcpu_wakeup:
@@ -202,7 +219,7 @@ kvm_novcpu_wakeup:
 
 	/* check the wake reason */
 	bl	kvmppc_check_wake_reason
-	
+
 	/* see if any other thread is already exiting */
 	lwz	r0, VCORE_ENTRY_EXIT(r5)
 	cmpwi	r0, 0x100
@@ -222,13 +239,37 @@ kvm_novcpu_wakeup:
 	cmpdi	r3, 0
 	bge	kvm_novcpu_exit
 
+	/* See if our timeslice has expired (HDEC is negative) */
+	mfspr	r0, SPRN_HDEC
+	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+	cmpwi	r0, 0
+	blt	kvm_novcpu_exit
+
 	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
 	ld	r4, HSTATE_KVM_VCPU(r13)
 	cmpdi	r4, 0
-	bne	kvmppc_got_guest
+	beq	kvmppc_primary_no_guest
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	addi	r3, r4, VCPU_TB_RMENTRY
+	bl	kvmhv_start_timing
+#endif
+	b	kvmppc_got_guest
 
 kvm_novcpu_exit:
-	b	hdec_soon
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	cmpdi	r4, 0
+	beq	13f
+	addi	r3, r4, VCPU_TB_RMEXIT
+	bl	kvmhv_accumulate_time
+#endif
+13:	mr	r3, r12
+	stw	r12, 112-4(r1)
+	bl	kvmhv_commence_exit
+	nop
+	lwz	r12, 112-4(r1)
+	b	kvmhv_switch_to_host
 
 /*
  * We come in here when wakened from nap mode.
@@ -239,9 +280,9 @@ kvm_novcpu_exit:
 kvm_start_guest:
 
 	/* Set runlatch bit the minute you wake up from nap */
-	mfspr	r1, SPRN_CTRLF
-	ori 	r1, r1, 1
-	mtspr	SPRN_CTRLT, r1
+	mfspr	r0, SPRN_CTRLF
+	ori 	r0, r0, 1
+	mtspr	SPRN_CTRLT, r0
 
 	ld	r2,PACATOC(r13)
 
@@ -286,26 +327,21 @@ kvm_secondary_got_guest:
 	ld	r6, PACA_DSCR(r13)
 	std	r6, HSTATE_DSCR(r13)
 
+	/* Order load of vcore, ptid etc. after load of vcpu */
+	lwsync
 	bl	kvmppc_hv_entry
 
 	/* Back from the guest, go back to nap */
 	/* Clear our vcpu pointer so we don't come back in early */
 	li	r0, 0
-	std	r0, HSTATE_KVM_VCPU(r13)
 	/*
-	 * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
-	 * the nap_count, because once the increment to nap_count is
-	 * visible we could be given another vcpu.
+	 * Once we clear HSTATE_KVM_VCPU(r13), the code in
+	 * kvmppc_run_core() is going to assume that all our vcpu
+	 * state is visible in memory.  This lwsync makes sure
+	 * that that is true.
 	 */
 	lwsync
-
-	/* increment the nap count and then go to nap mode */
-	ld	r4, HSTATE_KVM_VCORE(r13)
-	addi	r4, r4, VCORE_NAP_COUNT
-51:	lwarx	r3, 0, r4
-	addi	r3, r3, 1
-	stwcx.	r3, 0, r4
-	bne	51b
+	std	r0, HSTATE_KVM_VCPU(r13)
 
 /*
  * At this point we have finished executing in the guest.
@@ -376,6 +412,14 @@ kvmppc_hv_entry:
 	li	r6, KVM_GUEST_MODE_HOST_HV
 	stb	r6, HSTATE_IN_GUEST(r13)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	/* Store initial timestamp */
+	cmpdi	r4, 0
+	beq	1f
+	addi	r3, r4, VCPU_TB_RMENTRY
+	bl	kvmhv_start_timing
+1:
+#endif
 	/* Clear out SLB */
 	li	r6,0
 	slbmte	r6,r6
@@ -387,21 +431,23 @@ kvmppc_hv_entry:
 	 * We don't have to lock against concurrent tlbies,
 	 * but we do have to coordinate across hardware threads.
 	 */
-	/* Increment entry count iff exit count is zero. */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	addi	r9,r5,VCORE_ENTRY_EXIT
-21:	lwarx	r3,0,r9
-	cmpwi	r3,0x100		/* any threads starting to exit? */
+	/* Set bit in entry map iff exit map is zero. */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	li	r7, 1
+	lbz	r6, HSTATE_PTID(r13)
+	sld	r7, r7, r6
+	addi	r9, r5, VCORE_ENTRY_EXIT
+21:	lwarx	r3, 0, r9
+	cmpwi	r3, 0x100		/* any threads starting to exit? */
 	bge	secondary_too_late	/* if so we're too late to the party */
-	addi	r3,r3,1
-	stwcx.	r3,0,r9
+	or	r3, r3, r7
+	stwcx.	r3, 0, r9
 	bne	21b
 
 	/* Primary thread switches to guest partition. */
 	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
-	lbz	r6,HSTATE_PTID(r13)
 	cmpwi	r6,0
-	bne	20f
+	bne	10f
 	ld	r6,KVM_SDR1(r9)
 	lwz	r7,KVM_LPID(r9)
 	li	r0,LPID_RSVD		/* switch to reserved LPID */
@@ -472,28 +518,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	li	r0,1
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
-	b	10f
-
-	/* Secondary threads wait for primary to have done partition switch */
-20:	lbz	r0,VCORE_IN_GUEST(r5)
-	cmpwi	r0,0
-	beq	20b
-
-	/* Set LPCR and RMOR. */
-10:	ld	r8,VCORE_LPCR(r5)
-	mtspr	SPRN_LPCR,r8
-	ld	r8,KVM_RMOR(r9)
-	mtspr	SPRN_RMOR,r8
-	isync
-
-	/* Check if HDEC expires soon */
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,512		/* 1 microsecond */
-	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	blt	hdec_soon
 
 	/* Do we have a guest vcpu to run? */
-	cmpdi	r4, 0
+10:	cmpdi	r4, 0
 	beq	kvmppc_primary_no_guest
 kvmppc_got_guest:
 
@@ -818,6 +845,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	clrrdi	r6,r6,1
 	mtspr	SPRN_CTRLT,r6
 4:
+	/* Secondary threads wait for primary to have done partition switch */
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lbz	r6, HSTATE_PTID(r13)
+	cmpwi	r6, 0
+	beq	21f
+	lbz	r0, VCORE_IN_GUEST(r5)
+	cmpwi	r0, 0
+	bne	21f
+	HMT_LOW
+20:	lbz	r0, VCORE_IN_GUEST(r5)
+	cmpwi	r0, 0
+	beq	20b
+	HMT_MEDIUM
+21:
+	/* Set LPCR. */
+	ld	r8,VCORE_LPCR(r5)
+	mtspr	SPRN_LPCR,r8
+	isync
+
+	/* Check if HDEC expires soon */
+	mfspr	r3, SPRN_HDEC
+	cmpwi	r3, 512		/* 1 microsecond */
+	blt	hdec_soon
+
 	ld	r6, VCPU_CTR(r4)
 	lwz	r7, VCPU_XER(r4)
 
@@ -880,6 +931,12 @@ fast_guest_return:
 	li	r9, KVM_GUEST_MODE_GUEST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	/* Accumulate timing */
+	addi	r3, r4, VCPU_TB_GUEST
+	bl	kvmhv_accumulate_time
+#endif
+
 	/* Enter guest */
 
 BEGIN_FTR_SECTION
@@ -917,6 +974,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	hrfid
 	b	.
 
+secondary_too_late:
+	li	r12, 0
+	cmpdi	r4, 0
+	beq	11f
+	stw	r12, VCPU_TRAP(r4)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	addi	r3, r4, VCPU_TB_RMEXIT
+	bl	kvmhv_accumulate_time
+#endif
+11:	b	kvmhv_switch_to_host
+
+hdec_soon:
+	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+	stw	r12, VCPU_TRAP(r4)
+	mr	r9, r4
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	addi	r3, r4, VCPU_TB_RMEXIT
+	bl	kvmhv_accumulate_time
+#endif
+	b	guest_exit_cont
+
 /******************************************************************************
  *                                                                            *
  *                               Exit code                                    *
@@ -1002,6 +1080,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	stw	r12,VCPU_TRAP(r9)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	addi	r3, r9, VCPU_TB_RMINTR
+	mr	r4, r9
+	bl	kvmhv_accumulate_time
+	ld	r5, VCPU_GPR(R5)(r9)
+	ld	r6, VCPU_GPR(R6)(r9)
+	ld	r7, VCPU_GPR(R7)(r9)
+	ld	r8, VCPU_GPR(R8)(r9)
+#endif
+
 	/* Save HEIR (HV emulation assist reg) in emul_inst
 	   if this is an HEI (HV emulation interrupt, e40) */
 	li	r3,KVM_INST_FETCH_FAILED
@@ -1028,34 +1116,37 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	bne	2f
 	mfspr	r3,SPRN_HDEC
 	cmpwi	r3,0
-	bge	ignore_hdec
+	mr	r4,r9
+	bge	fast_guest_return
 2:
 	/* See if this is an hcall we can handle in real mode */
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
 
+	/* Hypervisor doorbell - exit only if host IPI flag set */
+	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
+	bne	3f
+	lbz	r0, HSTATE_HOST_IPI(r13)
+	beq	4f
+	b	guest_exit_cont
+3:
 	/* External interrupt ? */
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	bne+	ext_interrupt_to_host
+	bne+	guest_exit_cont
 
 	/* External interrupt, first check for host_ipi. If this is
 	 * set, we know the host wants us out so let's do it now
 	 */
 	bl	kvmppc_read_intr
 	cmpdi	r3, 0
-	bgt	ext_interrupt_to_host
+	bgt	guest_exit_cont
 
 	/* Check if any CPU is heading out to the host, if so head out too */
-	ld	r5, HSTATE_KVM_VCORE(r13)
+4:	ld	r5, HSTATE_KVM_VCORE(r13)
 	lwz	r0, VCORE_ENTRY_EXIT(r5)
 	cmpwi	r0, 0x100
-	bge	ext_interrupt_to_host
-
-	/* Return to guest after delivering any pending interrupt */
 	mr	r4, r9
-	b	deliver_guest_interrupt
-
-ext_interrupt_to_host:
+	blt	deliver_guest_interrupt
 
 guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Save more register state  */
@@ -1065,7 +1156,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	stw	r7, VCPU_DSISR(r9)
 	/* don't overwrite fault_dar/fault_dsisr if HDSI */
 	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-	beq	6f
+	beq	mc_cont
 	std	r6, VCPU_FAULT_DAR(r9)
 	stw	r7, VCPU_FAULT_DSISR(r9)
 
@@ -1073,9 +1164,20 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	beq	machine_check_realmode
 mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	addi	r3, r9, VCPU_TB_RMEXIT
+	mr	r4, r9
+	bl	kvmhv_accumulate_time
+#endif
+
+	/* Increment exit count, poke other threads to exit */
+	bl	kvmhv_commence_exit
+	nop
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	lwz	r12, VCPU_TRAP(r9)
 
 	/* Save guest CTRL register, set runlatch to 1 */
-6:	mfspr	r6,SPRN_CTRLF
+	mfspr	r6,SPRN_CTRLF
 	stw	r6,VCPU_CTRL(r9)
 	andi.	r0,r6,1
 	bne	4f
@@ -1417,68 +1519,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	slbia
 	ptesync
 
-hdec_soon:			/* r12 = trap, r13 = paca */
 	/*
 	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
 	 * have to coordinate the hardware threads.
 	 */
-	/* Increment the threads-exiting-guest count in the 0xff00
-	   bits of vcore->entry_exit_count */
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	addi	r6,r5,VCORE_ENTRY_EXIT
-41:	lwarx	r3,0,r6
-	addi	r0,r3,0x100
-	stwcx.	r0,0,r6
-	bne	41b
-	isync		/* order stwcx. vs. reading napping_threads */
-
-	/*
-	 * At this point we have an interrupt that we have to pass
-	 * up to the kernel or qemu; we can't handle it in real mode.
-	 * Thus we have to do a partition switch, so we have to
-	 * collect the other threads, if we are the first thread
-	 * to take an interrupt.  To do this, we set the HDEC to 0,
-	 * which causes an HDEC interrupt in all threads within 2ns
-	 * because the HDEC register is shared between all 4 threads.
-	 * However, we don't need to bother if this is an HDEC
-	 * interrupt, since the other threads will already be on their
-	 * way here in that case.
-	 */
-	cmpwi	r3,0x100	/* Are we the first here? */
-	bge	43f
-	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	beq	40f
-	li	r0,0
-	mtspr	SPRN_HDEC,r0
-40:
-	/*
-	 * Send an IPI to any napping threads, since an HDEC interrupt
-	 * doesn't wake CPUs up from nap.
-	 */
-	lwz	r3,VCORE_NAPPING_THREADS(r5)
-	lbz	r4,HSTATE_PTID(r13)
-	li	r0,1
-	sld	r0,r0,r4
-	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
-	beq	43f
-	/* Order entry/exit update vs. IPIs */
-	sync
-	mulli	r4,r4,PACA_SIZE		/* get paca for thread 0 */
-	subf	r6,r4,r13
-42:	andi.	r0,r3,1
-	beq	44f
-	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
-	li	r0,IPI_PRIORITY
-	li	r7,XICS_MFRR
-	stbcix	r0,r7,r8		/* trigger the IPI */
-44:	srdi.	r3,r3,1
-	addi	r6,r6,PACA_SIZE
-	bne	42b
-
-secondary_too_late:
+kvmhv_switch_to_host:
 	/* Secondary threads wait for primary to do partition switch */
-43:	ld	r5,HSTATE_KVM_VCORE(r13)
+	ld	r5,HSTATE_KVM_VCORE(r13)
 	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
 	lbz	r3,HSTATE_PTID(r13)
 	cmpwi	r3,0
@@ -1562,6 +1610,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 1:	addi	r8,r8,16
 	.endr
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	/* Finish timing, if we have a vcpu */
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	cmpdi	r4, 0
+	li	r3, 0
+	beq	2f
+	bl	kvmhv_accumulate_time
+2:
+#endif
 	/* Unset guest mode */
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
@@ -1696,8 +1753,10 @@ kvmppc_hisi:
  * Returns to the guest if we handle it, or continues on up to
  * the kernel if we can't (i.e. if we don't have a handler for
  * it, or if the handler returns H_TOO_HARD).
+ *
+ * r5 - r8 contain hcall args,
+ * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
  */
-	.globl	hcall_try_real_mode
 hcall_try_real_mode:
 	ld	r3,VCPU_GPR(R3)(r9)
 	andi.	r0,r11,MSR_PR
@@ -1839,13 +1898,124 @@ hcall_real_table:
 	.long	0		/* 0x12c */
 	.long	0		/* 0x130 */
 	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+	.long	0		/* 0x138 */
+	.long	0		/* 0x13c */
+	.long	0		/* 0x140 */
+	.long	0		/* 0x144 */
+	.long	0		/* 0x148 */
+	.long	0		/* 0x14c */
+	.long	0		/* 0x150 */
+	.long	0		/* 0x154 */
+	.long	0		/* 0x158 */
+	.long	0		/* 0x15c */
+	.long	0		/* 0x160 */
+	.long	0		/* 0x164 */
+	.long	0		/* 0x168 */
+	.long	0		/* 0x16c */
+	.long	0		/* 0x170 */
+	.long	0		/* 0x174 */
+	.long	0		/* 0x178 */
+	.long	0		/* 0x17c */
+	.long	0		/* 0x180 */
+	.long	0		/* 0x184 */
+	.long	0		/* 0x188 */
+	.long	0		/* 0x18c */
+	.long	0		/* 0x190 */
+	.long	0		/* 0x194 */
+	.long	0		/* 0x198 */
+	.long	0		/* 0x19c */
+	.long	0		/* 0x1a0 */
+	.long	0		/* 0x1a4 */
+	.long	0		/* 0x1a8 */
+	.long	0		/* 0x1ac */
+	.long	0		/* 0x1b0 */
+	.long	0		/* 0x1b4 */
+	.long	0		/* 0x1b8 */
+	.long	0		/* 0x1bc */
+	.long	0		/* 0x1c0 */
+	.long	0		/* 0x1c4 */
+	.long	0		/* 0x1c8 */
+	.long	0		/* 0x1cc */
+	.long	0		/* 0x1d0 */
+	.long	0		/* 0x1d4 */
+	.long	0		/* 0x1d8 */
+	.long	0		/* 0x1dc */
+	.long	0		/* 0x1e0 */
+	.long	0		/* 0x1e4 */
+	.long	0		/* 0x1e8 */
+	.long	0		/* 0x1ec */
+	.long	0		/* 0x1f0 */
+	.long	0		/* 0x1f4 */
+	.long	0		/* 0x1f8 */
+	.long	0		/* 0x1fc */
+	.long	0		/* 0x200 */
+	.long	0		/* 0x204 */
+	.long	0		/* 0x208 */
+	.long	0		/* 0x20c */
+	.long	0		/* 0x210 */
+	.long	0		/* 0x214 */
+	.long	0		/* 0x218 */
+	.long	0		/* 0x21c */
+	.long	0		/* 0x220 */
+	.long	0		/* 0x224 */
+	.long	0		/* 0x228 */
+	.long	0		/* 0x22c */
+	.long	0		/* 0x230 */
+	.long	0		/* 0x234 */
+	.long	0		/* 0x238 */
+	.long	0		/* 0x23c */
+	.long	0		/* 0x240 */
+	.long	0		/* 0x244 */
+	.long	0		/* 0x248 */
+	.long	0		/* 0x24c */
+	.long	0		/* 0x250 */
+	.long	0		/* 0x254 */
+	.long	0		/* 0x258 */
+	.long	0		/* 0x25c */
+	.long	0		/* 0x260 */
+	.long	0		/* 0x264 */
+	.long	0		/* 0x268 */
+	.long	0		/* 0x26c */
+	.long	0		/* 0x270 */
+	.long	0		/* 0x274 */
+	.long	0		/* 0x278 */
+	.long	0		/* 0x27c */
+	.long	0		/* 0x280 */
+	.long	0		/* 0x284 */
+	.long	0		/* 0x288 */
+	.long	0		/* 0x28c */
+	.long	0		/* 0x290 */
+	.long	0		/* 0x294 */
+	.long	0		/* 0x298 */
+	.long	0		/* 0x29c */
+	.long	0		/* 0x2a0 */
+	.long	0		/* 0x2a4 */
+	.long	0		/* 0x2a8 */
+	.long	0		/* 0x2ac */
+	.long	0		/* 0x2b0 */
+	.long	0		/* 0x2b4 */
+	.long	0		/* 0x2b8 */
+	.long	0		/* 0x2bc */
+	.long	0		/* 0x2c0 */
+	.long	0		/* 0x2c4 */
+	.long	0		/* 0x2c8 */
+	.long	0		/* 0x2cc */
+	.long	0		/* 0x2d0 */
+	.long	0		/* 0x2d4 */
+	.long	0		/* 0x2d8 */
+	.long	0		/* 0x2dc */
+	.long	0		/* 0x2e0 */
+	.long	0		/* 0x2e4 */
+	.long	0		/* 0x2e8 */
+	.long	0		/* 0x2ec */
+	.long	0		/* 0x2f0 */
+	.long	0		/* 0x2f4 */
+	.long	0		/* 0x2f8 */
+	.long	0		/* 0x2fc */
+	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
 	.globl	hcall_real_table_end
 hcall_real_table_end:
 
-ignore_hdec:
-	mr	r4,r9
-	b	fast_guest_return
-
 _GLOBAL(kvmppc_h_set_xdabr)
 	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
 	beq	6f
@@ -1884,7 +2054,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	li	r3, 0
 	blr
 
-_GLOBAL(kvmppc_h_cede)
+_GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	ori	r11,r11,MSR_EE
 	std	r11,VCPU_MSR(r3)
 	li	r0,1
@@ -1893,8 +2063,8 @@ _GLOBAL(kvmppc_h_cede)
 	lbz	r5,VCPU_PRODDED(r3)
 	cmpwi	r5,0
 	bne	kvm_cede_prodded
-	li	r0,0		/* set trap to 0 to say hcall is handled */
-	stw	r0,VCPU_TRAP(r3)
+	li	r12,0		/* set trap to 0 to say hcall is handled */
+	stw	r12,VCPU_TRAP(r3)
 	li	r0,H_SUCCESS
 	std	r0,VCPU_GPR(R3)(r3)
 
@@ -1912,12 +2082,11 @@ _GLOBAL(kvmppc_h_cede)
 	addi	r6,r5,VCORE_NAPPING_THREADS
 31:	lwarx	r4,0,r6
 	or	r4,r4,r0
-	PPC_POPCNTW(R7,R4)
-	cmpw	r7,r8
-	bge	kvm_cede_exit
+	cmpw	r4,r8
+	beq	kvm_cede_exit
 	stwcx.	r4,0,r6
 	bne	31b
-	/* order napping_threads update vs testing entry_exit_count */
+	/* order napping_threads update vs testing entry_exit_map */
 	isync
 	li	r0,NAPPING_CEDE
 	stb	r0,HSTATE_NAPPING(r13)
@@ -1955,21 +2124,52 @@ _GLOBAL(kvmppc_h_cede)
 	bl	kvmppc_save_fp
 
 	/*
+	 * Set DEC to the smaller of DEC and HDEC, so that we wake
+	 * no later than the end of our timeslice (HDEC interrupts
+	 * don't wake us from nap).
+	 */
+	mfspr	r3, SPRN_DEC
+	mfspr	r4, SPRN_HDEC
+	mftb	r5
+	cmpw	r3, r4
+	ble	67f
+	mtspr	SPRN_DEC, r4
+67:
+	/* save expiry time of guest decrementer */
+	extsw	r3, r3
+	add	r3, r3, r5
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_TB_OFFSET(r5)
+	subf	r3, r6, r3	/* convert to host TB value */
+	std	r3, VCPU_DEC_EXPIRES(r4)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	ld	r4, HSTATE_KVM_VCPU(r13)
+	addi	r3, r4, VCPU_TB_CEDE
+	bl	kvmhv_accumulate_time
+#endif
+
+	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
+
+	/*
 	 * Take a nap until a decrementer or external or doobell interrupt
-	 * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
-	 * runlatch bit before napping.
+	 * occurs, with PECE1 and PECE0 set in LPCR.
+	 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
+	 * Also clear the runlatch bit before napping.
 	 */
 kvm_do_nap:
-	mfspr	r2, SPRN_CTRLF
-	clrrdi	r2, r2, 1
-	mtspr	SPRN_CTRLT, r2
+	mfspr	r0, SPRN_CTRLF
+	clrrdi	r0, r0, 1
+	mtspr	SPRN_CTRLT, r0
 
 	li	r0,1
 	stb	r0,HSTATE_HWTHREAD_REQ(r13)
 	mfspr	r5,SPRN_LPCR
 	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
 BEGIN_FTR_SECTION
-	oris	r5,r5,LPCR_PECEDP@h
+	ori	r5, r5, LPCR_PECEDH
+	rlwimi	r5, r3, 0, LPCR_PECEDP
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_LPCR,r5
 	isync
@@ -1994,9 +2194,23 @@ kvm_end_cede:
 	/* Woken by external or decrementer interrupt */
 	ld	r1, HSTATE_HOST_R1(r13)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+	addi	r3, r4, VCPU_TB_RMINTR
+	bl	kvmhv_accumulate_time
+#endif
+
 	/* load up FP state */
 	bl	kvmppc_load_fp
 
+	/* Restore guest decrementer */
+	ld	r3, VCPU_DEC_EXPIRES(r4)
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	ld	r6, VCORE_TB_OFFSET(r5)
+	add	r3, r3, r6	/* convert host TB to guest TB value */
+	mftb	r7
+	subf	r3, r7, r3
+	mtspr	SPRN_DEC, r3
+
 	/* Load NV GPRS */
 	ld	r14, VCPU_GPR(R14)(r4)
 	ld	r15, VCPU_GPR(R15)(r4)
@@ -2057,7 +2271,8 @@ kvm_cede_prodded:
 
 	/* we've ceded but we want to give control to the host */
 kvm_cede_exit:
-	b	hcall_real_fallback
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	b	guest_exit_cont
 
 	/* Try to handle a machine check in real mode */
 machine_check_realmode:
@@ -2089,13 +2304,14 @@ machine_check_realmode:
 
 /*
  * Check the reason we woke from nap, and take appropriate action.
- * Returns:
+ * Returns (in r3):
  *	0 if nothing needs to be done
  *	1 if something happened that needs to be handled by the host
- *	-1 if there was a guest wakeup (IPI)
+ *	-1 if there was a guest wakeup (IPI or msgsnd)
  *
  * Also sets r12 to the interrupt vector for any interrupt that needs
  * to be handled now by the host (0x500 for external interrupt), or zero.
+ * Modifies r0, r6, r7, r8.
  */
 kvmppc_check_wake_reason:
 	mfspr	r6, SPRN_SRR1
@@ -2122,7 +2338,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 	/* hypervisor doorbell */
 3:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
+	/* see if it's a host IPI */
 	li	r3, 1
+	lbz	r0, HSTATE_HOST_IPI(r13)
+	cmpwi	r0, 0
+	bnelr
+	/* if not, clear it and return -1 */
+	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
+	PPC_MSGCLR(6)
+	li	r3, -1
 	blr
 
 /*
@@ -2131,6 +2355,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  *	0 if no interrupt is pending
  *	1 if an interrupt is pending that needs to be handled by the host
  *	-1 if there was a guest wakeup IPI (which has now been cleared)
+ * Modifies r0, r6, r7, r8, returns value in r3.
  */
 kvmppc_read_intr:
 	/* see if a host IPI is pending */
@@ -2185,6 +2410,7 @@ kvmppc_read_intr:
 	bne-	43f
 
 	/* OK, it's an IPI for us */
+	li	r12, 0
 	li	r3, -1
 1:	blr
 
@@ -2314,3 +2540,62 @@ kvmppc_fix_pmao:
 	mtspr	SPRN_PMC6, r3
 	isync
 	blr
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lbz	r6, VCORE_IN_GUEST(r5)
+	cmpwi	r6, 0
+	beq	5f				/* if in guest, need to */
+	ld	r6, VCORE_TB_OFFSET(r5)		/* subtract timebase offset */
+5:	mftb	r5
+	subf	r5, r6, r5
+	std	r3, VCPU_CUR_ACTIVITY(r4)
+	std	r5, VCPU_ACTIVITY_START(r4)
+	blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+	ld	r5, HSTATE_KVM_VCORE(r13)
+	lbz	r8, VCORE_IN_GUEST(r5)
+	cmpwi	r8, 0
+	beq	4f				/* if in guest, need to */
+	ld	r8, VCORE_TB_OFFSET(r5)		/* subtract timebase offset */
+4:	ld	r5, VCPU_CUR_ACTIVITY(r4)
+	ld	r6, VCPU_ACTIVITY_START(r4)
+	std	r3, VCPU_CUR_ACTIVITY(r4)
+	mftb	r7
+	subf	r7, r8, r7
+	std	r7, VCPU_ACTIVITY_START(r4)
+	cmpdi	r5, 0
+	beqlr
+	subf	r3, r6, r7
+	ld	r8, TAS_SEQCOUNT(r5)
+	cmpdi	r8, 0
+	addi	r8, r8, 1
+	std	r8, TAS_SEQCOUNT(r5)
+	lwsync
+	ld	r7, TAS_TOTAL(r5)
+	add	r7, r7, r3
+	std	r7, TAS_TOTAL(r5)
+	ld	r6, TAS_MIN(r5)
+	ld	r7, TAS_MAX(r5)
+	beq	3f
+	cmpd	r3, r6
+	bge	1f
+3:	std	r3, TAS_MIN(r5)
+1:	cmpd	r3, r7
+	ble	2f
+	std	r3, TAS_MAX(r5)
+2:	lwsync
+	addi	r8, r8, 1
+	std	r8, TAS_SEQCOUNT(r5)
+	blr
+#endif
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ce3c893d509b..f2c75a1e0536 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -258,6 +258,28 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+	long rc;
+
+	rc = kvmppc_h_logical_ci_load(vcpu);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+	long rc;
+
+	rc = kvmppc_h_logical_ci_store(vcpu);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
 static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 {
 	long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -290,6 +312,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_LOGICAL_CI_LOAD:
+		return kvmppc_h_pr_logical_ci_load(vcpu);
+	case H_LOGICAL_CI_STORE:
+		return kvmppc_h_pr_logical_ci_store(vcpu);
 	case H_XIRR:
 	case H_CPPR:
 	case H_EOI:
@@ -323,6 +349,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
 	case H_BULK_REMOVE:
 	case H_PUT_TCE:
 	case H_CEDE:
+	case H_LOGICAL_CI_LOAD:
+	case H_LOGICAL_CI_STORE:
 #ifdef CONFIG_KVM_XICS
 	case H_XIRR:
 	case H_CPPR:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a4a8d9f0dcb7..c6ca7db64673 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/gfp.h>
 #include <linux/anon_inodes.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/kvm_book3s.h>
@@ -39,7 +40,7 @@
  * LOCKING
  * =======
  *
- * Each ICS has a mutex protecting the information about the IRQ
+ * Each ICS has a spin lock protecting the information about the IRQ
  * sources and avoiding simultaneous deliveries if the same interrupt.
  *
  * ICP operations are done via a single compare & swap transaction
@@ -109,7 +110,10 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 {
 	int i;
 
-	mutex_lock(&ics->lock);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
 
 	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
 		struct ics_irq_state *state = &ics->irq_state[i];
@@ -120,12 +124,15 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 		XICS_DBG("resend %#x prio %#x\n", state->number,
 			      state->priority);
 
-		mutex_unlock(&ics->lock);
+		arch_spin_unlock(&ics->lock);
+		local_irq_restore(flags);
 		icp_deliver_irq(xics, icp, state->number);
-		mutex_lock(&ics->lock);
+		local_irq_save(flags);
+		arch_spin_lock(&ics->lock);
 	}
 
-	mutex_unlock(&ics->lock);
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
 }
 
 static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -133,8 +140,10 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 		       u32 server, u32 priority, u32 saved_priority)
 {
 	bool deliver;
+	unsigned long flags;
 
-	mutex_lock(&ics->lock);
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
 
 	state->server = server;
 	state->priority = priority;
@@ -145,7 +154,8 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 		deliver = true;
 	}
 
-	mutex_unlock(&ics->lock);
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
 
 	return deliver;
 }
@@ -186,6 +196,7 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
 	struct kvmppc_ics *ics;
 	struct ics_irq_state *state;
 	u16 src;
+	unsigned long flags;
 
 	if (!xics)
 		return -ENODEV;
@@ -195,10 +206,12 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
 		return -EINVAL;
 	state = &ics->irq_state[src];
 
-	mutex_lock(&ics->lock);
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
 	*server = state->server;
 	*priority = state->priority;
-	mutex_unlock(&ics->lock);
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
 
 	return 0;
 }
@@ -365,6 +378,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	struct kvmppc_ics *ics;
 	u32 reject;
 	u16 src;
+	unsigned long flags;
 
 	/*
 	 * This is used both for initial delivery of an interrupt and
@@ -391,7 +405,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	state = &ics->irq_state[src];
 
 	/* Get a lock on the ICS */
-	mutex_lock(&ics->lock);
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
 
 	/* Get our server */
 	if (!icp || state->server != icp->server_num) {
@@ -434,7 +449,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 	 *
 	 * Note that if successful, the new delivery might have itself
 	 * rejected an interrupt that was "delivered" before we took the
-	 * icp mutex.
+	 * ics spin lock.
 	 *
 	 * In this case we do the whole sequence all over again for the
 	 * new guy. We cannot assume that the rejected interrupt is less
@@ -448,7 +463,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 		 * Delivery was successful, did we reject somebody else ?
 		 */
 		if (reject && reject != XICS_IPI) {
-			mutex_unlock(&ics->lock);
+			arch_spin_unlock(&ics->lock);
+			local_irq_restore(flags);
 			new_irq = reject;
 			goto again;
 		}
@@ -468,12 +484,14 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 		 */
 		smp_mb();
 		if (!icp->state.need_resend) {
-			mutex_unlock(&ics->lock);
+			arch_spin_unlock(&ics->lock);
+			local_irq_restore(flags);
 			goto again;
 		}
 	}
  out:
-	mutex_unlock(&ics->lock);
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
 }
 
 static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -802,14 +820,22 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
 	XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
 		 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
 
-	if (icp->rm_action & XICS_RM_KICK_VCPU)
+	if (icp->rm_action & XICS_RM_KICK_VCPU) {
+		icp->n_rm_kick_vcpu++;
 		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
-	if (icp->rm_action & XICS_RM_CHECK_RESEND)
+	}
+	if (icp->rm_action & XICS_RM_CHECK_RESEND) {
+		icp->n_rm_check_resend++;
 		icp_check_resend(xics, icp->rm_resend_icp);
-	if (icp->rm_action & XICS_RM_REJECT)
+	}
+	if (icp->rm_action & XICS_RM_REJECT) {
+		icp->n_rm_reject++;
 		icp_deliver_irq(xics, icp, icp->rm_reject);
-	if (icp->rm_action & XICS_RM_NOTIFY_EOI)
+	}
+	if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
+		icp->n_rm_notify_eoi++;
 		kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
+	}
 
 	icp->rm_action = 0;
 
@@ -872,10 +898,21 @@ static int xics_debug_show(struct seq_file *m, void *private)
 	struct kvm *kvm = xics->kvm;
 	struct kvm_vcpu *vcpu;
 	int icsid, i;
+	unsigned long flags;
+	unsigned long t_rm_kick_vcpu, t_rm_check_resend;
+	unsigned long t_rm_reject, t_rm_notify_eoi;
+	unsigned long t_reject, t_check_resend;
 
 	if (!kvm)
 		return 0;
 
+	t_rm_kick_vcpu = 0;
+	t_rm_notify_eoi = 0;
+	t_rm_check_resend = 0;
+	t_rm_reject = 0;
+	t_check_resend = 0;
+	t_reject = 0;
+
 	seq_printf(m, "=========\nICP state\n=========\n");
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -890,8 +927,19 @@ static int xics_debug_show(struct seq_file *m, void *private)
 			   icp->server_num, state.xisr,
 			   state.pending_pri, state.cppr, state.mfrr,
 			   state.out_ee, state.need_resend);
+		t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
+		t_rm_notify_eoi += icp->n_rm_notify_eoi;
+		t_rm_check_resend += icp->n_rm_check_resend;
+		t_rm_reject += icp->n_rm_reject;
+		t_check_resend += icp->n_check_resend;
+		t_reject += icp->n_reject;
 	}
 
+	seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n",
+			t_rm_kick_vcpu, t_rm_check_resend,
+			t_rm_reject, t_rm_notify_eoi);
+	seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
+			t_check_resend, t_reject);
 	for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
 		struct kvmppc_ics *ics = xics->ics[icsid];
 
@@ -901,7 +949,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
 		seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
 			   icsid);
 
-		mutex_lock(&ics->lock);
+		local_irq_save(flags);
+		arch_spin_lock(&ics->lock);
 
 		for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
 			struct ics_irq_state *irq = &ics->irq_state[i];
@@ -912,7 +961,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
 				   irq->resend, irq->masked_pending);
 
 		}
-		mutex_unlock(&ics->lock);
+		arch_spin_unlock(&ics->lock);
+		local_irq_restore(flags);
 	}
 	return 0;
 }
@@ -965,7 +1015,6 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
 	if (!ics)
 		goto out;
 
-	mutex_init(&ics->lock);
 	ics->icsid = icsid;
 
 	for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
@@ -1107,13 +1156,15 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
 	u64 __user *ubufp = (u64 __user *) addr;
 	u16 idx;
 	u64 val, prio;
+	unsigned long flags;
 
 	ics = kvmppc_xics_find_ics(xics, irq, &idx);
 	if (!ics)
 		return -ENOENT;
 
 	irqp = &ics->irq_state[idx];
-	mutex_lock(&ics->lock);
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
 	ret = -ENOENT;
 	if (irqp->exists) {
 		val = irqp->server;
@@ -1129,7 +1180,8 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
 			val |= KVM_XICS_PENDING;
 		ret = 0;
 	}
-	mutex_unlock(&ics->lock);
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
 
 	if (!ret && put_user(val, ubufp))
 		ret = -EFAULT;
@@ -1146,6 +1198,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
 	u64 val;
 	u8 prio;
 	u32 server;
+	unsigned long flags;
 
 	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
 		return -ENOENT;
@@ -1166,7 +1219,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
 	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
 		return -EINVAL;
 
-	mutex_lock(&ics->lock);
+	local_irq_save(flags);
+	arch_spin_lock(&ics->lock);
 	irqp->server = server;
 	irqp->saved_priority = prio;
 	if (val & KVM_XICS_MASKED)
@@ -1178,7 +1232,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
 	if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
 		irqp->asserted = 1;
 	irqp->exists = 1;
-	mutex_unlock(&ics->lock);
+	arch_spin_unlock(&ics->lock);
+	local_irq_restore(flags);
 
 	if (val & KVM_XICS_PENDING)
 		icp_deliver_irq(xics, NULL, irqp->number);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 73f0f2723c07..56ea44f9867f 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -78,13 +78,22 @@ struct kvmppc_icp {
 	u32  rm_reject;
 	u32  rm_eoied_irq;
 
+	/* Counters for each reason we exited real mode */
+	unsigned long n_rm_kick_vcpu;
+	unsigned long n_rm_check_resend;
+	unsigned long n_rm_reject;
+	unsigned long n_rm_notify_eoi;
+	/* Counters for handling ICP processing in real mode */
+	unsigned long n_check_resend;
+	unsigned long n_reject;
+
 	/* Debug stuff for real mode */
 	union kvmppc_icp_state rm_dbgstate;
 	struct kvm_vcpu *rm_dbgtgt;
 };
 
 struct kvmppc_ics {
-	struct mutex lock;
+	arch_spinlock_t lock;
 	u16 icsid;
 	struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
 };
@@ -96,6 +105,8 @@ struct kvmppc_xics {
 	u32 max_icsid;
 	bool real_mode;
 	bool real_mode_dbg;
+	u32 err_noics;
+	u32 err_noicp;
 	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
 };
 
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index cc536d4a75ef..4d33e199edcc 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	pte_t *ptep;
 	unsigned int wimg = 0;
 	pgd_t *pgdir;
+	unsigned long flags;
 
 	/* used to check for invalidations in progress */
 	mmu_seq = kvm->mmu_notifier_seq;
@@ -468,15 +469,28 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 
 	pgdir = vcpu_e500->vcpu.arch.pgdir;
-	ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
-	if (pte_present(*ptep))
-		wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
-	else {
-		if (printk_ratelimit())
-			pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
-				__func__, (long)gfn, pfn);
-		ret = -EINVAL;
-		goto out;
+	/*
+	 * We are just looking at the wimg bits, so we don't
+	 * care much about the trans splitting bit.
+	 * We are holding kvm->mmu_lock so a notifier invalidate
+	 * can't run hence pfn won't change.
+	 */
+	local_irq_save(flags);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL);
+	if (ptep) {
+		pte_t pte = READ_ONCE(*ptep);
+
+		if (pte_present(pte)) {
+			wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
+				MAS2_WIMGE_MASK;
+			local_irq_restore(flags);
+		} else {
+			local_irq_restore(flags);
+			pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
+					   __func__, (long)gfn, pfn);
+			ret = -EINVAL;
+			goto out;
+		}
 	}
 	kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 91bbc845ac66..ac3ddf115f3d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -529,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_PPC_RMA:
 		r = 0;
 		break;
+	case KVM_CAP_PPC_HWRNG:
+		r = kvmppc_hwrng_present();
+		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2c2022d16059..fda236f908eb 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1066,7 +1066,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	/* Get PTE and page size from page tables */
-	ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
+	ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
 	if (ptep == NULL || !pte_present(*ptep)) {
 		DBG_LOW(" no PTE !\n");
 		rc = 1;
@@ -1394,6 +1394,7 @@ tm_abort:
 		tm_abort(TM_CAUSE_TLBI);
 	}
 #endif
+	return;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fa9d5c238d22..0ce968b00b7c 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -109,7 +109,7 @@ int pgd_huge(pgd_t pgd)
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	/* Only called for hugetlbfs pages, hence can ignore THP */
-	return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
+	return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
 }
 
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -581,6 +581,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 	pmd = pmd_offset(pud, start);
 	pud_clear(pud);
 	pmd_free_tlb(tlb, pmd, start);
+	mm_dec_nr_pmds(tlb->mm);
 }
 
 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -681,28 +682,35 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 	} while (addr = next, addr != end);
 }
 
+/*
+ * We are holding mmap_sem, so a parallel huge page collapse cannot run.
+ * To prevent hugepage split, disable irq.
+ */
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
 	pte_t *ptep;
 	struct page *page;
 	unsigned shift;
-	unsigned long mask;
+	unsigned long mask, flags;
 	/*
 	 * Transparent hugepages are handled by generic code. We can skip them
 	 * here.
 	 */
+	local_irq_save(flags);
 	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
 
 	/* Verify it is a huge page else bail. */
-	if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep))
+	if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
+		local_irq_restore(flags);
 		return ERR_PTR(-EINVAL);
-
+	}
 	mask = (1UL << shift) - 1;
 	page = pte_page(*ptep);
 	if (page)
 		page += (address & mask) / PAGE_SIZE;
 
+	local_irq_restore(flags);
 	return page;
 }
 
@@ -949,9 +957,12 @@ void flush_dcache_icache_hugepage(struct page *page)
  *
  * So long as we atomically load page table pointers we are safe against teardown,
  * we can follow the address down to the the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
  */
 
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+				   unsigned *shift)
 {
 	pgd_t pgd, *pgdp;
 	pud_t pud, *pudp;
@@ -1003,12 +1014,11 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
 			 * A hugepage collapse is captured by pmd_none, because
 			 * it mark the pmd none and do a hpte invalidate.
 			 *
-			 * A hugepage split is captured by pmd_trans_splitting
-			 * because we mark the pmd trans splitting and do a
-			 * hpte invalidate
-			 *
+			 * We don't worry about pmd_trans_splitting here, The
+			 * caller if it needs to handle the splitting case
+			 * should check for that.
 			 */
-			if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+			if (pmd_none(pmd))
 				return NULL;
 
 			if (pmd_huge(pmd) || pmd_large(pmd)) {
@@ -1030,7 +1040,7 @@ out:
 		*shift = pdshift;
 	return ret_pte;
 }
-EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
+EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
 
 int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		unsigned long end, int write, struct page **pages, int *nr)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index ead55351b254..ff09cde20cd2 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -111,41 +111,45 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
  * interrupt context, so if the access faults, we read the page tables
  * to find which page (if any) is mapped and access it directly.
  */
-static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
+static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 {
+	int ret = -EFAULT;
 	pgd_t *pgdir;
 	pte_t *ptep, pte;
 	unsigned shift;
 	unsigned long addr = (unsigned long) ptr;
 	unsigned long offset;
-	unsigned long pfn;
+	unsigned long pfn, flags;
 	void *kaddr;
 
 	pgdir = current->mm->pgd;
 	if (!pgdir)
 		return -EFAULT;
 
+	local_irq_save(flags);
 	ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+	if (!ptep)
+		goto err_out;
 	if (!shift)
 		shift = PAGE_SHIFT;
 
 	/* align address to page boundary */
 	offset = addr & ((1UL << shift) - 1);
-	addr -= offset;
 
-	if (ptep == NULL)
-		return -EFAULT;
-	pte = *ptep;
+	pte = READ_ONCE(*ptep);
 	if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
-		return -EFAULT;
+		goto err_out;
 	pfn = pte_pfn(pte);
 	if (!page_is_ram(pfn))
-		return -EFAULT;
+		goto err_out;
 
 	/* no highmem to worry about here */
 	kaddr = pfn_to_kaddr(pfn);
-	memcpy(ret, kaddr + offset, nb);
-	return 0;
+	memcpy(buf, kaddr + offset, nb);
+	ret = 0;
+err_out:
+	local_irq_restore(flags);
+	return ret;
 }
 
 static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 1a3429e1ccb5..1ba6307be4db 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -111,7 +111,7 @@ out:
 static int
 spufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = d_inode(dentry);
 
 	if ((attr->ia_valid & ATTR_SIZE) &&
 	    (attr->ia_size != inode->i_size))
@@ -163,14 +163,14 @@ static void spufs_prune_dir(struct dentry *dir)
 {
 	struct dentry *dentry, *tmp;
 
-	mutex_lock(&dir->d_inode->i_mutex);
+	mutex_lock(&d_inode(dir)->i_mutex);
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
 		spin_lock(&dentry->d_lock);
-		if (!(d_unhashed(dentry)) && dentry->d_inode) {
+		if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) {
 			dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
-			simple_unlink(dir->d_inode, dentry);
+			simple_unlink(d_inode(dir), dentry);
 			/* XXX: what was dcache_lock protecting here? Other
 			 * filesystems (IB, configfs) release dcache_lock
 			 * before unlink */
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
 		}
 	}
 	shrink_dcache_parent(dir);
-	mutex_unlock(&dir->d_inode->i_mutex);
+	mutex_unlock(&d_inode(dir)->i_mutex);
 }
 
 /* Caller must hold parent->i_mutex */
@@ -192,7 +192,7 @@ static int spufs_rmdir(struct inode *parent, struct dentry *dir)
 	d_drop(dir);
 	res = simple_rmdir(parent, dir);
 	/* We have to give up the mm_struct */
-	spu_forget(SPUFS_I(dir->d_inode)->i_ctx);
+	spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
 	return res;
 }
 
@@ -222,8 +222,8 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
 	int ret;
 
 	dir = file->f_path.dentry;
-	parent = dir->d_parent->d_inode;
-	ctx = SPUFS_I(dir->d_inode)->i_ctx;
+	parent = d_inode(dir->d_parent);
+	ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
 	mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
 	ret = spufs_rmdir(parent, dir);
@@ -460,7 +460,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
 		goto out_aff_unlock;
 
 	if (affinity) {
-		spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+		spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
 								neighbor);
 		if (neighbor)
 			put_spu_context(neighbor);
@@ -504,7 +504,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	d_instantiate(dentry, inode);
 	inc_nlink(dir);
-	inc_nlink(dentry->d_inode);
+	inc_nlink(d_inode(dentry));
 	return ret;
 
 out_iput:
@@ -561,7 +561,7 @@ static struct file_system_type spufs_type;
 long spufs_create(struct path *path, struct dentry *dentry,
 		unsigned int flags, umode_t mode, struct file *filp)
 {
-	struct inode *dir = path->dentry->d_inode;
+	struct inode *dir = d_inode(path->dentry);
 	int ret;
 
 	/* check if we are on spufs */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 920c252d1f49..f8bc950efcae 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2693,7 +2693,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		hose->last_busno = 0xff;
 	}
 	hose->private_data = phb;
-	hose->controller_ops = pnv_pci_controller_ops;
 	phb->hub_id = hub_id;
 	phb->opal_id = phb_id;
 	phb->type = ioda_type;
@@ -2812,6 +2811,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook;
 	pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment;
 	pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus;
+	hose->controller_ops = pnv_pci_controller_ops;
 
 #ifdef CONFIG_PCI_IOV
 	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 80db43944afe..6eb808ff637e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -24,12 +24,22 @@
 
 struct powernv_rng {
 	void __iomem *regs;
+	void __iomem *regs_real;
 	unsigned long mask;
 };
 
 static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
 
 
+int powernv_hwrng_present(void)
+{
+	struct powernv_rng *rng;
+
+	rng = get_cpu_var(powernv_rng);
+	put_cpu_var(rng);
+	return rng != NULL;
+}
+
 static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
 {
 	unsigned long parity;
@@ -46,6 +56,17 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
 	return val;
 }
 
+int powernv_get_random_real_mode(unsigned long *v)
+{
+	struct powernv_rng *rng;
+
+	rng = raw_cpu_read(powernv_rng);
+
+	*v = rng_whiten(rng, in_rm64(rng->regs_real));
+
+	return 1;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
 	struct powernv_rng *rng;
@@ -80,12 +101,20 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
 static __init int rng_create(struct device_node *dn)
 {
 	struct powernv_rng *rng;
+	struct resource res;
 	unsigned long val;
 
 	rng = kzalloc(sizeof(*rng), GFP_KERNEL);
 	if (!rng)
 		return -ENOMEM;
 
+	if (of_address_to_resource(dn, 0, &res)) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	rng->regs_real = (void __iomem *)res.start;
+
 	rng->regs = of_iomap(dn, 0);
 	if (!rng->regs) {
 		kfree(rng);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index b4b11096ea8b..019d34aaf054 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -412,6 +412,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	if (rc)
 		return -EINVAL;
 
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc)
+		return -EINVAL;
+
 	parent = of_find_node_by_path("/cpus");
 	if (!parent)
 		return -ENODEV;
@@ -422,12 +426,6 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 
 	of_node_put(parent);
 
-	rc = dlpar_acquire_drc(drc_index);
-	if (rc) {
-		dlpar_free_cc_nodes(dn);
-		return -EINVAL;
-	}
-
 	rc = dlpar_attach_node(dn);
 	if (rc) {
 		dlpar_release_drc(drc_index);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8e58c614c37d..b06dc3839268 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -115,7 +115,7 @@ config S390
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
-	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES
+	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index ba3b2aefddf5..d9c4c313fbc6 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -3,9 +3,10 @@
  *
  * Support for s390 cryptographic instructions.
  *
- *   Copyright IBM Corp. 2003, 2007
+ *   Copyright IBM Corp. 2003, 2015
  *   Author(s): Thomas Spatzier
  *		Jan Glauber (jan.glauber@de.ibm.com)
+ *		Harald Freudenberger (freude@de.ibm.com)
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -28,15 +29,17 @@
 #define CRYPT_S390_MSA	0x1
 #define CRYPT_S390_MSA3	0x2
 #define CRYPT_S390_MSA4	0x4
+#define CRYPT_S390_MSA5	0x8
 
 /* s390 cryptographic operations */
 enum crypt_s390_operations {
-	CRYPT_S390_KM   = 0x0100,
-	CRYPT_S390_KMC  = 0x0200,
-	CRYPT_S390_KIMD = 0x0300,
-	CRYPT_S390_KLMD = 0x0400,
-	CRYPT_S390_KMAC = 0x0500,
-	CRYPT_S390_KMCTR = 0x0600
+	CRYPT_S390_KM	 = 0x0100,
+	CRYPT_S390_KMC	 = 0x0200,
+	CRYPT_S390_KIMD  = 0x0300,
+	CRYPT_S390_KLMD  = 0x0400,
+	CRYPT_S390_KMAC  = 0x0500,
+	CRYPT_S390_KMCTR = 0x0600,
+	CRYPT_S390_PPNO  = 0x0700
 };
 
 /*
@@ -138,6 +141,16 @@ enum crypt_s390_kmac_func {
 	KMAC_TDEA_192 = CRYPT_S390_KMAC | 3
 };
 
+/*
+ * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER
+ * OPERATION) instruction
+ */
+enum crypt_s390_ppno_func {
+	PPNO_QUERY	      = CRYPT_S390_PPNO | 0,
+	PPNO_SHA512_DRNG_GEN  = CRYPT_S390_PPNO | 3,
+	PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83
+};
+
 /**
  * crypt_s390_km:
  * @func: the function code passed to KM; see crypt_s390_km_func
@@ -162,11 +175,11 @@ static inline int crypt_s390_km(long func, void *param,
 	int ret;
 
 	asm volatile(
-		"0:	.insn	rre,0xb92e0000,%3,%1 \n" /* KM opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rre,0xb92e0000,%3,%1\n" /* KM opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
 		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
 	if (ret < 0)
@@ -198,11 +211,11 @@ static inline int crypt_s390_kmc(long func, void *param,
 	int ret;
 
 	asm volatile(
-		"0:	.insn	rre,0xb92f0000,%3,%1 \n" /* KMC opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rre,0xb92f0000,%3,%1\n" /* KMC opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
 		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
 	if (ret < 0)
@@ -233,11 +246,11 @@ static inline int crypt_s390_kimd(long func, void *param,
 	int ret;
 
 	asm volatile(
-		"0:	.insn	rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rre,0xb93e0000,%1,%1\n" /* KIMD opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "=d" (ret), "+a" (__src), "+d" (__src_len)
 		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
 	if (ret < 0)
@@ -267,11 +280,11 @@ static inline int crypt_s390_klmd(long func, void *param,
 	int ret;
 
 	asm volatile(
-		"0:	.insn	rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rre,0xb93f0000,%1,%1\n" /* KLMD opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "=d" (ret), "+a" (__src), "+d" (__src_len)
 		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
 	if (ret < 0)
@@ -302,11 +315,11 @@ static inline int crypt_s390_kmac(long func, void *param,
 	int ret;
 
 	asm volatile(
-		"0:	.insn	rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rre,0xb91e0000,%1,%1\n" /* KLAC opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "=d" (ret), "+a" (__src), "+d" (__src_len)
 		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
 	if (ret < 0)
@@ -340,11 +353,11 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
 	int ret = -1;
 
 	asm volatile(
-		"0:	.insn	rrf,0xb92d0000,%3,%1,%4,0 \n" /* KMCTR opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
 		  "+a" (__ctr)
 		: "d" (__func), "a" (__param) : "cc", "memory");
@@ -354,6 +367,47 @@ static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
 }
 
 /**
+ * crypt_s390_ppno:
+ * @func: the function code passed to PPNO; see crypt_s390_ppno_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ *
+ * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of random
+ * bytes stored in dest buffer for generate function
+ */
+static inline int crypt_s390_ppno(long func, void *param,
+				  u8 *dest, long dest_len,
+				  const u8 *seed, long seed_len)
+{
+	register long  __func	  asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param	  asm("1") = param;    /* param block (240 bytes) */
+	register u8   *__dest	  asm("2") = dest;     /* buf for recv random bytes */
+	register long  __dest_len asm("3") = dest_len; /* requested random bytes */
+	register const u8 *__seed asm("4") = seed;     /* buf with seed data */
+	register long  __seed_len asm("5") = seed_len; /* bytes in seed buf */
+	int ret = -1;
+
+	asm volatile (
+		"0:	.insn	rre,0xb93c0000,%1,%5\n"	/* PPNO opcode */
+		"1:	brc	1,0b\n"	  /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (ret), "+a"(__dest), "+d"(__dest_len)
+		: "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len)
+		: "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0;
+}
+
+/**
  * crypt_s390_func_available:
  * @func: the function code of the specific function; 0 if op in general
  *
@@ -373,6 +427,9 @@ static inline int crypt_s390_func_available(int func,
 		return 0;
 	if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
 		return 0;
+	if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57))
+		return 0;
+
 	switch (func & CRYPT_S390_OP_MASK) {
 	case CRYPT_S390_KM:
 		ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
@@ -390,8 +447,12 @@ static inline int crypt_s390_func_available(int func,
 		ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
 		break;
 	case CRYPT_S390_KMCTR:
-		ret = crypt_s390_kmctr(KMCTR_QUERY, &status, NULL, NULL, 0,
-				       NULL);
+		ret = crypt_s390_kmctr(KMCTR_QUERY, &status,
+				       NULL, NULL, 0, NULL);
+		break;
+	case CRYPT_S390_PPNO:
+		ret = crypt_s390_ppno(PPNO_QUERY, &status,
+				      NULL, 0, NULL, 0);
 		break;
 	default:
 		return 0;
@@ -419,15 +480,14 @@ static inline int crypt_s390_pcc(long func, void *param)
 	int ret = -1;
 
 	asm volatile(
-		"0:	.insn	rre,0xb92c0000,0,0 \n" /* PCC opcode */
-		"1:	brc	1,0b \n" /* handle partial completion */
+		"0:	.insn	rre,0xb92c0000,0,0\n" /* PCC opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
 		"	la	%0,0\n"
 		"2:\n"
-		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
 		: "+d" (ret)
 		: "d" (__func), "a" (__param) : "cc", "memory");
 	return ret;
 }
 
-
 #endif	/* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 94a35a4c1b48..1f374b39a4ec 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -1,106 +1,529 @@
 /*
- * Copyright IBM Corp. 2006, 2007
+ * Copyright IBM Corp. 2006, 2015
  * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ *	      Harald Freudenberger <freude@de.ibm.com>
  * Driver for the s390 pseudo random number generator
  */
+
+#define KMSG_COMPONENT "prng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/fs.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/device.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <asm/debug.h>
 #include <asm/uaccess.h>
+#include <asm/timex.h>
 
 #include "crypt_s390.h"
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jan Glauber <jan.glauber@de.ibm.com>");
+MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("s390 PRNG interface");
 
-static int prng_chunk_size = 256;
-module_param(prng_chunk_size, int, S_IRUSR | S_IRGRP | S_IROTH);
+
+#define PRNG_MODE_AUTO	  0
+#define PRNG_MODE_TDES	  1
+#define PRNG_MODE_SHA512  2
+
+static unsigned int prng_mode = PRNG_MODE_AUTO;
+module_param_named(mode, prng_mode, int, 0);
+MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512");
+
+
+#define PRNG_CHUNKSIZE_TDES_MIN   8
+#define PRNG_CHUNKSIZE_TDES_MAX   (64*1024)
+#define PRNG_CHUNKSIZE_SHA512_MIN 64
+#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024)
+
+static unsigned int prng_chunk_size = 256;
+module_param_named(chunksize, prng_chunk_size, int, 0);
 MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes");
 
-static int prng_entropy_limit = 4096;
-module_param(prng_entropy_limit, int, S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR);
-MODULE_PARM_DESC(prng_entropy_limit,
-	"PRNG add entropy after that much bytes were produced");
+
+#define PRNG_RESEED_LIMIT_TDES		 4096
+#define PRNG_RESEED_LIMIT_TDES_LOWER	 4096
+#define PRNG_RESEED_LIMIT_SHA512       100000
+#define PRNG_RESEED_LIMIT_SHA512_LOWER	10000
+
+static unsigned int prng_reseed_limit;
+module_param_named(reseed_limit, prng_reseed_limit, int, 0);
+MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+
 
 /*
  * Any one who considers arithmetical methods of producing random digits is,
  * of course, in a state of sin. -- John von Neumann
  */
 
-struct s390_prng_data {
-	unsigned long count; /* how many bytes were produced */
-	char *buf;
+static int prng_errorflag;
+
+#define PRNG_GEN_ENTROPY_FAILED  1
+#define PRNG_SELFTEST_FAILED	 2
+#define PRNG_INSTANTIATE_FAILED  3
+#define PRNG_SEED_FAILED	 4
+#define PRNG_RESEED_FAILED	 5
+#define PRNG_GEN_FAILED		 6
+
+struct prng_ws_s {
+	u8  parm_block[32];
+	u32 reseed_counter;
+	u64 byte_counter;
 };
 
-static struct s390_prng_data *p;
+struct ppno_ws_s {
+	u32 res;
+	u32 reseed_counter;
+	u64 stream_bytes;
+	u8  V[112];
+	u8  C[112];
+};
 
-/* copied from libica, use a non-zero initial parameter block */
-static unsigned char parm_block[32] = {
-0x0F,0x2B,0x8E,0x63,0x8C,0x8E,0xD2,0x52,0x64,0xB7,0xA0,0x7B,0x75,0x28,0xB8,0xF4,
-0x75,0x5F,0xD2,0xA6,0x8D,0x97,0x11,0xFF,0x49,0xD8,0x23,0xF3,0x7E,0x21,0xEC,0xA0,
+struct prng_data_s {
+	struct mutex mutex;
+	union {
+		struct prng_ws_s prngws;
+		struct ppno_ws_s ppnows;
+	};
+	u8 *buf;
+	u32 rest;
+	u8 *prev;
 };
 
-static int prng_open(struct inode *inode, struct file *file)
+static struct prng_data_s *prng_data;
+
+/* initial parameter block for tdes mode, copied from libica */
+static const u8 initial_parm_block[32] __initconst = {
+	0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+	0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+	0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+	0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 };
+
+
+/*** helper functions ***/
+
+static int generate_entropy(u8 *ebuf, size_t nbytes)
 {
-	return nonseekable_open(inode, file);
+	int n, ret = 0;
+	u8 *pg, *h, hash[32];
+
+	pg = (u8 *) __get_free_page(GFP_KERNEL);
+	if (!pg) {
+		prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+		return -ENOMEM;
+	}
+
+	while (nbytes) {
+		/* fill page with urandom bytes */
+		get_random_bytes(pg, PAGE_SIZE);
+		/* exor page with stckf values */
+		for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) {
+			u64 *p = ((u64 *)pg) + n;
+			*p ^= get_tod_clock_fast();
+		}
+		n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
+		if (n < sizeof(hash))
+			h = hash;
+		else
+			h = ebuf;
+		/* generate sha256 from this page */
+		if (crypt_s390_kimd(KIMD_SHA_256, h,
+				    pg, PAGE_SIZE) != PAGE_SIZE) {
+			prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+			ret = -EIO;
+			goto out;
+		}
+		if (n < sizeof(hash))
+			memcpy(ebuf, hash, n);
+		ret += n;
+		ebuf += n;
+		nbytes -= n;
+	}
+
+out:
+	free_page((unsigned long)pg);
+	return ret;
 }
 
-static void prng_add_entropy(void)
+
+/*** tdes functions ***/
+
+static void prng_tdes_add_entropy(void)
 {
 	__u64 entropy[4];
 	unsigned int i;
 	int ret;
 
 	for (i = 0; i < 16; i++) {
-		ret = crypt_s390_kmc(KMC_PRNG, parm_block, (char *)entropy,
-				     (char *)entropy, sizeof(entropy));
+		ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+				     (char *)entropy, (char *)entropy,
+				     sizeof(entropy));
 		BUG_ON(ret < 0 || ret != sizeof(entropy));
-		memcpy(parm_block, entropy, sizeof(entropy));
+		memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
 	}
 }
 
-static void prng_seed(int nbytes)
+
+static void prng_tdes_seed(int nbytes)
 {
 	char buf[16];
 	int i = 0;
 
-	BUG_ON(nbytes > 16);
+	BUG_ON(nbytes > sizeof(buf));
+
 	get_random_bytes(buf, nbytes);
 
 	/* Add the entropy */
 	while (nbytes >= 8) {
-		*((__u64 *)parm_block) ^= *((__u64 *)(buf+i));
-		prng_add_entropy();
+		*((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i));
+		prng_tdes_add_entropy();
 		i += 8;
 		nbytes -= 8;
 	}
-	prng_add_entropy();
+	prng_tdes_add_entropy();
+	prng_data->prngws.reseed_counter = 0;
+}
+
+
+static int __init prng_tdes_instantiate(void)
+{
+	int datalen;
+
+	pr_debug("prng runs in TDES mode with "
+		 "chunksize=%d and reseed_limit=%u\n",
+		 prng_chunk_size, prng_reseed_limit);
+
+	/* memory allocation, prng_data struct init, mutex init */
+	datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+	prng_data = kzalloc(datalen, GFP_KERNEL);
+	if (!prng_data) {
+		prng_errorflag = PRNG_INSTANTIATE_FAILED;
+		return -ENOMEM;
+	}
+	mutex_init(&prng_data->mutex);
+	prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+	memcpy(prng_data->prngws.parm_block, initial_parm_block, 32);
+
+	/* initialize the PRNG, add 128 bits of entropy */
+	prng_tdes_seed(16);
+
+	return 0;
 }
 
-static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
-			 loff_t *ppos)
+
+static void prng_tdes_deinstantiate(void)
+{
+	pr_debug("The prng module stopped "
+		 "after running in triple DES mode\n");
+	kzfree(prng_data);
+}
+
+
+/*** sha512 functions ***/
+
+static int __init prng_sha512_selftest(void)
 {
-	int chunk, n;
+	/* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */
+	static const u8 seed[] __initconst = {
+		0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a,
+		0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22,
+		0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b,
+		0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c,
+		0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa,
+		0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 };
+	static const u8 V0[] __initconst = {
+		0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76,
+		0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22,
+		0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60,
+		0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1,
+		0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95,
+		0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b,
+		0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79,
+		0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03,
+		0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd,
+		0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36,
+		0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0,
+		0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e,
+		0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea,
+		0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 };
+	static const u8 C0[] __initconst = {
+		0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95,
+		0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e,
+		0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94,
+		0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86,
+		0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50,
+		0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f,
+		0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9,
+		0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43,
+		0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee,
+		0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a,
+		0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9,
+		0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09,
+		0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc,
+		0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e };
+	static const u8 random[] __initconst = {
+		0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57,
+		0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6,
+		0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d,
+		0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf,
+		0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26,
+		0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64,
+		0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55,
+		0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a,
+		0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78,
+		0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e,
+		0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb,
+		0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30,
+		0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19,
+		0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f,
+		0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d,
+		0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a,
+		0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2,
+		0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd,
+		0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd,
+		0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2,
+		0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b,
+		0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1,
+		0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99,
+		0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e,
+		0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3,
+		0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67,
+		0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3,
+		0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d,
+		0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b,
+		0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13,
+		0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
+		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
+
 	int ret = 0;
-	int tmp;
+	u8 buf[sizeof(random)];
+	struct ppno_ws_s ws;
+
+	memset(&ws, 0, sizeof(ws));
+
+	/* initial seed */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+			      &ws, NULL, 0,
+			      seed, sizeof(seed));
+	if (ret < 0) {
+		pr_err("The prng self test seed operation for the "
+		       "SHA-512 mode failed with rc=%d\n", ret);
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* check working states V and C */
+	if (memcmp(ws.V, V0, sizeof(V0)) != 0
+	    || memcmp(ws.C, C0, sizeof(C0)) != 0) {
+		pr_err("The prng self test state test "
+		       "for the SHA-512 mode failed\n");
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* generate random bytes */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+			      &ws, buf, sizeof(buf),
+			      NULL, 0);
+	if (ret < 0) {
+		pr_err("The prng self test generate operation for "
+		       "the SHA-512 mode failed with rc=%d\n", ret);
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+			      &ws, buf, sizeof(buf),
+			      NULL, 0);
+	if (ret < 0) {
+		pr_err("The prng self test generate operation for "
+		       "the SHA-512 mode failed with rc=%d\n", ret);
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* check against expected data */
+	if (memcmp(buf, random, sizeof(random)) != 0) {
+		pr_err("The prng self test data test "
+		       "for the SHA-512 mode failed\n");
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+static int __init prng_sha512_instantiate(void)
+{
+	int ret, datalen;
+	u8 seed[64];
+
+	pr_debug("prng runs in SHA-512 mode "
+		 "with chunksize=%d and reseed_limit=%u\n",
+		 prng_chunk_size, prng_reseed_limit);
+
+	/* memory allocation, prng_data struct init, mutex init */
+	datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+	if (fips_enabled)
+		datalen += prng_chunk_size;
+	prng_data = kzalloc(datalen, GFP_KERNEL);
+	if (!prng_data) {
+		prng_errorflag = PRNG_INSTANTIATE_FAILED;
+		return -ENOMEM;
+	}
+	mutex_init(&prng_data->mutex);
+	prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+
+	/* selftest */
+	ret = prng_sha512_selftest();
+	if (ret)
+		goto outfree;
+
+	/* generate initial seed bytestring, first 48 bytes of entropy */
+	ret = generate_entropy(seed, 48);
+	if (ret != 48)
+		goto outfree;
+	/* followed by 16 bytes of unique nonce */
+	get_tod_clock_ext(seed + 48);
+
+	/* initial seed of the ppno drng */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+			      &prng_data->ppnows, NULL, 0,
+			      seed, sizeof(seed));
+	if (ret < 0) {
+		prng_errorflag = PRNG_SEED_FAILED;
+		ret = -EIO;
+		goto outfree;
+	}
+
+	/* if fips mode is enabled, generate a first block of random
+	   bytes for the FIPS 140-2 Conditional Self Test */
+	if (fips_enabled) {
+		prng_data->prev = prng_data->buf + prng_chunk_size;
+		ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+				      &prng_data->ppnows,
+				      prng_data->prev,
+				      prng_chunk_size,
+				      NULL, 0);
+		if (ret < 0 || ret != prng_chunk_size) {
+			prng_errorflag = PRNG_GEN_FAILED;
+			ret = -EIO;
+			goto outfree;
+		}
+	}
+
+	return 0;
+
+outfree:
+	kfree(prng_data);
+	return ret;
+}
+
+
+static void prng_sha512_deinstantiate(void)
+{
+	pr_debug("The prng module stopped after running in SHA-512 mode\n");
+	kzfree(prng_data);
+}
+
+
+static int prng_sha512_reseed(void)
+{
+	int ret;
+	u8 seed[32];
+
+	/* generate 32 bytes of fresh entropy */
+	ret = generate_entropy(seed, sizeof(seed));
+	if (ret != sizeof(seed))
+		return ret;
+
+	/* do a reseed of the ppno drng with this bytestring */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+			      &prng_data->ppnows, NULL, 0,
+			      seed, sizeof(seed));
+	if (ret) {
+		prng_errorflag = PRNG_RESEED_FAILED;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+static int prng_sha512_generate(u8 *buf, size_t nbytes)
+{
+	int ret;
+
+	/* reseed needed ? */
+	if (prng_data->ppnows.reseed_counter > prng_reseed_limit) {
+		ret = prng_sha512_reseed();
+		if (ret)
+			return ret;
+	}
+
+	/* PPNO generate */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+			      &prng_data->ppnows, buf, nbytes,
+			      NULL, 0);
+	if (ret < 0 || ret != nbytes) {
+		prng_errorflag = PRNG_GEN_FAILED;
+		return -EIO;
+	}
+
+	/* FIPS 140-2 Conditional Self Test */
+	if (fips_enabled) {
+		if (!memcmp(prng_data->prev, buf, nbytes)) {
+			prng_errorflag = PRNG_GEN_FAILED;
+			return -EILSEQ;
+		}
+		memcpy(prng_data->prev, buf, nbytes);
+	}
+
+	return ret;
+}
+
+
+/*** file io functions ***/
+
+static int prng_open(struct inode *inode, struct file *file)
+{
+	return nonseekable_open(inode, file);
+}
+
+
+static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
+			      size_t nbytes, loff_t *ppos)
+{
+	int chunk, n, tmp, ret = 0;
+
+	/* lock prng_data struct */
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
 
-	/* nbytes can be arbitrary length, we split it into chunks */
 	while (nbytes) {
-		/* same as in extract_entropy_user in random.c */
 		if (need_resched()) {
 			if (signal_pending(current)) {
 				if (ret == 0)
 					ret = -ERESTARTSYS;
 				break;
 			}
+			/* give mutex free before calling schedule() */
+			mutex_unlock(&prng_data->mutex);
 			schedule();
+			/* occopy mutex again */
+			if (mutex_lock_interruptible(&prng_data->mutex)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				return ret;
+			}
 		}
 
 		/*
@@ -112,12 +535,11 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
 		/* PRNG only likes multiples of 8 bytes */
 		n = (chunk + 7) & -8;
 
-		if (p->count > prng_entropy_limit)
-			prng_seed(8);
+		if (prng_data->prngws.reseed_counter > prng_reseed_limit)
+			prng_tdes_seed(8);
 
 		/* if the CPU supports PRNG stckf is present too */
-		asm volatile(".insn     s,0xb27c0000,%0"
-			     : "=m" (*((unsigned long long *)p->buf)) : : "cc");
+		*((unsigned long long *)prng_data->buf) = get_tod_clock_fast();
 
 		/*
 		 * Beside the STCKF the input for the TDES-EDE is the output
@@ -132,35 +554,259 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
 		 * Note: you can still get strict X9.17 conformity by setting
 		 * prng_chunk_size to 8 bytes.
 		*/
-		tmp = crypt_s390_kmc(KMC_PRNG, parm_block, p->buf, p->buf, n);
-		BUG_ON((tmp < 0) || (tmp != n));
+		tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+				     prng_data->buf, prng_data->buf, n);
+		if (tmp < 0 || tmp != n) {
+			ret = -EIO;
+			break;
+		}
 
-		p->count += n;
+		prng_data->prngws.byte_counter += n;
+		prng_data->prngws.reseed_counter += n;
 
-		if (copy_to_user(ubuf, p->buf, chunk))
+		if (copy_to_user(ubuf, prng_data->buf, chunk))
 			return -EFAULT;
 
 		nbytes -= chunk;
 		ret += chunk;
 		ubuf += chunk;
 	}
+
+	/* unlock prng_data struct */
+	mutex_unlock(&prng_data->mutex);
+
 	return ret;
 }
 
-static const struct file_operations prng_fops = {
+
+static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
+				size_t nbytes, loff_t *ppos)
+{
+	int n, ret = 0;
+	u8 *p;
+
+	/* if errorflag is set do nothing and return 'broken pipe' */
+	if (prng_errorflag)
+		return -EPIPE;
+
+	/* lock prng_data struct */
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+
+	while (nbytes) {
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			/* give mutex free before calling schedule() */
+			mutex_unlock(&prng_data->mutex);
+			schedule();
+			/* occopy mutex again */
+			if (mutex_lock_interruptible(&prng_data->mutex)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				return ret;
+			}
+		}
+		if (prng_data->rest) {
+			/* push left over random bytes from the previous read */
+			p = prng_data->buf + prng_chunk_size - prng_data->rest;
+			n = (nbytes < prng_data->rest) ?
+				nbytes : prng_data->rest;
+			prng_data->rest -= n;
+		} else {
+			/* generate one chunk of random bytes into read buf */
+			p = prng_data->buf;
+			n = prng_sha512_generate(p, prng_chunk_size);
+			if (n < 0) {
+				ret = n;
+				break;
+			}
+			if (nbytes < prng_chunk_size) {
+				n = nbytes;
+				prng_data->rest = prng_chunk_size - n;
+			} else {
+				n = prng_chunk_size;
+				prng_data->rest = 0;
+			}
+		}
+		if (copy_to_user(ubuf, p, n)) {
+			ret = -EFAULT;
+			break;
+		}
+		ubuf += n;
+		nbytes -= n;
+		ret += n;
+	}
+
+	/* unlock prng_data struct */
+	mutex_unlock(&prng_data->mutex);
+
+	return ret;
+}
+
+
+/*** sysfs stuff ***/
+
+static const struct file_operations prng_sha512_fops = {
+	.owner		= THIS_MODULE,
+	.open		= &prng_open,
+	.release	= NULL,
+	.read		= &prng_sha512_read,
+	.llseek		= noop_llseek,
+};
+static const struct file_operations prng_tdes_fops = {
 	.owner		= THIS_MODULE,
 	.open		= &prng_open,
 	.release	= NULL,
-	.read		= &prng_read,
+	.read		= &prng_tdes_read,
 	.llseek		= noop_llseek,
 };
 
-static struct miscdevice prng_dev = {
+static struct miscdevice prng_sha512_dev = {
+	.name	= "prandom",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &prng_sha512_fops,
+};
+static struct miscdevice prng_tdes_dev = {
 	.name	= "prandom",
 	.minor	= MISC_DYNAMIC_MINOR,
-	.fops	= &prng_fops,
+	.fops	= &prng_tdes_fops,
 };
 
+
+/* chunksize attribute (ro) */
+static ssize_t prng_chunksize_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+}
+static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
+
+/* counter attribute (ro) */
+static ssize_t prng_counter_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	u64 counter;
+
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+	if (prng_mode == PRNG_MODE_SHA512)
+		counter = prng_data->ppnows.stream_bytes;
+	else
+		counter = prng_data->prngws.byte_counter;
+	mutex_unlock(&prng_data->mutex);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+}
+static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
+
+/* errorflag attribute (ro) */
+static ssize_t prng_errorflag_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+}
+static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
+
+/* mode attribute (ro) */
+static ssize_t prng_mode_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	if (prng_mode == PRNG_MODE_TDES)
+		return snprintf(buf, PAGE_SIZE, "TDES\n");
+	else
+		return snprintf(buf, PAGE_SIZE, "SHA512\n");
+}
+static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
+
+/* reseed attribute (w) */
+static ssize_t prng_reseed_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+	prng_sha512_reseed();
+	mutex_unlock(&prng_data->mutex);
+
+	return count;
+}
+static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store);
+
+/* reseed limit attribute (rw) */
+static ssize_t prng_reseed_limit_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+}
+static ssize_t prng_reseed_limit_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	unsigned limit;
+
+	if (sscanf(buf, "%u\n", &limit) != 1)
+		return -EINVAL;
+
+	if (prng_mode == PRNG_MODE_SHA512) {
+		if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+			return -EINVAL;
+	} else {
+		if (limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+			return -EINVAL;
+	}
+
+	prng_reseed_limit = limit;
+
+	return count;
+}
+static DEVICE_ATTR(reseed_limit, 0644,
+		   prng_reseed_limit_show, prng_reseed_limit_store);
+
+/* strength attribute (ro) */
+static ssize_t prng_strength_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "256\n");
+}
+static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
+
+static struct attribute *prng_sha512_dev_attrs[] = {
+	&dev_attr_errorflag.attr,
+	&dev_attr_chunksize.attr,
+	&dev_attr_byte_counter.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_reseed.attr,
+	&dev_attr_reseed_limit.attr,
+	&dev_attr_strength.attr,
+	NULL
+};
+static struct attribute *prng_tdes_dev_attrs[] = {
+	&dev_attr_chunksize.attr,
+	&dev_attr_byte_counter.attr,
+	&dev_attr_mode.attr,
+	NULL
+};
+
+static struct attribute_group prng_sha512_dev_attr_group = {
+	.attrs = prng_sha512_dev_attrs
+};
+static struct attribute_group prng_tdes_dev_attr_group = {
+	.attrs = prng_tdes_dev_attrs
+};
+
+
+/*** module init and exit ***/
+
 static int __init prng_init(void)
 {
 	int ret;
@@ -169,43 +815,105 @@ static int __init prng_init(void)
 	if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
 		return -EOPNOTSUPP;
 
-	if (prng_chunk_size < 8)
-		return -EINVAL;
+	/* choose prng mode */
+	if (prng_mode != PRNG_MODE_TDES) {
+		/* check for MSA5 support for PPNO operations */
+		if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN,
+					       CRYPT_S390_MSA5)) {
+			if (prng_mode == PRNG_MODE_SHA512) {
+				pr_err("The prng module cannot "
+				       "start in SHA-512 mode\n");
+				return -EOPNOTSUPP;
+			}
+			prng_mode = PRNG_MODE_TDES;
+		} else
+			prng_mode = PRNG_MODE_SHA512;
+	}
 
-	p = kmalloc(sizeof(struct s390_prng_data), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-	p->count = 0;
+	if (prng_mode == PRNG_MODE_SHA512) {
 
-	p->buf = kmalloc(prng_chunk_size, GFP_KERNEL);
-	if (!p->buf) {
-		ret = -ENOMEM;
-		goto out_free;
-	}
+		/* SHA512 mode */
 
-	/* initialize the PRNG, add 128 bits of entropy */
-	prng_seed(16);
+		if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN
+		    || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX)
+			return -EINVAL;
+		prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f;
 
-	ret = misc_register(&prng_dev);
-	if (ret)
-		goto out_buf;
-	return 0;
+		if (prng_reseed_limit == 0)
+			prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512;
+		else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+			return -EINVAL;
+
+		ret = prng_sha512_instantiate();
+		if (ret)
+			goto out;
+
+		ret = misc_register(&prng_sha512_dev);
+		if (ret) {
+			prng_sha512_deinstantiate();
+			goto out;
+		}
+		ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj,
+					 &prng_sha512_dev_attr_group);
+		if (ret) {
+			misc_deregister(&prng_sha512_dev);
+			prng_sha512_deinstantiate();
+			goto out;
+		}
 
-out_buf:
-	kfree(p->buf);
-out_free:
-	kfree(p);
+	} else {
+
+		/* TDES mode */
+
+		if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN
+		    || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX)
+			return -EINVAL;
+		prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07;
+
+		if (prng_reseed_limit == 0)
+			prng_reseed_limit = PRNG_RESEED_LIMIT_TDES;
+		else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+			return -EINVAL;
+
+		ret = prng_tdes_instantiate();
+		if (ret)
+			goto out;
+
+		ret = misc_register(&prng_tdes_dev);
+		if (ret) {
+			prng_tdes_deinstantiate();
+			goto out;
+		}
+		ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj,
+					 &prng_tdes_dev_attr_group);
+		if (ret) {
+			misc_deregister(&prng_tdes_dev);
+			prng_tdes_deinstantiate();
+			goto out;
+		}
+
+	}
+
+out:
 	return ret;
 }
 
+
 static void __exit prng_exit(void)
 {
-	/* wipe me */
-	kzfree(p->buf);
-	kfree(p);
-
-	misc_deregister(&prng_dev);
+	if (prng_mode == PRNG_MODE_SHA512) {
+		sysfs_remove_group(&prng_sha512_dev.this_device->kobj,
+				   &prng_sha512_dev_attr_group);
+		misc_deregister(&prng_sha512_dev);
+		prng_sha512_deinstantiate();
+	} else {
+		sysfs_remove_group(&prng_tdes_dev.this_device->kobj,
+				   &prng_tdes_dev_attr_group);
+		misc_deregister(&prng_tdes_dev);
+		prng_tdes_deinstantiate();
+	}
 }
 
+
 module_init(prng_init);
 module_exit(prng_exit);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 3f5c799b7fb5..d3f896a35b98 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -48,7 +48,7 @@ static struct dentry *hypfs_last_dentry;
 static void hypfs_update_update(struct super_block *sb)
 {
 	struct hypfs_sb_info *sb_info = sb->s_fs_info;
-	struct inode *inode = sb_info->update_file->d_inode;
+	struct inode *inode = d_inode(sb_info->update_file);
 
 	sb_info->last_update = get_seconds();
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -64,7 +64,7 @@ static void hypfs_add_dentry(struct dentry *dentry)
 
 static inline int hypfs_positive(struct dentry *dentry)
 {
-	return dentry->d_inode && !d_unhashed(dentry);
+	return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 static void hypfs_remove(struct dentry *dentry)
@@ -72,16 +72,16 @@ static void hypfs_remove(struct dentry *dentry)
 	struct dentry *parent;
 
 	parent = dentry->d_parent;
-	mutex_lock(&parent->d_inode->i_mutex);
+	mutex_lock(&d_inode(parent)->i_mutex);
 	if (hypfs_positive(dentry)) {
 		if (d_is_dir(dentry))
-			simple_rmdir(parent->d_inode, dentry);
+			simple_rmdir(d_inode(parent), dentry);
 		else
-			simple_unlink(parent->d_inode, dentry);
+			simple_unlink(d_inode(parent), dentry);
 	}
 	d_delete(dentry);
 	dput(dentry);
-	mutex_unlock(&parent->d_inode->i_mutex);
+	mutex_unlock(&d_inode(parent)->i_mutex);
 }
 
 static void hypfs_delete_tree(struct dentry *root)
@@ -336,7 +336,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
 	struct dentry *dentry;
 	struct inode *inode;
 
-	mutex_lock(&parent->d_inode->i_mutex);
+	mutex_lock(&d_inode(parent)->i_mutex);
 	dentry = lookup_one_len(name, parent, strlen(name));
 	if (IS_ERR(dentry)) {
 		dentry = ERR_PTR(-ENOMEM);
@@ -357,14 +357,14 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
 	} else if (S_ISDIR(mode)) {
 		inode->i_op = &simple_dir_inode_operations;
 		inode->i_fop = &simple_dir_operations;
-		inc_nlink(parent->d_inode);
+		inc_nlink(d_inode(parent));
 	} else
 		BUG();
 	inode->i_private = data;
 	d_instantiate(dentry, inode);
 	dget(dentry);
 fail:
-	mutex_unlock(&parent->d_inode->i_mutex);
+	mutex_unlock(&d_inode(parent)->i_mutex);
 	return dentry;
 }
 
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 694bcd6bd927..2f924bc30e35 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -26,6 +26,9 @@
 /* Not more than 2GB */
 #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
 
+/* Allocate control page with GFP_DMA */
+#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA
+
 /* Maximum address we can use for the crash control pages */
 #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
 
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index a5e656260a70..d29ad9545b41 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -14,7 +14,9 @@ typedef struct {
 	unsigned long asce_bits;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
-	/* The mmu context has extended page tables. */
+	/* The mmu context allocates 4K page tables. */
+	unsigned int alloc_pgste:1;
+	/* The mmu context uses extended page tables. */
 	unsigned int has_pgste:1;
 	/* The mmu context uses storage keys. */
 	unsigned int use_skey:1;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index d25d9ff10ba8..fb1b93ea3e3f 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -20,8 +20,11 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.flush_mm = 0;
 	mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
 	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
+#ifdef CONFIG_PGSTE
+	mm->context.alloc_pgste = page_table_allocate_pgste;
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
+#endif
 	mm->context.asce_limit = STACK_TOP_MAX;
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 51e7fb634ebc..7b7858f158b4 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -21,6 +21,7 @@ void crst_table_free(struct mm_struct *, unsigned long *);
 unsigned long *page_table_alloc(struct mm_struct *);
 void page_table_free(struct mm_struct *, unsigned long *);
 void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+extern int page_table_allocate_pgste;
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 989cfae9e202..fc642399b489 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -12,12 +12,9 @@
 #define _ASM_S390_PGTABLE_H
 
 /*
- * The Linux memory management assumes a three-level page table setup. For
- * s390 31 bit we "fold" the mid level into the top-level page table, so
- * that we physically have the same two-level page table as the s390 mmu
- * expects in 31 bit mode. For s390 64 bit we use three of the five levels
- * the hardware provides (region first and region second tables are not
- * used).
+ * The Linux memory management assumes a three-level page table setup.
+ * For s390 64 bit we use up to four of the five levels the hardware
+ * provides (region first tables are not used).
  *
  * The "pgd_xxx()" functions are trivial for a folded two-level
  * setup: the pgd is never bad, and a pmd always exists (as it's folded
@@ -101,8 +98,8 @@ extern unsigned long zero_page_mask;
 
 #ifndef __ASSEMBLY__
 /*
- * The vmalloc and module area will always be on the topmost area of the kernel
- * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules.
+ * The vmalloc and module area will always be on the topmost area of the
+ * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
  * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
  * modules will reside. That makes sure that inter module branches always
  * happen without trampolines and in addition the placement within a 2GB frame
@@ -131,38 +128,6 @@ static inline int is_module_addr(void *addr)
 }
 
 /*
- * A 31 bit pagetable entry of S390 has following format:
- *  |   PFRA          |    |  OS  |
- * 0                   0IP0
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * I Page-Invalid Bit:    Page is not available for address-translation
- * P Page-Protection Bit: Store access not possible for page
- *
- * A 31 bit segmenttable entry of S390 has following format:
- *  |   P-table origin      |  |PTL
- * 0                         IC
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * I Segment-Invalid Bit:    Segment is not available for address-translation
- * C Common-Segment Bit:     Segment is not private (PoP 3-30)
- * PTL Page-Table-Length:    Page-table length (PTL+1*16 entries -> up to 256)
- *
- * The 31 bit segmenttable origin of S390 has following format:
- *
- *  |S-table origin   |     | STL |
- * X                   **GPS
- * 00000000001111111111222222222233
- * 01234567890123456789012345678901
- *
- * X Space-Switch event:
- * G Segment-Invalid Bit:     *
- * P Private-Space Bit:       Segment is not private (PoP 3-30)
- * S Storage-Alteration:
- * STL Segment-Table-Length:  Segment-table length (STL+1*16 entries -> up to 2048)
- *
  * A 64 bit pagetable entry of S390 has following format:
  * |			 PFRA			      |0IPC|  OS  |
  * 0000000000111111111122222222223333333333444444444455555555556666
@@ -220,7 +185,6 @@ static inline int is_module_addr(void *addr)
 
 /* Software bits in the page table entry */
 #define _PAGE_PRESENT	0x001		/* SW pte present bit */
-#define _PAGE_TYPE	0x002		/* SW pte type bit */
 #define _PAGE_YOUNG	0x004		/* SW pte young bit */
 #define _PAGE_DIRTY	0x008		/* SW pte dirty bit */
 #define _PAGE_READ	0x010		/* SW pte read bit */
@@ -240,31 +204,34 @@ static inline int is_module_addr(void *addr)
  * table lock held.
  *
  * The following table gives the different possible bit combinations for
- * the pte hardware and software bits in the last 12 bits of a pte:
+ * the pte hardware and software bits in the last 12 bits of a pte
+ * (. unassigned bit, x don't care, t swap type):
  *
  *				842100000000
  *				000084210000
  *				000000008421
- *				.IR...wrdytp
- * empty			.10...000000
- * swap				.10...xxxx10
- * file				.11...xxxxx0
- * prot-none, clean, old	.11...000001
- * prot-none, clean, young	.11...000101
- * prot-none, dirty, old	.10...001001
- * prot-none, dirty, young	.10...001101
- * read-only, clean, old	.11...010001
- * read-only, clean, young	.01...010101
- * read-only, dirty, old	.11...011001
- * read-only, dirty, young	.01...011101
- * read-write, clean, old	.11...110001
- * read-write, clean, young	.01...110101
- * read-write, dirty, old	.10...111001
- * read-write, dirty, young	.00...111101
+ *				.IR.uswrdy.p
+ * empty			.10.00000000
+ * swap				.11..ttttt.0
+ * prot-none, clean, old	.11.xx0000.1
+ * prot-none, clean, young	.11.xx0001.1
+ * prot-none, dirty, old	.10.xx0010.1
+ * prot-none, dirty, young	.10.xx0011.1
+ * read-only, clean, old	.11.xx0100.1
+ * read-only, clean, young	.01.xx0101.1
+ * read-only, dirty, old	.11.xx0110.1
+ * read-only, dirty, young	.01.xx0111.1
+ * read-write, clean, old	.11.xx1100.1
+ * read-write, clean, young	.01.xx1101.1
+ * read-write, dirty, old	.10.xx1110.1
+ * read-write, dirty, young	.00.xx1111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ *	    u unused, l large
  *
- * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
- * pte_none    is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
- * pte_swap    is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
+ * pte_none    is true for the bit pattern .10.00000000, pte == 0x400
+ * pte_swap    is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
+ * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
  */
 
 /* Bits in the segment/region table address-space-control-element */
@@ -335,6 +302,8 @@ static inline int is_module_addr(void *addr)
  * read-write, dirty, young	11..0...0...11
  * The segment table origin is used to distinguish empty (origin==0) from
  * read-write, old segment table entries (origin!=0)
+ * HW-bits: R read-only, I invalid
+ * SW-bits: y young, d dirty, r read, w write
  */
 
 #define _SEGMENT_ENTRY_SPLIT_BIT 11	/* THP splitting bit number */
@@ -423,6 +392,15 @@ static inline int mm_has_pgste(struct mm_struct *mm)
 	return 0;
 }
 
+static inline int mm_alloc_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(mm->context.alloc_pgste))
+		return 1;
+#endif
+	return 0;
+}
+
 /*
  * In the case that a guest uses storage keys
  * faults should no longer be backed by zero pages
@@ -582,10 +560,9 @@ static inline int pte_none(pte_t pte)
 
 static inline int pte_swap(pte_t pte)
 {
-	/* Bit pattern: (pte & 0x603) == 0x402 */
-	return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
-				_PAGE_TYPE | _PAGE_PRESENT))
-		== (_PAGE_INVALID | _PAGE_TYPE);
+	/* Bit pattern: (pte & 0x201) == 0x200 */
+	return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
+		== _PAGE_PROTECT;
 }
 
 static inline int pte_special(pte_t pte)
@@ -1586,51 +1563,51 @@ static inline int has_transparent_hugepage(void)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
- * 31 bit swap entry format:
- * A page-table entry has some bits we have to treat in a special way.
- * Bits 0, 20 and bit 23 have to be zero, otherwise an specification
- * exception will occur instead of a page translation exception. The
- * specifiation exception has the bad habit not to store necessary
- * information in the lowcore.
- * Bits 21, 22, 30 and 31 are used to indicate the page type.
- * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
- * This leaves the bits 1-19 and bits 24-29 to store type and offset.
- * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
- * plus 24 for the offset.
- * 0|     offset        |0110|o|type |00|
- * 0 0000000001111111111 2222 2 22222 33
- * 0 1234567890123456789 0123 4 56789 01
- *
  * 64 bit swap entry format:
  * A page-table entry has some bits we have to treat in a special way.
  * Bits 52 and bit 55 have to be zero, otherwise an specification
  * exception will occur instead of a page translation exception. The
  * specifiation exception has the bad habit not to store necessary
  * information in the lowcore.
- * Bits 53, 54, 62 and 63 are used to indicate the page type.
- * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
- * This leaves the bits 0-51 and bits 56-61 to store type and offset.
- * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
- * plus 56 for the offset.
- * |                      offset                        |0110|o|type |00|
- *  0000000000111111111122222222223333333333444444444455 5555 5 55566 66
- *  0123456789012345678901234567890123456789012345678901 2345 6 78901 23
+ * Bits 54 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
+ * This leaves the bits 0-51 and bits 56-62 to store type and offset.
+ * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
+ * for the offset.
+ * |			  offset			|01100|type |00|
+ * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
+ * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
  */
 
-#define __SWP_OFFSET_MASK (~0UL >> 11)
+#define __SWP_OFFSET_MASK	((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT	12
+#define __SWP_TYPE_MASK		((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT	2
 
 static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 {
 	pte_t pte;
-	offset &= __SWP_OFFSET_MASK;
-	pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
-		((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
+
+	pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
+	pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+	pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
 	return pte;
 }
 
-#define __swp_type(entry)	(((entry).val >> 2) & 0x1f)
-#define __swp_offset(entry)	(((entry).val >> 11) | (((entry).val >> 7) & 1))
-#define __swp_entry(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) })
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+	return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
+}
 
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index afa2bd750ffc..8cd8e7b288c5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -110,7 +110,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[] = {
 	0xffe6fffbfcfdfc40UL,
-	0x205c800000000000UL,
+	0x005c800000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 210ffede0153..e617e74b7be2 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -14,20 +14,23 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
 
 	/*
 	 * Convert encoding		  pte bits	   pmd bits
-	 *				.IR...wrdytp	dy..R...I...wr
-	 * empty			.10...000000 -> 00..0...1...00
-	 * prot-none, clean, old	.11...000001 -> 00..1...1...00
-	 * prot-none, clean, young	.11...000101 -> 01..1...1...00
-	 * prot-none, dirty, old	.10...001001 -> 10..1...1...00
-	 * prot-none, dirty, young	.10...001101 -> 11..1...1...00
-	 * read-only, clean, old	.11...010001 -> 00..1...1...01
-	 * read-only, clean, young	.01...010101 -> 01..1...0...01
-	 * read-only, dirty, old	.11...011001 -> 10..1...1...01
-	 * read-only, dirty, young	.01...011101 -> 11..1...0...01
-	 * read-write, clean, old	.11...110001 -> 00..0...1...11
-	 * read-write, clean, young	.01...110101 -> 01..0...0...11
-	 * read-write, dirty, old	.10...111001 -> 10..0...1...11
-	 * read-write, dirty, young	.00...111101 -> 11..0...0...11
+	 *				lIR.uswrdy.p	dy..R...I...wr
+	 * empty			010.000000.0 -> 00..0...1...00
+	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
+	 * prot-none, clean, young	111.000001.1 -> 01..1...1...00
+	 * prot-none, dirty, old	111.000010.1 -> 10..1...1...00
+	 * prot-none, dirty, young	111.000011.1 -> 11..1...1...00
+	 * read-only, clean, old	111.000100.1 -> 00..1...1...01
+	 * read-only, clean, young	101.000101.1 -> 01..1...0...01
+	 * read-only, dirty, old	111.000110.1 -> 10..1...1...01
+	 * read-only, dirty, young	101.000111.1 -> 11..1...0...01
+	 * read-write, clean, old	111.001100.1 -> 00..1...1...11
+	 * read-write, clean, young	101.001101.1 -> 01..1...0...11
+	 * read-write, dirty, old	110.001110.1 -> 10..0...1...11
+	 * read-write, dirty, young	100.001111.1 -> 11..0...0...11
+	 * HW-bits: R read-only, I invalid
+	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
+	 *	    u unused, l large
 	 */
 	if (pte_present(pte)) {
 		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
@@ -48,20 +51,23 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 
 	/*
 	 * Convert encoding		   pmd bits	    pte bits
-	 *				dy..R...I...wr	  .IR...wrdytp
-	 * empty			00..0...1...00 -> .10...001100
-	 * prot-none, clean, old	00..0...1...00 -> .10...000001
-	 * prot-none, clean, young	01..0...1...00 -> .10...000101
-	 * prot-none, dirty, old	10..0...1...00 -> .10...001001
-	 * prot-none, dirty, young	11..0...1...00 -> .10...001101
-	 * read-only, clean, old	00..1...1...01 -> .11...010001
-	 * read-only, clean, young	01..1...1...01 -> .11...010101
-	 * read-only, dirty, old	10..1...1...01 -> .11...011001
-	 * read-only, dirty, young	11..1...1...01 -> .11...011101
-	 * read-write, clean, old	00..0...1...11 -> .10...110001
-	 * read-write, clean, young	01..0...1...11 -> .10...110101
-	 * read-write, dirty, old	10..0...1...11 -> .10...111001
-	 * read-write, dirty, young	11..0...1...11 -> .10...111101
+	 *				dy..R...I...wr	  lIR.uswrdy.p
+	 * empty			00..0...1...00 -> 010.000000.0
+	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
+	 * prot-none, clean, young	01..1...1...00 -> 111.000001.1
+	 * prot-none, dirty, old	10..1...1...00 -> 111.000010.1
+	 * prot-none, dirty, young	11..1...1...00 -> 111.000011.1
+	 * read-only, clean, old	00..1...1...01 -> 111.000100.1
+	 * read-only, clean, young	01..1...0...01 -> 101.000101.1
+	 * read-only, dirty, old	10..1...1...01 -> 111.000110.1
+	 * read-only, dirty, young	11..1...0...01 -> 101.000111.1
+	 * read-write, clean, old	00..1...1...11 -> 111.001100.1
+	 * read-write, clean, young	01..1...0...11 -> 101.001101.1
+	 * read-write, dirty, old	10..0...1...11 -> 110.001110.1
+	 * read-write, dirty, young	11..0...0...11 -> 100.001111.1
+	 * HW-bits: R read-only, I invalid
+	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
+	 *	    u unused, l large
 	 */
 	if (pmd_present(pmd)) {
 		pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
@@ -70,8 +76,8 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
 		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
-		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
 	} else
 		pte_val(pte) = _PAGE_INVALID;
 	return pte;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 33f589459113..b33f66110ca9 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -18,6 +18,7 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/swapops.h>
+#include <linux/sysctl.h>
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
@@ -920,6 +921,40 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
 }
 EXPORT_SYMBOL(get_guest_storage_key);
 
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+	{
+		.procname	= "allocate_pgste",
+		.data		= &page_table_allocate_pgste,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= proc_dointvec,
+		.extra1		= &page_table_allocate_pgste_min,
+		.extra2		= &page_table_allocate_pgste_max,
+	},
+	{ }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+	{
+		.procname	= "vm",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= page_table_sysctl,
+	},
+	{ }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)
@@ -963,7 +998,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	struct page *uninitialized_var(page);
 	unsigned int mask, bit;
 
-	if (mm_has_pgste(mm))
+	if (mm_alloc_pgste(mm))
 		return page_table_alloc_pgste(mm);
 	/* Allocate fragments of a 4K page as 1K/2K page table */
 	spin_lock_bh(&mm->context.list_lock);
@@ -1165,116 +1200,25 @@ static inline void thp_split_mm(struct mm_struct *mm)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
-				struct mm_struct *mm, pud_t *pud,
-				unsigned long addr, unsigned long end)
-{
-	unsigned long next, *table, *new;
-	struct page *page;
-	spinlock_t *ptl;
-	pmd_t *pmd;
-
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-again:
-		if (pmd_none_or_clear_bad(pmd))
-			continue;
-		table = (unsigned long *) pmd_deref(*pmd);
-		page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-		if (page_table_with_pgste(page))
-			continue;
-		/* Allocate new page table with pgstes */
-		new = page_table_alloc_pgste(mm);
-		if (!new)
-			return -ENOMEM;
-
-		ptl = pmd_lock(mm, pmd);
-		if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
-			/* Nuke pmd entry pointing to the "short" page table */
-			pmdp_flush_lazy(mm, addr, pmd);
-			pmd_clear(pmd);
-			/* Copy ptes from old table to new table */
-			memcpy(new, table, PAGE_SIZE/2);
-			clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
-			/* Establish new table */
-			pmd_populate(mm, pmd, (pte_t *) new);
-			/* Free old table with rcu, there might be a walker! */
-			page_table_free_rcu(tlb, table, addr);
-			new = NULL;
-		}
-		spin_unlock(ptl);
-		if (new) {
-			page_table_free_pgste(new);
-			goto again;
-		}
-	} while (pmd++, addr = next, addr != end);
-
-	return addr;
-}
-
-static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
-				   struct mm_struct *mm, pgd_t *pgd,
-				   unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	pud_t *pud;
-
-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
-		if (unlikely(IS_ERR_VALUE(next)))
-			return next;
-	} while (pud++, addr = next, addr != end);
-
-	return addr;
-}
-
-static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
-					unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	pgd_t *pgd;
-
-	pgd = pgd_offset(mm, addr);
-	do {
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
-		if (unlikely(IS_ERR_VALUE(next)))
-			return next;
-	} while (pgd++, addr = next, addr != end);
-
-	return 0;
-}
-
 /*
  * switch on pgstes for its userspace process (for kvm)
  */
 int s390_enable_sie(void)
 {
-	struct task_struct *tsk = current;
-	struct mm_struct *mm = tsk->mm;
-	struct mmu_gather tlb;
+	struct mm_struct *mm = current->mm;
 
 	/* Do we have pgstes? if yes, we are done */
-	if (mm_has_pgste(tsk->mm))
+	if (mm_has_pgste(mm))
 		return 0;
-
+	/* Fail if the page tables are 2K */
+	if (!mm_alloc_pgste(mm))
+		return -EINVAL;
 	down_write(&mm->mmap_sem);
+	mm->context.has_pgste = 1;
 	/* split thp mappings and disable thp for future mappings */
 	thp_split_mm(mm);
-	/* Reallocate the page tables with pgstes */
-	tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
-	if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
-		mm->context.has_pgste = 1;
-	tlb_finish_mmu(&tlb, 0, TASK_SIZE);
 	up_write(&mm->mmap_sem);
-	return mm->context.has_pgste ? 0 : -ENOMEM;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c
index 669df51a82e3..324599bfad14 100644
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c
@@ -17,6 +17,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/io.h>
+#include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
@@ -243,10 +244,10 @@ static struct platform_device sh_mmcif_device = {
 };
 
 /* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI_RX,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED,
+static struct tmio_mmc_data sdhi_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI_RX,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED,
 };
 
 static struct resource sdhi_resources[] = {
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index d4b01d4cc102..cbd2a9f02a91 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -18,6 +18,7 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/sh_flctl.h>
+#include <linux/mfd/tmio.h>
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/regulator/fixed.h>
@@ -447,8 +448,8 @@ static struct resource sdhi0_cn3_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sdhi0_cn3_data = {
-	.tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sdhi0_cn3_data = {
+	.capabilities	= MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi0_cn3_device = {
@@ -474,8 +475,8 @@ static struct resource sdhi1_cn7_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sdhi1_cn7_data = {
-	.tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sdhi1_cn7_data = {
+	.capabilities	= MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi1_cn7_device = {
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 0d3049244cd3..d531791f06ff 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -601,12 +601,12 @@ static struct platform_device sdhi0_power = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sdhi0_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
+static struct tmio_mmc_data sdhi0_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
+	.capabilities	= MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
 			  MMC_CAP_NEEDS_POLL,
-	.tmio_flags	= TMIO_MMC_USE_GPIO_CD,
+	.flags		= TMIO_MMC_USE_GPIO_CD,
 	.cd_gpio	= GPIO_PTY7,
 };
 
@@ -635,12 +635,12 @@ static struct platform_device sdhi0_device = {
 
 #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
 /* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI1_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI1_RX,
-	.tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
+static struct tmio_mmc_data sdhi1_info = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI1_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI1_RX,
+	.capabilities	= MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
 			  MMC_CAP_NEEDS_POLL,
-	.tmio_flags	= TMIO_MMC_USE_GPIO_CD,
+	.flags		= TMIO_MMC_USE_GPIO_CD,
 	.cd_gpio	= GPIO_PTW7,
 };
 
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 1df4398f8375..7d997cec09c5 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -373,11 +373,11 @@ static struct resource kfr2r09_sh_sdhi0_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi0_data = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_flags	= TMIO_MMC_WRPROTECT_DISABLE,
-	.tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi0_data = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
+	.flags		= TMIO_MMC_WRPROTECT_DISABLE,
+	.capabilities	= MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device kfr2r09_sh_sdhi0_device = {
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 8b73194ed2ce..29b7c0dcfc51 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -15,6 +15,7 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
+#include <linux/mfd/tmio.h>
 #include <linux/mtd/nand.h>
 #include <linux/i2c.h>
 #include <linux/regulator/fixed.h>
@@ -408,10 +409,10 @@ static struct resource sdhi_cn9_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi_data = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi_data = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
+	.capabilities	= MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi_cn9_device = {
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 1162bc6945a3..4f6635a075f2 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
+#include <linux/mfd/tmio.h>
 #include <linux/mtd/physmap.h>
 #include <linux/delay.h>
 #include <linux/regulator/fixed.h>
@@ -468,10 +469,10 @@ static struct resource sdhi0_cn7_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi0_data = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi0_data = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
+	.capabilities	= MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi0_cn7_device = {
@@ -497,10 +498,10 @@ static struct resource sdhi1_cn8_resources[] = {
 	},
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi1_data = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI1_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI1_RX,
-	.tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi1_data = {
+	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI1_TX,
+	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI1_RX,
+	.capabilities	= MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi1_cn8_device = {
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 6873f006f7d0..d366675e4bf8 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -774,7 +774,7 @@ static void __init zone_sizes_init(void)
 		 * though, there'll be no lowmem, so we just alloc_bootmem
 		 * the memmap.  There will be no percpu memory either.
 		 */
-		if (i != 0 && cpumask_test_cpu(i, &isolnodes)) {
+		if (i != 0 && node_isset(i, isolnodes)) {
 			node_memmap_pfn[i] =
 				alloc_bootmem_pfn(0, memmap_size, 0);
 			BUG_ON(node_percpu[i] != 0);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6049d587599e..226d5696e1d1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86_64
 ### Arch settings
 config X86
 	def_bool y
+	select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
 	select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_FAST_MULTIPLIER
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ef17683484e9..48304b89b601 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1109,6 +1109,8 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
+	/* Fill in upper bits of command line address, NOP on 32 bit  */
+	boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
 
 	hdr->ramdisk_image = 0;
 	hdr->ramdisk_size = 0;
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index a4771dcd1fcf..1f20b35d8573 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -79,7 +79,7 @@ NUM_BLKS    = %rdx
 c           = %rcx
 d           = %r8
 e           = %rdx
-y3          = %rdi
+y3          = %rsi
 
 TBL   = %rbp
 
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a821b1cd4fa7..72bf2680f819 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -427,6 +427,13 @@ sysretl_from_sys_call:
 	 * cs and ss are loaded from MSRs.
 	 * (Note: 32bit->32bit SYSRET is different: since r11
 	 * does not exist, it merely sets eflags.IF=1).
+	 *
+	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+	 * descriptor is not reinitialized.  This means that we must
+	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
+	 * exit the kernel, and re-enter using an interrupt vector.  (All
+	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+	 * from happening by reloading SS in __switch_to.
 	 */
 	USERGS_SYSRET32
 
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 7ee9b94d9921..3d6606fb97d0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -265,6 +265,7 @@
 #define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index e42f758a0fbd..055ea9941dd5 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -50,7 +50,7 @@ extern const struct hypervisor_x86 *x86_hyper;
 /* Recognized hypervisors */
 extern const struct hypervisor_x86 x86_hyper_vmware;
 extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
-extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_xen;
 extern const struct hypervisor_x86 x86_hyper_kvm;
 
 extern void init_hypervisor(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 25b1cc07d496..d6b078e9fa28 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,7 +95,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
 	struct pvclock_vcpu_time_info pvti;
-	u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index cf87de3fc390..64b611782ef0 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -169,7 +169,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	tmp.head &= ~TICKET_SLOWPATH_FLAG;
-	return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
+	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }
 #define arch_spin_is_contended	arch_spin_is_contended
 
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 358dcd338915..c44a5d53e464 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -269,4 +269,9 @@ static inline bool xen_arch_need_swiotlb(struct device *dev,
 	return false;
 }
 
+static inline unsigned long xen_get_swiotlb_free_pages(unsigned int order)
+{
+	return __get_free_pages(__GFP_NOWARN, order);
+}
+
 #endif /* _ASM_X86_XEN_PAGE_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 803b684676ff..dbe76a14c3c9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
 	return _acpi_map_lsapic(handle, physid, pcpu);
 }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index fd470ebf924e..e4cf63301ff4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
 		if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
 			set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
+
+	/* AMD CPUs don't reset SS attributes on SYSRET */
+	set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 36ce402a3fa5..d820d8eae96b 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -27,8 +27,8 @@
 
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
-#ifdef CONFIG_XEN_PVHVM
-	&x86_hyper_xen_hvm,
+#ifdef CONFIG_XEN
+	&x86_hyper_xen,
 #endif
 	&x86_hyper_vmware,
 	&x86_hyper_ms_hyperv,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 219d3fb423a1..3998131d1a68 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
-		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS,
+		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
@@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids
 	[ C(OP_READ) ] = {
 		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
 		[ C(RESULT_ACCESS) ] = 0x01b7,
-		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-		[ C(RESULT_MISS)   ] = 0x01b7,
+		[ C(RESULT_MISS)   ] = 0,
 	},
 	[ C(OP_WRITE) ] = {
 		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
@@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids
  [ C(ITLB) ] = {
 	[ C(OP_READ) ] = {
 		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */
+		[ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
 	},
 	[ C(OP_WRITE) ] = {
 		[ C(RESULT_ACCESS) ] = -1,
@@ -2533,34 +2532,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
 	return x86_event_sysfs_show(page, config, event);
 }
 
-static __initconst const struct x86_pmu core_pmu = {
-	.name			= "core",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= core_pmu_enable_all,
-	.enable			= core_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_pmu_hw_config,
-	.schedule_events	= x86_schedule_events,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic event period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.get_event_constraints	= intel_get_event_constraints,
-	.put_event_constraints	= intel_put_event_constraints,
-	.event_constraints	= intel_core_event_constraints,
-	.guest_get_msrs		= core_guest_get_msrs,
-	.format_attrs		= intel_arch_formats_attr,
-	.events_sysfs_show	= intel_event_sysfs_show,
-};
-
 struct intel_shared_regs *allocate_shared_regs(int cpu)
 {
 	struct intel_shared_regs *regs;
@@ -2743,6 +2714,44 @@ static struct attribute *intel_arch3_formats_attr[] = {
 	NULL,
 };
 
+static __initconst const struct x86_pmu core_pmu = {
+	.name			= "core",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= core_pmu_enable_all,
+	.enable			= core_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32-bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL<<31) - 1,
+	.get_event_constraints	= intel_get_event_constraints,
+	.put_event_constraints	= intel_put_event_constraints,
+	.event_constraints	= intel_core_event_constraints,
+	.guest_get_msrs		= core_guest_get_msrs,
+	.format_attrs		= intel_arch_formats_attr,
+	.events_sysfs_show	= intel_event_sysfs_show,
+
+	/*
+	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+	 * together with PMU version 1 and thus be using core_pmu with
+	 * shared_regs. We need following callbacks here to allocate
+	 * it properly.
+	 */
+	.cpu_prepare		= intel_pmu_cpu_prepare,
+	.cpu_starting		= intel_pmu_cpu_starting,
+	.cpu_dying		= intel_pmu_cpu_dying,
+};
+
 static __initconst const struct x86_pmu intel_pmu = {
 	.name			= "Intel",
 	.handle_irq		= intel_pmu_handle_irq,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 999289b94025..358c54ad20d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)
 		break;
 	case 60: /* Haswell */
 	case 69: /* Haswell-Celeron */
+	case 61: /* Broadwell */
 		rapl_cntr_mask = RAPL_IDX_HSW;
 		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
 		break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 3001015b755c..4562e9e22c60 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -1,6 +1,13 @@
 /* Nehalem/SandBridge/Haswell uncore support */
 #include "perf_event_intel_uncore.h"
 
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
+
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
 #define SNB_UNC_CTL_UMASK_MASK			0x0000ff00
@@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
 	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	{  /* end marker */ }
 };
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7b238494b31..02c2eff7478d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,15 @@ system_call_fastpath:
 	 * rflags from r11 (but RF and VM bits are forced to 0),
 	 * cs and ss are loaded from MSRs.
 	 * Restoration of rflags re-enables interrupts.
+	 *
+	 * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+	 * descriptor is not reinitialized.  This means that we should
+	 * avoid SYSRET with SS == NULL, which could happen if we schedule,
+	 * exit the kernel, and re-enter using an interrupt vector.  (All
+	 * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+	 * from happening by reloading SS in __switch_to.  (Actually
+	 * detecting the failure in 64-bit userspace is tricky but can be
+	 * done.)
 	 */
 	USERGS_SYSRET64
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8213da62b1b7..6e338e3b1dc0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
 };
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -156,11 +156,13 @@ void flush_thread(void)
 		/* FPU state will be reallocated lazily at the first use. */
 		drop_fpu(tsk);
 		free_thread_xstate(tsk);
-	} else if (!used_math()) {
-		/* kthread execs. TODO: cleanup this horror. */
-		if (WARN_ON(init_fpu(tsk)))
-			force_sig(SIGKILL, tsk);
-		user_fpu_begin();
+	} else {
+		if (!tsk_used_math(tsk)) {
+			/* kthread execs. TODO: cleanup this horror. */
+			if (WARN_ON(init_fpu(tsk)))
+				force_sig(SIGKILL, tsk);
+			user_fpu_begin();
+		}
 		restore_init_xstate();
 	}
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4baaa972f52a..ddfdbf74f174 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p, tss);
 
+	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+		/*
+		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
+		 * does not update the cached descriptor.  As a result, if we
+		 * do SYSRET while SS is NULL, we'll end up in user mode with
+		 * SS apparently equal to __USER_DS but actually unusable.
+		 *
+		 * The straightforward workaround would be to fix it up just
+		 * before SYSRET, but that would slow down the system call
+		 * fast paths.  Instead, we ensure that SS is never NULL in
+		 * system call context.  We do this by replacing NULL SS
+		 * selectors at every context switch.  SYSCALL sets up a valid
+		 * SS, so the only way to get NULL is to re-enter the kernel
+		 * from CPL 3 through an interrupt.  Since that can't happen
+		 * in the same task as a running syscall, we are guaranteed to
+		 * context switch between every interrupt vector entry and a
+		 * subsequent SYSRET.
+		 *
+		 * We read SS first because SS reads are much faster than
+		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
+		 * it previously had a different non-NULL value.
+		 */
+		unsigned short ss_sel;
+		savesegment(ss, ss_sel);
+		if (ss_sel != __KERNEL_DS)
+			loadsegment(ss, __KERNEL_DS);
+	}
+
 	return prev_p;
 }
 
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index e5ecd20e72dd..2f355d229a58 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
-static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
-
-static struct pvclock_vsyscall_time_info *
-pvclock_get_vsyscall_user_time_info(int cpu)
-{
-	if (!pvclock_vdso_info) {
-		BUG();
-		return NULL;
-	}
-
-	return &pvclock_vdso_info[cpu];
-}
-
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
-{
-	return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
-}
-
 #ifdef CONFIG_X86_64
-static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
-			        void *v)
-{
-	struct task_migration_notifier *mn = v;
-	struct pvclock_vsyscall_time_info *pvti;
-
-	pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
-
-	/* this is NULL when pvclock vsyscall is not initialized */
-	if (unlikely(pvti == NULL))
-		return NOTIFY_DONE;
-
-	pvti->migrate_count++;
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block pvclock_migrate = {
-	.notifier_call = pvclock_task_migrate,
-};
-
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
 
 	WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
-	pvclock_vdso_info = i;
-
 	for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
 		__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
 			     __pa(i) + (idx*PAGE_SIZE),
 			     PAGE_KERNEL_VVAR);
 	}
 
-
-	register_task_migration_notifier(&pvclock_migrate);
-
 	return 0;
 }
 #endif
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index 6eb5c20ee373..d090ecf08809 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev)
 		if (r)
 			return r;
 
-		inode = path.dentry->d_inode;
+		inode = d_backing_inode(path.dentry);
 
 		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
 		path_put(&path);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d67206a7b99a..629af0f1c5c4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	unsigned long bitmap = 1;
 	struct kvm_lapic **dst;
 	int i;
-	bool ret = false;
-	bool x2apic_ipi = src && apic_x2apic_mode(src);
+	bool ret, x2apic_ipi;
 
 	*r = -1;
 
@@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 	if (irq->shorthand)
 		return false;
 
+	x2apic_ipi = src && apic_x2apic_mode(src);
 	if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
 		return false;
 
+	ret = true;
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
-	if (!map)
+	if (!map) {
+		ret = false;
 		goto out;
-
-	ret = true;
+	}
 
 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
 		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 146f295ee322..d43867c33bc4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 		pfn = spte_to_pfn(*sptep);
 
 		/*
-		 * Only EPT supported for now; otherwise, one would need to
-		 * find out efficiently whether the guest page tables are
-		 * also using huge pages.
+		 * We cannot do huge page mapping for indirect shadow pages,
+		 * which are found on the last rmap (level = 1) when not using
+		 * tdp; such shadow pages are synced with the page table in
+		 * the guest, and the guest page table is using 4K page size
+		 * mapping if the indirect sp has level = 1.
 		 */
 		if (sp->role.direct &&
 			!kvm_is_reserved_pfn(pfn) &&
@@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	bool flush = false;
 	unsigned long *rmapp;
 	unsigned long last_index, index;
-	gfn_t gfn_start, gfn_end;
 
 	spin_lock(&kvm->mmu_lock);
 
-	gfn_start = memslot->base_gfn;
-	gfn_end = memslot->base_gfn + memslot->npages - 1;
-
-	if (gfn_start >= gfn_end)
-		goto out;
-
 	rmapp = memslot->arch.rmap[0];
-	last_index = gfn_to_index(gfn_end, memslot->base_gfn,
-					PT_PAGE_TABLE_LEVEL);
+	last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
+				memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
 
 	for (index = 0; index <= last_index; ++index, ++rmapp) {
 		if (*rmapp)
@@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
 
-out:
 	spin_unlock(&kvm->mmu_lock);
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f5e8dce8046c..f7b61687bd79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-	unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
-		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+	/*
+	 * Pass through host's Machine Check Enable value to hw_cr4, which
+	 * is in force while we are in guest mode.  Do not let guests control
+	 * this bit, even if host CR4.MCE == 0.
+	 */
+	unsigned long hw_cr4 =
+		(cr4_read_shadow() & X86_CR4_MCE) |
+		(cr4 & ~X86_CR4_MCE) |
+		(to_vmx(vcpu)->rmode.vm86_active ?
+		 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
 	if (cr4 & X86_CR4_VMXE) {
 		/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e1a81267f3f6..c73efcd03e29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1669,12 +1669,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		&guest_hv_clock, sizeof(guest_hv_clock))))
 		return 0;
 
-	/*
-	 * The interface expects us to write an even number signaling that the
-	 * update is finished. Since the guest won't see the intermediate
-	 * state, we just increase by 2 at the end.
+	/* This VCPU is paused, but it's legal for a guest to read another
+	 * VCPU's kvmclock, so we really have to follow the specification where
+	 * it says that version is odd if data is being modified, and even after
+	 * it is consistent.
+	 *
+	 * Version field updates must be kept separate.  This is because
+	 * kvm_write_guest_cached might use a "rep movs" instruction, and
+	 * writes within a string instruction are weakly ordered.  So there
+	 * are three writes overall.
+	 *
+	 * As a small optimization, only write the version field in the first
+	 * and third write.  The vcpu->pv_time cache is still valid, because the
+	 * version field is the first in the struct.
 	 */
-	vcpu->hv_clock.version = guest_hv_clock.version + 2;
+	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+
+	vcpu->hv_clock.version = guest_hv_clock.version + 1;
+	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+				&vcpu->hv_clock,
+				sizeof(vcpu->hv_clock.version));
+
+	smp_wmb();
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1695,6 +1711,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
 				&vcpu->hv_clock,
 				sizeof(vcpu->hv_clock));
+
+	smp_wmb();
+
+	vcpu->hv_clock.version++;
+	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+				&vcpu->hv_clock,
+				sizeof(vcpu->hv_clock.version));
 	return 0;
 }
 
@@ -5799,7 +5822,6 @@ int kvm_arch_init(void *opaque)
 	kvm_set_mmio_spte_mask();
 
 	kvm_x86_ops = ops;
-	kvm_init_msr_list();
 
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
 			PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7253,7 +7275,14 @@ void kvm_arch_hardware_disable(void)
 
 int kvm_arch_hardware_setup(void)
 {
-	return kvm_x86_ops->hardware_setup();
+	int r;
+
+	r = kvm_x86_ops->hardware_setup();
+	if (r != 0)
+		return r;
+
+	kvm_init_msr_list();
+	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 1f33b3d1fd68..0a42327a59d7 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
 	clac();
 
 	/* If the destination is a kernel buffer, we always clear the end */
-	if ((unsigned long)to >= TASK_SIZE_MAX)
+	if (!__addr_ok(to))
 		memset(to, 0, len);
 	return len;
 }
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 5ead4d6cf3a7..70e7444c6835 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -351,18 +351,20 @@ int arch_ioremap_pmd_supported(void)
  */
 void *xlate_dev_mem_ptr(phys_addr_t phys)
 {
-	void *addr;
-	unsigned long start = phys & PAGE_MASK;
+	unsigned long start  = phys &  PAGE_MASK;
+	unsigned long offset = phys & ~PAGE_MASK;
+	unsigned long vaddr;
 
 	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
 	if (page_is_ram(start >> PAGE_SHIFT))
 		return __va(phys);
 
-	addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
-	if (addr)
-		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+	vaddr = (unsigned long)ioremap_cache(start, PAGE_SIZE);
+	/* Only add the offset on success and return NULL if the ioremap() failed: */
+	if (vaddr)
+		vaddr += offset;
 
-	return addr;
+	return (void *)vaddr;
 }
 
 void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 987514396c1e..99f76103c6b7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 				if (is_ereg(dst_reg))
 					EMIT1(0x41);
 				EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
+
+				/* emit 'movzwl eax, ax' */
+				if (is_ereg(dst_reg))
+					EMIT3(0x45, 0x0F, 0xB7);
+				else
+					EMIT2(0x0F, 0xB7);
+				EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
 				break;
 			case 32:
 				/* emit 'bswap eax' to swap lower 4 bytes */
@@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			break;
 
 		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			switch (imm32) {
+			case 16:
+				/* emit 'movzwl eax, ax' to zero extend 16-bit
+				 * into 64 bit
+				 */
+				if (is_ereg(dst_reg))
+					EMIT3(0x45, 0x0F, 0xB7);
+				else
+					EMIT2(0x0F, 0xB7);
+				EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
+				break;
+			case 32:
+				/* emit 'mov eax, eax' to clear upper 32-bits */
+				if (is_ereg(dst_reg))
+					EMIT1(0x45);
+				EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
+				break;
+			case 64:
+				/* nop */
+				break;
+			}
 			break;
 
 			/* ST: *(u8*)(dst_reg + off) = imm */
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index e4695985f9de..d93963340c3c 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -325,6 +325,26 @@ static void release_pci_root_info(struct pci_host_bridge *bridge)
 	kfree(info);
 }
 
+/*
+ * An IO port or MMIO resource assigned to a PCI host bridge may be
+ * consumed by the host bridge itself or available to its child
+ * bus/devices. The ACPI specification defines a bit (Producer/Consumer)
+ * to tell whether the resource is consumed by the host bridge itself,
+ * but firmware hasn't used that bit consistently, so we can't rely on it.
+ *
+ * On x86 and IA64 platforms, all IO port and MMIO resources are assumed
+ * to be available to child bus/devices except one special case:
+ *     IO port [0xCF8-0xCFF] is consumed by the host bridge itself
+ *     to access PCI configuration space.
+ *
+ * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF].
+ */
+static bool resource_is_pcicfg_ioport(struct resource *res)
+{
+	return (res->flags & IORESOURCE_IO) &&
+		res->start == 0xCF8 && res->end == 0xCFF;
+}
+
 static void probe_pci_root_info(struct pci_root_info *info,
 				struct acpi_device *device,
 				int busnum, int domain,
@@ -346,8 +366,8 @@ static void probe_pci_root_info(struct pci_root_info *info,
 			"no IO and memory resources present in _CRS\n");
 	else
 		resource_list_for_each_entry_safe(entry, tmp, list) {
-			if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
-			    (entry->res->flags & IORESOURCE_DISABLED))
+			if ((entry->res->flags & IORESOURCE_DISABLED) ||
+			    resource_is_pcicfg_ioport(entry->res))
 				resource_list_destroy_entry(entry);
 			else
 				entry->res->name = info->name;
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 275a3a8b78af..e97032069f88 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 40d2473836c9..9793322751e0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,15 +82,18 @@ static notrace cycle_t vread_pvclock(int *mode)
 	cycle_t ret;
 	u64 last;
 	u32 version;
-	u32 migrate_count;
 	u8 flags;
 	unsigned cpu, cpu1;
 
 
 	/*
-	 * When looping to get a consistent (time-info, tsc) pair, we
-	 * also need to deal with the possibility we can switch vcpus,
-	 * so make sure we always re-fetch time-info for the current vcpu.
+	 * Note: hypervisor must guarantee that:
+	 * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
+	 * 2. that per-CPU pvclock time info is updated if the
+	 *    underlying CPU changes.
+	 * 3. that version is increased whenever underlying CPU
+	 *    changes.
+	 *
 	 */
 	do {
 		cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -99,27 +102,20 @@ static notrace cycle_t vread_pvclock(int *mode)
 		 * __getcpu() calls (Gleb).
 		 */
 
-		/* Make sure migrate_count will change if we leave the VCPU. */
-		do {
-			pvti = get_pvti(cpu);
-			migrate_count = pvti->migrate_count;
-
-			cpu1 = cpu;
-			cpu = __getcpu() & VGETCPU_CPU_MASK;
-		} while (unlikely(cpu != cpu1));
+		pvti = get_pvti(cpu);
 
 		version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
 		/*
 		 * Test we're still on the cpu as well as the version.
-		 * - We must read TSC of pvti's VCPU.
-		 * - KVM doesn't follow the versioning protocol, so data could
-		 *   change before version if we left the VCPU.
+		 * We could have been migrated just after the first
+		 * vgetcpu but before fetching the version, so we
+		 * wouldn't notice a version change.
 		 */
-		smp_rmb();
-	} while (unlikely((pvti->pvti.version & 1) ||
-			  pvti->pvti.version != version ||
-			  pvti->migrate_count != migrate_count));
+		cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+	} while (unlikely(cpu != cpu1 ||
+			  (pvti->pvti.version & 1) ||
+			  pvti->pvti.version != version));
 
 	if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
 		*mode = VCLOCK_NONE;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 94578efd3067..46957ead3060 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = {
 
 static void __init xen_hvm_guest_init(void)
 {
+	if (xen_pv_domain())
+		return;
+
 	init_hvm_pv_info();
 
 	xen_hvm_init_shared_info();
@@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void)
 	xen_hvm_init_time_ops();
 	xen_hvm_init_mmu_ops();
 }
+#endif
 
 static bool xen_nopv = false;
 static __init int xen_parse_nopv(char *arg)
@@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg)
 }
 early_param("xen_nopv", xen_parse_nopv);
 
-static uint32_t __init xen_hvm_platform(void)
+static uint32_t __init xen_platform(void)
 {
 	if (xen_nopv)
 		return 0;
 
-	if (xen_pv_domain())
-		return 0;
-
 	return xen_cpuid_base();
 }
 
@@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void)
 }
 EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
 
-const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
-	.name			= "Xen HVM",
-	.detect			= xen_hvm_platform,
+static void xen_set_cpu_features(struct cpuinfo_x86 *c)
+{
+	if (xen_pv_domain())
+		clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+}
+
+const struct hypervisor_x86 x86_hyper_xen = {
+	.name			= "Xen",
+	.detect			= xen_platform,
+#ifdef CONFIG_XEN_PVHVM
 	.init_platform		= xen_hvm_guest_init,
+#endif
 	.x2apic_available	= xen_x2apic_para_available,
+	.set_cpu_features       = xen_set_cpu_features,
 };
-EXPORT_SYMBOL(x86_hyper_xen_hvm);
-#endif
+EXPORT_SYMBOL(x86_hyper_xen);
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9497698645a..53b4c0811f4f 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data)
 	tick_resume_local();
 }
 
+static void xen_vcpu_notify_suspend(void *data)
+{
+	tick_suspend_local();
+}
+
 void xen_arch_resume(void)
 {
 	on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
+
+void xen_arch_suspend(void)
+{
+	on_each_cpu(xen_vcpu_notify_suspend, NULL, 1);
+}