29 files changed, 312 insertions, 167 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9a677cd5997f..44697817ccc6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-#define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
 #define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))
 #define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd))
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index a035b1e5dfa7..8a2aecfe9b02 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -185,7 +185,6 @@ typedef struct compat_siginfo {
 } compat_siginfo_t;
 
 #define COMPAT_OFF_T_MAX	0x7fffffff
-#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
 /*
  * A pointer passed in from user mode. This should not
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 85b7a1a21e22..9c14f7b5c46c 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
 static inline void wait_for_subcore_guest_exit(void) { }
 static inline void wait_for_tb_resync(void) { }
 #endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
 #endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a409177be8bd..f0461618bf7b 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -241,6 +241,7 @@
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING		0x1D8
@@ -330,6 +331,17 @@
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2
 /* >= 0 values are CPU number */
 
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31	(1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED	(1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2
+
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK	0x18
 #define PROC_TABLE_DEREG	0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
 	}
 }
 
+struct h_cpu_char_result {
+	u64 character;
+	u64 behaviour;
+};
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 7f74c282710f..fad0e6ff460f 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -21,11 +21,6 @@
 #include <asm/opal.h>
 
 /*
- * For static allocation of some of the structures.
- */
-#define IMC_MAX_PMUS			32
-
-/*
  * Compatibility macros for IMC devices
  */
 #define IMC_DTB_COMPAT			"ibm,opal-in-memory-counters"
@@ -125,4 +120,5 @@ enum {
 extern int init_imc_pmu(struct device_node *parent,
 				struct imc_pmu *pmu_ptr, int pmu_id);
 extern void thread_imc_disable(void);
+extern int get_max_nest_dev(void);
 #endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 7f01b22fa6cb..55eddf50d149 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
 
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+	long rc;
+
+	rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+	if (rc == H_SUCCESS) {
+		p->character = retbuf[0];
+		p->behaviour = retbuf[1];
+	}
+
+	return rc;
+}
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3ccd412..14e41b843952 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -432,8 +432,9 @@
 #define SPRN_LPID	0x13F	/* Logical Partition Identifier */
 #endif
 #define   LPID_RSVD	0x3ff		/* Reserved LPID for partn switching */
-#define	SPRN_HMER	0x150	/* Hardware m? error recovery */
-#define	SPRN_HMEER	0x151	/* Hardware m? enable error recovery */
+#define	SPRN_HMER	0x150	/* Hypervisor maintenance exception reg */
+#define   HMER_DEBUG_TRIG	(1ul << (63 - 17)) /* Debug trigger */
+#define	SPRN_HMEER	0x151	/* Hyp maintenance exception enable reg */
 #define SPRN_PCR	0x152	/* Processor compatibility register */
 #define   PCR_VEC_DIS	(1ul << (63-0))	/* Vec. disable (bit NA since POWER8) */
 #define   PCR_VSX_DIS	(1ul << (63-1))	/* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -10,6 +10,41 @@
 #define _ASM_POWERPC_XIVE_REGS_H
 
 /*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI	0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
+#define XIVE_ESB_GET		0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P		0x2
+#define XIVE_ESB_VAL_Q		0x1
+
+/*
  * Thread Management (aka "TM") registers
  */
 
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..b619a5585cd6 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_EOI_FW	0x10
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI		0x80
+
 #define XIVE_INVALID_CHIP_ID	-1
 
 /* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
 	atomic_t		pending_count;
 };
 
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI	0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI	0x000 /* Load */
-#define XIVE_ESB_GET		0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00	0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01	0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10	0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11	0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P		0x2
-#define XIVE_ESB_VAL_Q		0x1
-
 /* Global enable flags for the XIVE support */
 extern bool __xive_enabled;
 
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 602e0fde19b4..8bdc2f96c5d6 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -735,8 +735,8 @@ static __init void cpufeatures_cpu_quirks(void)
 	 */
 	if ((version & 0xffffff00) == 0x004e0100)
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
-	else if ((version & 0xffffefff) == 0x004e0200)
-		cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1;
+	else if ((version & 0xffffefff) == 0x004e0201)
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
 }
 
 static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 742e4658c5dc..d2fecaec4fec 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
 	return handled;
 }
 
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+	DTRIG_UNKNOWN,
+	DTRIG_VECTOR_CI,	/* need to emulate vector CI load instr */
+	DTRIG_SUSPEND_ESCAPE,	/* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
 {
-	__this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-	/* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
-	if (pvr_version_is(PVR_POWER9)) {
-		unsigned long hmer = mfspr(SPRN_HMER);
-
-		/* Do we have the debug bit set */
-		if (hmer & PPC_BIT(17)) {
-			hmer &= ~PPC_BIT(17);
-			mtspr(SPRN_HMER, hmer);
-
-			/*
-			 * Now to avoid problems with soft-disable we
-			 * only do the emulation if we are coming from
-			 * user space
-			 */
-			if (user_mode(regs))
-				local_paca->hmi_p9_special_emu = 1;
-
-			/*
-			 * Don't bother going to OPAL if that's the
-			 * only relevant bit.
-			 */
-			if (!(hmer & mfspr(SPRN_HMEER)))
-				return local_paca->hmi_p9_special_emu;
+	int pvr;
+	struct device_node *cpun;
+	struct property *prop = NULL;
+	const char *str;
+
+	/* First look in the device tree */
+	preempt_disable();
+	cpun = of_get_cpu_node(smp_processor_id(), NULL);
+	if (cpun) {
+		of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+					    prop, str) {
+			if (strcmp(str, "bit17-vector-ci-load") == 0)
+				hmer_debug_trig_function = DTRIG_VECTOR_CI;
+			else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+				hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
 		}
+		of_node_put(cpun);
+	}
+	preempt_enable();
+
+	/* If we found the property, don't look at PVR */
+	if (prop)
+		goto out;
+
+	pvr = mfspr(SPRN_PVR);
+	/* Check for POWER9 Nimbus (scale-out) */
+	if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+		/* DD2.2 and later */
+		if ((pvr & 0xfff) >= 0x202)
+			hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+		/* DD2.0 and DD2.1 - used for vector CI load emulation */
+		else if ((pvr & 0xfff) >= 0x200)
+			hmer_debug_trig_function = DTRIG_VECTOR_CI;
+	}
+
+ out:
+	switch (hmer_debug_trig_function) {
+	case DTRIG_VECTOR_CI:
+		pr_debug("HMI debug trigger used for vector CI load\n");
+		break;
+	case DTRIG_SUSPEND_ESCAPE:
+		pr_debug("HMI debug trigger used for TM suspend escape\n");
+		break;
+	default:
+		break;
 	}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+	return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ *  0 means no further handling is required
+ *  1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+	unsigned long hmer = mfspr(SPRN_HMER);
+	long ret = 0;
+
+	/* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+	if (!((hmer & HMER_DEBUG_TRIG)
+	      && hmer_debug_trig_function != DTRIG_UNKNOWN))
+		return -1;
+		
+	hmer &= ~HMER_DEBUG_TRIG;
+	/* HMER is a write-AND register */
+	mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+	switch (hmer_debug_trig_function) {
+	case DTRIG_VECTOR_CI:
+		/*
+		 * Now to avoid problems with soft-disable we
+		 * only do the emulation if we are coming from
+		 * host user space
+		 */
+		if (regs && user_mode(regs))
+			ret = local_paca->hmi_p9_special_emu = 1;
+
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * See if any other HMI causes remain to be handled
+	 */
+	if (hmer & mfspr(SPRN_HMEER))
+		return -1;
+
+	return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{	
+	int ret;
+
+	__this_cpu_inc(irq_stat.hmi_exceptions);
+
+	ret = hmi_handle_debugtrig(regs);
+	if (ret >= 0)
+		return ret;
 
 	wait_for_subcore_guest_exit();
 
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 8ac0bd2bddb0..3280953a82cf 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -623,7 +623,9 @@ BEGIN_FTR_SECTION
 	 * NOTE, we rely on r0 being 0 from above.
 	 */
 	mtspr	SPRN_IAMR,r0
+BEGIN_FTR_SECTION_NESTED(42)
 	mtspr	SPRN_AMOR,r0
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	/* save regs for local vars on new stack.
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bfdd783e3916..5acb5a176dbe 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1569,16 +1569,22 @@ void arch_release_task_struct(struct task_struct *t)
  */
 int set_thread_tidr(struct task_struct *t)
 {
+	int rc;
+
 	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		return -EINVAL;
 
 	if (t != current)
 		return -EINVAL;
 
-	t->thread.tidr = assign_thread_tidr();
-	if (t->thread.tidr < 0)
-		return t->thread.tidr;
+	if (t->thread.tidr)
+		return 0;
+
+	rc = assign_thread_tidr();
+	if (rc < 0)
+		return rc;
 
+	t->thread.tidr = rc;
 	mtspr(SPRN_TIDR, t->thread.tidr);
 
 	return 0;
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 16d16583cf11..9ffd73296f64 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -94,40 +94,13 @@
  */
 static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
 {
-	compat_sigset_t	cset;
-
-	switch (_NSIG_WORDS) {
-	case 4: cset.sig[6] = set->sig[3] & 0xffffffffull;
-		cset.sig[7] = set->sig[3] >> 32;
-	case 3: cset.sig[4] = set->sig[2] & 0xffffffffull;
-		cset.sig[5] = set->sig[2] >> 32;
-	case 2: cset.sig[2] = set->sig[1] & 0xffffffffull;
-		cset.sig[3] = set->sig[1] >> 32;
-	case 1: cset.sig[0] = set->sig[0] & 0xffffffffull;
-		cset.sig[1] = set->sig[0] >> 32;
-	}
-	return copy_to_user(uset, &cset, sizeof(*uset));
+	return put_compat_sigset(uset, set, sizeof(*uset));
 }
 
 static inline int get_sigset_t(sigset_t *set,
 			       const compat_sigset_t __user *uset)
 {
-	compat_sigset_t s32;
-
-	if (copy_from_user(&s32, uset, sizeof(*uset)))
-		return -EFAULT;
-
-	/*
-	 * Swap the 2 words of the 64-bit sigset_t (they are stored
-	 * in the "wrong" endian in 32-bit user storage).
-	 */
-	switch (_NSIG_WORDS) {
-	case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
-	case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
-	case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
-	case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
-	}
-	return 0;
+	return get_compat_sigset(set, uset);
 }
 
 #define to_user_ptr(p)		ptr_to_compat(p)
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e3c5f75d137c..8cdd852aedd1 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -188,7 +188,7 @@ static void tau_timeout(void * info)
 	local_irq_restore(flags);
 }
 
-static void tau_timeout_smp(unsigned long unused)
+static void tau_timeout_smp(struct timer_list *unused)
 {
 
 	/* schedule ourselves to be run again */
@@ -230,7 +230,7 @@ int __init TAU_init(void)
 
 
 	/* first, set up the window shrinking timer */
-	setup_timer(&tau_timer, tau_timeout_smp, 0UL);
+	timer_setup(&tau_timer, tau_timeout_smp, 0);
 	tau_timer.expires = jiffies + shrink_timer;
 	add_timer(&tau_timer);
 
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index c356f9a40b24..c296343d0dcc 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void)
  *   secondary threads to proceed.
  * - All secondary threads will eventually call opal hmi handler on
  *   their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
  */
 
 long kvmppc_realmode_hmi_handler(void)
 {
-	int ptid = local_paca->kvm_hstate.ptid;
 	bool resync_req;
 
-	/* This is only called on primary thread. */
-	BUG_ON(ptid != 0);
 	__this_cpu_inc(irq_stat.hmi_exceptions);
 
+	if (hmi_handle_debugtrig(NULL) >= 0)
+		return 1;
+
 	/*
 	 * By now primary thread has already completed guest->host
 	 * partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 30ece4cebaf5..7daf21be33d0 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	bne	27f
 	bl	kvmppc_realmode_hmi_handler
 	nop
+	cmpdi	r3, 0
 	li	r12, BOOK3S_INTERRUPT_HMI
 	/*
-	 * At this point kvmppc_realmode_hmi_handler would have resync-ed
-	 * the TB. Hence it is not required to subtract guest timebase
-	 * offset from timebase. So, skip it.
+	 * At this point kvmppc_realmode_hmi_handler may have resync-ed
+	 * the TB, and if it has, we must not subtract the guest timebase
+	 * offset from the timebase. So, skip it.
 	 *
 	 * Also, do not call kvmppc_subcore_exit_guest() because it has
 	 * been invoked as part of kvmppc_realmode_hmi_handler().
 	 */
-	b	30f
+	beq	30f
 
 27:
 	/* Subtract timebase offset from timebase */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 071b87ee682f..83b485810aea 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -599,9 +599,9 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu)
 	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
 }
 
-void kvmppc_watchdog_func(unsigned long data)
+void kvmppc_watchdog_func(struct timer_list *t)
 {
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.wdt_timer);
 	u32 tsr, new_tsr;
 	int final;
 
@@ -1412,8 +1412,7 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	/* setup watchdog timer once */
 	spin_lock_init(&vcpu->arch.wdt_lock);
-	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
-		    (unsigned long)vcpu);
+	timer_setup(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, 0);
 
 	/*
 	 * Clear DBSR.MRR to avoid guest debug interrupt as
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7c9e45f54186..77eb25abc601 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1403,7 +1403,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	int r;
-	sigset_t sigsaved;
 
 	if (vcpu->mmio_needed) {
 		vcpu->mmio_needed = 0;
@@ -1444,16 +1443,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 #endif
 	}
 
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+	kvm_sigset_activate(vcpu);
 
 	if (run->immediate_exit)
 		r = -EINTR;
 	else
 		r = kvmppc_vcpu_run(run, vcpu);
 
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	kvm_sigset_deactivate(vcpu);
 
 	return r;
 }
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c9de03e0c1f1..d469224c4ada 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,6 +21,7 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <asm/code-patching.h>
+#include <asm/setup.h>
 
 static int __patch_instruction(unsigned int *addr, unsigned int instr)
 {
@@ -146,11 +147,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
 	 * During early early boot patch_instruction is called
 	 * when text_poke_area is not ready, but we still need
 	 * to allow patching. We just do the plain old patching
-	 * We use slab_is_available and per cpu read * via this_cpu_read
-	 * of text_poke_area. Per-CPU areas might not be up early
-	 * this can create problems with just using this_cpu_read()
 	 */
-	if (!slab_is_available() || !this_cpu_read(text_poke_area))
+	if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
 		return __patch_instruction(addr, instr);
 
 	local_irq_save(flags);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3848af167df9..640cf566e986 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -47,7 +47,8 @@
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
+						int apsize, int ssize)
 {
 	unsigned long va;
 	unsigned int penc;
@@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 			     : "memory");
 		break;
 	}
-	trace_tlbie(0, 0, va, 0, 0, 0, 0);
+	return va;
+}
+
+static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+{
+	unsigned long rb;
+
+	rb = ___tlbie(vpn, psize, apsize, ssize);
+	trace_tlbie(0, 0, rb, 0, 0, 0, 0);
 }
 
 static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -652,7 +661,7 @@ static void native_hpte_clear(void)
 		if (hpte_v & HPTE_V_VALID) {
 			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
 			hptep->v = 0;
-			__tlbie(vpn, psize, apsize, ssize);
+			___tlbie(vpn, psize, apsize, ssize);
 		}
 	}
 
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 564fff06f5c1..23ec2c5e3b78 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -122,7 +122,8 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 	return !slice_area_is_free(mm, start, end - start);
 }
 
-static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
+				unsigned long high_limit)
 {
 	unsigned long i;
 
@@ -133,15 +134,16 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret)
 		if (!slice_low_has_vma(mm, i))
 			ret->low_slices |= 1u << i;
 
-	if (mm->context.slb_addr_limit <= SLICE_LOW_TOP)
+	if (high_limit <= SLICE_LOW_TOP)
 		return;
 
-	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++)
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
 		if (!slice_high_has_vma(mm, i))
 			__set_bit(i, ret->high_slices);
 }
 
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret)
+static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
+				unsigned long high_limit)
 {
 	unsigned char *hpsizes;
 	int index, mask_index;
@@ -156,8 +158,11 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 		if (((lpsizes >> (i * 4)) & 0xf) == psize)
 			ret->low_slices |= 1u << i;
 
+	if (high_limit <= SLICE_LOW_TOP)
+		return;
+
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+	for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
 		mask_index = i & 0x1;
 		index = i >> 1;
 		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
@@ -169,6 +174,10 @@ static int slice_check_fit(struct mm_struct *mm,
 			   struct slice_mask mask, struct slice_mask available)
 {
 	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
+	/*
+	 * Make sure we just do bit compare only to the max
+	 * addr limit and not the full bit map size.
+	 */
 	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
 
 	bitmap_and(result, mask.high_slices,
@@ -472,7 +481,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First make up a "good" mask of slices that have the right size
 	 * already
 	 */
-	slice_mask_for_size(mm, psize, &good_mask);
+	slice_mask_for_size(mm, psize, &good_mask, high_limit);
 	slice_print_mask(" good_mask", good_mask);
 
 	/*
@@ -497,7 +506,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 #ifdef CONFIG_PPC_64K_PAGES
 	/* If we support combo pages, we can allow 64k pages in 4k slices */
 	if (psize == MMU_PAGE_64K) {
-		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
 		if (fixed)
 			slice_or_mask(&good_mask, &compat_mask);
 	}
@@ -530,11 +539,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 			return newaddr;
 		}
 	}
-
-	/* We don't fit in the good mask, check what other slices are
+	/*
+	 * We don't fit in the good mask, check what other slices are
 	 * empty and thus can be converted
 	 */
-	slice_mask_for_free(mm, &potential_mask);
+	slice_mask_for_free(mm, &potential_mask, high_limit);
 	slice_or_mask(&potential_mask, &good_mask);
 	slice_print_mask(" potential", potential_mask);
 
@@ -744,17 +753,18 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 {
 	struct slice_mask mask, available;
 	unsigned int psize = mm->context.user_psize;
+	unsigned long high_limit = mm->context.slb_addr_limit;
 
 	if (radix_enabled())
 		return 0;
 
 	slice_range_to_mask(addr, len, &mask);
-	slice_mask_for_size(mm, psize, &available);
+	slice_mask_for_size(mm, psize, &available, high_limit);
 #ifdef CONFIG_PPC_64K_PAGES
 	/* We need to account for 4k slices too */
 	if (psize == MMU_PAGE_64K) {
 		struct slice_mask compat_mask;
-		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask);
+		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
 		slice_or_mask(&available, &compat_mask);
 	}
 #endif
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
index 264b6ab11978..b90a21bc2f3f 100644
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -451,7 +451,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl)
  * This routine will alternate loading the virtual counters for
  * virtual CPUs
  */
-static void cell_virtual_cntr(unsigned long data)
+static void cell_virtual_cntr(struct timer_list *unused)
 {
 	int i, prev_hdw_thread, next_hdw_thread;
 	u32 cpu;
@@ -555,7 +555,7 @@ static void cell_virtual_cntr(unsigned long data)
 
 static void start_virt_cntrs(void)
 {
-	setup_timer(&timer_virt_cntr, cell_virtual_cntr, 0UL);
+	timer_setup(&timer_virt_cntr, cell_virtual_cntr, 0);
 	timer_virt_cntr.expires = jiffies + HZ / 10;
 	add_timer(&timer_virt_cntr);
 }
@@ -587,7 +587,7 @@ static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
  * periodically based on kernel timer to switch which SPU is
  * being monitored in a round robbin fashion.
  */
-static void spu_evnt_swap(unsigned long data)
+static void spu_evnt_swap(struct timer_list *unused)
 {
 	int node;
 	int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
@@ -677,7 +677,7 @@ static void spu_evnt_swap(unsigned long data)
 
 static void start_spu_event_swap(void)
 {
-	setup_timer(&timer_spu_event_swap, spu_evnt_swap, 0UL);
+	timer_setup(&timer_spu_event_swap, spu_evnt_swap, 0);
 	timer_spu_event_swap.expires = jiffies + HZ / 25;
 	add_timer(&timer_spu_event_swap);
 }
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 36344117c680..0ead3cd73caa 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -26,7 +26,7 @@
  */
 static DEFINE_MUTEX(nest_init_lock);
 static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
-static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static struct imc_pmu **per_nest_pmu_arr;
 static cpumask_t nest_imc_cpumask;
 struct imc_pmu_ref *nest_imc_refc;
 static int nest_pmus;
@@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
 static void nest_change_cpu_context(int old_cpu, int new_cpu)
 {
 	struct imc_pmu **pn = per_nest_pmu_arr;
-	int i;
 
 	if (old_cpu < 0 || new_cpu < 0)
 		return;
 
-	for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+	while (*pn) {
 		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+		pn++;
+	}
 }
 
 static int ppc_nest_imc_cpu_offline(unsigned int cpu)
@@ -467,7 +468,7 @@ static int nest_imc_event_init(struct perf_event *event)
 	 * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
 	 * Get the base memory addresss for this cpu.
 	 */
-	chip_id = topology_physical_package_id(event->cpu);
+	chip_id = cpu_to_chip_id(event->cpu);
 	pcni = pmu->mem_info;
 	do {
 		if (pcni->id == chip_id) {
@@ -524,19 +525,19 @@ static int nest_imc_event_init(struct perf_event *event)
  */
 static int core_imc_mem_init(int cpu, int size)
 {
-	int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+	int nid, rc = 0, core_id = (cpu / threads_per_core);
 	struct imc_mem_info *mem_info;
 
 	/*
 	 * alloc_pages_node() will allocate memory for core in the
 	 * local node only.
 	 */
-	phys_id = topology_physical_package_id(cpu);
+	nid = cpu_to_node(cpu);
 	mem_info = &core_imc_pmu->mem_info[core_id];
 	mem_info->id = core_id;
 
 	/* We need only vbase for core counters */
-	mem_info->vbase = page_address(alloc_pages_node(phys_id,
+	mem_info->vbase = page_address(alloc_pages_node(nid,
 					  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
 					  __GFP_NOWARN, get_order(size)));
 	if (!mem_info->vbase)
@@ -797,14 +798,14 @@ static int core_imc_event_init(struct perf_event *event)
 static int thread_imc_mem_alloc(int cpu_id, int size)
 {
 	u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
-	int phys_id = topology_physical_package_id(cpu_id);
+	int nid = cpu_to_node(cpu_id);
 
 	if (!local_mem) {
 		/*
 		 * This case could happen only once at start, since we dont
 		 * free the memory in cpu offline path.
 		 */
-		local_mem = page_address(alloc_pages_node(phys_id,
+		local_mem = page_address(alloc_pages_node(nid,
 				  GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
 				  __GFP_NOWARN, get_order(size)));
 		if (!local_mem)
@@ -1194,6 +1195,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
 		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
 	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
 	kfree(pmu_ptr);
+	kfree(per_nest_pmu_arr);
 	return;
 }
 
@@ -1218,6 +1220,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
 			return -ENOMEM;
 
 		/* Needed for hotplug/migration */
+		if (!per_nest_pmu_arr) {
+			per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
+						sizeof(struct imc_pmu *),
+						GFP_KERNEL);
+			if (!per_nest_pmu_arr)
+				return -ENOMEM;
+		}
 		per_nest_pmu_arr[pmu_index] = pmu_ptr;
 		break;
 	case IMC_DOMAIN_CORE:
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 1fbb5da17dd2..9033c8194eda 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -992,13 +992,13 @@ static void spu_calc_load(void)
 	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
 }
 
-static void spusched_wake(unsigned long data)
+static void spusched_wake(struct timer_list *unused)
 {
 	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
 	wake_up_process(spusched_task);
 }
 
-static void spuloadavg_wake(unsigned long data)
+static void spuloadavg_wake(struct timer_list *unused)
 {
 	mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
 	spu_calc_load();
@@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
 		LOAD_INT(c), LOAD_FRAC(c),
 		count_active_contexts(),
 		atomic_read(&nr_spu_contexts),
-		task_active_pid_ns(current)->last_pid);
+		idr_get_cursor(&task_active_pid_ns(current)->idr));
 	return 0;
 }
 
@@ -1124,8 +1124,8 @@ int __init spu_sched_init(void)
 	}
 	spin_lock_init(&spu_prio->runq_lock);
 
-	setup_timer(&spusched_timer, spusched_wake, 0);
-	setup_timer(&spuloadavg_timer, spuloadavg_wake, 0);
+	timer_setup(&spusched_timer, spusched_wake, 0);
+	timer_setup(&spuloadavg_timer, spuloadavg_wake, 0);
 
 	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
 	if (IS_ERR(spusched_task)) {
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 39a1d4225e0f..3408f315ef48 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -361,9 +361,9 @@ static irqreturn_t kw_i2c_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void kw_i2c_timeout(unsigned long data)
+static void kw_i2c_timeout(struct timer_list *t)
 {
-	struct pmac_i2c_host_kw *host = (struct pmac_i2c_host_kw *)data;
+	struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer);
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
@@ -513,7 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 	mutex_init(&host->mutex);
 	init_completion(&host->complete);
 	spin_lock_init(&host->lock);
-	setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host);
+	timer_setup(&host->timeout_timer, kw_i2c_timeout, 0);
 
 	psteps = of_get_property(np, "AAPL,address-step", NULL);
 	steps = psteps ? (*psteps) : 0x10;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 21f6531fae20..465ea105b771 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -153,6 +153,22 @@ static void disable_core_pmu_counters(void)
 	put_online_cpus();
 }
 
+int get_max_nest_dev(void)
+{
+	struct device_node *node;
+	u32 pmu_units = 0, type;
+
+	for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) {
+		if (of_property_read_u32(node, "type", &type))
+			continue;
+
+		if (type == IMC_TYPE_CHIP)
+			pmu_units++;
+	}
+
+	return pmu_units;
+}
+
 static int opal_imc_counters_probe(struct platform_device *pdev)
 {
 	struct device_node *imc_dev = pdev->dev.of_node;
@@ -191,8 +207,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 			break;
 		}
 
-		if (!imc_pmu_create(imc_dev, pmu_count, domain))
-			pmu_count++;
+		if (!imc_pmu_create(imc_dev, pmu_count, domain)) {
+			if (domain == IMC_DOMAIN_NEST)
+				pmu_count++;
+		}
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index c488621dbec3..aebbe95c9230 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -135,6 +135,7 @@ int chip_to_vas_id(int chipid)
 	}
 	return -1;
 }
+EXPORT_SYMBOL(chip_to_vas_id);
 
 static int vas_probe(struct platform_device *pdev)
 {
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a3b8d7d1316e..2547b6021e6a 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
 	 * EOI the source if it hasn't been disabled and hasn't
 	 * been passed-through to a KVM guest
 	 */
-	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+	    !(xd->flags & XIVE_IRQ_NO_EOI))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
 	/*