smp/hotplug: Callback vs state-machine consistency

While the generic callback functions have an 'int' return and thus appear to be allowed to return error, this is not true for all states. Specifically, what used to be STARTING/DYING are ran with IRQs disabled from critical parts of CPU bringup/teardown and are not allowed to fail. Add WARNs to enforce this rule. But since some callbacks are indeed allowed to fail, we have the situation where a state-machine rollback encounters a failure, in this case we're stuck, we can't go forward and we can't go back. Also add a WARN for that case. AFAICT this is a fundamental 'problem' with no real obvious solution. We want the 'prepare' callbacks to allow failure on either up or down. Typically on prepare-up this would be things like -ENOMEM from resource allocations, and the typical usage in prepare-down would be something like -EBUSY to avoid CPUs being taken away. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: bigeasy@linutronix.de Cc: efault@gmx.de Cc: rostedt@goodmis.org Cc: max.byungchul.park@gmail.com Link: https://lkml.kernel.org/r/20170920170546.819539119@infradead.org
author: Peter Zijlstra <peterz@infradead.org> 2017-09-20 19:00:18 +0200
committer: Thomas Gleixner <tglx@linutronix.de> 2017-09-25 22:11:43 +0200
commit: 724a86881d03ee5794148e65142e24ed3621be66 (patch)
tree: c0e54518b981d521c526e53f88013c300a22ec42 /kernel/cpu.c
parent: 4dddfb5faa6118564b0c54a163353d13882299d8 (diff)
download: linux-stable-724a86881d03ee5794148e65142e24ed3621be66.tar.gz
linux-stable-724a86881d03ee5794148e65142e24ed3621be66.tar.bz2
linux-stable-724a86881d03ee5794148e65142e24ed3621be66.zip
1 files changed, 22 insertions, 4 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 1139063de5af..d6f1b8c36400 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -202,7 +202,14 @@ err:
 	hlist_for_each(node, &step->list) {
 		if (!cnt--)
 			break;
-		cbm(cpu, node);
+
+		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+		ret = cbm(cpu, node);
+		trace_cpuhp_exit(cpu, st->state, state, ret);
+		/*
+		 * Rollback must not fail,
+		 */
+		WARN_ON_ONCE(ret);
 	}
 	return ret;
 }
@@ -659,6 +666,7 @@ static int take_cpu_down(void *_param)
 	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
 	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
 	int err, cpu = smp_processor_id();
+	int ret;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
@@ -672,8 +680,13 @@ static int take_cpu_down(void *_param)
 	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
 	st->state--;
 	/* Invoke the former CPU_DYING callbacks */
-	for (; st->state > target; st->state--)
-		cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+	for (; st->state > target; st->state--) {
+		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+		/*
+		 * DYING must not fail!
+		 */
+		WARN_ON_ONCE(ret);
+	}
 
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
@@ -876,11 +889,16 @@ void notify_cpu_starting(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
+	int ret;
 
 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
 	while (st->state < target) {
 		st->state++;
-		cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+		/*
+		 * STARTING must not fail!
+		 */
+		WARN_ON_ONCE(ret);
 	}
 }
author	Peter Zijlstra <peterz@infradead.org>	2017-09-20 19:00:18 +0200
committer	Thomas Gleixner <tglx@linutronix.de>	2017-09-25 22:11:43 +0200
commit	724a86881d03ee5794148e65142e24ed3621be66 (patch)
tree	c0e54518b981d521c526e53f88013c300a22ec42 /kernel/cpu.c
parent	4dddfb5faa6118564b0c54a163353d13882299d8 (diff)
download	linux-stable-724a86881d03ee5794148e65142e24ed3621be66.tar.gz linux-stable-724a86881d03ee5794148e65142e24ed3621be66.tar.bz2 linux-stable-724a86881d03ee5794148e65142e24ed3621be66.zip