From 2e672ab2d491713541963afca3a5967ccc2376e9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 16:54:49 -0700 Subject: rcu: Avoid ->dynticks_nmi_nesting store tearing NMIs can nest, and store tearing could in theory happen on carries from one byte to the next. This commit therefore adds the WRITE_ONCE() macros preventing this. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c0ca2ccf0c..c5d960f86cf8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1103,7 +1103,8 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } - rdtp->dynticks_nmi_nesting += incby; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); barrier(); } @@ -1135,12 +1136,13 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { - rdtp->dynticks_nmi_nesting -= 2; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - rdtp->dynticks_nmi_nesting = 0; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } -- cgit v1.2.3 From a0eb22bf64a755bb162b421120b9fbe7d012b85f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 19:45:10 -0700 Subject: rcu: Reduce dyntick-idle state space Both extended-quiescent-state entry and exit first update the nesting counter and then adjust the dyntick-idle state. This means that there are four states: (1) Both nesting and dyntick idle indicate idle, (2) Nesting indicates idle but dyntick idle does not, (3) Nesting indicates non-idle and dyntick idle does not, and (4) Both nesting and dyntick idle indicate non-idle. This commit simplifies the state space by eliminating #3, reversing the order of updates on exit from extended quiescent state. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c5d960f86cf8..49f661bb8ffe 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -928,21 +928,21 @@ void rcu_irq_exit_irqson(void) * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_eqs_exit_common(long long oldval, int user) +static void rcu_eqs_exit_common(long long newval, int user) { RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on exit: not idle task"), - oldval, rdtp->dynticks_nesting); + rdtp->dynticks_nesting, newval); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -967,8 +967,8 @@ static void rcu_eqs_exit(bool user) rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { __this_cpu_inc(disable_rcu_irq_enter); + rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_eqs_exit_common(oldval, user); __this_cpu_dec(disable_rcu_irq_enter); } } @@ -1037,7 +1037,7 @@ void rcu_user_exit(void) void rcu_irq_enter(void) { struct rcu_dynticks *rdtp; - long long oldval; + long long newval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1046,14 +1046,13 @@ void rcu_irq_enter(void) if (rdtp->dynticks_nmi_nesting) return; - oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (oldval) - trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); + newval = rdtp->dynticks_nesting + 1; + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0); + if (rdtp->dynticks_nesting) + trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval); else - rcu_eqs_exit_common(oldval, true); + rcu_eqs_exit_common(newval, true); + rdtp->dynticks_nesting++; } /* -- cgit v1.2.3 From fd581a91ac16187625ec509414d08d37827472c4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 21:56:20 -0700 Subject: rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation This is a code-motion-only commit that prepares to define rcu_irq_enter() in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_irq_exit(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 150 +++++++++++++++++++++++++++--------------------------- 1 file changed, 75 insertions(+), 75 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 49f661bb8ffe..419f3c38e1b6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -866,6 +866,44 @@ void rcu_user_enter(void) } #endif /* CONFIG_NO_HZ_FULL */ +/** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If we are returning from the outermost NMI handler that interrupted an + * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting + * to let the RCU grace-period handling know that the CPU is back to + * being RCU-idle. + * + * If you add or remove a call to rcu_nmi_exit(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + + /* + * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. + * (We are exiting an NMI handler, so RCU better be paying attention + * to us!) + */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); + WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); + + /* + * If the nesting level is not 1, the CPU wasn't RCU-idle, so + * leave it in non-RCU-idle state. + */ + if (rdtp->dynticks_nmi_nesting != 1) { + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); + return; + } + + /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ + rcu_dynticks_eqs_enter(); +} + /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle * @@ -1012,6 +1050,43 @@ void rcu_user_exit(void) } #endif /* CONFIG_NO_HZ_FULL */ +/** + * rcu_nmi_enter - inform RCU of entry to NMI context + * + * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and + * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know + * that the CPU is active. This implementation permits nested NMIs, as + * long as the nesting level does not overflow an int. (You will probably + * run out of stack space first.) + * + * If you add or remove a call to rcu_nmi_enter(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_enter(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + int incby = 2; + + /* Complain about underflow. */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); + + /* + * If idle from RCU viewpoint, atomically increment ->dynticks + * to mark non-idle and increment ->dynticks_nmi_nesting by one. + * Otherwise, increment ->dynticks_nmi_nesting by two. This means + * if ->dynticks_nmi_nesting is equal to one, we are guaranteed + * to be in the outermost NMI handler that interrupted an RCU-idle + * period (observation due to Andy Lutomirski). + */ + if (rcu_dynticks_curr_cpu_in_eqs()) { + rcu_dynticks_eqs_exit(); + incby = 1; + } + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); + barrier(); +} + /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle * @@ -1070,81 +1145,6 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } -/** - * rcu_nmi_enter - inform RCU of entry to NMI context - * - * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and - * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know - * that the CPU is active. This implementation permits nested NMIs, as - * long as the nesting level does not overflow an int. (You will probably - * run out of stack space first.) - * - * If you add or remove a call to rcu_nmi_enter(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_nmi_enter(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; - - /* Complain about underflow. */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); - - /* - * If idle from RCU viewpoint, atomically increment ->dynticks - * to mark non-idle and increment ->dynticks_nmi_nesting by one. - * Otherwise, increment ->dynticks_nmi_nesting by two. This means - * if ->dynticks_nmi_nesting is equal to one, we are guaranteed - * to be in the outermost NMI handler that interrupted an RCU-idle - * period (observation due to Andy Lutomirski). - */ - if (rcu_dynticks_curr_cpu_in_eqs()) { - rcu_dynticks_eqs_exit(); - incby = 1; - } - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ - rdtp->dynticks_nmi_nesting + incby); - barrier(); -} - -/** - * rcu_nmi_exit - inform RCU of exit from NMI context - * - * If we are returning from the outermost NMI handler that interrupted an - * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting - * to let the RCU grace-period handling know that the CPU is back to - * being RCU-idle. - * - * If you add or remove a call to rcu_nmi_exit(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_nmi_exit(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - - /* - * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. - * (We are exiting an NMI handler, so RCU better be paying attention - * to us!) - */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); - WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); - - /* - * If the nesting level is not 1, the CPU wasn't RCU-idle, so - * leave it in non-RCU-idle state. - */ - if (rdtp->dynticks_nmi_nesting != 1) { - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ - rdtp->dynticks_nmi_nesting - 2); - return; - } - - /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ - rcu_dynticks_eqs_enter(); -} - /** * rcu_is_watching - see if RCU thinks that the current CPU is idle * -- cgit v1.2.3 From 6136d6e48a0138f6be5bb3427dbeb0ba07a546a4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 08:28:04 -0700 Subject: rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit In preparation for merging dyntick-idle irq handling into the NMI algorithm, clamp ->dynticks_nmi_nesting value to allow for interrupts that enter but never leave and vice versa. It is important that the clamping happen outside of the extended quiescent state. Otherwise, there will be short windows where irqs and NMIs fail to convince RCU to start watching. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 2 ++ kernel/rcu/tree.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 59c471de342a..f4a411964c41 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -56,6 +56,8 @@ #define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ DYNTICK_TASK_FLAG) +#define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) + /* * Grace-period counter management. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 419f3c38e1b6..142cdd4a50c9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -818,6 +818,7 @@ static void rcu_eqs_enter(bool user) struct rcu_dynticks *rdtp; rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) @@ -1008,6 +1009,7 @@ static void rcu_eqs_exit(bool user) rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; __this_cpu_dec(disable_rcu_irq_enter); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } -- cgit v1.2.3 From 58721f5da4bcd5187566f4159a4fc88f70bf74f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 10:42:22 -0700 Subject: rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}() RCU currently uses two different mechanisms for tracking irqs and NMIs. This is unnecessary complexity: Given that NMIs can nest and given that RCU's tracking handles such nesting, the NMI tracking mechanism can also be used to track irqs. This commit therefore defines rcu_irq_enter() in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_nmi_exit(). Unfortunately, callers must still distinguish between the irq and NMI functions because additional actions are taken when an irq interrupts idle or nohz_full usermode execution, and these actions cannot always be taken from NMI handlers. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 59 ++++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 142cdd4a50c9..fde0e840563f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -266,6 +266,7 @@ void rcu_bh_qs(void) static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; @@ -914,8 +915,8 @@ void rcu_nmi_exit(void) * * This code assumes that the idle loop never does anything that might * result in unbalanced calls to irq_enter() and irq_exit(). If your - * architecture violates this assumption, RCU will give you what you - * deserve, good and hard. But very infrequently and irreproducibly. + * architecture's idle loop violates this assumption, RCU will give you what + * you deserve, good and hard. But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -926,23 +927,14 @@ void rcu_nmi_exit(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting < 1); - if (rdtp->dynticks_nesting <= 1) { - rcu_eqs_enter_common(true); - } else { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); - rdtp->dynticks_nesting--; - } + if (rdtp->dynticks_nmi_nesting == 1) + rcu_prepare_for_idle(); + rcu_nmi_exit(); + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_enter(); } /* @@ -1097,12 +1089,12 @@ void rcu_nmi_enter(void) * sections can occur. The caller must have disabled interrupts. * * Note that the Linux kernel is fully capable of entering an interrupt - * handler that it never exits, for example when doing upcalls to - * user mode! This code assumes that the idle loop never does upcalls to - * user mode. If your architecture does do upcalls from the idle loop (or - * does anything else that results in unbalanced calls to the irq_enter() - * and irq_exit() functions), RCU will give you what you deserve, good - * and hard. But very infrequently and irreproducibly. + * handler that it never exits, for example when doing upcalls to user mode! + * This code assumes that the idle loop never does upcalls to user mode. + * If your architecture's idle loop does do upcalls to user mode (or does + * anything else that results in unbalanced calls to the irq_enter() and + * irq_exit() functions), RCU will give you what you deserve, good and hard. + * But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -1113,23 +1105,14 @@ void rcu_nmi_enter(void) */ void rcu_irq_enter(void) { - struct rcu_dynticks *rdtp; - long long newval; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - newval = rdtp->dynticks_nesting + 1; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0); - if (rdtp->dynticks_nesting) - trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval); - else - rcu_eqs_exit_common(newval, true); - rdtp->dynticks_nesting++; + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_exit(); + rcu_nmi_enter(); + if (rdtp->dynticks_nmi_nesting == 1) + rcu_cleanup_after_idle(); } /* -- cgit v1.2.3 From 51a1fd30f13090be7750fed86cf3728afaf4e394 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 14:43:40 -0700 Subject: rcu: Make ->dynticks_nesting be a simple counter Now that ->dynticks_nesting counts only process-level dyntick-idle entry and exit, there is no need for the elaborate segmented counter with its guard fields and overflow checking. This commit therefore makes ->dynticks_nesting be a simple counter. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 27 +-------------------------- kernel/rcu/tree.c | 40 ++++++++++++++++++++-------------------- kernel/rcu/tree.h | 1 - 3 files changed, 21 insertions(+), 47 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index f4a411964c41..afe0559d1867 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -30,32 +30,7 @@ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ -/* - * Process-level increment to ->dynticks_nesting field. This allows for - * architectures that use half-interrupts and half-exceptions from - * process context. - * - * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH - * that counts the number of process-based reasons why RCU cannot - * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE - * is the value used to increment or decrement this field. - * - * The rest of the bits could in principle be used to count interrupts, - * but this would mean that a negative-one value in the interrupt - * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. - * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK - * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. - * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon - * initial exit from idle. - */ -#define DYNTICK_TASK_NEST_WIDTH 7 -#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) -#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) -#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) -#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) -#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ - DYNTICK_TASK_FLAG) - +/* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ #define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index fde0e840563f..d123474fe829 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -265,7 +265,7 @@ void rcu_bh_qs(void) #endif static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; @@ -813,6 +813,10 @@ static void rcu_eqs_enter_common(bool user) /* * Enter an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ static void rcu_eqs_enter(bool user) { @@ -821,11 +825,11 @@ static void rcu_eqs_enter(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); - if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting == 1) rcu_eqs_enter_common(user); else - rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; + rdtp->dynticks_nesting--; } /** @@ -836,10 +840,6 @@ static void rcu_eqs_enter(bool user) * critical sections can occur in irq handlers in idle, a possibility * handled by irq_enter() and irq_exit().) * - * We crowbar the ->dynticks_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - * * If you add or remove a call to rcu_idle_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -984,6 +984,10 @@ static void rcu_eqs_exit_common(long long newval, int user) /* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to + * allow for the possibility of usermode upcalls messing up our count of + * interrupt nesting level during the busy period that is just now starting. */ static void rcu_eqs_exit(bool user) { @@ -994,12 +998,12 @@ static void rcu_eqs_exit(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); - if (oldval & DYNTICK_TASK_NEST_MASK) { - rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + if (oldval) { + rdtp->dynticks_nesting++; } else { __this_cpu_inc(disable_rcu_irq_enter); - rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); - rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rcu_eqs_exit_common(1, user); + rdtp->dynticks_nesting = 1; __this_cpu_dec(disable_rcu_irq_enter); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } @@ -1011,11 +1015,6 @@ static void rcu_eqs_exit(bool user) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to - * allow for the possibility of usermode upcalls messing up our count - * of interrupt nesting level during the busy period that is just - * now starting. - * * If you add or remove a call to rcu_idle_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -1219,7 +1218,8 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 && + __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1; } /* @@ -3709,7 +3709,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); rdp->cpu = cpu; rdp->rsp = rsp; @@ -3738,7 +3738,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rdp->dynticks->dynticks_nesting = 1; rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 46a5d1991450..dbd7e3753bed 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -39,7 +39,6 @@ */ struct rcu_dynticks { long long dynticks_nesting; /* Track irq/process nesting level. */ - /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ -- cgit v1.2.3 From 844ccdd7dce2c1a6ea9b437fcf8c3265b136e4a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 16:51:47 -0700 Subject: rcu: Eliminate rcu_irq_enter_disabled() Now that the irq path uses the rcu_nmi_{enter,exit}() algorithm, rcu_irq_enter() and rcu_irq_exit() may be used from any context. There is thus no need for rcu_irq_enter_disabled() and for the checks using it. This commit therefore eliminates rcu_irq_enter_disabled(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 22 ++-------------------- kernel/trace/trace.c | 11 ----------- 2 files changed, 2 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d123474fe829..444aa2b3f24d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -270,20 +270,6 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; -/* - * There's a few places, currently just in the tracing infrastructure, - * that uses rcu_irq_enter() to make sure RCU is watching. But there's - * a small location where that will not even work. In those cases - * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter() - * can be called. - */ -static DEFINE_PER_CPU(bool, disable_rcu_irq_enter); - -bool rcu_irq_enter_disabled(void) -{ - return this_cpu_read(disable_rcu_irq_enter); -} - /* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. @@ -792,10 +778,8 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - __this_cpu_inc(disable_rcu_irq_enter); - rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */ - rcu_dynticks_eqs_enter(); /* After this, tracing works again. */ - __this_cpu_dec(disable_rcu_irq_enter); + rdtp->dynticks_nesting = 0; + rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); /* @@ -1001,10 +985,8 @@ static void rcu_eqs_exit(bool user) if (oldval) { rdtp->dynticks_nesting++; } else { - __this_cpu_inc(disable_rcu_irq_enter); rcu_eqs_exit_common(1, user); rdtp->dynticks_nesting = 1; - __this_cpu_dec(disable_rcu_irq_enter); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73e67b68c53b..dbce1be3bab8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2682,17 +2682,6 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, if (unlikely(in_nmi())) return; - /* - * It is possible that a function is being traced in a - * location that RCU is not watching. A call to - * rcu_irq_enter() will make sure that it is, but there's - * a few internal rcu functions that could be traced - * where that wont work either. In those cases, we just - * do nothing. - */ - if (unlikely(rcu_irq_enter_disabled())) - return; - rcu_irq_enter_irqson(); __ftrace_trace_stack(buffer, flags, skip, pc, NULL); rcu_irq_exit_irqson(); -- cgit v1.2.3 From bd2b879a1ca55486fdb9dcac691bfd3dd79c83d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 12:29:01 -0700 Subject: rcu: Add tracing to irq/NMI dyntick-idle transitions Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 444aa2b3f24d..d069ba2d8412 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -880,12 +880,15 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting - 2); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } @@ -1057,6 +1060,9 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } + trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), + rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting + incby); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdtp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From 84585aa8b6ad24e5bdfba9db4a320a6aeed192ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 15:55:16 -0700 Subject: rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long Because the ->dynticks_nesting field now only contains the process-based nesting level instead of a value encoding both the process nesting level and the irq "nesting" level, we no longer need a long long, even on 32-bit systems. This commit therefore changes both the ->dynticks_nesting and ->dynticks_nmi_nesting fields to long. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 2 +- kernel/rcu/tree.c | 6 +++--- kernel/rcu/tree.h | 4 ++-- kernel/rcu/tree_plugin.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index afe0559d1867..6334f2c1abd0 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -31,7 +31,7 @@ #endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ -#define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d069ba2d8412..92de3bacda07 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -946,7 +946,7 @@ void rcu_irq_exit_irqson(void) * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_eqs_exit_common(long long newval, int user) +static void rcu_eqs_exit_common(long newval, int user) { RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) @@ -979,7 +979,7 @@ static void rcu_eqs_exit_common(long long newval, int user) static void rcu_eqs_exit(bool user) { struct rcu_dynticks *rdtp; - long long oldval; + long oldval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1043,7 +1043,7 @@ void rcu_user_exit(void) void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; + long incby = 2; /* Complain about underflow. */ WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index dbd7e3753bed..6488a3b0e729 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -38,8 +38,8 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - long long dynticks_nesting; /* Track irq/process nesting level. */ - int dynticks_nmi_nesting; /* Track NMI nesting level. */ + long dynticks_nesting; /* Track process nesting level. */ + long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index db85ca3975f1..e94e754464cd 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1687,7 +1687,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], -- cgit v1.2.3 From dec98900eae1e22467182e58688abe5fae98bd5f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 16:24:29 -0700 Subject: rcu: Add ->dynticks field to rcu_dyntick trace event Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 92de3bacda07..5febb76809f6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -761,13 +761,13 @@ static void rcu_eqs_enter_common(bool user) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); + trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0); + trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -880,15 +880,14 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, - rdtp->dynticks_nmi_nesting - 2); + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0); + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } @@ -953,14 +952,13 @@ static void rcu_eqs_exit_common(long newval, int user) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on exit: not idle task"), - rdtp->dynticks_nesting, newval); + trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -1062,7 +1060,7 @@ void rcu_nmi_enter(void) } trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdtp->dynticks_nmi_nesting, - rdtp->dynticks_nmi_nesting + incby); + rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdtp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From 914955e18ca09fc404d7fc3614bb04c96a03692c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 13:50:57 -0700 Subject: rcu: Stop duplicating lockdep checks in RCU's idle-entry code The three RCU_LOCKDEP_WARN() calls in rcu_eqs_enter_common() are redundant with other lockdep checks, so this commit removes them. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5febb76809f6..80cada11f544 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -781,17 +781,6 @@ static void rcu_eqs_enter_common(bool user) rdtp->dynticks_nesting = 0; rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); - - /* - * It is illegal to enter an extended quiescent state while - * in an RCU read-side critical section. - */ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), - "Illegal idle entry in RCU read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), - "Illegal idle entry in RCU-bh read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), - "Illegal idle entry in RCU-sched read-side critical section."); } /* -- cgit v1.2.3 From 2342172fd6c148506456862d795c7f155baf6797 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 15:03:10 -0700 Subject: rcu: Avoid ->dynticks_nesting store tearing Although ->dynticks_nesting is updated only by process level, it is accessed from hardirq to check for interrupt-from-idle quiescent states. Store tearing is thus possible, so this commit applies WRITE_ONCE() to ->dynticks_nesting stores. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 80cada11f544..b2ded4d436c6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -778,7 +778,7 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - rdtp->dynticks_nesting = 0; + WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); } @@ -976,7 +976,7 @@ static void rcu_eqs_exit(bool user) rdtp->dynticks_nesting++; } else { rcu_eqs_exit_common(1, user); - rdtp->dynticks_nesting = 1; + WRITE_ONCE(rdtp->dynticks_nesting, 1); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } @@ -3713,7 +3713,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = 1; + rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */ rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ -- cgit v1.2.3 From 215bba9f59e35e64b9936da62632b2fa3ede647c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 16:37:03 -0700 Subject: rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter() There is now only one call to rcu_eqs_enter_common() and there is no other reason to keep it separate. This commit therefore inlines it into its sole call site, saving a few lines of code in the process. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2ded4d436c6..5c8a5796c71f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -749,16 +749,27 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * rcu_eqs_enter_common - current CPU is entering an extended quiescent state + * Enter an RCU extended quiescent state, which can be either the + * idle loop or adaptive-tickless usermode execution. * - * Enter idle, doing appropriate accounting. The caller must have - * disabled interrupts. + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ -static void rcu_eqs_enter_common(bool user) +static void rcu_eqs_enter(bool user) { struct rcu_state *rsp; struct rcu_data *rdp; - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + struct rcu_dynticks *rdtp; + + rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting != 1) { + rdtp->dynticks_nesting--; + return; + } lockdep_assert_irqs_disabled(); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); @@ -783,28 +794,6 @@ static void rcu_eqs_enter_common(bool user) rcu_dynticks_task_enter(); } -/* - * Enter an RCU extended quiescent state, which can be either the - * idle loop or adaptive-tickless usermode execution. - * - * We crowbar the ->dynticks_nmi_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - */ -static void rcu_eqs_enter(bool user) -{ - struct rcu_dynticks *rdtp; - - rdtp = this_cpu_ptr(&rcu_dynticks); - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (rdtp->dynticks_nesting == 1) - rcu_eqs_enter_common(user); - else - rdtp->dynticks_nesting--; -} - /** * rcu_idle_enter - inform RCU that current CPU is entering idle * -- cgit v1.2.3 From 9dd238e28640d5514bbd0ff2d425f32409981d85 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 16:56:26 -0700 Subject: rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit() There is now only one call to rcu_eqs_exit_common() and there is no other reason to keep it separate. This commit therefore inlines it into its sole call site, saving a few lines of code in the process. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5c8a5796c71f..46a8e06bf03e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -916,34 +916,6 @@ void rcu_irq_exit_irqson(void) local_irq_restore(flags); } -/* - * rcu_eqs_exit_common - current CPU moving away from extended quiescent state - * - * If the new value of the ->dynticks_nesting counter was previously zero, - * we really have exited idle, and must do the appropriate accounting. - * The caller must have disabled interrupts. - */ -static void rcu_eqs_exit_common(long newval, int user) -{ - RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) - - rcu_dynticks_task_exit(); - rcu_dynticks_eqs_exit(); - rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } -} - /* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. @@ -963,11 +935,25 @@ static void rcu_eqs_exit(bool user) WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); if (oldval) { rdtp->dynticks_nesting++; - } else { - rcu_eqs_exit_common(1, user); - WRITE_ONCE(rdtp->dynticks_nesting, 1); - WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); + return; + } + rcu_dynticks_task_exit(); + rcu_dynticks_eqs_exit(); + rcu_cleanup_after_idle(); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); + + trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + rcu_ftrace_dump(DUMP_ORIG); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } + WRITE_ONCE(rdtp->dynticks_nesting, 1); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } /** -- cgit v1.2.3 From e68bbb266dcfed201d8d54a2828ef820d747f083 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 19:55:31 -0700 Subject: rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code The code that checks for non-idle non-nohz_idle-usermode tasks invoking rcu_eqs_enter() and rcu_eqs_exit() prints a considerable quantity of helpful information. However, these checks fire rarely, so the extra complexity is no longer worth it. This commit therefore replaces this debug code with simple WARN_ON_ONCE() statements. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 46a8e06bf03e..4d374d2bc925 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -773,17 +773,7 @@ static void rcu_eqs_enter(bool user) lockdep_assert_irqs_disabled(); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); do_nocb_deferred_wakeup(rdp); @@ -941,17 +931,7 @@ static void rcu_eqs_exit(bool user) rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdtp->dynticks_nesting, 1); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } -- cgit v1.2.3 From d633198088bd9e358566c470ed182994403acc7a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Oct 2017 13:52:30 -0700 Subject: srcu: Prohibit call_srcu() use under raw spinlocks Invoking queue_delayed_work() while holding a raw spinlock is forbidden in -rt kernels, which is exactly what __call_srcu() does, indirectly via srcu_funnel_gp_start(). This commit therefore downgrades Tree SRCU's locking from raw to non-raw spinlocks, which works because call_srcu() is not ever called while holding a raw spinlock. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutree.c | 109 +++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 6d5880089ff6..d5cea81378cc 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -53,6 +53,33 @@ static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); static void process_srcu(struct work_struct *work); +/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ +#define spin_lock_rcu_node(p) \ +do { \ + spin_lock(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) + +#define spin_lock_irq_rcu_node(p) \ +do { \ + spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define spin_unlock_irq_rcu_node(p) \ + spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) + +#define spin_lock_irqsave_rcu_node(p, flags) \ +do { \ + spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define spin_unlock_irqrestore_rcu_node(p, flags) \ + spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ + /* * Initialize SRCU combining tree. Note that statically allocated * srcu_struct structures might already have srcu_read_lock() and @@ -77,7 +104,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) /* Each pass through this loop initializes one srcu_node structure. */ rcu_for_each_node_breadth_first(sp, snp) { - raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); + spin_lock_init(&ACCESS_PRIVATE(snp, lock)); WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { @@ -111,7 +138,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp_first = sp->level[level]; for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(sp->sda, cpu); - raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); + spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; @@ -170,7 +197,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name, /* Don't re-initialize a lock while it is held. */ debug_check_no_locks_freed((void *)sp, sizeof(*sp)); lockdep_init_map(&sp->dep_map, name, key, 0); - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); + spin_lock_init(&ACCESS_PRIVATE(sp, lock)); return init_srcu_struct_fields(sp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -187,7 +214,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); */ int init_srcu_struct(struct srcu_struct *sp) { - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); + spin_lock_init(&ACCESS_PRIVATE(sp, lock)); return init_srcu_struct_fields(sp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -210,13 +237,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - raw_spin_lock_irqsave_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(sp, flags); if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); return; } init_srcu_struct_fields(sp, true); - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -513,7 +540,7 @@ static void srcu_gp_end(struct srcu_struct *sp) mutex_lock(&sp->srcu_cb_mutex); /* End the current grace period. */ - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); idx = rcu_seq_state(sp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); cbdelay = srcu_get_delay(sp); @@ -522,7 +549,7 @@ static void srcu_gp_end(struct srcu_struct *sp) gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) sp->srcu_gp_seq_needed_exp = gpseq; - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); mutex_unlock(&sp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -530,7 +557,7 @@ static void srcu_gp_end(struct srcu_struct *sp) idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); rcu_for_each_node_breadth_first(sp, snp) { - raw_spin_lock_irq_rcu_node(snp); + spin_lock_irq_rcu_node(snp); cbs = false; if (snp >= sp->level[rcu_num_lvls - 1]) cbs = snp->srcu_have_cbs[idx] == gpseq; @@ -540,7 +567,7 @@ static void srcu_gp_end(struct srcu_struct *sp) snp->srcu_gp_seq_needed_exp = gpseq; mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; - raw_spin_unlock_irq_rcu_node(snp); + spin_unlock_irq_rcu_node(snp); if (cbs) srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); @@ -548,11 +575,11 @@ static void srcu_gp_end(struct srcu_struct *sp) if (!(gpseq & counter_wrap_check)) for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { sdp = per_cpu_ptr(sp->sda, cpu); - raw_spin_lock_irqsave_rcu_node(sdp, flags); + spin_lock_irqsave_rcu_node(sdp, flags); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) sdp->srcu_gp_seq_needed = gpseq; - raw_spin_unlock_irqrestore_rcu_node(sdp, flags); + spin_unlock_irqrestore_rcu_node(sdp, flags); } } @@ -560,17 +587,17 @@ static void srcu_gp_end(struct srcu_struct *sp) mutex_unlock(&sp->srcu_cb_mutex); /* Start a new grace period if needed. */ - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { srcu_gp_start(sp); - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); /* Throttle expedited grace periods: Should be rare! */ srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff ? 0 : SRCU_INTERVAL); } else { - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); } } @@ -590,18 +617,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, if (rcu_seq_done(&sp->srcu_gp_seq, s) || ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) return; - raw_spin_lock_irqsave_rcu_node(snp, flags); + spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); return; } WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); } - raw_spin_lock_irqsave_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(sp, flags); if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) sp->srcu_gp_seq_needed_exp = s; - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -623,12 +650,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, for (; snp != NULL; snp = snp->srcu_parent) { if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) return; /* GP already done and CBs recorded. */ - raw_spin_lock_irqsave_rcu_node(snp, flags); + spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { snp_seq = snp->srcu_have_cbs[idx]; if (snp == sdp->mynode && snp_seq == s) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); if (snp == sdp->mynode && snp_seq != s) { srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL @@ -644,11 +671,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, snp->srcu_data_have_cbs[idx] |= sdp->grpmask; if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) snp->srcu_gp_seq_needed_exp = s; - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); } /* Top of tree, must ensure the grace period will be started. */ - raw_spin_lock_irqsave_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(sp, flags); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -667,7 +694,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, queue_delayed_work(system_power_efficient_wq, &sp->work, srcu_get_delay(sp)); } - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -830,7 +857,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rhp->func = func; local_irq_save(flags); sdp = this_cpu_ptr(sp->sda); - raw_spin_lock_rcu_node(sdp); + spin_lock_rcu_node(sdp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); @@ -844,7 +871,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, sdp->srcu_gp_seq_needed_exp = s; needexp = true; } - raw_spin_unlock_irqrestore_rcu_node(sdp, flags); + spin_unlock_irqrestore_rcu_node(sdp, flags); if (needgp) srcu_funnel_gp_start(sp, sdp, s, do_norm); else if (needexp) @@ -900,7 +927,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) /* * Make sure that later code is ordered after the SRCU grace - * period. This pairs with the raw_spin_lock_irq_rcu_node() + * period. This pairs with the spin_lock_irq_rcu_node() * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed * because the current CPU might have been totally uninvolved with * (and thus unordered against) that grace period. @@ -1024,7 +1051,7 @@ void srcu_barrier(struct srcu_struct *sp) */ for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(sp->sda, cpu); - raw_spin_lock_irq_rcu_node(sdp); + spin_lock_irq_rcu_node(sdp); atomic_inc(&sp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); @@ -1033,7 +1060,7 @@ void srcu_barrier(struct srcu_struct *sp) debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&sp->srcu_barrier_cpu_cnt); } - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); } /* Remove the initial count, at which point reaching zero can happen. */ @@ -1082,17 +1109,17 @@ static void srcu_advance_state(struct srcu_struct *sp) */ idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); mutex_unlock(&sp->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(sp); - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&sp->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1141,19 +1168,19 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp = container_of(work, struct srcu_data, work.work); sp = sdp->sp; rcu_cblist_init(&ready_cbs); - raw_spin_lock_irq_rcu_node(sdp); + spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); return; /* Someone else on the job or nothing to do. */ } /* We are on the job! Extract and invoke ready callbacks. */ sdp->srcu_cblist_invoking = true; rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); @@ -1166,13 +1193,13 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Update counts, accelerate new callbacks, and if needed, * schedule another round of callback invocation. */ - raw_spin_lock_irq_rcu_node(sdp); + spin_lock_irq_rcu_node(sdp); rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, rcu_seq_snap(&sp->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); if (more) srcu_schedule_cbs_sdp(sdp, 0); } @@ -1185,7 +1212,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) { bool pushgp = true; - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1195,7 +1222,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(sp); } - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); if (pushgp) queue_delayed_work(system_power_efficient_wq, &sp->work, delay); -- cgit v1.2.3 From dac95906003fec1b4801115830cc14ec61c74960 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 11:23:10 -0700 Subject: torture: Suppress CPU stall warnings during shutdown ftrace dump The torture_shutdown() function directly invokes ftrace_dump(), which can result in RCU CPU stall warnings when the ftrace buffer is large, which it usually is. This commit therefore invoks rcu_ftrace_dump() in place of ftrace_dump(), suppressing RCU CPU stall warnings during this time. Signed-off-by: Paul E. McKenney --- kernel/torture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/torture.c b/kernel/torture.c index 637e172835d8..52781e838541 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -47,6 +47,7 @@ #include #include #include +#include "rcu/rcu.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); @@ -500,7 +501,7 @@ static int torture_shutdown(void *arg) torture_shutdown_hook(); else VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping."); - ftrace_dump(DUMP_ALL); + rcu_ftrace_dump(DUMP_ALL); kernel_power_off(); /* Shut down the system. */ return 0; } -- cgit v1.2.3 From 5c6338b4877038d28148fcfe1e7f862970ebaad1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:08:53 -0700 Subject: uprobes: Remove now-redundant smp_read_barrier_depends() Now that READ_ONCE() implies smp_read_barrier_depends(), the get_xol_area() and get_trampoline_vaddr() no longer need their smp_read_barrier_depends() calls, which this commit removes. While we are here, convert the corresponding smp_wmb() to an smp_store_release(). Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin --- kernel/events/uprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 267f6ef91d97..ce6848e46e94 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1167,8 +1167,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) } ret = 0; - smp_wmb(); /* pairs with get_xol_area() */ - mm->uprobes_state.xol_area = area; + /* pairs with get_xol_area() */ + smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: up_write(&mm->mmap_sem); @@ -1230,8 +1230,8 @@ static struct xol_area *get_xol_area(void) if (!mm->uprobes_state.xol_area) __create_xol_area(0); - area = mm->uprobes_state.xol_area; - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */ return area; } @@ -1528,8 +1528,8 @@ static unsigned long get_trampoline_vaddr(void) struct xol_area *area; unsigned long trampoline_vaddr = -1; - area = current->mm->uprobes_state.xol_area; - smp_read_barrier_depends(); + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ if (area) trampoline_vaddr = area->vaddr; -- cgit v1.2.3 From 548095dea63ffc016d39c35b32c628d033638aca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:22:50 -0700 Subject: locking: Remove smp_read_barrier_depends() from queued_spin_lock_slowpath() Queued spinlocks are not used by DEC Alpha, and furthermore operations such as READ_ONCE() and release/relaxed RMW atomics are being changed to imply smp_read_barrier_depends(). This commit therefore removes the now-redundant smp_read_barrier_depends() from queued_spin_lock_slowpath(), and adjusts the comments accordingly. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar --- kernel/locking/qspinlock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 294294c71ba4..38ece035039e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev = xchg(lock, node) */ @@ -409,13 +409,11 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); /* - * The above xchg_tail() is also a load of @lock which generates, - * through decode_tail(), a pointer. - * - * The address dependency matches the RELEASE of xchg_tail() - * such that the access to @prev must happen after. + * The above xchg_tail() is also a load of @lock which + * generates, through decode_tail(), a pointer. The address + * dependency matches the RELEASE of xchg_tail() such that + * the subsequent access to @prev happens after. */ - smp_read_barrier_depends(); WRITE_ONCE(prev->next, node); -- cgit v1.2.3 From 243d1a7977ae0814aa1ccb8bb87f8a4e0822ca8f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:30:11 -0700 Subject: tracepoint: Remove smp_read_barrier_depends() from comment The comment in tracepoint_add_func() mentions smp_read_barrier_depends(), whose use should be quite restricted. This commit updates the comment to instead mention the smp_store_release() and rcu_dereference_sched() that the current code actually uses. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Acked-by: Steven Rostedt (VMware) Acked-by: Mathieu Desnoyers --- kernel/tracepoint.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 685c50ae6300..671b13457387 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -212,11 +212,10 @@ static int tracepoint_add_func(struct tracepoint *tp, } /* - * rcu_assign_pointer has a smp_wmb() which makes sure that the new - * probe callbacks array is consistent before setting a pointer to it. - * This array is referenced by __DO_TRACE from - * include/linux/tracepoints.h. A matching smp_read_barrier_depends() - * is used. + * rcu_assign_pointer has as smp_store_release() which makes sure + * that the new probe callbacks array is consistent before setting + * a pointer to it. This array is referenced by __DO_TRACE from + * include/linux/tracepoint.h using rcu_dereference_sched(). */ rcu_assign_pointer(tp->funcs, tp_funcs); if (!static_key_enabled(&tp->key)) -- cgit v1.2.3 From efd88b02bb9e6b8b73a20ea611e5d07ed6d4af34 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Oct 2017 14:52:41 -0700 Subject: rcu: Add comment giving debug strategy for double call_rcu() The following statement has for some reason proven non-intuitive: WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); This commit therefore adds a comment that states that this warning usually triggers in response to a double call_rcu(), which is sort of like a double free. The comment also suggests building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y to track down the double call_rcu(). Reported-by: David Howells Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c0ca2ccf0c..1bdc0481aaf1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2789,6 +2789,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rdp->n_force_qs_snap = rsp->n_force_qs; } else if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; + + /* + * The following usually indicates a double call_rcu(). To track + * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. + */ WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); local_irq_restore(flags); -- cgit v1.2.3 From 84b12b752f41cd3d25d75692c2145d816e42926c Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Fri, 17 Nov 2017 21:40:15 +0600 Subject: rcu: Remove have_rcu_nocb_mask from tree_plugin.h Currently have_rcu_nocb_mask is used to avoid double allocation of rcu_nocb_mask during boot up. Due to different representation of cpumask_var_t on different kernel config CPUMASK=y(or n) it was okay. But now we have a helper cpumask_available(), which can be utilized to check whether rcu_nocb_mask has been allocated or not without using a variable. Removing the variable also reduces vmlinux size. Unpatched version: text data bss dec hex filename 13050393 7852470 14543408 35446271 21cddff vmlinux Patched version: text data bss dec hex filename 13050390 7852438 14543408 35446236 21cdddc vmlinux Signed-off-by: Rakib Mullick Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index db85ca3975f1..13a8e08f1998 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -61,7 +61,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ -static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ @@ -1752,7 +1751,6 @@ static void increment_cpu_stall_ticks(void) static int __init rcu_nocb_setup(char *str) { alloc_bootmem_cpumask_var(&rcu_nocb_mask); - have_rcu_nocb_mask = true; cpulist_parse(str, rcu_nocb_mask); return 1; } @@ -1801,7 +1799,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) /* Is the specified CPU a no-CBs CPU? */ bool rcu_is_nocb_cpu(int cpu) { - if (have_rcu_nocb_mask) + if (cpumask_available(rcu_nocb_mask)) return cpumask_test_cpu(cpu, rcu_nocb_mask); return false; } @@ -2295,14 +2293,13 @@ void __init rcu_init_nohz(void) need_rcu_nocb_mask = true; #endif /* #if defined(CONFIG_NO_HZ_FULL) */ - if (!have_rcu_nocb_mask && need_rcu_nocb_mask) { + if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) { if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); return; } - have_rcu_nocb_mask = true; } - if (!have_rcu_nocb_mask) + if (!cpumask_available(rcu_nocb_mask)) return; #if defined(CONFIG_NO_HZ_FULL) @@ -2428,7 +2425,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp) struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ struct rcu_data *rdp_prev = NULL; - if (!have_rcu_nocb_mask) + if (!cpumask_available(rcu_nocb_mask)) return; if (ls == -1) { ls = int_sqrt(nr_cpu_ids); -- cgit v1.2.3 From cc1321c96f855525fbd847fec130f000daa1bb1f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Oct 2017 11:05:03 -0700 Subject: torture: Reduce #ifdefs for preempt_schedule() This commit adds a torture_preempt_schedule() that is nothingness in !PREEMPT builds and is preempt_schedule() otherwise. Then torture_preempt_schedule() is used to eliminate several ugly #ifdefs, both in rcutorture and in locktorture. Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 24 ++++++------------------ kernel/rcu/rcutorture.c | 4 +--- 2 files changed, 7 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index f24582d4dad3..617cea2520b3 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -130,10 +130,8 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_lock_busted_write_unlock(void) @@ -179,10 +177,8 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) @@ -352,10 +348,8 @@ static void torture_mutex_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 5); else mdelay(longdelay_ms / 5); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_mutex_unlock(void) __releases(torture_mutex) @@ -507,10 +501,8 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rtmutex_unlock(void) __releases(torture_rtmutex) @@ -547,10 +539,8 @@ static void torture_rwsem_write_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 10); else mdelay(longdelay_ms / 10); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rwsem_up_write(void) __releases(torture_rwsem) @@ -574,10 +564,8 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 2); else mdelay(longdelay_ms / 2); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rwsem_up_read(void) __releases(torture_rwsem) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 74f6b0146b98..e7d3cce84214 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -315,11 +315,9 @@ static void rcu_read_delay(struct torture_random_state *rrsp) } if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!preempt_count() && !(torture_random(rrsp) % (nrealreaders * 20000))) - preempt_schedule(); /* No QS if preempt_disable() in effect */ -#endif + torture_preempt_schedule(); /* QS only if preemptible. */ } static void rcu_torture_read_unlock(int idx) __releases(RCU) -- cgit v1.2.3 From e8302739aa2204d52dacf9e9619cb6e755fa997a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Oct 2017 11:23:42 -0700 Subject: rcutorture: Preempt RCU-preempt readers more vigorously This commit attempts to make a very rare rcutorture failure happen more often by increasing the fraction of RCU-preempt read-side critical sections that are preempted. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index e7d3cce84214..1074ecc3f72f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -316,7 +316,7 @@ static void rcu_read_delay(struct torture_random_state *rrsp) if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); if (!preempt_count() && - !(torture_random(rrsp) % (nrealreaders * 20000))) + !(torture_random(rrsp) % (nrealreaders * 500))) torture_preempt_schedule(); /* QS only if preemptible. */ } -- cgit v1.2.3 From f2f762608f45353b0b8c37507824f95bb716c3d5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 15 May 2017 02:07:22 -0700 Subject: locking/locktorture: Fix rwsem reader_delay We should account for nreader threads, not writers in this callback. Could even trigger a div by 0 if the user explicitly disables writers. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 617cea2520b3..a307a79e6b0b 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -560,7 +560,7 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + (cxt.nrealreaders_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms * 2); else mdelay(longdelay_ms / 2); -- cgit v1.2.3 From 2ce77d16db4240dd2e422fc0a5c26d3e2ec03446 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 15 May 2017 02:07:23 -0700 Subject: locking/locktorture: Fix num reader/writer corner cases Things can explode for locktorture if the user does combinations of nwriters_stress=0 nreaders_stress=0. Fix this by not assuming we always want to torture writer threads. Reported-by: Jeremy Linton Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney Reviewed-by: Jeremy Linton Tested-by: Jeremy Linton --- kernel/locking/locktorture.c | 76 +++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index a307a79e6b0b..2a1fc2a58910 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -703,8 +703,7 @@ static void __torture_print_stats(char *page, { bool fail = 0; int i, n_stress; - long max = 0; - long min = statp[0].n_lock_acquired; + long max = 0, min = statp ? statp[0].n_lock_acquired : 0; long long sum = 0; n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; @@ -811,7 +810,7 @@ static void lock_torture_cleanup(void) * such, only perform the underlying torture-specific cleanups, * and avoid anything related to locktorture. */ - if (!cxt.lwsa) + if (!cxt.lwsa && !cxt.lrsa) goto end; if (writer_tasks) { @@ -886,6 +885,13 @@ static int __init lock_torture_init(void) firsterr = -EINVAL; goto unwind; } + + if (nwriters_stress == 0 && nreaders_stress == 0) { + pr_alert("lock-torture: must run at least one locking thread\n"); + firsterr = -EINVAL; + goto unwind; + } + if (cxt.cur_ops->init) cxt.cur_ops->init(); @@ -909,17 +915,19 @@ static int __init lock_torture_init(void) #endif /* Initialize the statistics so that each run gets its own numbers. */ + if (nwriters_stress) { + lock_is_write_held = 0; + cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); + if (cxt.lwsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } - lock_is_write_held = 0; - cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); - if (cxt.lwsa == NULL) { - VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < cxt.nrealwriters_stress; i++) { - cxt.lwsa[i].n_lock_fail = 0; - cxt.lwsa[i].n_lock_acquired = 0; + for (i = 0; i < cxt.nrealwriters_stress; i++) { + cxt.lwsa[i].n_lock_fail = 0; + cxt.lwsa[i].n_lock_acquired = 0; + } } if (cxt.cur_ops->readlock) { @@ -936,19 +944,21 @@ static int __init lock_torture_init(void) cxt.nrealreaders_stress = cxt.nrealwriters_stress; } - lock_is_read_held = 0; - cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); - if (cxt.lrsa == NULL) { - VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); - firsterr = -ENOMEM; - kfree(cxt.lwsa); - cxt.lwsa = NULL; - goto unwind; - } - - for (i = 0; i < cxt.nrealreaders_stress; i++) { - cxt.lrsa[i].n_lock_fail = 0; - cxt.lrsa[i].n_lock_acquired = 0; + if (nreaders_stress) { + lock_is_read_held = 0; + cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); + if (cxt.lrsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); + firsterr = -ENOMEM; + kfree(cxt.lwsa); + cxt.lwsa = NULL; + goto unwind; + } + + for (i = 0; i < cxt.nrealreaders_stress; i++) { + cxt.lrsa[i].n_lock_fail = 0; + cxt.lrsa[i].n_lock_acquired = 0; + } } } @@ -978,12 +988,14 @@ static int __init lock_torture_init(void) goto unwind; } - writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), - GFP_KERNEL); - if (writer_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); - firsterr = -ENOMEM; - goto unwind; + if (nwriters_stress) { + writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), + GFP_KERNEL); + if (writer_tasks == NULL) { + VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } } if (cxt.cur_ops->readlock) { -- cgit v1.2.3 From 4ced3314fd3a73dabac4e8a41747883eff36c3e8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Nov 2017 15:01:02 -0800 Subject: torture: Make stutter less vulnerable to compilers and races The stutter_wait() function repeatedly fetched stutter_pause_test, and should really just fetch it once on each pass. The races should be harmless, but why have the races? Also, the whole point of the value "2" for stutter_pause_test is to get everyone to start at very nearly the same time, but the value "2" was the first jiffy of the stutter rather than the last jiffy of the stutter. This commit rearranges the code to be more sensible. Signed-off-by: Paul E. McKenney --- kernel/torture.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/torture.c b/kernel/torture.c index 52781e838541..3bcbd4fbfe18 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -573,18 +573,21 @@ static int stutter; */ void stutter_wait(const char *title) { + int spt; + cond_resched_rcu_qs(); - while (READ_ONCE(stutter_pause_test) || - (torture_runnable && !READ_ONCE(*torture_runnable))) { - if (stutter_pause_test) - if (READ_ONCE(stutter_pause_test) == 1) - schedule_timeout_interruptible(1); - else - while (READ_ONCE(stutter_pause_test)) - cond_resched(); - else + spt = READ_ONCE(stutter_pause_test); + while (spt || (torture_runnable && !READ_ONCE(*torture_runnable))) { + if (spt == 1) { + schedule_timeout_interruptible(1); + } else if (spt == 2) { + while (READ_ONCE(stutter_pause_test)) + cond_resched(); + } else { schedule_timeout_interruptible(round_jiffies_relative(HZ)); + } torture_shutdown_absorb(title); + spt = READ_ONCE(stutter_pause_test); } } EXPORT_SYMBOL_GPL(stutter_wait); @@ -597,17 +600,15 @@ static int torture_stutter(void *arg) { VERBOSE_TOROUT_STRING("torture_stutter task started"); do { - if (!torture_must_stop()) { - if (stutter > 1) { - schedule_timeout_interruptible(stutter - 1); - WRITE_ONCE(stutter_pause_test, 2); - } - schedule_timeout_interruptible(1); + if (!torture_must_stop() && stutter > 1) { WRITE_ONCE(stutter_pause_test, 1); + schedule_timeout_interruptible(stutter - 1); + WRITE_ONCE(stutter_pause_test, 2); + schedule_timeout_interruptible(1); } + WRITE_ONCE(stutter_pause_test, 0); if (!torture_must_stop()) schedule_timeout_interruptible(stutter); - WRITE_ONCE(stutter_pause_test, 0); torture_shutdown_absorb("torture_stutter"); } while (!torture_must_stop()); torture_kthread_stopping("torture_stutter"); -- cgit v1.2.3 From a2f2577d96ad060b65eb909dd39b57d676754119 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Nov 2017 20:19:17 -0800 Subject: torture: Eliminate torture_runnable and perf_runnable The purpose of torture_runnable is to allow rcutorture and locktorture to be started and stopped via sysfs when they are built into the kernel (as in not compiled as loadable modules). However, the 0444 permissions for both instances of torture_runnable prevent this use case from ever being put into practice. Given that there have been no complaints about this deficiency, it is reasonable to conclude that no one actually makes use of this sysfs capability. The perf_runnable module parameter for rcuperf is in the same situation. This commit therefore removes both torture_runnable instances as well as perf_runnable. Reported-by: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 6 +----- kernel/rcu/rcuperf.c | 6 +----- kernel/rcu/rcutorture.c | 6 +----- kernel/torture.c | 6 ++---- 4 files changed, 5 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 2a1fc2a58910..6850ffd69125 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -77,10 +77,6 @@ struct lock_stress_stats { long n_lock_acquired; }; -int torture_runnable = IS_ENABLED(MODULE); -module_param(torture_runnable, int, 0444); -MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init"); - /* Forward reference. */ static void lock_torture_cleanup(void); @@ -866,7 +862,7 @@ static int __init lock_torture_init(void) &percpu_rwsem_lock_ops, }; - if (!torture_init_begin(torture_type, verbose, &torture_runnable)) + if (!torture_init_begin(torture_type, verbose)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 1f87a02c3399..d1ebdf9868bb 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -106,10 +106,6 @@ static int rcu_perf_writer_state; #define MAX_MEAS 10000 #define MIN_MEAS 100 -static int perf_runnable = IS_ENABLED(MODULE); -module_param(perf_runnable, int, 0444); -MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot"); - /* * Operations vector for selecting different types of tests. */ @@ -646,7 +642,7 @@ rcu_perf_init(void) &tasks_ops, }; - if (!torture_init_begin(perf_type, verbose, &perf_runnable)) + if (!torture_init_begin(perf_type, verbose)) return -EBUSY; /* Process args and tell the world that the perf'er is on the job. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 1074ecc3f72f..308e6fdbced8 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -187,10 +187,6 @@ static const char *rcu_torture_writer_state_getname(void) return rcu_torture_writer_state_names[i]; } -static int torture_runnable = IS_ENABLED(MODULE); -module_param(torture_runnable, int, 0444); -MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot"); - #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) #define rcu_can_boost() 1 #else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ @@ -1729,7 +1725,7 @@ rcu_torture_init(void) &sched_ops, &tasks_ops, }; - if (!torture_init_begin(torture_type, verbose, &torture_runnable)) + if (!torture_init_begin(torture_type, verbose)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ diff --git a/kernel/torture.c b/kernel/torture.c index 3bcbd4fbfe18..572576ad9f58 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -61,7 +61,6 @@ static bool verbose; #define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */ static int fullstop = FULLSTOP_RMMOD; static DEFINE_MUTEX(fullstop_mutex); -static int *torture_runnable; #ifdef CONFIG_HOTPLUG_CPU @@ -577,7 +576,7 @@ void stutter_wait(const char *title) cond_resched_rcu_qs(); spt = READ_ONCE(stutter_pause_test); - while (spt || (torture_runnable && !READ_ONCE(*torture_runnable))) { + while (spt) { if (spt == 1) { schedule_timeout_interruptible(1); } else if (spt == 2) { @@ -649,7 +648,7 @@ static void torture_stutter_cleanup(void) * The runnable parameter points to a flag that controls whether or not * the test is currently runnable. If there is no such flag, pass in NULL. */ -bool torture_init_begin(char *ttype, bool v, int *runnable) +bool torture_init_begin(char *ttype, bool v) { mutex_lock(&fullstop_mutex); if (torture_type != NULL) { @@ -661,7 +660,6 @@ bool torture_init_begin(char *ttype, bool v, int *runnable) } torture_type = ttype; verbose = v; - torture_runnable = runnable; fullstop = FULLSTOP_DONTSTOP; return true; } -- cgit v1.2.3 From 29d3939084583b26a5487be64b9523e61468f1be Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Nov 2017 22:07:59 -0800 Subject: torture: Save a line in stutter_wait(): while -> for Signed-off-by: Paul E. McKenney --- kernel/torture.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/torture.c b/kernel/torture.c index 572576ad9f58..37b94012a3f8 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -576,7 +576,7 @@ void stutter_wait(const char *title) cond_resched_rcu_qs(); spt = READ_ONCE(stutter_pause_test); - while (spt) { + for (; spt; spt = READ_ONCE(stutter_pause_test)) { if (spt == 1) { schedule_timeout_interruptible(1); } else if (spt == 2) { @@ -586,7 +586,6 @@ void stutter_wait(const char *title) schedule_timeout_interruptible(round_jiffies_relative(HZ)); } torture_shutdown_absorb(title); - spt = READ_ONCE(stutter_pause_test); } } EXPORT_SYMBOL_GPL(stutter_wait); -- cgit v1.2.3