From 34ed62461ae4970695974afb9a60ac3df0086830 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Jan 2013 13:37:42 -0800 Subject: rcu: Remove restrictions on no-CBs CPUs Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore at least one no-CBs CPU must remain online at any given time. These restrictions are problematic in some situations, such as cases where all CPUs must run a real-time workload that needs to be insulated from OS jitter and latencies due to RCU callback invocation. This commit therefore provides no-CBs CPUs a (very crude and energy-inefficient) way to start and to wait for grace periods independently of the normal RCU callback mechanisms. This approach allows any or all of the CPUs to be designated as no-CBs CPUs, and allows any proper subset of the CPUs (whether no-CBs CPUs or not) to be offlined. This commit also provides a fix for a locking bug spotted by Xie ChanglongX . Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5b8ad827fd86..6ad0716e65dc 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -310,6 +310,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_gp_in_progress(rsp)) return 0; /* No, a grace period is already in progress. */ + if (rcu_nocb_needs_gp(rdp)) + return 1; /* Yes, a no-CBs CPU needs one. */ if (!rdp->nxttail[RCU_NEXT_TAIL]) return 0; /* No, this is a no-CBs (or offline) CPU. */ if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) @@ -1035,10 +1037,11 @@ static void init_callback_list(struct rcu_data *rdp) { int i; + if (init_nocb_callback_list(rdp)) + return; rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; - init_nocb_callback_list(rdp); } /* @@ -2909,7 +2912,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; struct rcu_state *rsp; - int ret = NOTIFY_OK; trace_rcu_utilization("Start CPU hotplug"); switch (action) { @@ -2923,10 +2925,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, rcu_boost_kthread_setaffinity(rnp, -1); break; case CPU_DOWN_PREPARE: - if (nocb_cpu_expendable(cpu)) - rcu_boost_kthread_setaffinity(rnp, cpu); - else - ret = NOTIFY_BAD; + rcu_boost_kthread_setaffinity(rnp, cpu); break; case CPU_DYING: case CPU_DYING_FROZEN: @@ -2950,7 +2949,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, break; } trace_rcu_utilization("End CPU hotplug"); - return ret; + return NOTIFY_OK; } /* @@ -3170,7 +3169,6 @@ void __init rcu_init(void) rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); - rcu_init_nocb(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* -- cgit v1.2.3 From b5b393601dbce0bce3f0717f29e6c8d1cf0295da Mon Sep 17 00:00:00 2001 From: Jiang Fang Date: Sat, 2 Feb 2013 14:13:42 -0800 Subject: rcu: Fix spacing problem Signed-off-by: Jiang Fang Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 5b8ad827fd86..157539a975df 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -3171,7 +3171,7 @@ void __init rcu_init(void) rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); rcu_init_nocb(); - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); /* * We don't need protection against CPU-hotplug here because -- cgit v1.2.3 From b0f740360efec6e6471547c0548f250bc045a233 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Feb 2013 12:14:24 -0800 Subject: rcu: Avoid invoking RCU core on offline CPUs Offline CPUs transition through the scheduler to the idle loop one last time before being shut down. This can result in RCU raising softirq on this CPU, which is at best useless given that the CPU's callbacks will be offloaded at CPU_DEAD time. This commit therefore avoids raising softirq on offline CPUs. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 157539a975df..b2fc234ba1b9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2169,7 +2169,8 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) static void invoke_rcu_core(void) { - raise_softirq(RCU_SOFTIRQ); + if (cpu_online(smp_processor_id())) + raise_softirq(RCU_SOFTIRQ); } /* -- cgit v1.2.3 From 0bdf5984ad647ba5d1c09ed66a75e5bf609456ba Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 8 Feb 2013 16:11:29 -0800 Subject: rcu: Remove comment referring to __stop_machine() Although it used to be that CPU_DYING notifiers executed on the outgoing CPU with interrupts disabled and with all other CPUs spinning, this is no longer the case. This commit therefore removes this obsolete comment. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b2fc234ba1b9..71df6f9f5ce6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2931,11 +2931,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, break; case CPU_DYING: case CPU_DYING_FROZEN: - /* - * The whole machine is "stopped" except this CPU, so we can - * touch any data without introducing corruption. We send the - * dying CPU's callbacks to an arbitrarily chosen online CPU. - */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); rcu_cleanup_after_idle(cpu); -- cgit v1.2.3 From 81e59494a56cb14f559886c345c4a93fb576bbab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Mar 2013 15:44:52 -0700 Subject: rcu: Tone down debugging during boot-up and shutdown. In some situations, randomly delaying RCU grace-period initialization can cause more trouble than help. This commit therefore restricts this type of RCU self-torture to runtime, giving it a rest during boot and shutdown. Reported-by: Sasha Levin Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 71df6f9f5ce6..0e522504ae37 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1319,7 +1319,8 @@ static int rcu_gp_init(struct rcu_state *rsp) rnp->grphi, rnp->qsmask); raw_spin_unlock_irq(&rnp->lock); #ifdef CONFIG_PROVE_RCU_DELAY - if ((random32() % (rcu_num_nodes * 8)) == 0) + if ((random32() % (rcu_num_nodes * 8)) == 0 && + system_state == SYSTEM_RUNNING) schedule_timeout_uninterruptible(2); #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ cond_resched(); -- cgit v1.2.3 From dae6e64d2bcfd4b06304ab864c7e3a4f6b5fedf4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Feb 2013 20:48:58 -0800 Subject: rcu: Introduce proper blocking to no-CBs kthreads GP waits Currently, the no-CBs kthreads do repeated timed waits for grace periods to elapse. This is crude and energy inefficient, so this commit allows no-CBs kthreads to specify exactly which grace period they are waiting for and also allows them to block for the entire duration until the desired grace period completes. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6ad0716e65dc..433f426c848f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -310,7 +310,7 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_gp_in_progress(rsp)) return 0; /* No, a grace period is already in progress. */ - if (rcu_nocb_needs_gp(rdp)) + if (rcu_nocb_needs_gp(rsp)) return 1; /* Yes, a no-CBs CPU needs one. */ if (!rdp->nxttail[RCU_NEXT_TAIL]) return 0; /* No, this is a no-CBs (or offline) CPU. */ @@ -1364,6 +1364,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) static void rcu_gp_cleanup(struct rcu_state *rsp) { unsigned long gp_duration; + int nocb = 0; struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(rsp); @@ -1394,11 +1395,13 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); rnp->completed = rsp->gpnum; + nocb += rcu_nocb_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); + rcu_nocb_gp_set(rnp, nocb); rsp->completed = rsp->gpnum; /* Declare grace period done. */ trace_rcu_grace_period(rsp->name, rsp->completed, "end"); @@ -3084,6 +3087,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, } rnp->level = i; INIT_LIST_HEAD(&rnp->blkd_tasks); + rcu_init_one_nocb(rnp); } } -- cgit v1.2.3 From a488985851cf2facd2227bd982cc2c251df56268 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Dec 2012 08:16:28 -0800 Subject: rcu: Distinguish "rcuo" kthreads by RCU flavor Currently, the per-no-CBs-CPU kthreads are named "rcuo" followed by the CPU number, for example, "rcuo". This is problematic given that there are either two or three RCU flavors, each of which gets a per-CPU kthread with exactly the same name. This commit therefore introduces a one-letter abbreviation for each RCU flavor, namely 'b' for RCU-bh, 'p' for RCU-preempt, and 's' for RCU-sched. This abbreviation is used to distinguish the "rcuo" kthreads, for example, for CPU 0 we would have "rcuob/0", "rcuop/0", and "rcuos/0". Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Dietmar Eggemann --- kernel/rcutree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 433f426c848f..074cb2d974bf 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -64,7 +64,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname, cr) { \ +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ .level = { &sname##_state.node[0] }, \ .call = cr, \ .fqs_state = RCU_GP_IDLE, \ @@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ .name = #sname, \ + .abbr = sabbr, \ } struct rcu_state rcu_sched_state = - RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); + RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; -- cgit v1.2.3 From b11cc5760a9c48c870ad286e8a6d8fdb998fa58d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Dec 2012 14:21:14 -0800 Subject: rcu: Accelerate RCU callbacks at grace-period end Now that callback acceleration is idempotent, it is safe to accelerate callbacks during grace-period cleanup on any CPUs that the kthread happens to be running on. This commit therefore propagates the completion of the grace period to the per-CPU data structures, and also adds an rcu_advance_cbs() just before the cpu_needs_another_gp() check in order to reduce false-positive grace periods. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 074cb2d974bf..2015bce749f9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1396,6 +1396,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); rnp->completed = rsp->gpnum; + rdp = this_cpu_ptr(rsp->rda); + if (rnp == rdp->mynode) + __rcu_process_gp_end(rsp, rnp, rdp); nocb += rcu_nocb_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); @@ -1408,6 +1411,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); + rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ if (cpu_needs_another_gp(rsp, rdp)) rsp->gp_flags = 1; raw_spin_unlock_irq(&rnp->lock); @@ -1497,6 +1501,15 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); + /* + * If there is no grace period in progress right now, any + * callbacks we have up to this point will be satisfied by the + * next grace period. Also, advancing the callbacks reduces the + * probability of false positives from cpu_needs_another_gp() + * resulting in pointless grace periods. So, advance callbacks! + */ + rcu_advance_cbs(rsp, rnp, rdp); + if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* @@ -1509,14 +1522,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) return; } - /* - * Because there is no grace period in progress right now, - * any callbacks we have up to this point will be satisfied - * by the next grace period. So this is a good place to - * assign a grace period number to recently posted callbacks. - */ - rcu_accelerate_cbs(rsp, rnp, rdp); - rsp->gp_flags = RCU_GP_FLAG_INIT; raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ -- cgit v1.2.3 From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2015bce749f9..7b1d7769872a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu) } /* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. + * Return true if the specified CPU has any callback. If all_lazy is + * non-NULL, store an indication of whether all callbacks are lazy. + * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int rcu_cpu_has_callbacks(int cpu) +static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) { + bool al = true; + bool hc = false; + struct rcu_data *rdp; struct rcu_state *rsp; - /* RCU callbacks either ready or pending? */ - for_each_rcu_flavor(rsp) - if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) - return 1; - return 0; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->qlen != rdp->qlen_lazy) + al = false; + if (rdp->nxtlist) + hc = true; + } + if (all_lazy) + *all_lazy = al; + return hc; } /* @@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); - rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* Add CPU to rcu_node bitmasks. */ @@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); - rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: -- cgit v1.2.3 From b92db6cb7efcbd41e469e1d757c47da4865f7622 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 14:52:41 -0800 Subject: rcu: Rearrange locking in rcu_start_gp() If CPUs are to give prior notice of needed grace periods, it will be necessary to invoke rcu_start_gp() without dropping the root rcu_node structure's ->lock. This commit takes a first step in this direction by moving the release of this lock to the end of rcu_start_gp(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7b1d7769872a..2c6a9314c7f7 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1521,16 +1521,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - rsp->gp_flags = RCU_GP_FLAG_INIT; - raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ /* Ensure that CPU is aware of completion of last grace period. */ - rcu_process_gp_end(rsp, rdp); - local_irq_restore(flags); + __rcu_process_gp_end(rsp, rdp->mynode, rdp); /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* -- cgit v1.2.3 From b8462084a2a88a6a0489f9bb7d8b1bb95bc455ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 22:04:18 -0800 Subject: rcu: Push lock release to rcu_start_gp()'s callers If CPUs are to give prior notice of needed grace periods, it will be necessary to invoke rcu_start_gp() without dropping the root rcu_node structure's ->lock. This commit takes a second step in this direction by moving the release of this lock to rcu_start_gp()'s callers. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2c6a9314c7f7..0d532950baa3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1487,16 +1487,14 @@ static int __noreturn rcu_gp_kthread(void *arg) /* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold - * the root node's ->lock, which is released before return. Hard irqs must - * be disabled. + * the root node's ->lock and hard irqs must be disabled. * * Note that it is legal for a dying CPU (which is marked as offline) to * invoke this function. This can happen when the dying CPU reports its * quiescent state. */ static void -rcu_start_gp(struct rcu_state *rsp, unsigned long flags) - __releases(rcu_get_root(rsp)->lock) +rcu_start_gp(struct rcu_state *rsp) { struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); @@ -1510,15 +1508,13 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) */ rcu_advance_cbs(rsp, rnp, rdp); - if (!rsp->gp_kthread || - !cpu_needs_another_gp(rsp, rdp)) { + if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* * Either we have not yet spawned the grace-period * task, this CPU does not need another grace period, * or a grace period is already in progress. * Either way, don't start a new grace period. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } rsp->gp_flags = RCU_GP_FLAG_INIT; @@ -1528,15 +1524,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); - raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* * Report a full set of quiescent states to the specified rcu_state * data structure. This involves cleaning up after the prior grace * period and letting rcu_start_gp() start up the next grace period - * if one is needed. Note that the caller must hold rnp->lock, as - * required by rcu_start_gp(), which will release it. + * if one is needed. Note that the caller must hold rnp->lock, which + * is released before return. */ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) @@ -2134,7 +2129,8 @@ __rcu_process_callbacks(struct rcu_state *rsp) local_irq_save(flags); if (cpu_needs_another_gp(rsp, rdp)) { raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ - rcu_start_gp(rsp, flags); /* releases above lock */ + rcu_start_gp(rsp); + raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); } else { local_irq_restore(flags); } @@ -2214,11 +2210,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, /* Start a new grace period if one not already started. */ if (!rcu_gp_in_progress(rsp)) { - unsigned long nestflag; struct rcu_node *rnp_root = rcu_get_root(rsp); - raw_spin_lock_irqsave(&rnp_root->lock, nestflag); - rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ + raw_spin_lock(&rnp_root->lock); + rcu_start_gp(rsp); + raw_spin_unlock(&rnp_root->lock); } else { /* Give the grace period a kick. */ rdp->blimit = LONG_MAX; -- cgit v1.2.3 From 0446be489795d8bb994125a916ef03211f539e54 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 30 Dec 2012 15:21:01 -0800 Subject: rcu: Abstract rcu_start_future_gp() from rcu_nocb_wait_gp() CPUs going idle will need to record the need for a future grace period, but won't actually need to block waiting on it. This commit therefore splits rcu_start_future_gp(), which does the recording, from rcu_nocb_wait_gp(), which now invokes rcu_start_future_gp() to do the recording, after which rcu_nocb_wait_gp() does the waiting. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 4 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0d532950baa3..f4b23f16677a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -224,6 +224,7 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); +static void rcu_start_gp(struct rcu_state *rsp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); @@ -1074,6 +1075,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp, return rnp->completed + 2; } +/* + * Trace-event helper function for rcu_start_future_gp() and + * rcu_nocb_wait_gp(). + */ +static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long c, char *s) +{ + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, s); +} + +/* + * Start some future grace period, as needed to handle newly arrived + * callbacks. The required future grace periods are recorded in each + * rcu_node structure's ->need_future_gp field. + * + * The caller must hold the specified rcu_node structure's ->lock. + */ +static unsigned long __maybe_unused +rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) +{ + unsigned long c; + int i; + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); + + /* + * Pick up grace-period number for new callbacks. If this + * grace period is already marked as needed, return to the caller. + */ + c = rcu_cbs_completed(rdp->rsp, rnp); + trace_rcu_future_gp(rnp, rdp, c, "Startleaf"); + if (rnp->need_future_gp[c & 0x1]) { + trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf"); + return c; + } + + /* + * If either this rcu_node structure or the root rcu_node structure + * believe that a grace period is in progress, then we must wait + * for the one following, which is in "c". Because our request + * will be noticed at the end of the current grace period, we don't + * need to explicitly start one. + */ + if (rnp->gpnum != rnp->completed || + ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) { + rnp->need_future_gp[c & 0x1]++; + trace_rcu_future_gp(rnp, rdp, c, "Startedleaf"); + return c; + } + + /* + * There might be no grace period in progress. If we don't already + * hold it, acquire the root rcu_node structure's lock in order to + * start one (if needed). + */ + if (rnp != rnp_root) + raw_spin_lock(&rnp_root->lock); + + /* + * Get a new grace-period number. If there really is no grace + * period in progress, it will be smaller than the one we obtained + * earlier. Adjust callbacks as needed. Note that even no-CBs + * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed. + */ + c = rcu_cbs_completed(rdp->rsp, rnp_root); + for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) + if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) + rdp->nxtcompleted[i] = c; + + /* + * If the needed for the required grace period is already + * recorded, trace and leave. + */ + if (rnp_root->need_future_gp[c & 0x1]) { + trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot"); + goto unlock_out; + } + + /* Record the need for the future grace period. */ + rnp_root->need_future_gp[c & 0x1]++; + + /* If a grace period is not already in progress, start one. */ + if (rnp_root->gpnum != rnp_root->completed) { + trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); + } else { + trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); + rcu_start_gp(rdp->rsp); + } +unlock_out: + if (rnp != rnp_root) + raw_spin_unlock(&rnp_root->lock); + return c; +} + +/* + * Clean up any old requests for the just-ended grace period. Also return + * whether any additional grace periods have been requested. Also invoke + * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads + * waiting for this grace period to complete. + */ +static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int c = rnp->completed; + int needmore; + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + + rcu_nocb_gp_cleanup(rsp, rnp); + rnp->need_future_gp[c & 0x1] = 0; + needmore = rnp->need_future_gp[(c + 1) & 0x1]; + trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup"); + return needmore; +} + /* * If there is room, assign a ->completed number to any callbacks on * this CPU that have not already been assigned. Also accelerate any @@ -1312,9 +1427,9 @@ static int rcu_gp_init(struct rcu_state *rsp) rdp = this_cpu_ptr(rsp->rda); rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; - rnp->gpnum = rsp->gpnum; + ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; WARN_ON_ONCE(rnp->completed != rsp->completed); - rnp->completed = rsp->completed; + ACCESS_ONCE(rnp->completed) = rsp->completed; if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); @@ -1395,11 +1510,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) */ rcu_for_each_node_breadth_first(rsp, rnp) { raw_spin_lock_irq(&rnp->lock); - rnp->completed = rsp->gpnum; + ACCESS_ONCE(rnp->completed) = rsp->gpnum; rdp = this_cpu_ptr(rsp->rda); if (rnp == rdp->mynode) __rcu_process_gp_end(rsp, rnp, rdp); - nocb += rcu_nocb_gp_cleanup(rsp, rnp); + nocb += rcu_future_gp_cleanup(rsp, rnp); raw_spin_unlock_irq(&rnp->lock); cond_resched(); } -- cgit v1.2.3 From 910ee45db2f4837c8440e770474758493ab94bf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 31 Dec 2012 02:24:21 -0800 Subject: rcu: Make rcu_accelerate_cbs() note need for future grace periods Now that rcu_start_future_gp() has been abstracted from rcu_nocb_wait_gp(), rcu_accelerate_cbs() can invoke rcu_start_future_gp() so as to register the need for any future grace periods needed by a CPU about to enter dyntick-idle mode. This commit makes this change. Note that some refactoring of rcu_start_gp() is carried out to avoid recursion and subsequent self-deadlocks. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f4b23f16677a..9cb91e4885af 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -224,7 +224,8 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); -static void rcu_start_gp(struct rcu_state *rsp); +static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp); static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)); static void force_quiescent_state(struct rcu_state *rsp); static int rcu_pending(int cpu); @@ -1162,7 +1163,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp) trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot"); } else { trace_rcu_future_gp(rnp, rdp, c, "Startedroot"); - rcu_start_gp(rdp->rsp); + rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp); } unlock_out: if (rnp != rnp_root) @@ -1248,6 +1249,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxtcompleted[i] = c; } + /* Record any needed additional grace periods. */ + rcu_start_future_gp(rnp, rdp); /* Trace depending on how much we were able to accelerate. */ if (!*rdp->nxttail[RCU_WAIT_TAIL]) @@ -1609,20 +1612,9 @@ static int __noreturn rcu_gp_kthread(void *arg) * quiescent state. */ static void -rcu_start_gp(struct rcu_state *rsp) +rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) { - struct rcu_data *rdp = this_cpu_ptr(rsp->rda); - struct rcu_node *rnp = rcu_get_root(rsp); - - /* - * If there is no grace period in progress right now, any - * callbacks we have up to this point will be satisfied by the - * next grace period. Also, advancing the callbacks reduces the - * probability of false positives from cpu_needs_another_gp() - * resulting in pointless grace periods. So, advance callbacks! - */ - rcu_advance_cbs(rsp, rnp, rdp); - if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { /* * Either we have not yet spawned the grace-period @@ -1634,13 +1626,35 @@ rcu_start_gp(struct rcu_state *rsp) } rsp->gp_flags = RCU_GP_FLAG_INIT; - /* Ensure that CPU is aware of completion of last grace period. */ - __rcu_process_gp_end(rsp, rdp->mynode, rdp); - /* Wake up rcu_gp_kthread() to start the grace period. */ wake_up(&rsp->gp_wq); } +/* + * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's + * callbacks. Note that rcu_start_gp_advanced() cannot do this because it + * is invoked indirectly from rcu_advance_cbs(), which would result in + * endless recursion -- or would do so if it wasn't for the self-deadlock + * that is encountered beforehand. + */ +static void +rcu_start_gp(struct rcu_state *rsp) +{ + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); + struct rcu_node *rnp = rcu_get_root(rsp); + + /* + * If there is no grace period in progress right now, any + * callbacks we have up to this point will be satisfied by the + * next grace period. Also, advancing the callbacks reduces the + * probability of false positives from cpu_needs_another_gp() + * resulting in pointless grace periods. So, advance callbacks + * then start the grace period! + */ + rcu_advance_cbs(rsp, rnp, rdp); + rcu_start_gp_advanced(rsp, rnp, rdp); +} + /* * Report a full set of quiescent states to the specified rcu_state * data structure. This involves cleaning up after the prior grace -- cgit v1.2.3