From 34ed62461ae4970695974afb9a60ac3df0086830 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Jan 2013 13:37:42 -0800 Subject: rcu: Remove restrictions on no-CBs CPUs Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore at least one no-CBs CPU must remain online at any given time. These restrictions are problematic in some situations, such as cases where all CPUs must run a real-time workload that needs to be insulated from OS jitter and latencies due to RCU callback invocation. This commit therefore provides no-CBs CPUs a (very crude and energy-inefficient) way to start and to wait for grace periods independently of the normal RCU callback mechanisms. This approach allows any or all of the CPUs to be designated as no-CBs CPUs, and allows any proper subset of the CPUs (whether no-CBs CPUs or not) to be offlined. This commit also provides a fix for a locking bug spotted by Xie ChanglongX . Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 156 ++++++++++++++---------------------------------- 1 file changed, 46 insertions(+), 110 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c1cc7e17ff9d..44f958a88b21 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -86,10 +86,6 @@ static void __init rcu_bootup_announce_oddness(void) printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_NOCB_CPU if (have_rcu_nocb_mask) { - if (cpumask_test_cpu(0, rcu_nocb_mask)) { - cpumask_clear_cpu(0, rcu_nocb_mask); - pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n"); - } cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); if (rcu_nocb_poll) @@ -2165,6 +2161,14 @@ static int __init parse_rcu_nocb_poll(char *arg) } early_param("rcu_nocb_poll", parse_rcu_nocb_poll); +/* + * Does this CPU needs a grace period due to offloaded callbacks? + */ +static int rcu_nocb_needs_gp(struct rcu_data *rdp) +{ + return rdp->nocb_needs_gp; +} + /* Is the specified CPU a no-CPUs CPU? */ static bool is_nocb_cpu(int cpu) { @@ -2265,95 +2269,39 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, } /* - * There must be at least one non-no-CBs CPU in operation at any given - * time, because no-CBs CPUs are not capable of initiating grace periods - * independently. This function therefore complains if the specified - * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to - * avoid offlining the last such CPU. (Recursion is a wonderful thing, - * but you have to have a base case!) + * If necessary, kick off a new grace period, and either way wait + * for a subsequent grace period to complete. */ -static bool nocb_cpu_expendable(int cpu) +static void rcu_nocb_wait_gp(struct rcu_data *rdp) { - cpumask_var_t non_nocb_cpus; - int ret; + unsigned long c; + unsigned long flags; + unsigned long j; + struct rcu_node *rnp = rdp->mynode; + + raw_spin_lock_irqsave(&rnp->lock, flags); + c = rnp->completed + 2; + rdp->nocb_needs_gp = true; + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* - * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, - * then offlining this CPU is harmless. Let it happen. + * Wait for the grace period. Do so interruptibly to avoid messing + * up the load average. */ - if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) - return 1; - - /* If no memory, play it safe and keep the CPU around. */ - if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) - return 0; - cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); - cpumask_clear_cpu(cpu, non_nocb_cpus); - ret = !cpumask_empty(non_nocb_cpus); - free_cpumask_var(non_nocb_cpus); - return ret; -} - -/* - * Helper structure for remote registry of RCU callbacks. - * This is needed for when a no-CBs CPU needs to start a grace period. - * If it just invokes call_rcu(), the resulting callback will be queued, - * which can result in deadlock. - */ -struct rcu_head_remote { - struct rcu_head *rhp; - call_rcu_func_t *crf; - void (*func)(struct rcu_head *rhp); -}; - -/* - * Register a callback as specified by the rcu_head_remote struct. - * This function is intended to be invoked via smp_call_function_single(). - */ -static void call_rcu_local(void *arg) -{ - struct rcu_head_remote *rhrp = - container_of(arg, struct rcu_head_remote, rhp); - - rhrp->crf(rhrp->rhp, rhrp->func); -} - -/* - * Set up an rcu_head_remote structure and the invoke call_rcu_local() - * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via - * smp_call_function_single(). - */ -static void invoke_crf_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp), - call_rcu_func_t crf) -{ - struct rcu_head_remote rhr; - - rhr.rhp = rhp; - rhr.crf = crf; - rhr.func = func; - smp_call_function_single(0, call_rcu_local, &rhr, 1); -} - -/* - * Helper functions to be passed to wait_rcu_gp(), each of which - * invokes invoke_crf_remote() to register a callback appropriately. - */ -static void __maybe_unused -call_rcu_preempt_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp)) -{ - invoke_crf_remote(rhp, func, call_rcu); -} -static void call_rcu_bh_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp)) -{ - invoke_crf_remote(rhp, func, call_rcu_bh); -} -static void call_rcu_sched_remote(struct rcu_head *rhp, - void (*func)(struct rcu_head *rhp)) -{ - invoke_crf_remote(rhp, func, call_rcu_sched); + for (;;) { + j = jiffies; + schedule_timeout_interruptible(2); + raw_spin_lock_irqsave(&rnp->lock, flags); + if (ULONG_CMP_GE(rnp->completed, c)) { + rdp->nocb_needs_gp = false; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + break; + } + if (j == jiffies) + flush_signals(current); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } + smp_mb(); /* Ensure that CB invocation happens after GP end. */ } /* @@ -2390,7 +2338,7 @@ static int rcu_nocb_kthread(void *arg) cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); ACCESS_ONCE(rdp->nocb_p_count) += c; ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; - wait_rcu_gp(rdp->rsp->call_remote); + rcu_nocb_wait_gp(rdp); /* Each pass through the following loop invokes a callback. */ trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); @@ -2443,26 +2391,22 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) } /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ -static void init_nocb_callback_list(struct rcu_data *rdp) +static bool init_nocb_callback_list(struct rcu_data *rdp) { if (rcu_nocb_mask == NULL || !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) - return; + return false; rdp->nxttail[RCU_NEXT_TAIL] = NULL; + return true; } -/* Initialize the ->call_remote fields in the rcu_state structures. */ -static void __init rcu_init_nocb(void) +#else /* #ifdef CONFIG_RCU_NOCB_CPU */ + +static int rcu_nocb_needs_gp(struct rcu_data *rdp) { -#ifdef CONFIG_PREEMPT_RCU - rcu_preempt_state.call_remote = call_rcu_preempt_remote; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - rcu_bh_state.call_remote = call_rcu_bh_remote; - rcu_sched_state.call_remote = call_rcu_sched_remote; + return 0; } -#else /* #ifdef CONFIG_RCU_NOCB_CPU */ - static bool is_nocb_cpu(int cpu) { return false; @@ -2480,11 +2424,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, return 0; } -static bool nocb_cpu_expendable(int cpu) -{ - return 1; -} - static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { } @@ -2493,12 +2432,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) { } -static void init_nocb_callback_list(struct rcu_data *rdp) -{ -} - -static void __init rcu_init_nocb(void) +static bool init_nocb_callback_list(struct rcu_data *rdp) { + return false; } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ -- cgit v1.2.3 From 6231069bdab575fce862ca786f1c0ba5e4e9ba3b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 6 Mar 2013 13:37:09 -0800 Subject: rcu: Add softirq-stall indications to stall-warning messages If RCU's softirq handler is prevented from executing, an RCU CPU stall warning can result. Ways to prevent RCU's softirq handler from executing include: (1) CPU spinning with interrupts disabled, (2) infinite loop in some softirq handler, and (3) in -rt kernels, an infinite loop in a set of real-time threads running at priorities higher than that of RCU's softirq handler. Because this situation can be difficult to track down, this commit causes the count of RCU softirq handler invocations to be printed with RCU CPU stall warnings. This information does require some interpretation, as now documented in Documentation/RCU/stallwarn.txt. Reported-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Tested-by: Paul Gortmaker --- kernel/rcutree_plugin.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c1cc7e17ff9d..7fcd3bbf67da 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2070,10 +2070,11 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", + printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", cpu, ticks_value, ticks_title, atomic_read(&rdtp->dynticks) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, + rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), fast_no_hz); } @@ -2087,6 +2088,7 @@ static void print_cpu_stall_info_end(void) static void zero_cpu_stall_ticks(struct rcu_data *rdp) { rdp->ticks_this_gp = 0; + rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); } /* Increment ->ticks_this_gp for all flavors of RCU. */ -- cgit v1.2.3 From 911af505ef407c2511106c224dd640f882f0f590 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Feb 2013 10:23:27 -0800 Subject: rcu: Provide compile-time control for no-CBs CPUs Currently, the only way to specify no-CBs CPUs is via the rcu_nocbs kernel command-line parameter. This is inconvenient in some cases, particularly for randconfig testing, so this commit adds a new set of kernel configuration parameters. CONFIG_RCU_NOCB_CPU_NONE (the default) retains the old behavior, CONFIG_RCU_NOCB_CPU_ZERO offloads callback processing from CPU 0 (along with any other CPUs specified by the rcu_nocbs boot-time parameter), and CONFIG_RCU_NOCB_CPU_ALL offloads callback processing from all CPUs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 44f958a88b21..3e33aefce0ea 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -85,6 +85,20 @@ static void __init rcu_bootup_announce_oddness(void) if (nr_cpu_ids != NR_CPUS) printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_NOCB_CPU +#ifndef CONFIG_RCU_NOCB_CPU_NONE + if (!have_rcu_nocb_mask) { + alloc_bootmem_cpumask_var(&rcu_nocb_mask); + have_rcu_nocb_mask = true; + } +#ifdef CONFIG_RCU_NOCB_CPU_ZERO + pr_info("\tExperimental no-CBs CPU 0\n"); + cpumask_set_cpu(0, rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ +#ifdef CONFIG_RCU_NOCB_CPU_ALL + pr_info("\tExperimental no-CBs for all CPUs\n"); + cpumask_setall(rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ if (have_rcu_nocb_mask) { cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); -- cgit v1.2.3 From dae6e64d2bcfd4b06304ab864c7e3a4f6b5fedf4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 10 Feb 2013 20:48:58 -0800 Subject: rcu: Introduce proper blocking to no-CBs kthreads GP waits Currently, the no-CBs kthreads do repeated timed waits for grace periods to elapse. This is crude and energy inefficient, so this commit allows no-CBs kthreads to specify exactly which grace period they are waiting for and also allows them to block for the entire duration until the desired grace period completes. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 129 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 112 insertions(+), 17 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3e33aefce0ea..90a191452550 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2176,11 +2176,51 @@ static int __init parse_rcu_nocb_poll(char *arg) early_param("rcu_nocb_poll", parse_rcu_nocb_poll); /* - * Does this CPU needs a grace period due to offloaded callbacks? + * Do any no-CBs CPUs need another grace period? + * + * Interrupts must be disabled. If the caller does not hold the root + * rnp_node structure's ->lock, the results are advisory only. + */ +static int rcu_nocb_needs_gp(struct rcu_state *rsp) +{ + struct rcu_node *rnp = rcu_get_root(rsp); + + return rnp->n_nocb_gp_requests[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; +} + +/* + * Clean up this rcu_node structure's no-CBs state at the end of + * a grace period, and also return whether any no-CBs CPU associated + * with this rcu_node structure needs another grace period. + */ +static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int c = rnp->completed; + int needmore; + + wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); + rnp->n_nocb_gp_requests[c & 0x1] = 0; + needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + return needmore; +} + +/* + * Set the root rcu_node structure's ->n_nocb_gp_requests field + * based on the sum of those of all rcu_node structures. This does + * double-count the root rcu_node structure's requests, but this + * is necessary to handle the possibility of a rcu_nocb_kthread() + * having awakened during the time that the rcu_node structures + * were being updated for the end of the previous grace period. */ -static int rcu_nocb_needs_gp(struct rcu_data *rdp) +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) +{ + rnp->n_nocb_gp_requests[(rnp->completed + 1) & 0x1] += nrq; +} + +static void rcu_init_one_nocb(struct rcu_node *rnp) { - return rdp->nocb_needs_gp; + init_waitqueue_head(&rnp->nocb_gp_wq[0]); + init_waitqueue_head(&rnp->nocb_gp_wq[1]); } /* Is the specified CPU a no-CPUs CPU? */ @@ -2289,31 +2329,73 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, static void rcu_nocb_wait_gp(struct rcu_data *rdp) { unsigned long c; + bool d; unsigned long flags; - unsigned long j; + unsigned long flags1; struct rcu_node *rnp = rdp->mynode; + struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); raw_spin_lock_irqsave(&rnp->lock, flags); c = rnp->completed + 2; - rdp->nocb_needs_gp = true; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + + /* Count our request for a grace period. */ + rnp->n_nocb_gp_requests[c & 0x1]++; + + if (rnp->gpnum != rnp->completed) { + + /* + * This rcu_node structure believes that a grace period + * is in progress, so we are done. When this grace + * period ends, our request will be acted upon. + */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + + } else { + + /* + * Might not be a grace period, check root rcu_node + * structure to see if we must start one. + */ + if (rnp != rnp_root) + raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ + if (rnp_root->gpnum != rnp_root->completed) { + raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ + } else { + + /* + * No grace period, so we need to start one. + * The good news is that we can wait for exactly + * one grace period instead of part of the current + * grace period and all of the next grace period. + * Adjust counters accordingly and start the + * needed grace period. + */ + rnp->n_nocb_gp_requests[c & 0x1]--; + c = rnp_root->completed + 1; + rnp->n_nocb_gp_requests[c & 0x1]++; + rnp_root->n_nocb_gp_requests[c & 0x1]++; + local_save_flags(flags1); + rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ + } + + /* Clean up locking and irq state. */ + if (rnp != rnp_root) + raw_spin_unlock_irqrestore(&rnp->lock, flags); + else + local_irq_restore(flags); + } /* * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ for (;;) { - j = jiffies; - schedule_timeout_interruptible(2); - raw_spin_lock_irqsave(&rnp->lock, flags); - if (ULONG_CMP_GE(rnp->completed, c)) { - rdp->nocb_needs_gp = false; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + wait_event_interruptible( + rnp->nocb_gp_wq[c & 0x1], + (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); + if (likely(d)) break; - } - if (j == jiffies) - flush_signals(current); - raw_spin_unlock_irqrestore(&rnp->lock, flags); + flush_signals(current); } smp_mb(); /* Ensure that CB invocation happens after GP end. */ } @@ -2416,11 +2498,24 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ -static int rcu_nocb_needs_gp(struct rcu_data *rdp) +static int rcu_nocb_needs_gp(struct rcu_state *rsp) +{ + return 0; +} + +static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { return 0; } +static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) +{ +} + +static void rcu_init_one_nocb(struct rcu_node *rnp) +{ +} + static bool is_nocb_cpu(int cpu) { return false; -- cgit v1.2.3 From 21e7a6087480451804124cee27c0a7d0a7de1564 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 9 Feb 2013 17:42:16 -0800 Subject: rcu: Add event tracing for no-CBs CPUs' callback registration Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 90a191452550..7225a5a14cef 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2285,6 +2285,13 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, if (!is_nocb_cpu(rdp->cpu)) return 0; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); + if (__is_kfree_rcu_offset((unsigned long)rhp->func)) + trace_rcu_kfree_callback(rdp->rsp->name, rhp, + (unsigned long)rhp->func, + rdp->qlen_lazy, rdp->qlen); + else + trace_rcu_callback(rdp->rsp->name, rhp, + rdp->qlen_lazy, rdp->qlen); return 1; } -- cgit v1.2.3 From 09c7b890622d72b5e004cc249bbe610e8b928ddf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 8 Feb 2013 15:55:02 -0800 Subject: rcu: Add event tracing for no-CBs CPUs' grace periods Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7225a5a14cef..e32236e83dda 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2201,6 +2201,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); rnp->n_nocb_gp_requests[c & 0x1] = 0; needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + needmore ? "CleanupMore" : "Cleanup"); return needmore; } @@ -2347,6 +2350,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) /* Count our request for a grace period. */ rnp->n_nocb_gp_requests[c & 0x1]++; + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "Startleaf"); if (rnp->gpnum != rnp->completed) { @@ -2355,6 +2361,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * is in progress, so we are done. When this grace * period ends, our request will be acted upon. */ + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, rnp->grphi, + "Startedleaf"); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { @@ -2366,6 +2376,11 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (rnp != rnp_root) raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ if (rnp_root->gpnum != rnp_root->completed) { + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleafroot"); raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ } else { @@ -2381,6 +2396,11 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp_root->completed + 1; rnp->n_nocb_gp_requests[c & 0x1]++; rnp_root->n_nocb_gp_requests[c & 0x1]++; + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedroot"); local_save_flags(flags1); rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ } @@ -2396,6 +2416,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2403,7 +2426,14 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); + trace_rcu_nocb_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, rnp->grphi, + "ResumeWait"); } + trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } -- cgit v1.2.3 From a488985851cf2facd2227bd982cc2c251df56268 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Dec 2012 08:16:28 -0800 Subject: rcu: Distinguish "rcuo" kthreads by RCU flavor Currently, the per-no-CBs-CPU kthreads are named "rcuo" followed by the CPU number, for example, "rcuo". This is problematic given that there are either two or three RCU flavors, each of which gets a per-CPU kthread with exactly the same name. This commit therefore introduces a one-letter abbreviation for each RCU flavor, namely 'b' for RCU-bh, 'p' for RCU-preempt, and 's' for RCU-sched. This abbreviation is used to distinguish the "rcuo" kthreads, for example, for CPU 0 we would have "rcuob/0", "rcuop/0", and "rcuos/0". Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Dietmar Eggemann --- kernel/rcutree_plugin.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e32236e83dda..c0164441ab92 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -111,7 +111,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = - RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); + RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; @@ -2517,7 +2517,8 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) return; for_each_cpu(cpu, rcu_nocb_mask) { rdp = per_cpu_ptr(rsp->rda, cpu); - t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu); + t = kthread_run(rcu_nocb_kthread, rdp, + "rcuo%c/%d", rsp->abbr, cpu); BUG_ON(IS_ERR(t)); ACCESS_ONCE(rdp->nocb_kthread) = t; } -- cgit v1.2.3 From 5e44ce35a6ec1a16522fa2099dda27aefd8a584e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Dec 2012 12:35:29 -0800 Subject: rcu: Export RCU_FAST_NO_HZ parameters to sysfs RCU_FAST_NO_HZ operation is controlled by four compile-time C-preprocessor macros, but some use cases benefit greatly from runtime adjustment, particularly when tuning devices. This commit therefore creates the corresponding sysfs entries. Reported-by: Robin Randhawa Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c0164441ab92..28185ad18df3 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1617,6 +1617,15 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +static int rcu_idle_flushes = RCU_IDLE_FLUSHES; +module_param(rcu_idle_flushes, int, 0644); +static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; +module_param(rcu_idle_opt_flushes, int, 0644); +static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; +module_param(rcu_idle_gp_delay, int, 0644); +static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; +module_param(rcu_idle_lazy_gp_delay, int, 0644); + extern int tick_nohz_enabled; /* @@ -1696,10 +1705,10 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) } /* Set up for the possibility that RCU will post a timer. */ if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, - RCU_IDLE_GP_DELAY) - jiffies; + *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; + *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; } return 0; @@ -1805,11 +1814,11 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("User dyntick with callbacks"); rdtp->idle_gp_timer_expires = - round_up(jiffies + RCU_IDLE_GP_DELAY, - RCU_IDLE_GP_DELAY); + round_up(jiffies + rcu_idle_gp_delay, + rcu_idle_gp_delay); } else if (rcu_cpu_has_callbacks(cpu)) { rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); + round_jiffies(jiffies + rcu_idle_lazy_gp_delay); trace_rcu_prep_idle("User dyntick with lazy callbacks"); } else { return; @@ -1861,8 +1870,8 @@ static void rcu_prepare_for_idle(int cpu) /* Check and update the ->dyntick_drain sequencing. */ if (rdtp->dyntick_drain <= 0) { /* First time through, initialize the counter. */ - rdtp->dyntick_drain = RCU_IDLE_FLUSHES; - } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES && + rdtp->dyntick_drain = rcu_idle_flushes; + } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && !rcu_pending(cpu) && !local_softirq_pending()) { /* Can we go dyntick-idle despite still having callbacks? */ @@ -1871,11 +1880,11 @@ static void rcu_prepare_for_idle(int cpu) if (rcu_cpu_has_nonlazy_callbacks(cpu)) { trace_rcu_prep_idle("Dyntick with callbacks"); rdtp->idle_gp_timer_expires = - round_up(jiffies + RCU_IDLE_GP_DELAY, - RCU_IDLE_GP_DELAY); + round_up(jiffies + rcu_idle_gp_delay, + rcu_idle_gp_delay); } else { rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); + round_jiffies(jiffies + rcu_idle_lazy_gp_delay); trace_rcu_prep_idle("Dyntick with lazy callbacks"); } tp = &rdtp->idle_gp_timer; -- cgit v1.2.3 From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 374 +++++++++++++----------------------------------- 1 file changed, 99 insertions(+), 275 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28185ad18df3..d318f9f18be5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu); -} - -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ + return rcu_cpu_has_callbacks(cpu, NULL); } /* @@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void) * * The following three proprocessor symbols control this state machine: * - * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt - * to satisfy RCU. Beyond this point, it is better to incur a periodic - * scheduling-clock interrupt than to loop through the state machine - * at full power. - * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are - * optional if RCU does not need anything immediately from this - * CPU, even if this CPU still has RCU callbacks queued. The first - * times through the state machine are mandatory: we need to give - * the state machine a chance to communicate a quiescent state - * to the RCU core. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This * is sized to be roughly one RCU grace period. Those energy-efficiency @@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void) * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ -#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ -#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -static int rcu_idle_flushes = RCU_IDLE_FLUSHES; -module_param(rcu_idle_flushes, int, 0644); -static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; -module_param(rcu_idle_opt_flushes, int, 0644); static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; @@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Does the specified flavor of RCU have non-lazy callbacks pending on - * the specified CPU? Both RCU flavor and CPU are specified by the - * rcu_data structure. - */ -static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) -{ - return rdp->qlen != rdp->qlen_lazy; -} - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/* - * Are there non-lazy RCU-preempt callbacks? (There cannot be if there - * is no RCU-preempt in the kernel.) + * Try to advance callbacks for all flavors of RCU on the current CPU. + * Afterwards, if there are any callbacks ready for immediate invocation, + * return true. */ -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +static bool rcu_try_advance_all_cbs(void) { - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - return __rcu_cpu_has_nonlazy_callbacks(rdp); -} - -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + bool cbs_ready = false; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) -{ - return 0; -} + for_each_rcu_flavor(rsp) { + rdp = this_cpu_ptr(rsp->rda); + rnp = rdp->mynode; -#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ + /* + * Don't bother checking unless a grace period has + * completed since we last checked and there are + * callbacks not yet ready to invoke. + */ + if (rdp->completed != rnp->completed && + rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) + rcu_process_gp_end(rsp, rdp); -/* - * Does any flavor of RCU have non-lazy callbacks on the specified CPU? - */ -static bool rcu_cpu_has_nonlazy_callbacks(int cpu) -{ - return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || - __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_cpu_has_nonlazy_callbacks(cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + cbs_ready = true; + } + return cbs_ready; } /* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready + * to invoke. If the CPU has callbacks, try to advance them. Tell the + * caller to set the timeout based on whether or not there are non-lazy + * callbacks. * - * The delta_jiffies argument is used to store the time when RCU is - * going to need the CPU again if it still has callbacks. The reason - * for this is that rcu_prepare_for_idle() might need to post a timer, - * but if so, it will do so after tick_nohz_stop_sched_tick() has set - * the wakeup time for this CPU. This means that RCU's timer can be - * delayed until the wakeup time, which defeats the purpose of posting - * a timer. + * The caller must have disabled interrupts. */ -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +int rcu_needs_cpu(int cpu, unsigned long *dj) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; + /* Snapshot to detect later posting of non-lazy callback. */ + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; + /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - *delta_jiffies = ULONG_MAX; + if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { + *dj = ULONG_MAX; return 0; } - if (rdtp->dyntick_holdoff == jiffies) { - /* RCU recently tried and failed, so don't try again. */ - *delta_jiffies = 1; + + /* Attempt to advance callbacks. */ + if (rcu_try_advance_all_cbs()) { + /* Some ready to invoke, so initiate later invocation. */ + invoke_rcu_core(); return 1; } - /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; + rdtp->last_accelerate = jiffies; + + /* Request timer delay depending on laziness, and round. */ + if (rdtp->all_lazy) { + *dj = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; - *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } return 0; } /* - * Handler for smp_call_function_single(). The only point of this - * handler is to wake the CPU up, so the handler does only tracing. - */ -void rcu_idle_demigrate(void *unused) -{ - trace_rcu_prep_idle("Demigrate"); -} - -/* - * Timer handler used to force CPU to start pushing its remaining RCU - * callbacks in the case where it entered dyntick-idle mode with callbacks - * pending. The hander doesn't really need to do anything because the - * real work is done upon re-entry to idle, or by the next scheduling-clock - * interrupt should idle not be re-entered. - * - * One special case: the timer gets migrated without awakening the CPU - * on which the timer was scheduled on. In this case, we must wake up - * that CPU. We do so with smp_call_function_single(). - */ -static void rcu_idle_gp_timer_func(unsigned long cpu_in) -{ - int cpu = (int)cpu_in; - - trace_rcu_prep_idle("Timer"); - if (cpu != smp_processor_id()) - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); - else - WARN_ON_ONCE(1); /* Getting here can hang the system... */ -} - -/* - * Initialize the timer used to pull CPUs out of dyntick-idle mode. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - rdtp->dyntick_holdoff = jiffies - 1; - setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); - rdtp->idle_gp_timer_expires = jiffies - 1; - rdtp->idle_first_pass = 1; -} - -/* - * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to ->idle_gp_timer, so cancel it. This will - * do nothing if this timer is not active, so just cancel it unconditionally. - */ -static void rcu_cleanup_after_idle(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - del_timer(&rdtp->idle_gp_timer); - trace_rcu_prep_idle("Cleanup after idle"); - rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); -} - -/* - * Check to see if any RCU-related work can be done by the current CPU, - * and if so, schedule a softirq to get it done. This function is part - * of the RCU implementation; it is -not- an exported member of the RCU API. - * - * The idea is for the current CPU to clear out all work required by the - * RCU core for the current grace period, so that this CPU can be permitted - * to enter dyntick-idle mode. In some cases, it will need to be awakened - * at the end of the grace period by whatever CPU ends the grace period. - * This allows CPUs to go dyntick-idle more quickly, and to reduce the - * number of wakeups by a modest integer factor. - * - * Because it is not legal to invoke rcu_process_callbacks() with irqs - * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The ->dyntick_drain field controls the sequencing. + * Prepare a CPU for idle from an RCU perspective. The first major task + * is to sense whether nohz mode has been enabled or disabled via sysfs. + * The second major task is to check to see if a non-lazy callback has + * arrived at a CPU that previously had only lazy callbacks. The third + * major task is to accelerate (that is, assign grace-period numbers to) + * any recently arrived callbacks. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { - struct timer_list *tp; + struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_node *rnp; + struct rcu_state *rsp; int tne; /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_enabled); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu)) + if (rcu_cpu_has_callbacks(cpu, NULL)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; - /* Adaptive-tick mode, where usermode execution is idle to RCU. */ - if (!is_idle_task(current)) { - rdtp->dyntick_holdoff = jiffies - 1; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("User dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else if (rcu_cpu_has_callbacks(cpu)) { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("User dyntick with lazy callbacks"); - } else { - return; - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + /* If this is a no-CBs CPU, no callbacks, just return. */ + if (is_nocb_cpu(cpu)) return; - } /* - * If this is an idle re-entry, for example, due to use of - * RCU_NONIDLE() or the new idle-loop tracing API within the idle - * loop, then don't take any state-machine actions, unless the - * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the ->idle_gp_timer if this CPU has callbacks - * pending. + * If a non-lazy callback arrived at a CPU having only lazy + * callbacks, invoke RCU core for the side-effect of recalculating + * idle duration on re-entry to idle. */ - if (!rdtp->idle_first_pass && - (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { - if (rcu_cpu_has_callbacks(cpu)) { - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - } + if (rdtp->all_lazy && + rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + invoke_rcu_core(); return; } - rdtp->idle_first_pass = 0; - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* - * If there are no callbacks on this CPU, enter dyntick-idle mode. - * Also reset state to avoid prejudicing later attempts. + * If we have not yet accelerated this jiffy, accelerate all + * callbacks on this CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - rdtp->dyntick_holdoff = jiffies - 1; - rdtp->dyntick_drain = 0; - trace_rcu_prep_idle("No callbacks"); + if (rdtp->last_accelerate == jiffies) return; + rdtp->last_accelerate = jiffies; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (!*rdp->nxttail[RCU_DONE_TAIL]) + continue; + rnp = rdp->mynode; + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rcu_accelerate_cbs(rsp, rnp, rdp); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } +} - /* - * If in holdoff mode, just return. We will presumably have - * refrained from disabling the scheduling-clock tick. - */ - if (rdtp->dyntick_holdoff == jiffies) { - trace_rcu_prep_idle("In holdoff"); - return; - } +/* + * Clean up for exit from idle. Attempt to advance callbacks based on + * any grace periods that elapsed while the CPU was idle, and if any + * callbacks are now ready to invoke, initiate invocation. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + struct rcu_data *rdp; + struct rcu_state *rsp; - /* Check and update the ->dyntick_drain sequencing. */ - if (rdtp->dyntick_drain <= 0) { - /* First time through, initialize the counter. */ - rdtp->dyntick_drain = rcu_idle_flushes; - } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && - !rcu_pending(cpu) && - !local_softirq_pending()) { - /* Can we go dyntick-idle despite still having callbacks? */ - rdtp->dyntick_drain = 0; - rdtp->dyntick_holdoff = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("Dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("Dyntick with lazy callbacks"); - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - return; /* Nothing more to do immediately. */ - } else if (--(rdtp->dyntick_drain) <= 0) { - /* We have hit the limit, so time to give up. */ - rdtp->dyntick_holdoff = jiffies; - trace_rcu_prep_idle("Begin holdoff"); - invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ + if (is_nocb_cpu(cpu)) return; - } - - /* - * Do one step of pushing the remaining RCU callbacks through - * the RCU core state machine. - */ -#ifdef CONFIG_TREE_PREEMPT_RCU - if (per_cpu(rcu_preempt_data, cpu).nxtlist) { - rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state); - } -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state); - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state); - } - - /* - * If RCU callbacks are still pending, RCU still needs this CPU. - * So try forcing the callbacks through the grace period. - */ - if (rcu_cpu_has_callbacks(cpu)) { - trace_rcu_prep_idle("More callbacks"); - invoke_rcu_core(); - } else { - trace_rcu_prep_idle("Callbacks drained"); + rcu_try_advance_all_cbs(); + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + invoke_rcu_core(); } } @@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier); static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - struct timer_list *tltp = &rdtp->idle_gp_timer; - char c; + unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; - c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; - if (timer_pending(tltp)) - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, c, tltp->expires - jiffies); - else - sprintf(cp, "drain=%d %c timer not pending", - rdtp->dyntick_drain, c); + sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", + rdtp->last_accelerate & 0xffff, jiffies & 0xffff, + ulong2long(nlpd), + rdtp->all_lazy ? 'L' : '.', + rdtp->tick_nohz_enabled_snap ? '.' : 'D'); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ -- cgit v1.2.3 From bd9f0686fc8c9a01c6850b1c611d1c9ad80b86d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 21:51:20 -0800 Subject: rcu: Repurpose no-CBs event tracing to future-GP events Dyntick-idle CPUs need to be able to pre-announce their need for grace periods. This can be done using something similar to the mechanism used by no-CB CPUs to announce their need for grace periods. This commit moves in this direction by renaming the no-CBs grace-period event tracing to suit the new future-grace-period needs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 62 +++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 30 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index d318f9f18be5..df50502eca2c 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2034,9 +2034,9 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); rnp->n_nocb_gp_requests[c & 0x1] = 0; needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; - trace_rcu_nocb_grace_period(rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - needmore ? "CleanupMore" : "Cleanup"); + trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, + c, rnp->level, rnp->grplo, rnp->grphi, + needmore ? "CleanupMore" : "Cleanup"); return needmore; } @@ -2183,9 +2183,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) /* Count our request for a grace period. */ rnp->n_nocb_gp_requests[c & 0x1]++; - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "Startleaf"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "Startleaf"); if (rnp->gpnum != rnp->completed) { @@ -2194,10 +2194,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * is in progress, so we are done. When this grace * period ends, our request will be acted upon. */ - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, rnp->grphi, - "Startedleaf"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleaf"); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { @@ -2209,11 +2209,12 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (rnp != rnp_root) raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ if (rnp_root->gpnum != rnp_root->completed) { - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleafroot"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, + rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedleafroot"); raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ } else { @@ -2229,11 +2230,12 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp_root->completed + 1; rnp->n_nocb_gp_requests[c & 0x1]++; rnp_root->n_nocb_gp_requests[c & 0x1]++; - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedroot"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, + rnp->completed, + c, rnp->level, + rnp->grplo, rnp->grphi, + "Startedroot"); local_save_flags(flags1); rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ } @@ -2249,9 +2251,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "StartWait"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2259,14 +2261,14 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); - trace_rcu_nocb_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, rnp->grphi, - "ResumeWait"); + trace_rcu_future_grace_period(rdp->rsp->name, + rnp->gpnum, rnp->completed, c, + rnp->level, rnp->grplo, + rnp->grphi, "ResumeWait"); } - trace_rcu_nocb_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - "EndWait"); + trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, + rnp->completed, c, rnp->level, + rnp->grplo, rnp->grphi, "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } -- cgit v1.2.3 From b8462084a2a88a6a0489f9bb7d8b1bb95bc455ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 29 Dec 2012 22:04:18 -0800 Subject: rcu: Push lock release to rcu_start_gp()'s callers If CPUs are to give prior notice of needed grace periods, it will be necessary to invoke rcu_start_gp() without dropping the root rcu_node structure's ->lock. This commit takes a second step in this direction by moving the release of this lock to rcu_start_gp()'s callers. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index df50502eca2c..073ded26e259 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2174,7 +2174,6 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) unsigned long c; bool d; unsigned long flags; - unsigned long flags1; struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); @@ -2236,8 +2235,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c, rnp->level, rnp->grplo, rnp->grphi, "Startedroot"); - local_save_flags(flags1); - rcu_start_gp(rdp->rsp, flags1); /* Rlses ->lock. */ + rcu_start_gp(rdp->rsp); + raw_spin_unlock(&rnp->lock); } /* Clean up locking and irq state. */ -- cgit v1.2.3 From 8b425aa8f1acfe48aed919c7aadff2ed290fe969 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 30 Dec 2012 13:06:35 -0800 Subject: rcu: Rename n_nocb_gp_requests to need_future_gp CPUs going idle need to be able to indicate their need for future grace periods. A mechanism for doing this already exists for no-callbacks CPUs, so the idea is to re-use that mechanism. This commit therefore moves the ->n_nocb_gp_requests field of the rcu_node structure out from under the CONFIG_RCU_NOCB_CPU #ifdef and renames it to ->need_future_gp. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 073ded26e259..f3f0020b5b58 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2018,7 +2018,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp) { struct rcu_node *rnp = rcu_get_root(rsp); - return rnp->n_nocb_gp_requests[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; + return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1]; } /* @@ -2032,8 +2032,8 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) int needmore; wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); - rnp->n_nocb_gp_requests[c & 0x1] = 0; - needmore = rnp->n_nocb_gp_requests[(c + 1) & 0x1]; + rnp->need_future_gp[c & 0x1] = 0; + needmore = rnp->need_future_gp[(c + 1) & 0x1]; trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, c, rnp->level, rnp->grplo, rnp->grphi, needmore ? "CleanupMore" : "Cleanup"); @@ -2041,7 +2041,7 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* - * Set the root rcu_node structure's ->n_nocb_gp_requests field + * Set the root rcu_node structure's ->need_future_gp field * based on the sum of those of all rcu_node structures. This does * double-count the root rcu_node structure's requests, but this * is necessary to handle the possibility of a rcu_nocb_kthread() @@ -2050,7 +2050,7 @@ static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) */ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) { - rnp->n_nocb_gp_requests[(rnp->completed + 1) & 0x1] += nrq; + rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; } static void rcu_init_one_nocb(struct rcu_node *rnp) @@ -2181,7 +2181,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) c = rnp->completed + 2; /* Count our request for a grace period. */ - rnp->n_nocb_gp_requests[c & 0x1]++; + rnp->need_future_gp[c & 0x1]++; trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, c, rnp->level, rnp->grplo, rnp->grphi, "Startleaf"); @@ -2225,10 +2225,10 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) * Adjust counters accordingly and start the * needed grace period. */ - rnp->n_nocb_gp_requests[c & 0x1]--; + rnp->need_future_gp[c & 0x1]--; c = rnp_root->completed + 1; - rnp->n_nocb_gp_requests[c & 0x1]++; - rnp_root->n_nocb_gp_requests[c & 0x1]++; + rnp->need_future_gp[c & 0x1]++; + rnp_root->need_future_gp[c & 0x1]++; trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, rnp->completed, -- cgit v1.2.3 From 0446be489795d8bb994125a916ef03211f539e54 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 30 Dec 2012 15:21:01 -0800 Subject: rcu: Abstract rcu_start_future_gp() from rcu_nocb_wait_gp() CPUs going idle will need to record the need for a future grace period, but won't actually need to block waiting on it. This commit therefore splits rcu_start_future_gp(), which does the recording, from rcu_nocb_wait_gp(), which now invokes rcu_start_future_gp() to do the recording, after which rcu_nocb_wait_gp() does the waiting. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree_plugin.h | 104 +++++------------------------------------------- 1 file changed, 10 insertions(+), 94 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f3f0020b5b58..723af5f707f0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -2022,22 +2022,12 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp) } /* - * Clean up this rcu_node structure's no-CBs state at the end of - * a grace period, and also return whether any no-CBs CPU associated - * with this rcu_node structure needs another grace period. + * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended + * grace period. */ -static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { - int c = rnp->completed; - int needmore; - - wake_up_all(&rnp->nocb_gp_wq[c & 0x1]); - rnp->need_future_gp[c & 0x1] = 0; - needmore = rnp->need_future_gp[(c + 1) & 0x1]; - trace_rcu_future_grace_period(rsp->name, rnp->gpnum, rnp->completed, - c, rnp->level, rnp->grplo, rnp->grphi, - needmore ? "CleanupMore" : "Cleanup"); - return needmore; + wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]); } /* @@ -2175,84 +2165,16 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) bool d; unsigned long flags; struct rcu_node *rnp = rdp->mynode; - struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); raw_spin_lock_irqsave(&rnp->lock, flags); - c = rnp->completed + 2; - - /* Count our request for a grace period. */ - rnp->need_future_gp[c & 0x1]++; - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, "Startleaf"); - - if (rnp->gpnum != rnp->completed) { - - /* - * This rcu_node structure believes that a grace period - * is in progress, so we are done. When this grace - * period ends, our request will be acted upon. - */ - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleaf"); - raw_spin_unlock_irqrestore(&rnp->lock, flags); - - } else { - - /* - * Might not be a grace period, check root rcu_node - * structure to see if we must start one. - */ - if (rnp != rnp_root) - raw_spin_lock(&rnp_root->lock); /* irqs disabled. */ - if (rnp_root->gpnum != rnp_root->completed) { - trace_rcu_future_grace_period(rdp->rsp->name, - rnp->gpnum, - rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedleafroot"); - raw_spin_unlock(&rnp_root->lock); /* irqs disabled. */ - } else { - - /* - * No grace period, so we need to start one. - * The good news is that we can wait for exactly - * one grace period instead of part of the current - * grace period and all of the next grace period. - * Adjust counters accordingly and start the - * needed grace period. - */ - rnp->need_future_gp[c & 0x1]--; - c = rnp_root->completed + 1; - rnp->need_future_gp[c & 0x1]++; - rnp_root->need_future_gp[c & 0x1]++; - trace_rcu_future_grace_period(rdp->rsp->name, - rnp->gpnum, - rnp->completed, - c, rnp->level, - rnp->grplo, rnp->grphi, - "Startedroot"); - rcu_start_gp(rdp->rsp); - raw_spin_unlock(&rnp->lock); - } - - /* Clean up locking and irq state. */ - if (rnp != rnp_root) - raw_spin_unlock_irqrestore(&rnp->lock, flags); - else - local_irq_restore(flags); - } + c = rcu_start_future_gp(rnp, rdp); + raw_spin_unlock_irqrestore(&rnp->lock, flags); /* * Wait for the grace period. Do so interruptibly to avoid messing * up the load average. */ - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, "StartWait"); + trace_rcu_future_gp(rnp, rdp, c, "StartWait"); for (;;) { wait_event_interruptible( rnp->nocb_gp_wq[c & 0x1], @@ -2260,14 +2182,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) if (likely(d)) break; flush_signals(current); - trace_rcu_future_grace_period(rdp->rsp->name, - rnp->gpnum, rnp->completed, c, - rnp->level, rnp->grplo, - rnp->grphi, "ResumeWait"); + trace_rcu_future_gp(rnp, rdp, c, "ResumeWait"); } - trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, - rnp->completed, c, rnp->level, - rnp->grplo, rnp->grphi, "EndWait"); + trace_rcu_future_gp(rnp, rdp, c, "EndWait"); smp_mb(); /* Ensure that CB invocation happens after GP end. */ } @@ -2375,9 +2292,8 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp) return 0; } -static int rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) +static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { - return 0; } static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) -- cgit v1.2.3