diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-10-01 10:26:24 -0700 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-12-04 12:26:52 -0800 |
commit | df5bd5144a80a9f6c3807383b11f735dae9caf9d (patch) | |
tree | fce44d0970a70446c5fd6b50f2e764db0efb8e56 /kernel/rcu/tree.c | |
parent | 1307f2148719cc9e9d12f5fa7d5b3b61ec5aef72 (diff) | |
download | linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.gz linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.bz2 linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.zip |
rcu: Reduce expedited GP memory contention via per-CPU variables
Currently, the piggybacked-work checks carried out by sync_exp_work_done()
atomically increment a small set of variables (the ->expedited_workdone0,
->expedited_workdone1, ->expedited_workdone2, ->expedited_workdone3
fields in the rcu_state structure), which will form a memory-contention
bottleneck given a sufficiently large number of CPUs concurrently invoking
either synchronize_rcu_expedited() or synchronize_sched_expedited().
This commit therefore moves these for fields to the per-CPU rcu_data
structure, eliminating the memory contention. The show_rcuexp() function
also changes to sum up each field in the rcu_data structures.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 00f07d6436ce..33d7e2551165 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3585,7 +3585,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, */ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) { - struct rcu_data *rdp; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); struct rcu_node *rnp0; struct rcu_node *rnp1 = NULL; @@ -3599,7 +3599,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) { if (mutex_trylock(&rnp0->exp_funnel_mutex)) { if (sync_exp_work_done(rsp, rnp0, NULL, - &rsp->expedited_workdone0, s)) + &rdp->expedited_workdone0, s)) return NULL; return rnp0; } @@ -3613,14 +3613,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) * can be inexact, as it is just promoting locality and is not * strictly needed for correctness. */ - rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); - if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s)) + if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s)) return NULL; mutex_lock(&rdp->exp_funnel_mutex); rnp0 = rdp->mynode; for (; rnp0 != NULL; rnp0 = rnp0->parent) { if (sync_exp_work_done(rsp, rnp1, rdp, - &rsp->expedited_workdone2, s)) + &rdp->expedited_workdone2, s)) return NULL; mutex_lock(&rnp0->exp_funnel_mutex); if (rnp1) @@ -3630,7 +3629,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) rnp1 = rnp0; } if (sync_exp_work_done(rsp, rnp1, rdp, - &rsp->expedited_workdone3, s)) + &rdp->expedited_workdone3, s)) return NULL; return rnp1; } |