rcu: Reduce expedited GP memory contention via per-CPU variables

Currently, the piggybacked-work checks carried out by sync_exp_work_done() atomically increment a small set of variables (the ->expedited_workdone0, ->expedited_workdone1, ->expedited_workdone2, ->expedited_workdone3 fields in the rcu_state structure), which will form a memory-contention bottleneck given a sufficiently large number of CPUs concurrently invoking either synchronize_rcu_expedited() or synchronize_sched_expedited(). This commit therefore moves these for fields to the per-CPU rcu_data structure, eliminating the memory contention. The show_rcuexp() function also changes to sum up each field in the rcu_data structures. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2015-10-01 10:26:24 -0700
committer: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2015-12-04 12:26:52 -0800
commit: df5bd5144a80a9f6c3807383b11f735dae9caf9d (patch)
tree: fce44d0970a70446c5fd6b50f2e764db0efb8e56 /kernel/rcu/tree.c
parent: 1307f2148719cc9e9d12f5fa7d5b3b61ec5aef72 (diff)
download: linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.gz
linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.bz2
linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.zip
1 files changed, 5 insertions, 6 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 00f07d6436ce..33d7e2551165 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3585,7 +3585,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
  */
 static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 {
-	struct rcu_data *rdp;
+	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
 	struct rcu_node *rnp0;
 	struct rcu_node *rnp1 = NULL;
 
@@ -3599,7 +3599,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 	if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
 		if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
 			if (sync_exp_work_done(rsp, rnp0, NULL,
-					       &rsp->expedited_workdone0, s))
+					       &rdp->expedited_workdone0, s))
 				return NULL;
 			return rnp0;
 		}
@@ -3613,14 +3613,13 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 	 * can be inexact, as it is just promoting locality and is not
 	 * strictly needed for correctness.
 	 */
-	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
-	if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+	if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
 		return NULL;
 	mutex_lock(&rdp->exp_funnel_mutex);
 	rnp0 = rdp->mynode;
 	for (; rnp0 != NULL; rnp0 = rnp0->parent) {
 		if (sync_exp_work_done(rsp, rnp1, rdp,
-				       &rsp->expedited_workdone2, s))
+				       &rdp->expedited_workdone2, s))
 			return NULL;
 		mutex_lock(&rnp0->exp_funnel_mutex);
 		if (rnp1)
@@ -3630,7 +3629,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 		rnp1 = rnp0;
 	}
 	if (sync_exp_work_done(rsp, rnp1, rdp,
-			       &rsp->expedited_workdone3, s))
+			       &rdp->expedited_workdone3, s))
 		return NULL;
 	return rnp1;
 }
author	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2015-10-01 10:26:24 -0700
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2015-12-04 12:26:52 -0800
commit	df5bd5144a80a9f6c3807383b11f735dae9caf9d (patch)
tree	fce44d0970a70446c5fd6b50f2e764db0efb8e56 /kernel/rcu/tree.c
parent	1307f2148719cc9e9d12f5fa7d5b3b61ec5aef72 (diff)
download	linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.gz linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.tar.bz2 linux-df5bd5144a80a9f6c3807383b11f735dae9caf9d.zip