summaryrefslogtreecommitdiffstats
path: root/kernel/rcu
diff options
context:
space:
mode:
authorFrederic Weisbecker <frederic@kernel.org>2023-11-15 14:11:28 -0500
committerBoqun Feng <boqun.feng@gmail.com>2024-02-14 07:50:45 -0800
commit1e8e6951a5774c8dd9d1f14af9c5b7d66130d96f (patch)
tree85e20d29695f6129b05af15c6bf73ef4d98d803a /kernel/rcu
parentca16265aaf9d357035000833636dcddbfafacac3 (diff)
downloadlinux-1e8e6951a5774c8dd9d1f14af9c5b7d66130d96f.tar.gz
linux-1e8e6951a5774c8dd9d1f14af9c5b7d66130d96f.tar.bz2
linux-1e8e6951a5774c8dd9d1f14af9c5b7d66130d96f.zip
rcu/nocb: Remove needless full barrier after callback advancing
A full barrier is issued from nocb_gp_wait() upon callbacks advancing to order grace period completion with callbacks execution. However these two events are already ordered by the smp_mb__after_unlock_lock() barrier within the call to raw_spin_lock_rcu_node() that is necessary for callbacks advancing to happen. The following litmus test shows the kind of guarantee that this barrier provides: C smp_mb__after_unlock_lock {} // rcu_gp_cleanup() P0(spinlock_t *rnp_lock, int *gpnum) { // Grace period cleanup increase gp sequence number spin_lock(rnp_lock); WRITE_ONCE(*gpnum, 1); spin_unlock(rnp_lock); } // nocb_gp_wait() P1(spinlock_t *rnp_lock, spinlock_t *nocb_lock, int *gpnum, int *cb_ready) { int r1; // Call rcu_advance_cbs() from nocb_gp_wait() spin_lock(nocb_lock); spin_lock(rnp_lock); smp_mb__after_unlock_lock(); r1 = READ_ONCE(*gpnum); WRITE_ONCE(*cb_ready, 1); spin_unlock(rnp_lock); spin_unlock(nocb_lock); } // nocb_cb_wait() P2(spinlock_t *nocb_lock, int *cb_ready, int *cb_executed) { int r2; // rcu_do_batch() -> rcu_segcblist_extract_done_cbs() spin_lock(nocb_lock); r2 = READ_ONCE(*cb_ready); spin_unlock(nocb_lock); // Actual callback execution WRITE_ONCE(*cb_executed, 1); } P3(int *cb_executed, int *gpnum) { int r3; WRITE_ONCE(*cb_executed, 2); smp_mb(); r3 = READ_ONCE(*gpnum); } exists (1:r1=1 /\ 2:r2=1 /\ cb_executed=2 /\ 3:r3=0) (* Bad outcome. *) Here the bad outcome only occurs if the smp_mb__after_unlock_lock() is removed. This barrier orders the grace period completion against callbacks advancing and even later callbacks invocation, thanks to the opportunistic propagation via the ->nocb_lock to nocb_cb_wait(). Therefore the smp_mb() placed after callbacks advancing can be safely removed. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Reviewed-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/tree.c6
-rw-r--r--kernel/rcu/tree_nocb.h1
2 files changed, 6 insertions, 1 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b2bccfd37c38..d540d210e5c7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2145,6 +2145,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
* Extract the list of ready callbacks, disabling IRQs to prevent
* races with call_rcu() from interrupt handlers. Leave the
* callback counts, as rcu_barrier() needs to be conservative.
+ *
+ * Callbacks execution is fully ordered against preceding grace period
+ * completion (materialized by rnp->gp_seq update) thanks to the
+ * smp_mb__after_unlock_lock() upon node locking required for callbacks
+ * advancing. In NOCB mode this ordering is then further relayed through
+ * the nocb locking that protects both callbacks advancing and extraction.
*/
rcu_nocb_lock_irqsave(rdp, flags);
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 785946834c6b..b2c3145c4c13 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -779,7 +779,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
needwake = rdp->nocb_cb_sleep;
WRITE_ONCE(rdp->nocb_cb_sleep, false);
- smp_mb(); /* CB invocation -after- GP end. */
} else {
needwake = false;
}